lm_code/交易/bitmart-AI最佳回测.py

"""
AI最佳配置回测 — 基于之前扫描结果

最佳: AI-v4: 10bar前瞻, 方向阈值0.3%, 概率阈值0.45, SL=0.5%, TP=0.8%
该配置在100U时年净利+5544, 月均+462, 935笔交易, 8/12月盈利
"""
import datetime, sqlite3, time as _time
import numpy as np
import pandas as pd
import lightgbm as lgb
import warnings
from pathlib import Path
from collections import defaultdict
warnings.filterwarnings('ignore')

def main():
    t0 = _time.time()
    print("Loading...", flush=True)
    db = Path(__file__).parent.parent / 'models' / 'database.db'
    s = int(datetime.datetime(2025,1,1).timestamp())*1000
    e = int(datetime.datetime(2026,1,1).timestamp())*1000
    conn = sqlite3.connect(str(db))
    df = pd.read_sql_query(
        f"SELECT id as ts,open,high,low,close FROM bitmart_eth_1m WHERE id>={s} AND id<{e} ORDER BY id", conn)
    conn.close()
    df['datetime'] = pd.to_datetime(df['ts'], unit='ms')
    df.set_index('datetime', inplace=True)
    print(f"  {len(df):,} bars", flush=True)

    # ===== 特征 =====
    print("Features...", flush=True)
    c=df['close']; h=df['high']; l=df['low']; o=df['open']
    for p in [5,8,13,21,50,120]:
        df[f'ema_{p}'] = c.ewm(span=p, adjust=False).mean()
    df['ema_fast_slow'] = (df['ema_8']-df['ema_21'])/c
    df['ema_slow_big'] = (df['ema_21']-df['ema_120'])/c
    df['price_vs_ema120'] = (c-df['ema_120'])/c
    df['price_vs_ema50'] = (c-df['ema_50'])/c
    df['ema8_slope'] = df['ema_8'].pct_change(5)
    df['ema21_slope'] = df['ema_21'].pct_change(5)

    delta = c.diff()
    gain = delta.clip(lower=0); loss = (-delta).clip(lower=0)
    for p in [7,14,21]:
        ag=gain.rolling(p).mean(); al=loss.rolling(p).mean()
        df[f'rsi_{p}'] = 100 - 100/(1+ag/al.replace(0,np.nan))

    mid=c.rolling(20).mean(); std=c.rolling(20).std()
    df['bb_pct'] = (c-(mid-2*std))/((mid+2*std)-(mid-2*std)).replace(0,np.nan)
    df['bb_width'] = 4*std/mid

    ema12=c.ewm(span=12,adjust=False).mean(); ema26=c.ewm(span=26,adjust=False).mean()
    df['macd'] = (ema12-ema26)/c
    df['macd_signal'] = df['macd'].ewm(span=9,adjust=False).mean()
    df['macd_hist'] = df['macd']-df['macd_signal']

    tr = pd.concat([h-l,(h-c.shift(1)).abs(),(l-c.shift(1)).abs()],axis=1).max(axis=1)
    df['atr_pct'] = tr.rolling(14).mean()/c
    df['atr_7'] = tr.rolling(7).mean()/c

    low14=l.rolling(14).min(); high14=h.rolling(14).max()
    df['stoch_k'] = (c-low14)/(high14-low14).replace(0,np.nan)*100
    df['stoch_d'] = df['stoch_k'].rolling(3).mean()

    for p in [1,3,5,10,20,60]:
        df[f'ret_{p}'] = c.pct_change(p)

    df['vol_5'] = c.pct_change().rolling(5).std()
    df['vol_20'] = c.pct_change().rolling(20).std()
    df['vol_ratio'] = df['vol_5']/df['vol_20'].replace(0,np.nan)

    body = (c-o).abs()
    df['body_pct'] = body/c
    df['upper_shadow'] = (h-pd.concat([o,c],axis=1).max(axis=1))/c
    df['lower_shadow'] = (pd.concat([o,c],axis=1).min(axis=1)-l)/c
    df['body_vs_range'] = body/(h-l).replace(0,np.nan)
    df['is_bullish'] = (c>o).astype(float)

    df['high_20'] = h.rolling(20).max()
    df['low_20'] = l.rolling(20).min()
    df['price_position'] = (c-df['low_20'])/(df['high_20']-df['low_20']).replace(0,np.nan)

    df['hour'] = df.index.hour
    df['minute'] = df.index.minute
    df['hour_sin'] = np.sin(2*np.pi*df['hour']/24)
    df['hour_cos'] = np.cos(2*np.pi*df['hour']/24)

    prev_body = body.shift(1)
    df['engulf_ratio'] = body/prev_body.replace(0,np.nan)

    exclude = {'ts','open','high','low','close','label',
               'high_20','low_20','ema_5','ema_8','ema_13','ema_21','ema_50','ema_120'}
    fcols = [c_ for c_ in df.columns if c_ not in exclude
             and df[c_].dtype in ('float64','float32','int64','int32')]
    print(f"  {len(fcols)} features", flush=True)

    # ===== 标签: 10bar前瞻, 0.3%阈值 =====
    fb = 10; thresh = 0.003
    future_ret = df['close'].shift(-fb)/df['close'] - 1
    df['label'] = 0
    df.loc[future_ret > thresh, 'label'] = 1
    df.loc[future_ret < -thresh, 'label'] = -1

    # ===== 滚动训练 =====
    print("Walk-forward training...", flush=True)
    df['month'] = df.index.to_period('M')
    months = sorted(df['month'].unique())

    pl = pd.Series(index=df.index, dtype=float); pl[:] = 0.0
    ps = pd.Series(index=df.index, dtype=float); ps[:] = 0.0

    params = {
        'objective':'multiclass','num_class':3,'metric':'multi_logloss',
        'learning_rate':0.05,'num_leaves':31,'max_depth':6,
        'min_child_samples':50,'subsample':0.8,'colsample_bytree':0.8,
        'reg_alpha':0.1,'reg_lambda':0.1,'verbose':-1,'n_jobs':-1,'seed':42
    }

    for i in range(3, len(months)):
        tm = months[i]; ts_ = months[i-3]
        tr_mask = (df['month']>=ts_) & (df['month']<tm)
        te_mask = df['month']==tm
        tr_df = df[tr_mask].dropna(subset=fcols+['label'])
        te_df = df[te_mask].dropna(subset=fcols)
        if len(tr_df)<1000 or len(te_df)<100: continue
        X_tr = tr_df[fcols].values; y_tr = tr_df['label'].values + 1
        dt_ = lgb.Dataset(X_tr, label=y_tr)
        model = lgb.train(params, dt_, num_boost_round=200)
        proba = model.predict(te_df[fcols].values)
        pl.loc[te_df.index] = proba[:,2]
        ps.loc[te_df.index] = proba[:,0]
        lc = (proba[:,2]>0.45).sum(); sc = (proba[:,0]>0.45).sum()
        print(f"  {tm}: long={lc} short={sc}", flush=True)

    # ===== 回测 =====
    print("\nBacktest...", flush=True)
    NOTIONAL = 10000.0
    FEE = NOTIONAL*0.0006*2; REB = FEE*0.9; NFEE = FEE-REB
    prob_th = 0.45; sl_pct = 0.005; tp_pct = 0.008

    pos=0; op=0.0; ot=None; trades=[]
    for i in range(len(df)):
        dt=df.index[i]; p=df['close'].iloc[i]; p_l=pl.iloc[i]; p_s=ps.iloc[i]
        if pos!=0 and ot is not None:
            pp=(p-op)/op if pos==1 else (op-p)/op
            hsec=(dt-ot).total_seconds()
            if -pp>=sl_pct*1.5:
                trades.append((pos,op,p,NOTIONAL*pp,hsec,'hard_sl',ot,dt)); pos=0; continue
            if hsec>=200:
                if -pp>=sl_pct:
                    trades.append((pos,op,p,NOTIONAL*pp,hsec,'sl',ot,dt)); pos=0; continue
                if pp>=tp_pct:
                    trades.append((pos,op,p,NOTIONAL*pp,hsec,'tp',ot,dt)); pos=0; continue
                if hsec>=1800:
                    trades.append((pos,op,p,NOTIONAL*pp,hsec,'timeout',ot,dt)); pos=0; continue
                if pos==1 and p_s>prob_th+0.05:
                    trades.append((pos,op,p,NOTIONAL*pp,hsec,'ai_rev',ot,dt)); pos=0
                elif pos==-1 and p_l>prob_th+0.05:
                    trades.append((pos,op,p,NOTIONAL*pp,hsec,'ai_rev',ot,dt)); pos=0
        if pos==0:
            if p_l>prob_th and p_l>p_s: pos=1; op=p; ot=dt
            elif p_s>prob_th and p_s>p_l: pos=-1; op=p; ot=dt
    if pos!=0:
        p=df['close'].iloc[-1]; dt=df.index[-1]
        pp=(p-op)/op if pos==1 else (op-p)/op
        trades.append((pos,op,p,NOTIONAL*pp,(dt-ot).total_seconds(),'end',ot,dt))

    # ===== 结果 =====
    n = len(trades)
    tpnl = sum(t[3] for t in trades)
    net = tpnl - NFEE*n
    treb = REB*n
    wins = len([t for t in trades if t[3]>0])
    wr = wins/n*100 if n else 0

    monthly = defaultdict(lambda: {'n':0,'net':0,'w':0})
    for t in trades:
        k = t[7].strftime('%Y-%m')
        monthly[k]['n'] += 1
        monthly[k]['net'] += t[3] - NFEE
        if t[3]>0: monthly[k]['w'] += 1

    cum=0; peak=0; dd=0
    for t in trades:
        cum += t[3]-NFEE
        if cum>peak: peak=cum
        if peak-cum>dd: dd=peak-cum

    reasons = defaultdict(int)
    for t in trades:
        reasons[t[5]] += 1

    elapsed = _time.time()-t0

    print(f"\n{'='*70}", flush=True)
    print(f"  AI策略最佳配置 (LightGBM + 42特征)", flush=True)
    print(f"  10bar前瞻 | 阈值0.3% | 概率>0.45 | SL=0.5% TP=0.8%", flush=True)
    print(f"  100U保证金 x 100倍杠杆 = 10,000U名义 | 耗时{elapsed:.0f}s", flush=True)
    print(f"{'='*70}", flush=True)
    print(f"  方向盈亏:     {tpnl:>+10.0f} USDT", flush=True)
    print(f"  返佣(90%):    {treb:>+10.0f} USDT", flush=True)
    print(f"  净手续费(10%):{NFEE*n:>10.0f} USDT", flush=True)
    print(f"  ================================", flush=True)
    print(f"  年净利:       {net:>+10.0f} USDT", flush=True)
    print(f"  月均:         {net/12:>+10.0f} USDT", flush=True)
    print(f"  最大回撤:     {dd:>10.0f} USDT", flush=True)
    print(f"  交易笔数:     {n:>10}", flush=True)
    print(f"  胜率:         {wr:>9.1f}%", flush=True)

    if wins>0 and wins<n:
        aw = sum(t[3] for t in trades if t[3]>0)/wins
        al = sum(t[3] for t in trades if t[3]<=0)/(n-wins)
        print(f"  平均盈利:     {aw:>+10.1f} USDT", flush=True)
        print(f"  平均亏损:     {al:>+10.1f} USDT", flush=True)
        print(f"  盈亏比:       {abs(aw/al):>10.2f}", flush=True)

    print(f"\n  平仓原因:", flush=True)
    for r,cnt in sorted(reasons.items(), key=lambda x:-x[1]):
        print(f"    {r:<10} {cnt:>5}笔 ({cnt/n*100:.1f}%)", flush=True)

    print(f"\n  月度明细:", flush=True)
    pm = 0
    for m in sorted(monthly.keys()):
        d = monthly[m]
        wr_m = d['w']/d['n']*100 if d['n']>0 else 0
        status = "盈利" if d['net']>0 else "亏损"
        print(f"    {m}: {d['n']:>4}笔 {d['net']:>+8.0f}U  胜率{wr_m:.0f}%  [{status}]", flush=True)
        if d['net']>0: pm += 1
    print(f"    合计: {n:>4}笔 {net:>+8.0f}U  盈利月: {pm}/12", flush=True)

    print(f"\n  --- 不同保证金下的月均收入 ---", flush=True)
    for margin in [100, 200, 300, 500, 800, 1000]:
        sc = margin*100/NOTIONAL
        mn = net*sc/12
        ok = " <<< 达标!" if mn>=1000 else ""
        print(f"    {margin:>5}U保证金: 月均 {mn:>+6.0f} USDT{ok}", flush=True)

    # 对比EMA基线
    print(f"\n  --- 对比: AI vs 纯EMA策略 ---", flush=True)
    ema_net = 1196  # 之前EMA基线100U的年净利
    print(f"    纯EMA: {ema_net:>+6.0f}/年 = {ema_net/12:>+4.0f}/月 (227笔)", flush=True)
    print(f"    AI策略: {net:>+6.0f}/年 = {net/12:>+4.0f}/月 ({n}笔)", flush=True)
    if net > ema_net:
        print(f"    AI提升: {(net/ema_net-1)*100:>+.0f}% ({net-ema_net:>+.0f} USDT)", flush=True)

    print(f"\n{'='*70}", flush=True)

    # 保存
    csv = Path(__file__).parent.parent / 'ai_trades.csv'
    with open(csv, 'w', encoding='utf-8-sig') as f:
        f.write("dir,open_px,close_px,pnl,hold_sec,reason,open_time,close_time\n")
        for t in trades:
            d = 'long' if t[0]==1 else 'short'
            f.write(f"{d},{t[1]:.2f},{t[2]:.2f},{t[3]:.2f},{t[4]:.0f},{t[5]},{t[6]},{t[7]}\n")
    print(f"  Saved: {csv}", flush=True)

if __name__=='__main__':
    main()