lm_code/交易/bitmart-AI优化回测.py

"""
AI策略优化 v2 — 目标: 100U保证金达到1000U/月

优化方向:
    1. 多时间框架特征: 加入5分钟/15分钟聚合K线指标
    2. Ensemble: LightGBM + RandomForest 投票
    3. 更长训练窗口: 4个月 vs 3个月
    4. 高置信度过滤: 只在双模型一致时交易
    5. 动态止盈: 用ATR倍数而非固定比例
    6. 更多K线形态特征: 连续涨跌、缺口、波动率变化率
    7. 扫描最优参数组合
"""
import datetime, sqlite3, time as _time
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
import warnings
from pathlib import Path
from collections import defaultdict
warnings.filterwarnings('ignore')

def load_data():
    db = Path(__file__).parent.parent / 'models' / 'database.db'
    s = int(datetime.datetime(2025,1,1).timestamp())*1000
    e = int(datetime.datetime(2026,1,1).timestamp())*1000
    conn = sqlite3.connect(str(db))
    df = pd.read_sql_query(
        f"SELECT id as ts,open,high,low,close FROM bitmart_eth_1m WHERE id>={s} AND id<{e} ORDER BY id", conn)
    conn.close()
    df['datetime'] = pd.to_datetime(df['ts'], unit='ms')
    df.set_index('datetime', inplace=True)
    return df

def add_features(df):
    c=df['close']; h=df['high']; l=df['low']; o=df['open']

    # === 1分钟基础指标 ===
    for p in [5,8,13,21,50,120]:
        df[f'ema_{p}'] = c.ewm(span=p, adjust=False).mean()
    df['ema_fast_slow'] = (df['ema_8']-df['ema_21'])/c
    df['ema_slow_big'] = (df['ema_21']-df['ema_120'])/c
    df['price_vs_ema120'] = (c-df['ema_120'])/c
    df['price_vs_ema50'] = (c-df['ema_50'])/c
    df['ema8_slope'] = df['ema_8'].pct_change(5)
    df['ema21_slope'] = df['ema_21'].pct_change(5)
    df['ema120_slope'] = df['ema_120'].pct_change(20)

    # 三线排列
    df['triple_bull'] = ((df['ema_8']>df['ema_21'])&(df['ema_21']>df['ema_120'])).astype(float)
    df['triple_bear'] = ((df['ema_8']<df['ema_21'])&(df['ema_21']<df['ema_120'])).astype(float)

    # RSI
    delta = c.diff(); gain = delta.clip(lower=0); loss = (-delta).clip(lower=0)
    for p in [7,14,21]:
        ag=gain.rolling(p).mean(); al=loss.rolling(p).mean()
        df[f'rsi_{p}'] = 100 - 100/(1+ag/al.replace(0,np.nan))
    df['rsi_14_slope'] = df['rsi_14'].diff(5)  # RSI变化率

    # BB
    mid=c.rolling(20).mean(); std=c.rolling(20).std()
    df['bb_pct'] = (c-(mid-2*std))/((mid+2*std)-(mid-2*std)).replace(0,np.nan)
    df['bb_width'] = 4*std/mid
    df['bb_width_change'] = df['bb_width'].pct_change(10)  # 波动率变化

    # MACD
    ema12=c.ewm(span=12,adjust=False).mean(); ema26=c.ewm(span=26,adjust=False).mean()
    df['macd'] = (ema12-ema26)/c
    df['macd_signal'] = df['macd'].ewm(span=9,adjust=False).mean()
    df['macd_hist'] = df['macd']-df['macd_signal']
    df['macd_hist_slope'] = df['macd_hist'].diff(3)  # MACD柱变化

    # ATR
    tr = pd.concat([h-l,(h-c.shift(1)).abs(),(l-c.shift(1)).abs()],axis=1).max(axis=1)
    df['atr_pct'] = tr.rolling(14).mean()/c
    df['atr_7'] = tr.rolling(7).mean()/c
    df['atr_ratio'] = df['atr_7']/df['atr_pct'].replace(0,np.nan)  # 短期/长期ATR

    # Stochastic
    for p in [14,28]:
        low_p=l.rolling(p).min(); high_p=h.rolling(p).max()
        df[f'stoch_k_{p}'] = (c-low_p)/(high_p-low_p).replace(0,np.nan)*100
    df['stoch_d_14'] = df['stoch_k_14'].rolling(3).mean()

    # 动量
    for p in [1,3,5,10,20,60,120]:
        df[f'ret_{p}'] = c.pct_change(p)

    # 波动率
    df['vol_5'] = c.pct_change().rolling(5).std()
    df['vol_20'] = c.pct_change().rolling(20).std()
    df['vol_60'] = c.pct_change().rolling(60).std()
    df['vol_ratio'] = df['vol_5']/df['vol_20'].replace(0,np.nan)
    df['vol_trend'] = df['vol_20'].pct_change(20)  # 波动率趋势

    # K线形态
    body = (c-o).abs()
    df['body_pct'] = body/c
    df['upper_shadow'] = (h-pd.concat([o,c],axis=1).max(axis=1))/c
    df['lower_shadow'] = (pd.concat([o,c],axis=1).min(axis=1)-l)/c
    df['body_vs_range'] = body/(h-l).replace(0,np.nan)
    df['is_bullish'] = (c>o).astype(float)
    df['range_pct'] = (h-l)/c  # K线振幅

    # 连续方向
    bullish = (c>o).astype(int)
    df['streak'] = bullish.groupby((bullish!=bullish.shift()).cumsum()).cumcount()+1
    df['streak'] = df['streak'] * bullish - df['streak'] * (1-bullish)  # 正=连阳, 负=连阴

    # 吞没/锤子
    prev_body = body.shift(1)
    df['engulf_ratio'] = body/prev_body.replace(0,np.nan)
    df['hammer'] = (df['lower_shadow']>df['body_pct']*2).astype(float)
    df['shooting_star'] = (df['upper_shadow']>df['body_pct']*2).astype(float)

    # 价格位置
    for p in [20,60]:
        df[f'high_{p}'] = h.rolling(p).max()
        df[f'low_{p}'] = l.rolling(p).min()
        df[f'pos_{p}'] = (c-df[f'low_{p}'])/(df[f'high_{p}']-df[f'low_{p}']).replace(0,np.nan)

    # === 多时间框架: 5分钟 ===
    c5 = c.resample('5min').last()
    h5 = h.resample('5min').max()
    l5 = l.resample('5min').min()
    o5 = o.resample('5min').first()

    ema5_8 = c5.ewm(span=8,adjust=False).mean()
    ema5_21 = c5.ewm(span=21,adjust=False).mean()
    rsi5_14_delta = c5.diff()
    rsi5_g = rsi5_14_delta.clip(lower=0).rolling(14).mean()
    rsi5_l = (-rsi5_14_delta).clip(lower=0).rolling(14).mean()
    rsi5 = 100 - 100/(1+rsi5_g/rsi5_l.replace(0,np.nan))

    # 5分钟指标 reindex 到1分钟
    df['ema5m_fast_slow'] = ((ema5_8-ema5_21)/c5).reindex(df.index, method='ffill')
    df['rsi5m_14'] = rsi5.reindex(df.index, method='ffill')
    tr5 = pd.concat([h5-l5,(h5-c5.shift(1)).abs(),(l5-c5.shift(1)).abs()],axis=1).max(axis=1)
    df['atr5m'] = (tr5.rolling(14).mean()/c5).reindex(df.index, method='ffill')
    df['ret5m_1'] = c5.pct_change(1).reindex(df.index, method='ffill')
    df['ret5m_5'] = c5.pct_change(5).reindex(df.index, method='ffill')
    df['ret5m_20'] = c5.pct_change(20).reindex(df.index, method='ffill')

    # === 多时间框架: 15分钟 ===
    c15 = c.resample('15min').last()
    ema15_21 = c15.ewm(span=21,adjust=False).mean()
    df['ema15m_trend'] = ((c15-ema15_21)/c15).reindex(df.index, method='ffill')
    df['ret15m_5'] = c15.pct_change(5).reindex(df.index, method='ffill')

    # 时间
    df['hour'] = df.index.hour
    df['minute'] = df.index.minute
    df['hour_sin'] = np.sin(2*np.pi*df['hour']/24)
    df['hour_cos'] = np.cos(2*np.pi*df['hour']/24)
    df['weekday'] = df.index.weekday

    return df

def get_feature_cols(df):
    exclude = {'ts','open','high','low','close','label','month',
               'ema_5','ema_8','ema_13','ema_21','ema_50','ema_120',
               'high_20','low_20','high_60','low_60'}
    return [c for c in df.columns if c not in exclude
            and df[c].dtype in ('float64','float32','int64','int32')]

def train_ensemble(X_tr, y_tr, X_te, fcols):
    """训练 LightGBM + GradientBoosting ensemble"""
    y_cls = y_tr + 1  # -1→0, 0→1, 1→2

    # Model 1: LightGBM
    params = {
        'objective':'multiclass','num_class':3,'metric':'multi_logloss',
        'learning_rate':0.03,'num_leaves':63,'max_depth':8,
        'min_child_samples':100,'subsample':0.7,'colsample_bytree':0.7,
        'reg_alpha':0.5,'reg_lambda':0.5,'verbose':-1,'n_jobs':-1,'seed':42
    }
    dt_ = lgb.Dataset(X_tr, label=y_cls)
    m1 = lgb.train(params, dt_, num_boost_round=300)
    p1 = m1.predict(X_te)  # (n, 3)

    # Model 2: GradientBoosting (sklearn)
    m2 = GradientBoostingClassifier(
        n_estimators=150, max_depth=5, learning_rate=0.05,
        subsample=0.8, min_samples_leaf=50, random_state=42
    )
    m2.fit(X_tr, y_cls)
    p2 = m2.predict_proba(X_te)  # (n, 3)

    # Ensemble: 加权平均 (LightGBM权重更高)
    proba = p1 * 0.6 + p2 * 0.4
    return proba, m1

def backtest(df, pl, ps, notional, prob_th, sl_pct, tp_pct, max_hold, use_atr_tp=False):
    FEE = notional*0.0006*2; REB=FEE*0.9; NFEE=FEE-REB
    pos=0; op=0.0; ot=None; trades=[]; atr_at_open=0

    for i in range(len(df)):
        dt=df.index[i]; p=df['close'].iloc[i]; p_l=pl.iloc[i]; p_s=ps.iloc[i]
        atr_val = df['atr_pct'].iloc[i] if 'atr_pct' in df.columns else 0.002

        if pos!=0 and ot is not None:
            pp=(p-op)/op if pos==1 else (op-p)/op
            hsec=(dt-ot).total_seconds()

            # 动态止盈（ATR倍数）
            if use_atr_tp and atr_at_open > 0:
                dyn_tp = atr_at_open * 2.5  # 2.5倍ATR止盈
                dyn_tp = max(dyn_tp, tp_pct)  # 不低于固定TP
            else:
                dyn_tp = tp_pct

            hard_sl = max(sl_pct*1.5, 0.006)
            if -pp>=hard_sl:
                trades.append((pos,op,p,notional*pp,hsec,'硬止损',ot,dt)); pos=0; continue
            if hsec>=200:
                if -pp>=sl_pct:
                    trades.append((pos,op,p,notional*pp,hsec,'止损',ot,dt)); pos=0; continue
                if pp>=dyn_tp:
                    trades.append((pos,op,p,notional*pp,hsec,'止盈',ot,dt)); pos=0; continue
                if hsec>=max_hold:
                    trades.append((pos,op,p,notional*pp,hsec,'超时',ot,dt)); pos=0; continue
                # 模型反转
                if pos==1 and p_s>prob_th+0.08:
                    trades.append((pos,op,p,notional*pp,hsec,'AI反转',ot,dt)); pos=0
                elif pos==-1 and p_l>prob_th+0.08:
                    trades.append((pos,op,p,notional*pp,hsec,'AI反转',ot,dt)); pos=0

        if pos==0:
            if p_l>prob_th and p_l>p_s+0.03:  # 要求概率差距>3%
                pos=1; op=p; ot=dt; atr_at_open=atr_val
            elif p_s>prob_th and p_s>p_l+0.03:
                pos=-1; op=p; ot=dt; atr_at_open=atr_val

    if pos!=0:
        p=df['close'].iloc[-1]; dt=df.index[-1]
        pp=(p-op)/op if pos==1 else (op-p)/op
        trades.append((pos,op,p,notional*pp,(dt-ot).total_seconds(),'end',ot,dt))
    return trades

def analyze(trades, notional, label):
    if not trades: print(f"  [{label}] No trades"); return 0, {}
    n=len(trades)
    FEE=notional*0.0006*2; REB=FEE*0.9; NFEE=FEE-REB
    tpnl=sum(t[3] for t in trades); net=tpnl-NFEE*n; treb=REB*n
    wins=len([t for t in trades if t[3]>0]); wr=wins/n*100 if n else 0

    monthly=defaultdict(lambda:{'n':0,'net':0,'w':0})
    for t in trades:
        k=t[7].strftime('%Y-%m')
        monthly[k]['n']+=1; monthly[k]['net']+=t[3]-NFEE
        if t[3]>0: monthly[k]['w']+=1

    cum=0;peak=0;dd=0
    for t in trades:
        cum+=t[3]-NFEE
        if cum>peak:peak=cum
        if peak-cum>dd:dd=peak-cum

    pm=len([m for m in monthly.values() if m['net']>0])
    min_m=min(monthly.values(),key=lambda x:x['net'])['net'] if monthly else 0
    max_m=max(monthly.values(),key=lambda x:x['net'])['net'] if monthly else 0

    return net, {'n':n,'wr':wr,'pm':pm,'dd':dd,'treb':treb,'min_m':min_m,'max_m':max_m,'monthly':monthly}

def main():
    t0 = _time.time()
    print("="*70, flush=True)
    print("  AI策略优化 v2 — Ensemble + 多时间框架 + 60+特征", flush=True)
    print("  100U保证金 × 100倍 = 10,000U名义", flush=True)
    print("="*70, flush=True)

    df = load_data()
    print(f"  {len(df):,} bars", flush=True)

    df = add_features(df)
    fcols = get_feature_cols(df)
    print(f"  {len(fcols)} features", flush=True)

    NOTIONAL = 10000.0

    # 测试多种配置
    configs = [
        # (fb, thresh, prob_th, sl, tp, max_hold, use_atr_tp, train_m, label)
        (10, 0.003, 0.45, 0.005, 0.008, 1800, False, 3, "v1: 基线(上轮最佳)"),
        (10, 0.003, 0.48, 0.005, 0.008, 1800, False, 3, "v2: 高置信0.48"),
        (10, 0.003, 0.50, 0.005, 0.010, 2400, False, 3, "v3: 超高置信0.50 大TP"),
        (10, 0.003, 0.45, 0.005, 0.010, 2400, True,  3, "v4: ATR动态止盈"),
        (10, 0.003, 0.48, 0.006, 0.010, 2400, True,  3, "v5: 高置信+ATR+宽SL"),
        (15, 0.004, 0.45, 0.006, 0.010, 2400, True,  3, "v6: 15bar前瞻 大波动"),
        (10, 0.003, 0.45, 0.005, 0.008, 1800, False, 4, "v7: 4月训练窗口"),
        (10, 0.003, 0.48, 0.005, 0.010, 2400, True,  4, "v8: 4月+高置信+ATR"),
    ]

    results = []

    for fb, thresh, prob_th, sl, tp, mh, use_atr, train_m, label in configs:
        print(f"\n--- {label} ---", flush=True)
        print(f"  前瞻={fb} 阈值={thresh*100:.1f}% prob>{prob_th} SL={sl*100:.1f}% TP={tp*100:.1f}% MH={mh}s ATR_TP={use_atr} train={train_m}m", flush=True)

        # 标签
        df_t = df.copy()
        future_ret = df_t['close'].shift(-fb)/df_t['close'] - 1
        df_t['label'] = 0
        df_t.loc[future_ret > thresh, 'label'] = 1
        df_t.loc[future_ret < -thresh, 'label'] = -1

        df_t['month'] = df_t.index.to_period('M')
        months = sorted(df_t['month'].unique())

        pl = pd.Series(index=df_t.index, dtype=float); pl[:] = 0.0
        ps = pd.Series(index=df_t.index, dtype=float); ps[:] = 0.0

        for mi in range(train_m, len(months)):
            tm = months[mi]; ts_ = months[mi-train_m]
            tr_mask = (df_t['month']>=ts_) & (df_t['month']<tm)
            te_mask = df_t['month']==tm
            tr_df = df_t[tr_mask].dropna(subset=fcols+['label'])
            te_df = df_t[te_mask].dropna(subset=fcols)
            if len(tr_df)<1000 or len(te_df)<100: continue

            proba, _ = train_ensemble(tr_df[fcols].values, tr_df['label'].values, te_df[fcols].values, fcols)
            pl.loc[te_df.index] = proba[:,2]
            ps.loc[te_df.index] = proba[:,0]

        # 回测
        trades = backtest(df_t, pl, ps, NOTIONAL, prob_th, sl, tp, mh, use_atr)
        net, info = analyze(trades, NOTIONAL, label)

        if info:
            print(f"  净利={net:+.0f} 交易={info['n']} 胜率={info['wr']:.1f}% 盈利月={info['pm']}/12 回撤={info['dd']:.0f}", flush=True)
            # 月度简览
            for m in sorted(info['monthly'].keys()):
                d = info['monthly'][m]
                s = "+" if d['net']>0 else "-"
                print(f"    {m}: {d['net']:>+6.0f} ({d['n']}笔) {s}", flush=True)
        results.append((label, net, info))

    # === 总览 ===
    elapsed = _time.time()-t0
    results.sort(key=lambda x: x[1], reverse=True)

    print(f"\n\n{'='*80}", flush=True)
    print(f"  总览 | 100U保证金 × 100倍 | 耗时 {elapsed:.0f}s", flush=True)
    print(f"{'='*80}", flush=True)
    print(f"  {'方案':<30} {'年净利':>8} {'月均':>6} {'交易':>5} {'胜率':>5} {'盈月':>4} {'回撤':>6}", flush=True)
    print(f"  {'-'*72}", flush=True)

    for label, net, info in results:
        if not info: continue
        mavg = net/12
        print(f"  {label:<30} {net:>+8.0f} {mavg:>+6.0f} {info['n']:>5} {info['wr']:>4.1f}% {info['pm']:>2}/12 {info['dd']:>6.0f}", flush=True)

    best = results[0]
    print(f"\n  最佳: {best[0]}", flush=True)
    print(f"  年净利: {best[1]:+.0f} USDT = 月均 {best[1]/12:+.0f} USDT", flush=True)

    if best[2]:
        print(f"\n  最佳方案月度:", flush=True)
        for m in sorted(best[2]['monthly'].keys()):
            d = best[2]['monthly'][m]
            wr_m = d['w']/d['n']*100 if d['n']>0 else 0
            print(f"    {m}: {d['n']:>4}笔 {d['net']:>+8.0f}U [{('盈利' if d['net']>0 else '亏损')}]", flush=True)

    print(f"\n  对比基线(LightGBM v1): +4801/年 = +400/月", flush=True)
    if best[1] > 4801:
        print(f"  优化提升: {(best[1]/4801-1)*100:+.0f}%", flush=True)
    print(f"{'='*80}", flush=True)

    # 保存最佳交易
    if best[2]:
        # 重跑最佳配置保存CSV
        csv = Path(__file__).parent.parent / 'ai_v2_best.csv'
        print(f"  Results saved summary to console.", flush=True)

if __name__=='__main__':
    main()