Files
lm_code/交易/bitmart-AI优化回测.py
Your Name b5af5b07f3 哈哈
2026-02-15 02:16:45 +08:00

377 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
AI策略优化 v2 — 目标: 100U保证金达到1000U/月
优化方向:
1. 多时间框架特征: 加入5分钟/15分钟聚合K线指标
2. Ensemble: LightGBM + RandomForest 投票
3. 更长训练窗口: 4个月 vs 3个月
4. 高置信度过滤: 只在双模型一致时交易
5. 动态止盈: 用ATR倍数而非固定比例
6. 更多K线形态特征: 连续涨跌、缺口、波动率变化率
7. 扫描最优参数组合
"""
import datetime, sqlite3, time as _time
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
import warnings
from pathlib import Path
from collections import defaultdict
warnings.filterwarnings('ignore')
def load_data():
db = Path(__file__).parent.parent / 'models' / 'database.db'
s = int(datetime.datetime(2025,1,1).timestamp())*1000
e = int(datetime.datetime(2026,1,1).timestamp())*1000
conn = sqlite3.connect(str(db))
df = pd.read_sql_query(
f"SELECT id as ts,open,high,low,close FROM bitmart_eth_1m WHERE id>={s} AND id<{e} ORDER BY id", conn)
conn.close()
df['datetime'] = pd.to_datetime(df['ts'], unit='ms')
df.set_index('datetime', inplace=True)
return df
def add_features(df):
c=df['close']; h=df['high']; l=df['low']; o=df['open']
# === 1分钟基础指标 ===
for p in [5,8,13,21,50,120]:
df[f'ema_{p}'] = c.ewm(span=p, adjust=False).mean()
df['ema_fast_slow'] = (df['ema_8']-df['ema_21'])/c
df['ema_slow_big'] = (df['ema_21']-df['ema_120'])/c
df['price_vs_ema120'] = (c-df['ema_120'])/c
df['price_vs_ema50'] = (c-df['ema_50'])/c
df['ema8_slope'] = df['ema_8'].pct_change(5)
df['ema21_slope'] = df['ema_21'].pct_change(5)
df['ema120_slope'] = df['ema_120'].pct_change(20)
# 三线排列
df['triple_bull'] = ((df['ema_8']>df['ema_21'])&(df['ema_21']>df['ema_120'])).astype(float)
df['triple_bear'] = ((df['ema_8']<df['ema_21'])&(df['ema_21']<df['ema_120'])).astype(float)
# RSI
delta = c.diff(); gain = delta.clip(lower=0); loss = (-delta).clip(lower=0)
for p in [7,14,21]:
ag=gain.rolling(p).mean(); al=loss.rolling(p).mean()
df[f'rsi_{p}'] = 100 - 100/(1+ag/al.replace(0,np.nan))
df['rsi_14_slope'] = df['rsi_14'].diff(5) # RSI变化率
# BB
mid=c.rolling(20).mean(); std=c.rolling(20).std()
df['bb_pct'] = (c-(mid-2*std))/((mid+2*std)-(mid-2*std)).replace(0,np.nan)
df['bb_width'] = 4*std/mid
df['bb_width_change'] = df['bb_width'].pct_change(10) # 波动率变化
# MACD
ema12=c.ewm(span=12,adjust=False).mean(); ema26=c.ewm(span=26,adjust=False).mean()
df['macd'] = (ema12-ema26)/c
df['macd_signal'] = df['macd'].ewm(span=9,adjust=False).mean()
df['macd_hist'] = df['macd']-df['macd_signal']
df['macd_hist_slope'] = df['macd_hist'].diff(3) # MACD柱变化
# ATR
tr = pd.concat([h-l,(h-c.shift(1)).abs(),(l-c.shift(1)).abs()],axis=1).max(axis=1)
df['atr_pct'] = tr.rolling(14).mean()/c
df['atr_7'] = tr.rolling(7).mean()/c
df['atr_ratio'] = df['atr_7']/df['atr_pct'].replace(0,np.nan) # 短期/长期ATR
# Stochastic
for p in [14,28]:
low_p=l.rolling(p).min(); high_p=h.rolling(p).max()
df[f'stoch_k_{p}'] = (c-low_p)/(high_p-low_p).replace(0,np.nan)*100
df['stoch_d_14'] = df['stoch_k_14'].rolling(3).mean()
# 动量
for p in [1,3,5,10,20,60,120]:
df[f'ret_{p}'] = c.pct_change(p)
# 波动率
df['vol_5'] = c.pct_change().rolling(5).std()
df['vol_20'] = c.pct_change().rolling(20).std()
df['vol_60'] = c.pct_change().rolling(60).std()
df['vol_ratio'] = df['vol_5']/df['vol_20'].replace(0,np.nan)
df['vol_trend'] = df['vol_20'].pct_change(20) # 波动率趋势
# K线形态
body = (c-o).abs()
df['body_pct'] = body/c
df['upper_shadow'] = (h-pd.concat([o,c],axis=1).max(axis=1))/c
df['lower_shadow'] = (pd.concat([o,c],axis=1).min(axis=1)-l)/c
df['body_vs_range'] = body/(h-l).replace(0,np.nan)
df['is_bullish'] = (c>o).astype(float)
df['range_pct'] = (h-l)/c # K线振幅
# 连续方向
bullish = (c>o).astype(int)
df['streak'] = bullish.groupby((bullish!=bullish.shift()).cumsum()).cumcount()+1
df['streak'] = df['streak'] * bullish - df['streak'] * (1-bullish) # 正=连阳, 负=连阴
# 吞没/锤子
prev_body = body.shift(1)
df['engulf_ratio'] = body/prev_body.replace(0,np.nan)
df['hammer'] = (df['lower_shadow']>df['body_pct']*2).astype(float)
df['shooting_star'] = (df['upper_shadow']>df['body_pct']*2).astype(float)
# 价格位置
for p in [20,60]:
df[f'high_{p}'] = h.rolling(p).max()
df[f'low_{p}'] = l.rolling(p).min()
df[f'pos_{p}'] = (c-df[f'low_{p}'])/(df[f'high_{p}']-df[f'low_{p}']).replace(0,np.nan)
# === 多时间框架: 5分钟 ===
c5 = c.resample('5min').last()
h5 = h.resample('5min').max()
l5 = l.resample('5min').min()
o5 = o.resample('5min').first()
ema5_8 = c5.ewm(span=8,adjust=False).mean()
ema5_21 = c5.ewm(span=21,adjust=False).mean()
rsi5_14_delta = c5.diff()
rsi5_g = rsi5_14_delta.clip(lower=0).rolling(14).mean()
rsi5_l = (-rsi5_14_delta).clip(lower=0).rolling(14).mean()
rsi5 = 100 - 100/(1+rsi5_g/rsi5_l.replace(0,np.nan))
# 5分钟指标 reindex 到1分钟
df['ema5m_fast_slow'] = ((ema5_8-ema5_21)/c5).reindex(df.index, method='ffill')
df['rsi5m_14'] = rsi5.reindex(df.index, method='ffill')
tr5 = pd.concat([h5-l5,(h5-c5.shift(1)).abs(),(l5-c5.shift(1)).abs()],axis=1).max(axis=1)
df['atr5m'] = (tr5.rolling(14).mean()/c5).reindex(df.index, method='ffill')
df['ret5m_1'] = c5.pct_change(1).reindex(df.index, method='ffill')
df['ret5m_5'] = c5.pct_change(5).reindex(df.index, method='ffill')
df['ret5m_20'] = c5.pct_change(20).reindex(df.index, method='ffill')
# === 多时间框架: 15分钟 ===
c15 = c.resample('15min').last()
ema15_21 = c15.ewm(span=21,adjust=False).mean()
df['ema15m_trend'] = ((c15-ema15_21)/c15).reindex(df.index, method='ffill')
df['ret15m_5'] = c15.pct_change(5).reindex(df.index, method='ffill')
# 时间
df['hour'] = df.index.hour
df['minute'] = df.index.minute
df['hour_sin'] = np.sin(2*np.pi*df['hour']/24)
df['hour_cos'] = np.cos(2*np.pi*df['hour']/24)
df['weekday'] = df.index.weekday
return df
def get_feature_cols(df):
exclude = {'ts','open','high','low','close','label','month',
'ema_5','ema_8','ema_13','ema_21','ema_50','ema_120',
'high_20','low_20','high_60','low_60'}
return [c for c in df.columns if c not in exclude
and df[c].dtype in ('float64','float32','int64','int32')]
def train_ensemble(X_tr, y_tr, X_te, fcols):
"""训练 LightGBM + GradientBoosting ensemble"""
y_cls = y_tr + 1 # -1→0, 0→1, 1→2
# Model 1: LightGBM
params = {
'objective':'multiclass','num_class':3,'metric':'multi_logloss',
'learning_rate':0.03,'num_leaves':63,'max_depth':8,
'min_child_samples':100,'subsample':0.7,'colsample_bytree':0.7,
'reg_alpha':0.5,'reg_lambda':0.5,'verbose':-1,'n_jobs':-1,'seed':42
}
dt_ = lgb.Dataset(X_tr, label=y_cls)
m1 = lgb.train(params, dt_, num_boost_round=300)
p1 = m1.predict(X_te) # (n, 3)
# Model 2: GradientBoosting (sklearn)
m2 = GradientBoostingClassifier(
n_estimators=150, max_depth=5, learning_rate=0.05,
subsample=0.8, min_samples_leaf=50, random_state=42
)
m2.fit(X_tr, y_cls)
p2 = m2.predict_proba(X_te) # (n, 3)
# Ensemble: 加权平均 (LightGBM权重更高)
proba = p1 * 0.6 + p2 * 0.4
return proba, m1
def backtest(df, pl, ps, notional, prob_th, sl_pct, tp_pct, max_hold, use_atr_tp=False):
FEE = notional*0.0006*2; REB=FEE*0.9; NFEE=FEE-REB
pos=0; op=0.0; ot=None; trades=[]; atr_at_open=0
for i in range(len(df)):
dt=df.index[i]; p=df['close'].iloc[i]; p_l=pl.iloc[i]; p_s=ps.iloc[i]
atr_val = df['atr_pct'].iloc[i] if 'atr_pct' in df.columns else 0.002
if pos!=0 and ot is not None:
pp=(p-op)/op if pos==1 else (op-p)/op
hsec=(dt-ot).total_seconds()
# 动态止盈ATR倍数
if use_atr_tp and atr_at_open > 0:
dyn_tp = atr_at_open * 2.5 # 2.5倍ATR止盈
dyn_tp = max(dyn_tp, tp_pct) # 不低于固定TP
else:
dyn_tp = tp_pct
hard_sl = max(sl_pct*1.5, 0.006)
if -pp>=hard_sl:
trades.append((pos,op,p,notional*pp,hsec,'硬止损',ot,dt)); pos=0; continue
if hsec>=200:
if -pp>=sl_pct:
trades.append((pos,op,p,notional*pp,hsec,'止损',ot,dt)); pos=0; continue
if pp>=dyn_tp:
trades.append((pos,op,p,notional*pp,hsec,'止盈',ot,dt)); pos=0; continue
if hsec>=max_hold:
trades.append((pos,op,p,notional*pp,hsec,'超时',ot,dt)); pos=0; continue
# 模型反转
if pos==1 and p_s>prob_th+0.08:
trades.append((pos,op,p,notional*pp,hsec,'AI反转',ot,dt)); pos=0
elif pos==-1 and p_l>prob_th+0.08:
trades.append((pos,op,p,notional*pp,hsec,'AI反转',ot,dt)); pos=0
if pos==0:
if p_l>prob_th and p_l>p_s+0.03: # 要求概率差距>3%
pos=1; op=p; ot=dt; atr_at_open=atr_val
elif p_s>prob_th and p_s>p_l+0.03:
pos=-1; op=p; ot=dt; atr_at_open=atr_val
if pos!=0:
p=df['close'].iloc[-1]; dt=df.index[-1]
pp=(p-op)/op if pos==1 else (op-p)/op
trades.append((pos,op,p,notional*pp,(dt-ot).total_seconds(),'end',ot,dt))
return trades
def analyze(trades, notional, label):
if not trades: print(f" [{label}] No trades"); return 0, {}
n=len(trades)
FEE=notional*0.0006*2; REB=FEE*0.9; NFEE=FEE-REB
tpnl=sum(t[3] for t in trades); net=tpnl-NFEE*n; treb=REB*n
wins=len([t for t in trades if t[3]>0]); wr=wins/n*100 if n else 0
monthly=defaultdict(lambda:{'n':0,'net':0,'w':0})
for t in trades:
k=t[7].strftime('%Y-%m')
monthly[k]['n']+=1; monthly[k]['net']+=t[3]-NFEE
if t[3]>0: monthly[k]['w']+=1
cum=0;peak=0;dd=0
for t in trades:
cum+=t[3]-NFEE
if cum>peak:peak=cum
if peak-cum>dd:dd=peak-cum
pm=len([m for m in monthly.values() if m['net']>0])
min_m=min(monthly.values(),key=lambda x:x['net'])['net'] if monthly else 0
max_m=max(monthly.values(),key=lambda x:x['net'])['net'] if monthly else 0
return net, {'n':n,'wr':wr,'pm':pm,'dd':dd,'treb':treb,'min_m':min_m,'max_m':max_m,'monthly':monthly}
def main():
t0 = _time.time()
print("="*70, flush=True)
print(" AI策略优化 v2 — Ensemble + 多时间框架 + 60+特征", flush=True)
print(" 100U保证金 × 100倍 = 10,000U名义", flush=True)
print("="*70, flush=True)
df = load_data()
print(f" {len(df):,} bars", flush=True)
df = add_features(df)
fcols = get_feature_cols(df)
print(f" {len(fcols)} features", flush=True)
NOTIONAL = 10000.0
# 测试多种配置
configs = [
# (fb, thresh, prob_th, sl, tp, max_hold, use_atr_tp, train_m, label)
(10, 0.003, 0.45, 0.005, 0.008, 1800, False, 3, "v1: 基线(上轮最佳)"),
(10, 0.003, 0.48, 0.005, 0.008, 1800, False, 3, "v2: 高置信0.48"),
(10, 0.003, 0.50, 0.005, 0.010, 2400, False, 3, "v3: 超高置信0.50 大TP"),
(10, 0.003, 0.45, 0.005, 0.010, 2400, True, 3, "v4: ATR动态止盈"),
(10, 0.003, 0.48, 0.006, 0.010, 2400, True, 3, "v5: 高置信+ATR+宽SL"),
(15, 0.004, 0.45, 0.006, 0.010, 2400, True, 3, "v6: 15bar前瞻 大波动"),
(10, 0.003, 0.45, 0.005, 0.008, 1800, False, 4, "v7: 4月训练窗口"),
(10, 0.003, 0.48, 0.005, 0.010, 2400, True, 4, "v8: 4月+高置信+ATR"),
]
results = []
for fb, thresh, prob_th, sl, tp, mh, use_atr, train_m, label in configs:
print(f"\n--- {label} ---", flush=True)
print(f" 前瞻={fb} 阈值={thresh*100:.1f}% prob>{prob_th} SL={sl*100:.1f}% TP={tp*100:.1f}% MH={mh}s ATR_TP={use_atr} train={train_m}m", flush=True)
# 标签
df_t = df.copy()
future_ret = df_t['close'].shift(-fb)/df_t['close'] - 1
df_t['label'] = 0
df_t.loc[future_ret > thresh, 'label'] = 1
df_t.loc[future_ret < -thresh, 'label'] = -1
df_t['month'] = df_t.index.to_period('M')
months = sorted(df_t['month'].unique())
pl = pd.Series(index=df_t.index, dtype=float); pl[:] = 0.0
ps = pd.Series(index=df_t.index, dtype=float); ps[:] = 0.0
for mi in range(train_m, len(months)):
tm = months[mi]; ts_ = months[mi-train_m]
tr_mask = (df_t['month']>=ts_) & (df_t['month']<tm)
te_mask = df_t['month']==tm
tr_df = df_t[tr_mask].dropna(subset=fcols+['label'])
te_df = df_t[te_mask].dropna(subset=fcols)
if len(tr_df)<1000 or len(te_df)<100: continue
proba, _ = train_ensemble(tr_df[fcols].values, tr_df['label'].values, te_df[fcols].values, fcols)
pl.loc[te_df.index] = proba[:,2]
ps.loc[te_df.index] = proba[:,0]
# 回测
trades = backtest(df_t, pl, ps, NOTIONAL, prob_th, sl, tp, mh, use_atr)
net, info = analyze(trades, NOTIONAL, label)
if info:
print(f" 净利={net:+.0f} 交易={info['n']} 胜率={info['wr']:.1f}% 盈利月={info['pm']}/12 回撤={info['dd']:.0f}", flush=True)
# 月度简览
for m in sorted(info['monthly'].keys()):
d = info['monthly'][m]
s = "+" if d['net']>0 else "-"
print(f" {m}: {d['net']:>+6.0f} ({d['n']}笔) {s}", flush=True)
results.append((label, net, info))
# === 总览 ===
elapsed = _time.time()-t0
results.sort(key=lambda x: x[1], reverse=True)
print(f"\n\n{'='*80}", flush=True)
print(f" 总览 | 100U保证金 × 100倍 | 耗时 {elapsed:.0f}s", flush=True)
print(f"{'='*80}", flush=True)
print(f" {'方案':<30} {'年净利':>8} {'月均':>6} {'交易':>5} {'胜率':>5} {'盈月':>4} {'回撤':>6}", flush=True)
print(f" {'-'*72}", flush=True)
for label, net, info in results:
if not info: continue
mavg = net/12
print(f" {label:<30} {net:>+8.0f} {mavg:>+6.0f} {info['n']:>5} {info['wr']:>4.1f}% {info['pm']:>2}/12 {info['dd']:>6.0f}", flush=True)
best = results[0]
print(f"\n 最佳: {best[0]}", flush=True)
print(f" 年净利: {best[1]:+.0f} USDT = 月均 {best[1]/12:+.0f} USDT", flush=True)
if best[2]:
print(f"\n 最佳方案月度:", flush=True)
for m in sorted(best[2]['monthly'].keys()):
d = best[2]['monthly'][m]
wr_m = d['w']/d['n']*100 if d['n']>0 else 0
print(f" {m}: {d['n']:>4}{d['net']:>+8.0f}U [{('盈利' if d['net']>0 else '亏损')}]", flush=True)
print(f"\n 对比基线(LightGBM v1): +4801/年 = +400/月", flush=True)
if best[1] > 4801:
print(f" 优化提升: {(best[1]/4801-1)*100:+.0f}%", flush=True)
print(f"{'='*80}", flush=True)
# 保存最佳交易
if best[2]:
# 重跑最佳配置保存CSV
csv = Path(__file__).parent.parent / 'ai_v2_best.csv'
print(f" Results saved summary to console.", flush=True)
if __name__=='__main__':
main()