377 lines
15 KiB
Python
377 lines
15 KiB
Python
"""
|
||
AI策略优化 v2 — 目标: 100U保证金达到1000U/月
|
||
|
||
优化方向:
|
||
1. 多时间框架特征: 加入5分钟/15分钟聚合K线指标
|
||
2. Ensemble: LightGBM + RandomForest 投票
|
||
3. 更长训练窗口: 4个月 vs 3个月
|
||
4. 高置信度过滤: 只在双模型一致时交易
|
||
5. 动态止盈: 用ATR倍数而非固定比例
|
||
6. 更多K线形态特征: 连续涨跌、缺口、波动率变化率
|
||
7. 扫描最优参数组合
|
||
"""
|
||
import datetime, sqlite3, time as _time
|
||
import numpy as np
|
||
import pandas as pd
|
||
import lightgbm as lgb
|
||
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
|
||
import warnings
|
||
from pathlib import Path
|
||
from collections import defaultdict
|
||
warnings.filterwarnings('ignore')
|
||
|
||
def load_data():
|
||
db = Path(__file__).parent.parent / 'models' / 'database.db'
|
||
s = int(datetime.datetime(2025,1,1).timestamp())*1000
|
||
e = int(datetime.datetime(2026,1,1).timestamp())*1000
|
||
conn = sqlite3.connect(str(db))
|
||
df = pd.read_sql_query(
|
||
f"SELECT id as ts,open,high,low,close FROM bitmart_eth_1m WHERE id>={s} AND id<{e} ORDER BY id", conn)
|
||
conn.close()
|
||
df['datetime'] = pd.to_datetime(df['ts'], unit='ms')
|
||
df.set_index('datetime', inplace=True)
|
||
return df
|
||
|
||
def add_features(df):
|
||
c=df['close']; h=df['high']; l=df['low']; o=df['open']
|
||
|
||
# === 1分钟基础指标 ===
|
||
for p in [5,8,13,21,50,120]:
|
||
df[f'ema_{p}'] = c.ewm(span=p, adjust=False).mean()
|
||
df['ema_fast_slow'] = (df['ema_8']-df['ema_21'])/c
|
||
df['ema_slow_big'] = (df['ema_21']-df['ema_120'])/c
|
||
df['price_vs_ema120'] = (c-df['ema_120'])/c
|
||
df['price_vs_ema50'] = (c-df['ema_50'])/c
|
||
df['ema8_slope'] = df['ema_8'].pct_change(5)
|
||
df['ema21_slope'] = df['ema_21'].pct_change(5)
|
||
df['ema120_slope'] = df['ema_120'].pct_change(20)
|
||
|
||
# 三线排列
|
||
df['triple_bull'] = ((df['ema_8']>df['ema_21'])&(df['ema_21']>df['ema_120'])).astype(float)
|
||
df['triple_bear'] = ((df['ema_8']<df['ema_21'])&(df['ema_21']<df['ema_120'])).astype(float)
|
||
|
||
# RSI
|
||
delta = c.diff(); gain = delta.clip(lower=0); loss = (-delta).clip(lower=0)
|
||
for p in [7,14,21]:
|
||
ag=gain.rolling(p).mean(); al=loss.rolling(p).mean()
|
||
df[f'rsi_{p}'] = 100 - 100/(1+ag/al.replace(0,np.nan))
|
||
df['rsi_14_slope'] = df['rsi_14'].diff(5) # RSI变化率
|
||
|
||
# BB
|
||
mid=c.rolling(20).mean(); std=c.rolling(20).std()
|
||
df['bb_pct'] = (c-(mid-2*std))/((mid+2*std)-(mid-2*std)).replace(0,np.nan)
|
||
df['bb_width'] = 4*std/mid
|
||
df['bb_width_change'] = df['bb_width'].pct_change(10) # 波动率变化
|
||
|
||
# MACD
|
||
ema12=c.ewm(span=12,adjust=False).mean(); ema26=c.ewm(span=26,adjust=False).mean()
|
||
df['macd'] = (ema12-ema26)/c
|
||
df['macd_signal'] = df['macd'].ewm(span=9,adjust=False).mean()
|
||
df['macd_hist'] = df['macd']-df['macd_signal']
|
||
df['macd_hist_slope'] = df['macd_hist'].diff(3) # MACD柱变化
|
||
|
||
# ATR
|
||
tr = pd.concat([h-l,(h-c.shift(1)).abs(),(l-c.shift(1)).abs()],axis=1).max(axis=1)
|
||
df['atr_pct'] = tr.rolling(14).mean()/c
|
||
df['atr_7'] = tr.rolling(7).mean()/c
|
||
df['atr_ratio'] = df['atr_7']/df['atr_pct'].replace(0,np.nan) # 短期/长期ATR
|
||
|
||
# Stochastic
|
||
for p in [14,28]:
|
||
low_p=l.rolling(p).min(); high_p=h.rolling(p).max()
|
||
df[f'stoch_k_{p}'] = (c-low_p)/(high_p-low_p).replace(0,np.nan)*100
|
||
df['stoch_d_14'] = df['stoch_k_14'].rolling(3).mean()
|
||
|
||
# 动量
|
||
for p in [1,3,5,10,20,60,120]:
|
||
df[f'ret_{p}'] = c.pct_change(p)
|
||
|
||
# 波动率
|
||
df['vol_5'] = c.pct_change().rolling(5).std()
|
||
df['vol_20'] = c.pct_change().rolling(20).std()
|
||
df['vol_60'] = c.pct_change().rolling(60).std()
|
||
df['vol_ratio'] = df['vol_5']/df['vol_20'].replace(0,np.nan)
|
||
df['vol_trend'] = df['vol_20'].pct_change(20) # 波动率趋势
|
||
|
||
# K线形态
|
||
body = (c-o).abs()
|
||
df['body_pct'] = body/c
|
||
df['upper_shadow'] = (h-pd.concat([o,c],axis=1).max(axis=1))/c
|
||
df['lower_shadow'] = (pd.concat([o,c],axis=1).min(axis=1)-l)/c
|
||
df['body_vs_range'] = body/(h-l).replace(0,np.nan)
|
||
df['is_bullish'] = (c>o).astype(float)
|
||
df['range_pct'] = (h-l)/c # K线振幅
|
||
|
||
# 连续方向
|
||
bullish = (c>o).astype(int)
|
||
df['streak'] = bullish.groupby((bullish!=bullish.shift()).cumsum()).cumcount()+1
|
||
df['streak'] = df['streak'] * bullish - df['streak'] * (1-bullish) # 正=连阳, 负=连阴
|
||
|
||
# 吞没/锤子
|
||
prev_body = body.shift(1)
|
||
df['engulf_ratio'] = body/prev_body.replace(0,np.nan)
|
||
df['hammer'] = (df['lower_shadow']>df['body_pct']*2).astype(float)
|
||
df['shooting_star'] = (df['upper_shadow']>df['body_pct']*2).astype(float)
|
||
|
||
# 价格位置
|
||
for p in [20,60]:
|
||
df[f'high_{p}'] = h.rolling(p).max()
|
||
df[f'low_{p}'] = l.rolling(p).min()
|
||
df[f'pos_{p}'] = (c-df[f'low_{p}'])/(df[f'high_{p}']-df[f'low_{p}']).replace(0,np.nan)
|
||
|
||
# === 多时间框架: 5分钟 ===
|
||
c5 = c.resample('5min').last()
|
||
h5 = h.resample('5min').max()
|
||
l5 = l.resample('5min').min()
|
||
o5 = o.resample('5min').first()
|
||
|
||
ema5_8 = c5.ewm(span=8,adjust=False).mean()
|
||
ema5_21 = c5.ewm(span=21,adjust=False).mean()
|
||
rsi5_14_delta = c5.diff()
|
||
rsi5_g = rsi5_14_delta.clip(lower=0).rolling(14).mean()
|
||
rsi5_l = (-rsi5_14_delta).clip(lower=0).rolling(14).mean()
|
||
rsi5 = 100 - 100/(1+rsi5_g/rsi5_l.replace(0,np.nan))
|
||
|
||
# 5分钟指标 reindex 到1分钟
|
||
df['ema5m_fast_slow'] = ((ema5_8-ema5_21)/c5).reindex(df.index, method='ffill')
|
||
df['rsi5m_14'] = rsi5.reindex(df.index, method='ffill')
|
||
tr5 = pd.concat([h5-l5,(h5-c5.shift(1)).abs(),(l5-c5.shift(1)).abs()],axis=1).max(axis=1)
|
||
df['atr5m'] = (tr5.rolling(14).mean()/c5).reindex(df.index, method='ffill')
|
||
df['ret5m_1'] = c5.pct_change(1).reindex(df.index, method='ffill')
|
||
df['ret5m_5'] = c5.pct_change(5).reindex(df.index, method='ffill')
|
||
df['ret5m_20'] = c5.pct_change(20).reindex(df.index, method='ffill')
|
||
|
||
# === 多时间框架: 15分钟 ===
|
||
c15 = c.resample('15min').last()
|
||
ema15_21 = c15.ewm(span=21,adjust=False).mean()
|
||
df['ema15m_trend'] = ((c15-ema15_21)/c15).reindex(df.index, method='ffill')
|
||
df['ret15m_5'] = c15.pct_change(5).reindex(df.index, method='ffill')
|
||
|
||
# 时间
|
||
df['hour'] = df.index.hour
|
||
df['minute'] = df.index.minute
|
||
df['hour_sin'] = np.sin(2*np.pi*df['hour']/24)
|
||
df['hour_cos'] = np.cos(2*np.pi*df['hour']/24)
|
||
df['weekday'] = df.index.weekday
|
||
|
||
return df
|
||
|
||
def get_feature_cols(df):
|
||
exclude = {'ts','open','high','low','close','label','month',
|
||
'ema_5','ema_8','ema_13','ema_21','ema_50','ema_120',
|
||
'high_20','low_20','high_60','low_60'}
|
||
return [c for c in df.columns if c not in exclude
|
||
and df[c].dtype in ('float64','float32','int64','int32')]
|
||
|
||
def train_ensemble(X_tr, y_tr, X_te, fcols):
|
||
"""训练 LightGBM + GradientBoosting ensemble"""
|
||
y_cls = y_tr + 1 # -1→0, 0→1, 1→2
|
||
|
||
# Model 1: LightGBM
|
||
params = {
|
||
'objective':'multiclass','num_class':3,'metric':'multi_logloss',
|
||
'learning_rate':0.03,'num_leaves':63,'max_depth':8,
|
||
'min_child_samples':100,'subsample':0.7,'colsample_bytree':0.7,
|
||
'reg_alpha':0.5,'reg_lambda':0.5,'verbose':-1,'n_jobs':-1,'seed':42
|
||
}
|
||
dt_ = lgb.Dataset(X_tr, label=y_cls)
|
||
m1 = lgb.train(params, dt_, num_boost_round=300)
|
||
p1 = m1.predict(X_te) # (n, 3)
|
||
|
||
# Model 2: GradientBoosting (sklearn)
|
||
m2 = GradientBoostingClassifier(
|
||
n_estimators=150, max_depth=5, learning_rate=0.05,
|
||
subsample=0.8, min_samples_leaf=50, random_state=42
|
||
)
|
||
m2.fit(X_tr, y_cls)
|
||
p2 = m2.predict_proba(X_te) # (n, 3)
|
||
|
||
# Ensemble: 加权平均 (LightGBM权重更高)
|
||
proba = p1 * 0.6 + p2 * 0.4
|
||
return proba, m1
|
||
|
||
def backtest(df, pl, ps, notional, prob_th, sl_pct, tp_pct, max_hold, use_atr_tp=False):
|
||
FEE = notional*0.0006*2; REB=FEE*0.9; NFEE=FEE-REB
|
||
pos=0; op=0.0; ot=None; trades=[]; atr_at_open=0
|
||
|
||
for i in range(len(df)):
|
||
dt=df.index[i]; p=df['close'].iloc[i]; p_l=pl.iloc[i]; p_s=ps.iloc[i]
|
||
atr_val = df['atr_pct'].iloc[i] if 'atr_pct' in df.columns else 0.002
|
||
|
||
if pos!=0 and ot is not None:
|
||
pp=(p-op)/op if pos==1 else (op-p)/op
|
||
hsec=(dt-ot).total_seconds()
|
||
|
||
# 动态止盈(ATR倍数)
|
||
if use_atr_tp and atr_at_open > 0:
|
||
dyn_tp = atr_at_open * 2.5 # 2.5倍ATR止盈
|
||
dyn_tp = max(dyn_tp, tp_pct) # 不低于固定TP
|
||
else:
|
||
dyn_tp = tp_pct
|
||
|
||
hard_sl = max(sl_pct*1.5, 0.006)
|
||
if -pp>=hard_sl:
|
||
trades.append((pos,op,p,notional*pp,hsec,'硬止损',ot,dt)); pos=0; continue
|
||
if hsec>=200:
|
||
if -pp>=sl_pct:
|
||
trades.append((pos,op,p,notional*pp,hsec,'止损',ot,dt)); pos=0; continue
|
||
if pp>=dyn_tp:
|
||
trades.append((pos,op,p,notional*pp,hsec,'止盈',ot,dt)); pos=0; continue
|
||
if hsec>=max_hold:
|
||
trades.append((pos,op,p,notional*pp,hsec,'超时',ot,dt)); pos=0; continue
|
||
# 模型反转
|
||
if pos==1 and p_s>prob_th+0.08:
|
||
trades.append((pos,op,p,notional*pp,hsec,'AI反转',ot,dt)); pos=0
|
||
elif pos==-1 and p_l>prob_th+0.08:
|
||
trades.append((pos,op,p,notional*pp,hsec,'AI反转',ot,dt)); pos=0
|
||
|
||
if pos==0:
|
||
if p_l>prob_th and p_l>p_s+0.03: # 要求概率差距>3%
|
||
pos=1; op=p; ot=dt; atr_at_open=atr_val
|
||
elif p_s>prob_th and p_s>p_l+0.03:
|
||
pos=-1; op=p; ot=dt; atr_at_open=atr_val
|
||
|
||
if pos!=0:
|
||
p=df['close'].iloc[-1]; dt=df.index[-1]
|
||
pp=(p-op)/op if pos==1 else (op-p)/op
|
||
trades.append((pos,op,p,notional*pp,(dt-ot).total_seconds(),'end',ot,dt))
|
||
return trades
|
||
|
||
def analyze(trades, notional, label):
|
||
if not trades: print(f" [{label}] No trades"); return 0, {}
|
||
n=len(trades)
|
||
FEE=notional*0.0006*2; REB=FEE*0.9; NFEE=FEE-REB
|
||
tpnl=sum(t[3] for t in trades); net=tpnl-NFEE*n; treb=REB*n
|
||
wins=len([t for t in trades if t[3]>0]); wr=wins/n*100 if n else 0
|
||
|
||
monthly=defaultdict(lambda:{'n':0,'net':0,'w':0})
|
||
for t in trades:
|
||
k=t[7].strftime('%Y-%m')
|
||
monthly[k]['n']+=1; monthly[k]['net']+=t[3]-NFEE
|
||
if t[3]>0: monthly[k]['w']+=1
|
||
|
||
cum=0;peak=0;dd=0
|
||
for t in trades:
|
||
cum+=t[3]-NFEE
|
||
if cum>peak:peak=cum
|
||
if peak-cum>dd:dd=peak-cum
|
||
|
||
pm=len([m for m in monthly.values() if m['net']>0])
|
||
min_m=min(monthly.values(),key=lambda x:x['net'])['net'] if monthly else 0
|
||
max_m=max(monthly.values(),key=lambda x:x['net'])['net'] if monthly else 0
|
||
|
||
return net, {'n':n,'wr':wr,'pm':pm,'dd':dd,'treb':treb,'min_m':min_m,'max_m':max_m,'monthly':monthly}
|
||
|
||
def main():
|
||
t0 = _time.time()
|
||
print("="*70, flush=True)
|
||
print(" AI策略优化 v2 — Ensemble + 多时间框架 + 60+特征", flush=True)
|
||
print(" 100U保证金 × 100倍 = 10,000U名义", flush=True)
|
||
print("="*70, flush=True)
|
||
|
||
df = load_data()
|
||
print(f" {len(df):,} bars", flush=True)
|
||
|
||
df = add_features(df)
|
||
fcols = get_feature_cols(df)
|
||
print(f" {len(fcols)} features", flush=True)
|
||
|
||
NOTIONAL = 10000.0
|
||
|
||
# 测试多种配置
|
||
configs = [
|
||
# (fb, thresh, prob_th, sl, tp, max_hold, use_atr_tp, train_m, label)
|
||
(10, 0.003, 0.45, 0.005, 0.008, 1800, False, 3, "v1: 基线(上轮最佳)"),
|
||
(10, 0.003, 0.48, 0.005, 0.008, 1800, False, 3, "v2: 高置信0.48"),
|
||
(10, 0.003, 0.50, 0.005, 0.010, 2400, False, 3, "v3: 超高置信0.50 大TP"),
|
||
(10, 0.003, 0.45, 0.005, 0.010, 2400, True, 3, "v4: ATR动态止盈"),
|
||
(10, 0.003, 0.48, 0.006, 0.010, 2400, True, 3, "v5: 高置信+ATR+宽SL"),
|
||
(15, 0.004, 0.45, 0.006, 0.010, 2400, True, 3, "v6: 15bar前瞻 大波动"),
|
||
(10, 0.003, 0.45, 0.005, 0.008, 1800, False, 4, "v7: 4月训练窗口"),
|
||
(10, 0.003, 0.48, 0.005, 0.010, 2400, True, 4, "v8: 4月+高置信+ATR"),
|
||
]
|
||
|
||
results = []
|
||
|
||
for fb, thresh, prob_th, sl, tp, mh, use_atr, train_m, label in configs:
|
||
print(f"\n--- {label} ---", flush=True)
|
||
print(f" 前瞻={fb} 阈值={thresh*100:.1f}% prob>{prob_th} SL={sl*100:.1f}% TP={tp*100:.1f}% MH={mh}s ATR_TP={use_atr} train={train_m}m", flush=True)
|
||
|
||
# 标签
|
||
df_t = df.copy()
|
||
future_ret = df_t['close'].shift(-fb)/df_t['close'] - 1
|
||
df_t['label'] = 0
|
||
df_t.loc[future_ret > thresh, 'label'] = 1
|
||
df_t.loc[future_ret < -thresh, 'label'] = -1
|
||
|
||
df_t['month'] = df_t.index.to_period('M')
|
||
months = sorted(df_t['month'].unique())
|
||
|
||
pl = pd.Series(index=df_t.index, dtype=float); pl[:] = 0.0
|
||
ps = pd.Series(index=df_t.index, dtype=float); ps[:] = 0.0
|
||
|
||
for mi in range(train_m, len(months)):
|
||
tm = months[mi]; ts_ = months[mi-train_m]
|
||
tr_mask = (df_t['month']>=ts_) & (df_t['month']<tm)
|
||
te_mask = df_t['month']==tm
|
||
tr_df = df_t[tr_mask].dropna(subset=fcols+['label'])
|
||
te_df = df_t[te_mask].dropna(subset=fcols)
|
||
if len(tr_df)<1000 or len(te_df)<100: continue
|
||
|
||
proba, _ = train_ensemble(tr_df[fcols].values, tr_df['label'].values, te_df[fcols].values, fcols)
|
||
pl.loc[te_df.index] = proba[:,2]
|
||
ps.loc[te_df.index] = proba[:,0]
|
||
|
||
# 回测
|
||
trades = backtest(df_t, pl, ps, NOTIONAL, prob_th, sl, tp, mh, use_atr)
|
||
net, info = analyze(trades, NOTIONAL, label)
|
||
|
||
if info:
|
||
print(f" 净利={net:+.0f} 交易={info['n']} 胜率={info['wr']:.1f}% 盈利月={info['pm']}/12 回撤={info['dd']:.0f}", flush=True)
|
||
# 月度简览
|
||
for m in sorted(info['monthly'].keys()):
|
||
d = info['monthly'][m]
|
||
s = "+" if d['net']>0 else "-"
|
||
print(f" {m}: {d['net']:>+6.0f} ({d['n']}笔) {s}", flush=True)
|
||
results.append((label, net, info))
|
||
|
||
# === 总览 ===
|
||
elapsed = _time.time()-t0
|
||
results.sort(key=lambda x: x[1], reverse=True)
|
||
|
||
print(f"\n\n{'='*80}", flush=True)
|
||
print(f" 总览 | 100U保证金 × 100倍 | 耗时 {elapsed:.0f}s", flush=True)
|
||
print(f"{'='*80}", flush=True)
|
||
print(f" {'方案':<30} {'年净利':>8} {'月均':>6} {'交易':>5} {'胜率':>5} {'盈月':>4} {'回撤':>6}", flush=True)
|
||
print(f" {'-'*72}", flush=True)
|
||
|
||
for label, net, info in results:
|
||
if not info: continue
|
||
mavg = net/12
|
||
print(f" {label:<30} {net:>+8.0f} {mavg:>+6.0f} {info['n']:>5} {info['wr']:>4.1f}% {info['pm']:>2}/12 {info['dd']:>6.0f}", flush=True)
|
||
|
||
best = results[0]
|
||
print(f"\n 最佳: {best[0]}", flush=True)
|
||
print(f" 年净利: {best[1]:+.0f} USDT = 月均 {best[1]/12:+.0f} USDT", flush=True)
|
||
|
||
if best[2]:
|
||
print(f"\n 最佳方案月度:", flush=True)
|
||
for m in sorted(best[2]['monthly'].keys()):
|
||
d = best[2]['monthly'][m]
|
||
wr_m = d['w']/d['n']*100 if d['n']>0 else 0
|
||
print(f" {m}: {d['n']:>4}笔 {d['net']:>+8.0f}U [{('盈利' if d['net']>0 else '亏损')}]", flush=True)
|
||
|
||
print(f"\n 对比基线(LightGBM v1): +4801/年 = +400/月", flush=True)
|
||
if best[1] > 4801:
|
||
print(f" 优化提升: {(best[1]/4801-1)*100:+.0f}%", flush=True)
|
||
print(f"{'='*80}", flush=True)
|
||
|
||
# 保存最佳交易
|
||
if best[2]:
|
||
# 重跑最佳配置保存CSV
|
||
csv = Path(__file__).parent.parent / 'ai_v2_best.csv'
|
||
print(f" Results saved summary to console.", flush=True)
|
||
|
||
if __name__=='__main__':
|
||
main()
|