259 lines
10 KiB
Python
259 lines
10 KiB
Python
"""
|
|
AI最佳配置回测 — 基于之前扫描结果
|
|
|
|
最佳: AI-v4: 10bar前瞻, 方向阈值0.3%, 概率阈值0.45, SL=0.5%, TP=0.8%
|
|
该配置在100U时年净利+5544, 月均+462, 935笔交易, 8/12月盈利
|
|
"""
|
|
import datetime, sqlite3, time as _time
|
|
import numpy as np
|
|
import pandas as pd
|
|
import lightgbm as lgb
|
|
import warnings
|
|
from pathlib import Path
|
|
from collections import defaultdict
|
|
warnings.filterwarnings('ignore')
|
|
|
|
def main():
|
|
t0 = _time.time()
|
|
print("Loading...", flush=True)
|
|
db = Path(__file__).parent.parent / 'models' / 'database.db'
|
|
s = int(datetime.datetime(2025,1,1).timestamp())*1000
|
|
e = int(datetime.datetime(2026,1,1).timestamp())*1000
|
|
conn = sqlite3.connect(str(db))
|
|
df = pd.read_sql_query(
|
|
f"SELECT id as ts,open,high,low,close FROM bitmart_eth_1m WHERE id>={s} AND id<{e} ORDER BY id", conn)
|
|
conn.close()
|
|
df['datetime'] = pd.to_datetime(df['ts'], unit='ms')
|
|
df.set_index('datetime', inplace=True)
|
|
print(f" {len(df):,} bars", flush=True)
|
|
|
|
# ===== 特征 =====
|
|
print("Features...", flush=True)
|
|
c=df['close']; h=df['high']; l=df['low']; o=df['open']
|
|
for p in [5,8,13,21,50,120]:
|
|
df[f'ema_{p}'] = c.ewm(span=p, adjust=False).mean()
|
|
df['ema_fast_slow'] = (df['ema_8']-df['ema_21'])/c
|
|
df['ema_slow_big'] = (df['ema_21']-df['ema_120'])/c
|
|
df['price_vs_ema120'] = (c-df['ema_120'])/c
|
|
df['price_vs_ema50'] = (c-df['ema_50'])/c
|
|
df['ema8_slope'] = df['ema_8'].pct_change(5)
|
|
df['ema21_slope'] = df['ema_21'].pct_change(5)
|
|
|
|
delta = c.diff()
|
|
gain = delta.clip(lower=0); loss = (-delta).clip(lower=0)
|
|
for p in [7,14,21]:
|
|
ag=gain.rolling(p).mean(); al=loss.rolling(p).mean()
|
|
df[f'rsi_{p}'] = 100 - 100/(1+ag/al.replace(0,np.nan))
|
|
|
|
mid=c.rolling(20).mean(); std=c.rolling(20).std()
|
|
df['bb_pct'] = (c-(mid-2*std))/((mid+2*std)-(mid-2*std)).replace(0,np.nan)
|
|
df['bb_width'] = 4*std/mid
|
|
|
|
ema12=c.ewm(span=12,adjust=False).mean(); ema26=c.ewm(span=26,adjust=False).mean()
|
|
df['macd'] = (ema12-ema26)/c
|
|
df['macd_signal'] = df['macd'].ewm(span=9,adjust=False).mean()
|
|
df['macd_hist'] = df['macd']-df['macd_signal']
|
|
|
|
tr = pd.concat([h-l,(h-c.shift(1)).abs(),(l-c.shift(1)).abs()],axis=1).max(axis=1)
|
|
df['atr_pct'] = tr.rolling(14).mean()/c
|
|
df['atr_7'] = tr.rolling(7).mean()/c
|
|
|
|
low14=l.rolling(14).min(); high14=h.rolling(14).max()
|
|
df['stoch_k'] = (c-low14)/(high14-low14).replace(0,np.nan)*100
|
|
df['stoch_d'] = df['stoch_k'].rolling(3).mean()
|
|
|
|
for p in [1,3,5,10,20,60]:
|
|
df[f'ret_{p}'] = c.pct_change(p)
|
|
|
|
df['vol_5'] = c.pct_change().rolling(5).std()
|
|
df['vol_20'] = c.pct_change().rolling(20).std()
|
|
df['vol_ratio'] = df['vol_5']/df['vol_20'].replace(0,np.nan)
|
|
|
|
body = (c-o).abs()
|
|
df['body_pct'] = body/c
|
|
df['upper_shadow'] = (h-pd.concat([o,c],axis=1).max(axis=1))/c
|
|
df['lower_shadow'] = (pd.concat([o,c],axis=1).min(axis=1)-l)/c
|
|
df['body_vs_range'] = body/(h-l).replace(0,np.nan)
|
|
df['is_bullish'] = (c>o).astype(float)
|
|
|
|
df['high_20'] = h.rolling(20).max()
|
|
df['low_20'] = l.rolling(20).min()
|
|
df['price_position'] = (c-df['low_20'])/(df['high_20']-df['low_20']).replace(0,np.nan)
|
|
|
|
df['hour'] = df.index.hour
|
|
df['minute'] = df.index.minute
|
|
df['hour_sin'] = np.sin(2*np.pi*df['hour']/24)
|
|
df['hour_cos'] = np.cos(2*np.pi*df['hour']/24)
|
|
|
|
prev_body = body.shift(1)
|
|
df['engulf_ratio'] = body/prev_body.replace(0,np.nan)
|
|
|
|
exclude = {'ts','open','high','low','close','label',
|
|
'high_20','low_20','ema_5','ema_8','ema_13','ema_21','ema_50','ema_120'}
|
|
fcols = [c_ for c_ in df.columns if c_ not in exclude
|
|
and df[c_].dtype in ('float64','float32','int64','int32')]
|
|
print(f" {len(fcols)} features", flush=True)
|
|
|
|
# ===== 标签: 10bar前瞻, 0.3%阈值 =====
|
|
fb = 10; thresh = 0.003
|
|
future_ret = df['close'].shift(-fb)/df['close'] - 1
|
|
df['label'] = 0
|
|
df.loc[future_ret > thresh, 'label'] = 1
|
|
df.loc[future_ret < -thresh, 'label'] = -1
|
|
|
|
# ===== 滚动训练 =====
|
|
print("Walk-forward training...", flush=True)
|
|
df['month'] = df.index.to_period('M')
|
|
months = sorted(df['month'].unique())
|
|
|
|
pl = pd.Series(index=df.index, dtype=float); pl[:] = 0.0
|
|
ps = pd.Series(index=df.index, dtype=float); ps[:] = 0.0
|
|
|
|
params = {
|
|
'objective':'multiclass','num_class':3,'metric':'multi_logloss',
|
|
'learning_rate':0.05,'num_leaves':31,'max_depth':6,
|
|
'min_child_samples':50,'subsample':0.8,'colsample_bytree':0.8,
|
|
'reg_alpha':0.1,'reg_lambda':0.1,'verbose':-1,'n_jobs':-1,'seed':42
|
|
}
|
|
|
|
for i in range(3, len(months)):
|
|
tm = months[i]; ts_ = months[i-3]
|
|
tr_mask = (df['month']>=ts_) & (df['month']<tm)
|
|
te_mask = df['month']==tm
|
|
tr_df = df[tr_mask].dropna(subset=fcols+['label'])
|
|
te_df = df[te_mask].dropna(subset=fcols)
|
|
if len(tr_df)<1000 or len(te_df)<100: continue
|
|
X_tr = tr_df[fcols].values; y_tr = tr_df['label'].values + 1
|
|
dt_ = lgb.Dataset(X_tr, label=y_tr)
|
|
model = lgb.train(params, dt_, num_boost_round=200)
|
|
proba = model.predict(te_df[fcols].values)
|
|
pl.loc[te_df.index] = proba[:,2]
|
|
ps.loc[te_df.index] = proba[:,0]
|
|
lc = (proba[:,2]>0.45).sum(); sc = (proba[:,0]>0.45).sum()
|
|
print(f" {tm}: long={lc} short={sc}", flush=True)
|
|
|
|
# ===== 回测 =====
|
|
print("\nBacktest...", flush=True)
|
|
NOTIONAL = 10000.0
|
|
FEE = NOTIONAL*0.0006*2; REB = FEE*0.9; NFEE = FEE-REB
|
|
prob_th = 0.45; sl_pct = 0.005; tp_pct = 0.008
|
|
|
|
pos=0; op=0.0; ot=None; trades=[]
|
|
for i in range(len(df)):
|
|
dt=df.index[i]; p=df['close'].iloc[i]; p_l=pl.iloc[i]; p_s=ps.iloc[i]
|
|
if pos!=0 and ot is not None:
|
|
pp=(p-op)/op if pos==1 else (op-p)/op
|
|
hsec=(dt-ot).total_seconds()
|
|
if -pp>=sl_pct*1.5:
|
|
trades.append((pos,op,p,NOTIONAL*pp,hsec,'hard_sl',ot,dt)); pos=0; continue
|
|
if hsec>=200:
|
|
if -pp>=sl_pct:
|
|
trades.append((pos,op,p,NOTIONAL*pp,hsec,'sl',ot,dt)); pos=0; continue
|
|
if pp>=tp_pct:
|
|
trades.append((pos,op,p,NOTIONAL*pp,hsec,'tp',ot,dt)); pos=0; continue
|
|
if hsec>=1800:
|
|
trades.append((pos,op,p,NOTIONAL*pp,hsec,'timeout',ot,dt)); pos=0; continue
|
|
if pos==1 and p_s>prob_th+0.05:
|
|
trades.append((pos,op,p,NOTIONAL*pp,hsec,'ai_rev',ot,dt)); pos=0
|
|
elif pos==-1 and p_l>prob_th+0.05:
|
|
trades.append((pos,op,p,NOTIONAL*pp,hsec,'ai_rev',ot,dt)); pos=0
|
|
if pos==0:
|
|
if p_l>prob_th and p_l>p_s: pos=1; op=p; ot=dt
|
|
elif p_s>prob_th and p_s>p_l: pos=-1; op=p; ot=dt
|
|
if pos!=0:
|
|
p=df['close'].iloc[-1]; dt=df.index[-1]
|
|
pp=(p-op)/op if pos==1 else (op-p)/op
|
|
trades.append((pos,op,p,NOTIONAL*pp,(dt-ot).total_seconds(),'end',ot,dt))
|
|
|
|
# ===== 结果 =====
|
|
n = len(trades)
|
|
tpnl = sum(t[3] for t in trades)
|
|
net = tpnl - NFEE*n
|
|
treb = REB*n
|
|
wins = len([t for t in trades if t[3]>0])
|
|
wr = wins/n*100 if n else 0
|
|
|
|
monthly = defaultdict(lambda: {'n':0,'net':0,'w':0})
|
|
for t in trades:
|
|
k = t[7].strftime('%Y-%m')
|
|
monthly[k]['n'] += 1
|
|
monthly[k]['net'] += t[3] - NFEE
|
|
if t[3]>0: monthly[k]['w'] += 1
|
|
|
|
cum=0; peak=0; dd=0
|
|
for t in trades:
|
|
cum += t[3]-NFEE
|
|
if cum>peak: peak=cum
|
|
if peak-cum>dd: dd=peak-cum
|
|
|
|
reasons = defaultdict(int)
|
|
for t in trades:
|
|
reasons[t[5]] += 1
|
|
|
|
elapsed = _time.time()-t0
|
|
|
|
print(f"\n{'='*70}", flush=True)
|
|
print(f" AI策略最佳配置 (LightGBM + 42特征)", flush=True)
|
|
print(f" 10bar前瞻 | 阈值0.3% | 概率>0.45 | SL=0.5% TP=0.8%", flush=True)
|
|
print(f" 100U保证金 x 100倍杠杆 = 10,000U名义 | 耗时{elapsed:.0f}s", flush=True)
|
|
print(f"{'='*70}", flush=True)
|
|
print(f" 方向盈亏: {tpnl:>+10.0f} USDT", flush=True)
|
|
print(f" 返佣(90%): {treb:>+10.0f} USDT", flush=True)
|
|
print(f" 净手续费(10%):{NFEE*n:>10.0f} USDT", flush=True)
|
|
print(f" ================================", flush=True)
|
|
print(f" 年净利: {net:>+10.0f} USDT", flush=True)
|
|
print(f" 月均: {net/12:>+10.0f} USDT", flush=True)
|
|
print(f" 最大回撤: {dd:>10.0f} USDT", flush=True)
|
|
print(f" 交易笔数: {n:>10}", flush=True)
|
|
print(f" 胜率: {wr:>9.1f}%", flush=True)
|
|
|
|
if wins>0 and wins<n:
|
|
aw = sum(t[3] for t in trades if t[3]>0)/wins
|
|
al = sum(t[3] for t in trades if t[3]<=0)/(n-wins)
|
|
print(f" 平均盈利: {aw:>+10.1f} USDT", flush=True)
|
|
print(f" 平均亏损: {al:>+10.1f} USDT", flush=True)
|
|
print(f" 盈亏比: {abs(aw/al):>10.2f}", flush=True)
|
|
|
|
print(f"\n 平仓原因:", flush=True)
|
|
for r,cnt in sorted(reasons.items(), key=lambda x:-x[1]):
|
|
print(f" {r:<10} {cnt:>5}笔 ({cnt/n*100:.1f}%)", flush=True)
|
|
|
|
print(f"\n 月度明细:", flush=True)
|
|
pm = 0
|
|
for m in sorted(monthly.keys()):
|
|
d = monthly[m]
|
|
wr_m = d['w']/d['n']*100 if d['n']>0 else 0
|
|
status = "盈利" if d['net']>0 else "亏损"
|
|
print(f" {m}: {d['n']:>4}笔 {d['net']:>+8.0f}U 胜率{wr_m:.0f}% [{status}]", flush=True)
|
|
if d['net']>0: pm += 1
|
|
print(f" 合计: {n:>4}笔 {net:>+8.0f}U 盈利月: {pm}/12", flush=True)
|
|
|
|
print(f"\n --- 不同保证金下的月均收入 ---", flush=True)
|
|
for margin in [100, 200, 300, 500, 800, 1000]:
|
|
sc = margin*100/NOTIONAL
|
|
mn = net*sc/12
|
|
ok = " <<< 达标!" if mn>=1000 else ""
|
|
print(f" {margin:>5}U保证金: 月均 {mn:>+6.0f} USDT{ok}", flush=True)
|
|
|
|
# 对比EMA基线
|
|
print(f"\n --- 对比: AI vs 纯EMA策略 ---", flush=True)
|
|
ema_net = 1196 # 之前EMA基线100U的年净利
|
|
print(f" 纯EMA: {ema_net:>+6.0f}/年 = {ema_net/12:>+4.0f}/月 (227笔)", flush=True)
|
|
print(f" AI策略: {net:>+6.0f}/年 = {net/12:>+4.0f}/月 ({n}笔)", flush=True)
|
|
if net > ema_net:
|
|
print(f" AI提升: {(net/ema_net-1)*100:>+.0f}% ({net-ema_net:>+.0f} USDT)", flush=True)
|
|
|
|
print(f"\n{'='*70}", flush=True)
|
|
|
|
# 保存
|
|
csv = Path(__file__).parent.parent / 'ai_trades.csv'
|
|
with open(csv, 'w', encoding='utf-8-sig') as f:
|
|
f.write("dir,open_px,close_px,pnl,hold_sec,reason,open_time,close_time\n")
|
|
for t in trades:
|
|
d = 'long' if t[0]==1 else 'short'
|
|
f.write(f"{d},{t[1]:.2f},{t[2]:.2f},{t[3]:.2f},{t[4]:.0f},{t[5]},{t[6]},{t[7]}\n")
|
|
print(f" Saved: {csv}", flush=True)
|
|
|
|
if __name__=='__main__':
|
|
main()
|