Files
lm_code/交易/bitmart-AI策略回测.py
Your Name b5af5b07f3 哈哈
2026-02-15 02:16:45 +08:00

443 lines
17 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
AI/ML 交易策略回测 — LightGBM + 30+技术指标
核心思路:
1. 用30+种技术指标作为特征EMA/RSI/BB/MACD/ATR/K线形态/动量/波动率等)
2. 标签未来N根K线的收益方向涨>阈值=做多,跌>阈值=做空,否则=不交易)
3. 滚动训练每月用过去3个月数据训练预测下一个月
4. 只在模型高置信度时开仓(概率>阈值)
5. 同一时间只持1个仓
条件: 100U保证金, 100x杠杆, 90%返佣, >3分钟持仓
"""
import datetime
import sqlite3
import time as _time
from pathlib import Path
from collections import defaultdict
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import TimeSeriesSplit
import warnings
warnings.filterwarnings('ignore')
# ==================== 数据加载 ====================
def load_data():
db = Path(__file__).parent.parent / 'models' / 'database.db'
s = int(datetime.datetime(2025,1,1).timestamp()) * 1000
e = int(datetime.datetime(2026,1,1).timestamp()) * 1000
conn = sqlite3.connect(str(db))
df = pd.read_sql_query(
f"SELECT id as ts, open, high, low, close FROM bitmart_eth_1m "
f"WHERE id >= {s} AND id < {e} ORDER BY id", conn)
conn.close()
df['datetime'] = pd.to_datetime(df['ts'], unit='ms')
df.set_index('datetime', inplace=True)
return df
# ==================== 特征工程 ====================
def add_features(df):
"""生成30+技术指标特征"""
c = df['close']; h = df['high']; l = df['low']; o = df['open']
# --- EMA ---
for p in [5, 8, 13, 21, 50, 120]:
df[f'ema_{p}'] = c.ewm(span=p, adjust=False).mean()
# EMA 相对位置
df['ema_fast_slow'] = (df['ema_8'] - df['ema_21']) / c # 快慢线差距
df['ema_slow_big'] = (df['ema_21'] - df['ema_120']) / c
df['price_vs_ema120'] = (c - df['ema_120']) / c
df['price_vs_ema50'] = (c - df['ema_50']) / c
df['ema8_slope'] = df['ema_8'].pct_change(5) # EMA斜率
df['ema21_slope'] = df['ema_21'].pct_change(5)
# --- RSI ---
for p in [7, 14, 21]:
delta = c.diff()
gain = delta.clip(lower=0)
loss = (-delta).clip(lower=0)
avg_gain = gain.rolling(p).mean()
avg_loss = loss.rolling(p).mean()
rs = avg_gain / avg_loss.replace(0, np.nan)
df[f'rsi_{p}'] = 100 - 100 / (1 + rs)
# --- Bollinger Bands ---
for p in [20]:
mid = c.rolling(p).mean()
std = c.rolling(p).std()
df['bb_upper'] = mid + 2 * std
df['bb_lower'] = mid - 2 * std
df['bb_mid'] = mid
df['bb_pct'] = (c - df['bb_lower']) / (df['bb_upper'] - df['bb_lower']).replace(0, np.nan)
df['bb_width'] = (df['bb_upper'] - df['bb_lower']) / mid # 波动率
# --- MACD ---
ema12 = c.ewm(span=12, adjust=False).mean()
ema26 = c.ewm(span=26, adjust=False).mean()
df['macd'] = (ema12 - ema26) / c
df['macd_signal'] = df['macd'].ewm(span=9, adjust=False).mean()
df['macd_hist'] = df['macd'] - df['macd_signal']
# --- ATR ---
tr = pd.concat([
h - l,
(h - c.shift(1)).abs(),
(l - c.shift(1)).abs()
], axis=1).max(axis=1)
df['atr_14'] = tr.rolling(14).mean()
df['atr_pct'] = df['atr_14'] / c
df['atr_7'] = tr.rolling(7).mean() / c
# --- Stochastic ---
low14 = l.rolling(14).min()
high14 = h.rolling(14).max()
df['stoch_k'] = (c - low14) / (high14 - low14).replace(0, np.nan) * 100
df['stoch_d'] = df['stoch_k'].rolling(3).mean()
# --- 动量 ---
for p in [1, 3, 5, 10, 20, 60]:
df[f'ret_{p}'] = c.pct_change(p) # 过去N根收益率
# --- 波动率 ---
df['vol_5'] = c.pct_change().rolling(5).std()
df['vol_20'] = c.pct_change().rolling(20).std()
df['vol_ratio'] = df['vol_5'] / df['vol_20'].replace(0, np.nan)
# --- K线形态 ---
body = (c - o).abs()
df['body_pct'] = body / c # 实体占比
df['upper_shadow'] = (h - pd.concat([o, c], axis=1).max(axis=1)) / c
df['lower_shadow'] = (pd.concat([o, c], axis=1).min(axis=1) - l) / c
df['body_vs_range'] = body / (h - l).replace(0, np.nan) # 实体/全幅
df['is_bullish'] = (c > o).astype(float)
# 连续同向K线
bullish = (c > o).astype(int)
df['consec_bull'] = bullish.groupby((bullish != bullish.shift()).cumsum()).cumcount() + 1
df['consec_bull'] = df['consec_bull'] * bullish
bearish = (c < o).astype(int)
df['consec_bear'] = bearish.groupby((bearish != bearish.shift()).cumsum()).cumcount() + 1
df['consec_bear'] = df['consec_bear'] * bearish
# 吞没形态
prev_body = body.shift(1)
df['engulf_ratio'] = body / prev_body.replace(0, np.nan)
df['bullish_engulf'] = ((c.shift(1) < o.shift(1)) & (c > o) &
(c > o.shift(1)) & (o <= c.shift(1))).astype(float)
df['bearish_engulf'] = ((c.shift(1) > o.shift(1)) & (c < o) &
(c < o.shift(1)) & (o >= c.shift(1))).astype(float)
# 相对高低位置
df['high_20'] = h.rolling(20).max()
df['low_20'] = l.rolling(20).min()
df['price_position'] = (c - df['low_20']) / (df['high_20'] - df['low_20']).replace(0, np.nan)
# 小时/分钟时间特征
df['hour'] = df.index.hour
df['minute'] = df.index.minute
df['hour_sin'] = np.sin(2 * np.pi * df['hour'] / 24)
df['hour_cos'] = np.cos(2 * np.pi * df['hour'] / 24)
return df
# ==================== 标签生成 ====================
def add_labels(df, forward_bars=10, threshold=0.002):
"""
未来N根K线的收益:
> threshold → 1 (做多机会)
< -threshold → -1 (做空机会)
否则 → 0 (不交易)
"""
future_ret = df['close'].shift(-forward_bars) / df['close'] - 1
df['label'] = 0
df.loc[future_ret > threshold, 'label'] = 1
df.loc[future_ret < -threshold, 'label'] = -1
return df
# ==================== 模型训练 + 预测 ====================
def get_feature_cols(df):
exclude = {'ts', 'open', 'high', 'low', 'close', 'label',
'bb_upper', 'bb_lower', 'bb_mid', 'high_20', 'low_20',
'atr_14', 'ema_5', 'ema_8', 'ema_13', 'ema_21', 'ema_50', 'ema_120'}
return [c for c in df.columns if c not in exclude and df[c].dtype in ('float64','float32','int64','int32')]
def train_predict_walkforward(df, feature_cols, train_months=3):
"""
滚动训练:
用过去 train_months 个月训练 → 预测下一个月
从第4个月开始有预测
"""
df['month'] = df.index.to_period('M')
months = sorted(df['month'].unique())
all_preds = pd.Series(index=df.index, dtype=float)
all_preds[:] = 0.0 # 默认不交易
all_proba_long = pd.Series(index=df.index, dtype=float)
all_proba_short = pd.Series(index=df.index, dtype=float)
all_proba_long[:] = 0.0
all_proba_short[:] = 0.0
print(f"\n Walk-forward training ({len(months)} months, train={train_months}m):", flush=True)
for i in range(train_months, len(months)):
test_month = months[i]
train_start = months[i - train_months]
# 训练数据
train_mask = (df['month'] >= train_start) & (df['month'] < test_month)
test_mask = df['month'] == test_month
train_df = df[train_mask].dropna(subset=feature_cols + ['label'])
test_df = df[test_mask].dropna(subset=feature_cols)
if len(train_df) < 1000 or len(test_df) < 100:
print(f" {test_month}: skip (data insufficient)", flush=True)
continue
X_train = train_df[feature_cols].values
y_train = train_df['label'].values
X_test = test_df[feature_cols].values
# 将 -1,0,1 映射到 0,1,2 用于多分类
y_train_cls = y_train + 1 # -1→0, 0→1, 1→2
# LightGBM 训练
params = {
'objective': 'multiclass',
'num_class': 3,
'metric': 'multi_logloss',
'learning_rate': 0.05,
'num_leaves': 31,
'max_depth': 6,
'min_child_samples': 50,
'subsample': 0.8,
'colsample_bytree': 0.8,
'reg_alpha': 0.1,
'reg_lambda': 0.1,
'verbose': -1,
'n_jobs': -1,
'seed': 42,
}
dtrain = lgb.Dataset(X_train, label=y_train_cls)
model = lgb.train(params, dtrain, num_boost_round=200)
# 预测概率
proba = model.predict(X_test) # shape: (n, 3) → [P(short), P(neutral), P(long)]
test_idx = test_df.index
all_proba_short.loc[test_idx] = proba[:, 0] # P(short)
all_proba_long.loc[test_idx] = proba[:, 2] # P(long)
# 特征重要性(只打印最后一个月的)
if i == len(months) - 1:
importance = model.feature_importance(importance_type='gain')
feat_imp = sorted(zip(feature_cols, importance), key=lambda x: -x[1])
print(f"\n Top 10 features:", flush=True)
for fname, imp in feat_imp[:10]:
print(f" {fname:<20} {imp:.0f}", flush=True)
long_cnt = (proba[:, 2] > 0.45).sum()
short_cnt = (proba[:, 0] > 0.45).sum()
print(f" {test_month}: train={len(train_df):,} test={len(test_df):,} "
f"signals: long={long_cnt} short={short_cnt}", flush=True)
return all_proba_long, all_proba_short
# ==================== 回测引擎 ====================
def backtest(df, proba_long, proba_short, notional=10000.0,
prob_threshold=0.45, min_hold=200, max_hold=1800,
sl_pct=0.004, tp_pct=0.006):
FEE = notional * 0.0006 * 2
REB = FEE * 0.9
NFEE = FEE - REB
pos = 0; op = 0.0; ot = None
trades = []
for i in range(len(df)):
dt = df.index[i]
p = df['close'].iloc[i]
pl = proba_long.iloc[i]
ps = proba_short.iloc[i]
# 持仓管理
if pos != 0 and ot is not None:
pp = (p - op) / op if pos == 1 else (op - p) / op
hsec = (dt - ot).total_seconds()
# 硬止损
if -pp >= sl_pct * 1.5:
trades.append((pos, op, p, notional*pp, hsec, "硬止损", ot, dt))
pos=0; continue
if hsec >= min_hold:
if -pp >= sl_pct:
trades.append((pos, op, p, notional*pp, hsec, "止损", ot, dt))
pos=0; continue
if pp >= tp_pct:
trades.append((pos, op, p, notional*pp, hsec, "止盈", ot, dt))
pos=0; continue
if hsec >= max_hold:
trades.append((pos, op, p, notional*pp, hsec, "超时", ot, dt))
pos=0; continue
# AI反向信号平仓
if pos == 1 and ps > prob_threshold + 0.05:
trades.append((pos, op, p, notional*pp, hsec, "AI反转", ot, dt))
pos=0
elif pos == -1 and pl > prob_threshold + 0.05:
trades.append((pos, op, p, notional*pp, hsec, "AI反转", ot, dt))
pos=0
# 开仓
if pos == 0:
if pl > prob_threshold and pl > ps:
pos = 1; op = p; ot = dt
elif ps > prob_threshold and ps > pl:
pos = -1; op = p; ot = dt
if pos != 0:
p = df['close'].iloc[-1]; dt = df.index[-1]
pp = (p-op)/op if pos==1 else (op-p)/op
trades.append((pos, op, p, notional*pp, (dt-ot).total_seconds(), "结束", ot, dt))
return trades
# ==================== 结果分析 ====================
def analyze(trades, notional, label):
if not trades:
print(f" [{label}] No trades", flush=True); return 0
n = len(trades)
FEE = notional * 0.0006 * 2; REB = FEE * 0.9; NFEE = FEE - REB
total_pnl = sum(t[3] for t in trades)
net = total_pnl - NFEE * n
wins = len([t for t in trades if t[3]>0]); wr = wins/n*100
total_reb = REB * n
monthly = defaultdict(lambda: {'n':0,'net':0,'w':0})
for t in trades:
k = t[7].strftime('%Y-%m')
monthly[k]['n']+=1; monthly[k]['net']+=t[3]-NFEE
if t[3]>0: monthly[k]['w']+=1
cum=0;peak=0;dd=0
for t in trades:
cum+=t[3]-NFEE
if cum>peak: peak=cum
if peak-cum>dd: dd=peak-cum
pm = len([m for m in monthly.values() if m['net']>0])
reasons = defaultdict(int)
for t in trades: reasons[t[5]]+=1
print(f"\n{'='*75}", flush=True)
print(f" {label}", flush=True)
print(f"{'='*75}", flush=True)
print(f" 方向盈亏: {total_pnl:>+10.0f} USDT", flush=True)
print(f" 返佣: {total_reb:>+10.0f} USDT", flush=True)
print(f" 净手续费: {NFEE*n:>10.0f} USDT", flush=True)
print(f" ================================", flush=True)
print(f" 年净利: {net:>+10.0f} USDT (月均 {net/12:>+.0f})", flush=True)
print(f" 交易: {n}笔 | 胜率: {wr:.1f}% | 盈利月: {pm}/12", flush=True)
print(f" 最大回撤: {dd:>.0f} USDT", flush=True)
if wins > 0:
avg_win = sum(t[3] for t in trades if t[3]>0) / wins
avg_loss = sum(t[3] for t in trades if t[3]<=0) / (n-wins) if n>wins else 0
print(f" 均赢: {avg_win:>+.2f} | 均亏: {avg_loss:>+.2f} | 盈亏比: {abs(avg_win/avg_loss) if avg_loss!=0 else 999:.2f}", flush=True)
print(f"\n 平仓原因:", flush=True)
for r, cnt in sorted(reasons.items(), key=lambda x:-x[1]):
print(f" {r:<10} {cnt:>5}笔 ({cnt/n*100:.1f}%)", flush=True)
print(f"\n 月度:", flush=True)
for m in sorted(monthly.keys()):
d = monthly[m]; wr_m=d['w']/d['n']*100 if d['n']>0 else 0
bar = "+" * min(30, max(0, int(d['net']/100))) + "-" * min(30, max(0, int(-d['net']/100)))
print(f" {m} {d['n']:>4}{d['net']:>+8.0f} {wr_m:>4.0f}% {bar}", flush=True)
print(f" {'合计':>7} {n:>4}{net:>+8.0f}", flush=True)
print(f"\n 仓位放大:", flush=True)
for margin in [100, 300, 500, 800, 1000]:
scale = margin * 100 / notional
print(f" {margin}U: 月均 {net*scale/12:>+.0f} USDT {'<<< 达标' if net*scale/12>=1000 else ''}", flush=True)
print(f"{'='*75}", flush=True)
return net
# ==================== 主函数 ====================
def main():
t0 = _time.time()
print("="*75, flush=True)
print(" AI/ML 交易策略 — LightGBM + 30+技术指标", flush=True)
print("="*75, flush=True)
print("\n[1/4] 加载数据...", flush=True)
df = load_data()
print(f" {len(df):,} 根 1分钟K线", flush=True)
print("\n[2/4] 特征工程 (30+指标)...", flush=True)
df = add_features(df)
feature_cols = get_feature_cols(df)
print(f" 生成 {len(feature_cols)} 个特征", flush=True)
# 测试不同的前瞻期和阈值
configs = [
# (forward_bars, threshold, prob_threshold, sl, tp, label)
(5, 0.001, 0.42, 0.003, 0.004, "AI-v1: 5bar前瞻 阈值0.1%"),
(10, 0.002, 0.42, 0.004, 0.006, "AI-v2: 10bar前瞻 阈值0.2%"),
(10, 0.002, 0.45, 0.004, 0.006, "AI-v3: 10bar 高置信0.45"),
(10, 0.003, 0.45, 0.005, 0.008, "AI-v4: 10bar 阈值0.3% 宽SL"),
(20, 0.003, 0.42, 0.005, 0.008, "AI-v5: 20bar前瞻 阈值0.3%"),
(20, 0.004, 0.45, 0.005, 0.010, "AI-v6: 20bar 阈值0.4% 大TP"),
]
best_net = -999999; best_label = ""
for fb, thresh, prob_th, sl, tp, label in configs:
print(f"\n{'='*75}", flush=True)
print(f" [{label}]", flush=True)
print(f" 前瞻={fb}bar 方向阈值={thresh*100:.1f}% 概率阈值={prob_th} SL={sl*100:.1f}% TP={tp*100:.1f}%", flush=True)
print(f"{'='*75}", flush=True)
print("\n[3/4] 生成标签...", flush=True)
df_labeled = add_labels(df.copy(), forward_bars=fb, threshold=thresh)
labels = df_labeled['label']
print(f" 多={int((labels==1).sum()):,} 空={int((labels==-1).sum()):,} 中性={int((labels==0).sum()):,}", flush=True)
print("\n[4/4] 滚动训练+预测...", flush=True)
proba_long, proba_short = train_predict_walkforward(df_labeled, feature_cols, train_months=3)
print("\n 回测...", flush=True)
trades = backtest(df_labeled, proba_long, proba_short,
notional=10000.0, prob_threshold=prob_th,
sl_pct=sl, tp_pct=tp)
net = analyze(trades, 10000.0, label)
if net > best_net:
best_net = net; best_label = label
elapsed = _time.time() - t0
print(f"\n\n{'='*75}", flush=True)
print(f" 总结 | 耗时 {elapsed:.0f}s", flush=True)
print(f"{'='*75}", flush=True)
print(f" 最佳: {best_label}", flush=True)
print(f" 年净利: {best_net:+.0f} USDT = 月均 {best_net/12:+.0f} USDT", flush=True)
if best_net > 0:
needed = int(12000 / best_net * 100) + 1
print(f" 达到1000U/月需保证金: ~{needed}U", flush=True)
print(f"{'='*75}", flush=True)
if __name__ == '__main__':
main()