Files
jyx_code4/ensemble_meta.py
Your Name 50d8260437 haha
2026-02-16 22:43:53 +08:00

229 lines
8.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
集成学习 + Meta 模型 + 动态风控模块
"""
import numpy as np
import pandas as pd
import lightgbm as lgb
import datetime
ENSEMBLE_CONFIGS = [
{'num_leaves': 31, 'max_depth': 6, 'learning_rate': 0.05,
'subsample': 0.8, 'colsample_bytree': 0.8,
'reg_alpha': 0.1, 'reg_lambda': 0.1, 'seed': 42},
{'num_leaves': 63, 'max_depth': 8, 'learning_rate': 0.03,
'subsample': 0.7, 'colsample_bytree': 0.7,
'reg_alpha': 0.05, 'reg_lambda': 0.05, 'seed': 123},
{'num_leaves': 15, 'max_depth': 5, 'learning_rate': 0.08,
'subsample': 0.9, 'colsample_bytree': 0.9,
'reg_alpha': 0.5, 'reg_lambda': 0.5, 'seed': 456},
{'num_leaves': 31, 'max_depth': 7, 'learning_rate': 0.04,
'subsample': 0.75, 'colsample_bytree': 0.6,
'reg_alpha': 1.0, 'reg_lambda': 1.0, 'seed': 789},
{'num_leaves': 47, 'max_depth': 6, 'learning_rate': 0.06,
'subsample': 0.85, 'colsample_bytree': 0.85,
'reg_alpha': 0.2, 'reg_lambda': 0.3, 'seed': 2024},
]
META_EXTRA = ['bb_pct', 'rsi', 'zscore', 'atr_pct', 'vol_20', 'stoch_k', 'cci']
def _train_one(X, y, cfg, rounds=250):
params = {'objective': 'multiclass', 'num_class': 3, 'metric': 'multi_logloss',
'min_child_samples': 50, 'verbose': -1, 'n_jobs': -1, **cfg}
ds = lgb.Dataset(X, label=y)
return lgb.train(params, ds, num_boost_round=rounds)
def train_ensemble_with_meta(
df_train, df_test, feature_cols,
forward_bars, label_threshold, lgb_rounds=250, n_models=5,
):
"""集成学习+Meta模型: 5个多样化LightGBM基模型投票 + Meta二次过滤"""
train_df = df_train.dropna(subset=feature_cols + ['label'])
if len(train_df) < 2000:
return None, None, None, None, None
test_df = df_test.dropna(subset=feature_cols)
if len(test_df) < 100:
return None, None, None, None, None
X_full = train_df[feature_cols].values
y_full = (train_df['label'].values + 1).astype(int)
X_test = test_df[feature_cols].values
cfgs = ENSEMBLE_CONFIGS[:n_models]
# Step1: 训练N个多样化基模型在测试集上集成预测
models = []
test_probas = []
for cfg in cfgs:
m = _train_one(X_full, y_full, cfg, lgb_rounds)
models.append(m)
test_probas.append(m.predict(X_test))
avg_proba = np.mean(test_probas, axis=0)
std_proba = np.std(test_probas, axis=0)
# Step2: Meta模型 - 训练集内时间序列划分
split_idx = int(len(train_df) * 0.7)
base_part = train_df.iloc[:split_idx]
meta_part = train_df.iloc[split_idx:]
X_base = base_part[feature_cols].values
y_base = (base_part['label'].values + 1).astype(int)
X_meta_f = meta_part[feature_cols].values
meta_probas = []
for cfg in cfgs:
tmp = _train_one(X_base, y_base, cfg, lgb_rounds)
meta_probas.append(tmp.predict(X_meta_f))
m_avg = np.mean(meta_probas, axis=0)
m_std = np.std(meta_probas, axis=0)
me_cols = [c for c in META_EXTRA if c in feature_cols]
X_me = meta_part[me_cols].values if me_cols else np.zeros((len(meta_part), 1))
X_mi = np.hstack([m_avg, m_std, X_me])
# Meta标签: 模型预测方向与真实方向一致=1(好信号), 否则=0
pred_dir = np.argmax(m_avg, axis=1)
true_dir = (meta_part['label'].values + 1).astype(int)
ml = (pred_dir == true_dir).astype(int)
has_sig = pred_dir != 1
meta_model = None
if has_sig.sum() > 500:
mp = {'objective': 'binary', 'metric': 'binary_logloss',
'num_leaves': 15, 'max_depth': 4, 'learning_rate': 0.05,
'min_child_samples': 100, 'subsample': 0.8, 'colsample_bytree': 0.8,
'reg_alpha': 1.0, 'reg_lambda': 1.0, 'verbose': -1, 'n_jobs': -1, 'seed': 42}
dm = lgb.Dataset(X_mi[has_sig], label=ml[has_sig])
meta_model = lgb.train(mp, dm, num_boost_round=150)
# Step3: 测试集Meta置信度
X_te = test_df[me_cols].values if me_cols else np.zeros((len(test_df), 1))
X_tm = np.hstack([avg_proba, std_proba, X_te])
if meta_model is not None:
mc_arr = meta_model.predict(X_tm)
else:
mc_arr = 1.0 - std_proba.mean(axis=1)
pl = pd.Series(0.0, index=df_test.index)
ps = pd.Series(0.0, index=df_test.index)
mc = pd.Series(0.5, index=df_test.index)
ps.loc[test_df.index] = avg_proba[:, 0]
pl.loc[test_df.index] = avg_proba[:, 2]
mc.loc[test_df.index] = mc_arr
return pl, ps, mc, models, meta_model
def backtest_with_risk_control(
df, proba_long, proba_short,
notional=10000.0,
prob_threshold=0.45,
min_hold_seconds=180,
max_hold_seconds=1800,
sl_pct=0.004,
tp_pct=0.006,
meta_conf=None,
meta_threshold=0.55,
daily_loss_limit=80.0,
dd_circuit_breaker=400.0,
cooldown_bars=30,
max_consec_losses=5,
):
"""带动态风控的回测: 日亏损限制+回撤熔断+连损冷却+Meta过滤"""
pos = 0
open_price = 0.0
open_time = None
trades = []
# 风控状态
daily_pnl = 0.0
current_day = None
cum_pnl = 0.0
peak_pnl = 0.0
consec_losses = 0
cooldown_until = 0
net_fee = notional * 0.0006 * 2 * 0.10 # 净手续费
for i in range(len(df)):
dt = df.index[i]
price = df['close'].iloc[i]
pl = proba_long.iloc[i]
ps = proba_short.iloc[i]
mc = meta_conf.iloc[i] if meta_conf is not None else 1.0
day = dt.date()
# 日切重置
if current_day != day:
current_day = day
daily_pnl = 0.0
# 持仓中: 检查平仓条件
if pos != 0 and open_time is not None:
pnl_pct = (price - open_price) / open_price if pos == 1 else (open_price - price) / open_price
hold_sec = (dt - open_time).total_seconds()
# 硬止损
if -pnl_pct >= sl_pct * 1.5:
pnl_usdt = notional * pnl_pct
trades.append((pos, open_price, price, pnl_usdt, hold_sec, '硬止损', open_time, dt))
cum_pnl += pnl_usdt - net_fee
daily_pnl += pnl_usdt - net_fee
if pnl_usdt < 0:
consec_losses += 1
else:
consec_losses = 0
if cum_pnl > peak_pnl:
peak_pnl = cum_pnl
pos = 0
continue
if hold_sec >= min_hold_seconds:
closed = False
reason = ''
if -pnl_pct >= sl_pct:
reason = '止损'
closed = True
elif pnl_pct >= tp_pct:
reason = '止盈'
closed = True
elif hold_sec >= max_hold_seconds:
reason = '超时'
closed = True
elif pos == 1 and ps > prob_threshold + 0.05:
reason = 'AI反转'
closed = True
elif pos == -1 and pl > prob_threshold + 0.05:
reason = 'AI反转'
closed = True
if closed:
pnl_usdt = notional * pnl_pct
trades.append((pos, open_price, price, pnl_usdt, hold_sec, reason, open_time, dt))
cum_pnl += pnl_usdt - net_fee
daily_pnl += pnl_usdt - net_fee
if pnl_usdt < 0:
consec_losses += 1
else:
consec_losses = 0
if cum_pnl > peak_pnl:
peak_pnl = cum_pnl
if consec_losses >= max_consec_losses:
cooldown_until = i + cooldown_bars
pos = 0
# 开仓: 检查风控条件
if pos == 0:
# 风控检查: 冷却期、日亏损限制、回撤熔断
if i < cooldown_until:
continue
if daily_pnl <= -daily_loss_limit:
continue
dd_now = peak_pnl - cum_pnl
if dd_now >= dd_circuit_breaker:
continue
# Meta过滤: 置信度不够则跳过
if mc < meta_threshold:
continue
if pl > prob_threshold and pl > ps:
pos = 1
open_price = price
open_time = dt
elif ps > prob_threshold and ps > pl:
pos = -1
open_price = price
open_time = dt
if pos != 0:
price = df['close'].iloc[-1]
dt = df.index[-1]
pnl_pct = (price - open_price) / open_price if pos == 1 else (open_price - price) / open_price
hold_sec = (dt - open_time).total_seconds()
trades.append((pos, open_price, price, notional * pnl_pct, hold_sec, '结束', open_time, dt))
return trades