optimize_comfort_zone.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. 舒适区规则优化
  5. 步骤:
  6. 1. 只在非死亡区数据上操作
  7. 2. 2023-2024 → 训练集:穷举组合,筛选有效规则
  8. 3. 2025 → 验证集:调优组合入选门槛
  9. 4. 2026 → 盲测集:最终验证(不允许回看调整)
  10. 5. 输出新规则,运行完整回测对比
  11. """
  12. import sys, io
  13. if sys.platform == 'win32':
  14. sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
  15. sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
  16. import os
  17. import pandas as pd
  18. import numpy as np
  19. from itertools import combinations
  20. import warnings
  21. warnings.filterwarnings('ignore')
  22. SEP = '=' * 70
  23. INITIAL = 1_000_000
  24. DEATH_ZONES = {'下跌趋势低波', '震荡低波'}
  25. # ── 加载数据 ────────────────────────────────────────────────────
  26. def load_trades():
  27. csv = os.path.join(os.path.dirname(__file__),
  28. 't1_trades_with_environment_20260327_141655.csv')
  29. df = pd.read_csv(csv, encoding='utf-8-sig')
  30. cols = [
  31. '交易方向','开仓时间','平仓时间','开仓价格','平仓价格','仓位',
  32. '盈亏金额','盈亏百分比','退出原因','持仓周期数','持仓小时数',
  33. 'T1调整','原平仓时间','原平仓价格','原盈亏','盈亏变化',
  34. '入场信号','开仓市值','平仓时资金','市场状态',
  35. '趋势短期','趋势中期','趋势强度','波动率分位','波动率水平',
  36. '成交量分位','布林带位置','布林带区域','RSI分位','RSI区域',
  37. '1日动量','入场价格'
  38. ]
  39. df.columns = cols
  40. df['开仓时间'] = pd.to_datetime(df['开仓时间'])
  41. for c in ['盈亏金额','盈亏百分比','波动率分位','RSI分位','趋势强度']:
  42. df[c] = pd.to_numeric(df[c], errors='coerce')
  43. df['盈利'] = df['盈亏金额'] > 0
  44. df['年份'] = df['开仓时间'].dt.year
  45. return df.sort_values('开仓时间').reset_index(drop=True)
  46. # ── 权益曲线模拟 ─────────────────────────────────────────────────
  47. def simulate_equity(df, initial=INITIAL):
  48. df = df.copy().reset_index(drop=True)
  49. cap = float(initial)
  50. caps = []
  51. for _, r in df.iterrows():
  52. cap += float(r['盈亏金额'])
  53. caps.append(cap)
  54. df['资金余额'] = caps
  55. df['盈利'] = df['盈亏金额'] > 0
  56. return df
  57. # ── 指标分箱(将连续指标离散化用于组合搜索)────────────────────────
  58. def add_bins(df):
  59. df = df.copy()
  60. # 波动率分位数分箱
  61. def vol_bin(v):
  62. if pd.isna(v): return 'unknown'
  63. if v < 0.20: return 'vol极低'
  64. if v < 0.40: return 'vol低'
  65. if v < 0.60: return 'vol中'
  66. if v < 0.80: return 'vol高'
  67. return 'vol极高'
  68. # RSI分位数分箱
  69. def rsi_bin(v):
  70. if pd.isna(v): return 'unknown'
  71. if v < 0.05: return 'rsi极底'
  72. if v < 0.10: return 'rsi底'
  73. if v < 0.20: return 'rsi低'
  74. if v < 0.40: return 'rsi偏低'
  75. if v < 0.60: return 'rsi中'
  76. if v < 0.80: return 'rsi偏高'
  77. return 'rsi高'
  78. # 趋势强度分箱
  79. def ts_bin(v):
  80. if pd.isna(v): return 'unknown'
  81. if v < 1.0: return 'ts弱'
  82. if v < 1.5: return 'ts中弱'
  83. if v < 2.5: return 'ts中'
  84. if v < 4.0: return 'ts强'
  85. return 'ts极强'
  86. df['vol_bin'] = df['波动率分位'].apply(vol_bin)
  87. df['rsi_bin'] = df['RSI分位'].apply(rsi_bin)
  88. df['ts_bin'] = df['趋势强度'].apply(ts_bin)
  89. df['t1_flag'] = df['T1调整'].apply(lambda x: 'T1是' if 'T0' in str(x) else 'T1否')
  90. return df
  91. # ── 组合穷举(在指定数据集上) ──────────────────────────────────────
  92. CAT_COLS = ['市场状态', '波动率水平', 'RSI区域', 'vol_bin', 'rsi_bin', 'ts_bin']
  93. def scan_combos(df, min_trades=6, min_wr=0.55, max_combos=2):
  94. """穷举双指标组合,返回满足条件的规则列表"""
  95. results = []
  96. for n in range(1, max_combos + 1):
  97. for cols in combinations(CAT_COLS, n):
  98. groups = df.groupby(list(cols))
  99. for key, sub in groups:
  100. if len(sub) < min_trades:
  101. continue
  102. wr = sub['盈利'].mean()
  103. avg = sub['盈亏金额'].mean()
  104. if wr >= min_wr:
  105. cond = dict(zip(cols, key if n > 1 else [key]))
  106. results.append({
  107. 'n_cols': n,
  108. 'conditions': cond,
  109. 'cond_str': ' & '.join(f'{k}={v}' for k, v in cond.items()),
  110. 'trades': len(sub),
  111. 'win_rate': wr,
  112. 'avg_pnl': avg,
  113. 'total_pnl': sub['盈亏金额'].sum(),
  114. })
  115. return pd.DataFrame(results) if results else pd.DataFrame()
  116. def apply_combo_rule(row, combo_rules):
  117. """判断一笔交易是否命中任意一条组合规则"""
  118. for rule in combo_rules:
  119. match = all(str(row.get(col, '')) == str(val)
  120. for col, val in rule['conditions'].items())
  121. if match:
  122. return True
  123. return False
  124. # ── 绩效统计 ─────────────────────────────────────────────────────
  125. def stats(df, initial=INITIAL):
  126. if len(df) == 0:
  127. return None
  128. wr = df['盈利'].mean()
  129. pnl = df['盈亏金额'].sum()
  130. cap = df['资金余额'].iloc[-1]
  131. ret = (cap - initial) / initial
  132. win = df[df['盈利']]['盈亏金额']
  133. los = df[~df['盈利']]['盈亏金额']
  134. plr = abs(win.mean() / los.mean()) if len(los) > 0 and los.mean() != 0 else float('inf')
  135. eq = df['资金余额'].values
  136. pk = np.maximum.accumulate(np.append([initial], eq))
  137. dd = ((eq - pk[1:]) / pk[1:]).min() if len(eq) > 0 else 0
  138. return dict(n=len(df), wr=wr, pnl=pnl, cap=cap, ret=ret, plr=plr, dd=dd)
  139. def print_yearly(df, initial=INITIAL):
  140. df = df.copy()
  141. df['年份'] = pd.to_datetime(df['开仓时间']).dt.year
  142. prev = initial
  143. for y in sorted(df['年份'].unique()):
  144. sy = df[df['年份'] == y]
  145. pnl = sy['盈亏金额'].sum()
  146. wr = sy['盈利'].mean()
  147. end = sy['资金余额'].iloc[-1]
  148. print(f" {y}年: {len(sy):>3}笔 胜率{wr:.1%} | {pnl:>+12,.0f}元 ({pnl/prev:>+.2%}) → {end:,.0f}元")
  149. prev = end
  150. # ═══════════════════════════════════════════════════════════════
  151. # 主流程
  152. # ═══════════════════════════════════════════════════════════════
  153. def main():
  154. print(SEP)
  155. print(' 舒适区规则优化 — 时间分层法')
  156. print(SEP)
  157. raw = load_trades()
  158. raw = add_bins(raw)
  159. # ── Step 1:数据分层 ─────────────────────────────────────
  160. print(f'\n{SEP}')
  161. print(' Step 1: 数据分层')
  162. print(SEP)
  163. all_nondead = raw[~raw['市场状态'].isin(DEATH_ZONES)].copy()
  164. train = all_nondead[all_nondead['年份'].isin([2023, 2024])].copy()
  165. valid = all_nondead[all_nondead['年份'] == 2025].copy()
  166. test = all_nondead[all_nondead['年份'] == 2026].copy()
  167. print(f'\n 全量非死亡区: {len(all_nondead)}笔 | 胜率{all_nondead["盈利"].mean():.1%}')
  168. print(f' 训练集(23-24): {len(train)}笔 | 胜率{train["盈利"].mean():.1%}')
  169. print(f' 验证集(2025): {len(valid)}笔 | 胜率{valid["盈利"].mean():.1%}')
  170. print(f' 盲测集(2026): {len(test)}笔 | 胜率{test["盈利"].mean():.1%}')
  171. # ── Step 2:从训练集穷举组合规则 ─────────────────────────
  172. print(f'\n{SEP}')
  173. print(' Step 2: 训练集(2023-2024) 组合规则穷举')
  174. print(f' 筛选条件: 笔数≥6, 胜率≥55%')
  175. print(SEP)
  176. combos_df = scan_combos(train, min_trades=6, min_wr=0.55, max_combos=2)
  177. if combos_df.empty:
  178. print(' 未找到满足条件的组合')
  179. return
  180. combos_df = combos_df.sort_values('win_rate', ascending=False).reset_index(drop=True)
  181. print(f'\n 共找到 {len(combos_df)} 条候选规则:')
  182. print(f' {"条件":<45} {"笔数":>5} {"胜率":>7} {"均盈亏":>10}')
  183. print(f' {"-"*45} {"-"*5} {"-"*7} {"-"*10}')
  184. for _, r in combos_df.iterrows():
  185. print(f' {r["cond_str"]:<45} {r["trades"]:>5} {r["win_rate"]:>7.1%} {r["avg_pnl"]:>+10,.0f}')
  186. # ── Step 3:在验证集上测试每条规则 ───────────────────────
  187. print(f'\n{SEP}')
  188. print(' Step 3: 验证集(2025) 规则命中率与胜率')
  189. print(SEP)
  190. rule_valid_stats = []
  191. for _, rule_row in combos_df.iterrows():
  192. rule = [{'conditions': rule_row['conditions'], 'cond_str': rule_row['cond_str']}]
  193. mask = valid.apply(lambda r: apply_combo_rule(r, rule), axis=1)
  194. sub = valid[mask]
  195. if len(sub) < 3:
  196. continue
  197. wr = sub['盈利'].mean()
  198. avg = sub['盈亏金额'].mean()
  199. rule_valid_stats.append({
  200. 'cond_str': rule_row['cond_str'],
  201. 'train_wr': rule_row['win_rate'],
  202. 'train_n': rule_row['trades'],
  203. 'valid_n': len(sub),
  204. 'valid_wr': wr,
  205. 'valid_avg': avg,
  206. 'stable': abs(wr - rule_row['win_rate']) < 0.20, # 验证集与训练集胜率偏差<20%
  207. })
  208. valid_rules_df = pd.DataFrame(rule_valid_stats)
  209. if len(valid_rules_df) == 0:
  210. print(' 无规则在验证集有足够样本')
  211. return
  212. valid_rules_df = valid_rules_df.sort_values('valid_wr', ascending=False)
  213. print(f'\n {"条件":<45} {"训练":>8} {"验证":>8} {"稳定?":>6}')
  214. print(f' {"-"*45} {"-"*8} {"-"*8} {"-"*6}')
  215. for _, r in valid_rules_df.iterrows():
  216. stab = '✓' if r['stable'] else '✗偏移'
  217. print(f' {r["cond_str"]:<45} '
  218. f'{r["train_n"]}笔{r["train_wr"]:.0%} '
  219. f'{r["valid_n"]}笔{r["valid_wr"]:.0%} '
  220. f'{stab:>6}')
  221. # 只保留验证集胜率 >= 50% 且样本≥3的稳定规则
  222. stable_rules = valid_rules_df[
  223. (valid_rules_df['valid_wr'] >= 0.50) &
  224. (valid_rules_df['valid_n'] >= 3)
  225. ].copy()
  226. print(f'\n 通过验证的规则: {len(stable_rules)}条')
  227. passed_rule_list = []
  228. for _, r in stable_rules.iterrows():
  229. # 找回 conditions dict
  230. orig = combos_df[combos_df['cond_str'] == r['cond_str']].iloc[0]
  231. passed_rule_list.append({
  232. 'conditions': orig['conditions'],
  233. 'cond_str': r['cond_str'],
  234. 'train_wr': r['train_wr'],
  235. 'valid_wr': r['valid_wr'],
  236. })
  237. print(f' ✓ {r["cond_str"]} | 训练{r["train_wr"]:.0%} → 验证{r["valid_wr"]:.0%}')
  238. if not passed_rule_list:
  239. print('\n 无规则通过验证,将仅使用死亡区过滤(Version B)')
  240. passed_rule_list = []
  241. # ── Step 4:盲测集(2026) 验证 ─────────────────────────────
  242. print(f'\n{SEP}')
  243. print(' Step 4: 盲测集(2026) — 规则锁定后不允许回看调整')
  244. print(SEP)
  245. if passed_rule_list:
  246. test_mask = test.apply(lambda r: apply_combo_rule(r, passed_rule_list), axis=1)
  247. test_pass = test[test_mask]
  248. test_skip = test[~test_mask]
  249. print(f'\n 规则命中: {len(test_pass)}笔, 胜率{test_pass["盈利"].mean():.1%}, '
  250. f'均盈亏{test_pass["盈亏金额"].mean():+,.0f}元')
  251. print(f' 规则未命中: {len(test_skip)}笔, 胜率{test_skip["盈利"].mean():.1%}, '
  252. f'均盈亏{test_skip["盈亏金额"].mean():+,.0f}元')
  253. print()
  254. for _, r in test.iterrows():
  255. hit = '★命中' if apply_combo_rule(r, passed_rule_list) else ' 跳过'
  256. win = 'WIN ' if r['盈利'] else 'LOSS'
  257. print(f' {hit} {win} {str(r["开仓时间"])[:10]} | '
  258. f'{r["市场状态"]:<12} {r["波动率水平"]:<5} {r["RSI区域"]:<10} | '
  259. f'{r["盈亏金额"]:>+9,.0f}元')
  260. else:
  261. test_pass = test
  262. # ── Step 5:完整回测对比 ──────────────────────────────────
  263. print(f'\n{SEP}')
  264. print(' Step 5: 完整回测对比')
  265. print(SEP)
  266. # Version B: 仅排死亡区
  267. vB_df = raw[~raw['市场状态'].isin(DEATH_ZONES)].copy()
  268. vB = simulate_equity(vB_df)
  269. # Version E: 排死亡区 + 新组合规则(时间分层推导)
  270. if passed_rule_list:
  271. mask_E = raw.apply(
  272. lambda r: (r['市场状态'] not in DEATH_ZONES) and apply_combo_rule(r, passed_rule_list),
  273. axis=1
  274. )
  275. vE_df = raw[mask_E].copy()
  276. else:
  277. vE_df = vB_df.copy()
  278. print(' (无有效组合规则,Version E = Version B)')
  279. vE = simulate_equity(vE_df)
  280. sB = stats(vB)
  281. sE = stats(vE)
  282. print(f'\n {"指标":<14} {"Version B(排死亡区)":>20} {"Version E(新组合规则)":>22}')
  283. print(f' {"-"*14} {"-"*20} {"-"*22}')
  284. rows = [
  285. ('交易笔数', 'n', 'd'),
  286. ('胜率', 'wr', 'pct'),
  287. ('盈亏比', 'plr', 'f2'),
  288. ('总收益率', 'ret', 'pct2'),
  289. ('最终资金', 'cap', 'cap'),
  290. ('总盈亏', 'pnl', 'money'),
  291. ('最大回撤', 'dd', 'pct'),
  292. ]
  293. def fv(s, k, fmt):
  294. if s is None: return 'N/A'
  295. v = s[k]
  296. if fmt == 'd': return str(int(v))
  297. if fmt == 'pct': return f'{v:.1%}'
  298. if fmt == 'pct2': return f'{v:+.2%}'
  299. if fmt == 'f2': return f'{v:.2f}'
  300. if fmt == 'money':return f'{v:+,.0f}'
  301. if fmt == 'cap': return f'{v:,.0f}'
  302. return str(v)
  303. for name, k, fmt in rows:
  304. print(f' {name:<14} {fv(sB,k,fmt):>20} {fv(sE,k,fmt):>22}')
  305. print(f'\n Version B 年度明细:')
  306. print_yearly(vB)
  307. print(f'\n Version E 年度明细:')
  308. print_yearly(vE)
  309. # ── Step 6:最终规则输出 ──────────────────────────────────
  310. print(f'\n{SEP}')
  311. print(' Step 6: 最终规则(可直接用于生产)')
  312. print(SEP)
  313. print(f'\n [必要条件] 市场状态 NOT IN {sorted(DEATH_ZONES)}')
  314. if passed_rule_list:
  315. print(f' [充分条件] 满足以下任意一条组合规则:')
  316. for i, rule in enumerate(passed_rule_list, 1):
  317. print(f' 规则{i}: {rule["cond_str"]}')
  318. print(f' 训练胜率{rule["train_wr"]:.0%} → 验证胜率{rule["valid_wr"]:.0%}')
  319. else:
  320. print(f' [充分条件] 无(仅死亡区过滤即为最优)')
  321. print()
  322. print(SEP)
  323. print(' 优化完成')
  324. print(SEP)
  325. if __name__ == '__main__':
  326. main()