check_second_layer.py 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. 第二层分析:在排除死亡区后,找出真正把2023-2024和2025-2026分开的指标
  5. """
  6. import sys, io
  7. if sys.platform == 'win32':
  8. sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
  9. sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
  10. import pandas as pd
  11. import numpy as np
  12. import warnings
  13. warnings.filterwarnings('ignore')
  14. df = pd.read_csv(
  15. 'D:/work/project/cyb50-quant/cat-fly/t1/t1_trades_with_environment_20260327_141655.csv',
  16. encoding='utf-8-sig'
  17. )
  18. cols = [
  19. '交易方向','开仓时间','平仓时间','开仓价格','平仓价格','仓位',
  20. '盈亏金额','盈亏百分比','退出原因','持仓周期数','持仓小时数',
  21. 'T1调整','原平仓时间','原平仓价格','原盈亏','盈亏变化',
  22. '入场信号','开仓市值','平仓时资金','市场状态',
  23. '趋势短期','趋势中期','趋势强度','波动率分位','波动率水平',
  24. '成交量分位','布林带位置','布林带区域','RSI分位','RSI区域',
  25. '1日动量','入场价格'
  26. ]
  27. df.columns = cols
  28. df['开仓时间'] = pd.to_datetime(df['开仓时间'])
  29. df['年份'] = df['开仓时间'].dt.year
  30. df['盈利'] = df['盈亏金额'] > 0
  31. df['好年份'] = df['年份'].isin([2025, 2026])
  32. # 排除死亡区
  33. gz = df[df['市场状态'] != '下跌趋势低波'].copy()
  34. SEP = '=' * 70
  35. def show(t):
  36. print(f'\n{SEP}\n {t}\n{SEP}')
  37. # ─── 在非死亡区,比较好年份和差年份各指标分布 ───────────────────
  38. show('非死亡区内:好年份(2025-2026) vs 差年份(2023-2024) 指标对比')
  39. good_gz = gz[gz['好年份']]
  40. bad_gz = gz[~gz['好年份']]
  41. print(f'\n 差年份(非死亡区): {len(bad_gz)}笔, 胜率{bad_gz["盈利"].mean():.1%}')
  42. print(f' 好年份(非死亡区): {len(good_gz)}笔, 胜率{good_gz["盈利"].mean():.1%}')
  43. print()
  44. print(' 各市场状态分布:')
  45. for ms in gz['市场状态'].value_counts().index:
  46. b = (bad_gz['市场状态'] == ms).mean()
  47. g = (good_gz['市场状态'] == ms).mean()
  48. b_wr = bad_gz[bad_gz['市场状态'] == ms]['盈利'].mean() if (bad_gz['市场状态'] == ms).any() else float('nan')
  49. g_wr = good_gz[good_gz['市场状态'] == ms]['盈利'].mean() if (good_gz['市场状态'] == ms).any() else float('nan')
  50. marker = ' ←' if abs(g - b) > 0.08 else ''
  51. print(f' {ms:<15} | 差:{b:.1%}(胜率{b_wr:.1%}) → 好:{g:.1%}(胜率{g_wr:.1%}){marker}')
  52. print()
  53. print(' 各波动率水平分布:')
  54. for vl in ['极低', '低', '中等', '高', '极高']:
  55. b = (bad_gz['波动率水平'] == vl).mean()
  56. g = (good_gz['波动率水平'] == vl).mean()
  57. b_wr = bad_gz[bad_gz['波动率水平'] == vl]['盈利'].mean() if (bad_gz['波动率水平'] == vl).any() else float('nan')
  58. g_wr = good_gz[good_gz['波动率水平'] == vl]['盈利'].mean() if (good_gz['波动率水平'] == vl).any() else float('nan')
  59. marker = ' ←' if abs(g - b) > 0.05 else ''
  60. print(f' {vl:<8} | 差:{b:.1%}(胜率{b_wr:.1%}) → 好:{g:.1%}(胜率{g_wr:.1%}){marker}')
  61. print()
  62. print(' 各RSI区域分布:')
  63. for rsi in gz['RSI区域'].value_counts().index:
  64. b = (bad_gz['RSI区域'] == rsi).mean()
  65. g = (good_gz['RSI区域'] == rsi).mean()
  66. b_wr = bad_gz[bad_gz['RSI区域'] == rsi]['盈利'].mean() if (bad_gz['RSI区域'] == rsi).any() else float('nan')
  67. g_wr = good_gz[good_gz['RSI区域'] == rsi]['盈利'].mean() if (good_gz['RSI区域'] == rsi).any() else float('nan')
  68. marker = ' ←' if abs(g - b) > 0.05 else ''
  69. print(f' {rsi:<10} | 差:{b:.1%}(胜率{b_wr:.1%}) → 好:{g:.1%}(胜率{g_wr:.1%}){marker}')
  70. # ─── 在非死亡区,找出在差年份里最集中的"毒药"组合 ──────────────
  71. show('非死亡区内:差年份的主要亏损来自哪些组合')
  72. combos = []
  73. for ms in gz['市场状态'].unique():
  74. for vl in gz['波动率水平'].unique():
  75. sub = bad_gz[(bad_gz['市场状态'] == ms) & (bad_gz['波动率水平'] == vl)]
  76. if len(sub) < 4:
  77. continue
  78. wr = sub['盈利'].mean()
  79. avg = sub['盈亏金额'].mean()
  80. total = sub['盈亏金额'].sum()
  81. combos.append((f'{ms} × {vl}', len(sub), wr, avg, total))
  82. combos.sort(key=lambda x: x[4]) # 按总盈亏升序(最亏排前)
  83. print(f'\n 差年份非死亡区内各组合(按总盈亏升序):')
  84. print(f' {"组合":<28} 笔数 胜率 均盈亏 总盈亏')
  85. for cond, n, wr, avg, total in combos[:12]:
  86. print(f' {cond:<28} {n:>4} {wr:.1%} {avg:>+8,.0f}元 {total:>+10,.0f}元')
  87. # ─── 逐步叠加过滤条件,看能否让差年份也变好 ───────────────────
  88. show('逐步叠加过滤:能否让差年份胜率提升至可接受水平')
  89. # 基础: 非死亡区
  90. print(f'\n [基础] 非死亡区: {len(gz)}笔, 胜率{gz["盈利"].mean():.1%}')
  91. # 过滤1: 再排除震荡低波
  92. f1 = gz[gz['市场状态'] != '震荡低波']
  93. print(f' [+过滤震荡低波] {len(f1)}笔, 胜率{f1["盈利"].mean():.1%}')
  94. for y in [2023, 2024, 2025, 2026]:
  95. sy = f1[f1['年份'] == y]
  96. print(f' {y}年: {len(sy)}笔, 胜率{sy["盈利"].mean():.1%}, {sy["盈亏金额"].sum():+,.0f}元')
  97. # 过滤2: 再排除 下跌趋势高波×极高波动率
  98. f2 = f1[~((f1['市场状态'] == '下跌趋势高波') & (f1['波动率水平'] == '极高'))]
  99. print(f'\n [+过滤 下跌趋势高波×极高] {len(f2)}笔, 胜率{f2["盈利"].mean():.1%}')
  100. for y in [2023, 2024, 2025, 2026]:
  101. sy = f2[f2['年份'] == y]
  102. if len(sy) == 0:
  103. continue
  104. print(f' {y}年: {len(sy)}笔, 胜率{sy["盈利"].mean():.1%}, {sy["盈亏金额"].sum():+,.0f}元')
  105. # 过滤3: 再排除T+1
  106. f3 = f2[~f2['T1调整'].str.contains('T0', na=False)]
  107. print(f'\n [+过滤T+1调整] {len(f3)}笔, 胜率{f3["盈利"].mean():.1%}')
  108. for y in [2023, 2024, 2025, 2026]:
  109. sy = f3[f3['年份'] == y]
  110. if len(sy) == 0:
  111. continue
  112. print(f' {y}年: {len(sy)}笔, 胜率{sy["盈利"].mean():.1%}, {sy["盈亏金额"].sum():+,.0f}元')
  113. # 过滤4: 再排除 波动率分位>0.7(非死亡区内高波动仍然不好)
  114. f4 = f3[f3['波动率分位'].fillna(0) <= 0.70]
  115. print(f'\n [+过滤 波动率分位>0.70] {len(f4)}笔, 胜率{f4["盈利"].mean():.1%}')
  116. for y in [2023, 2024, 2025, 2026]:
  117. sy = f4[f4['年份'] == y]
  118. if len(sy) == 0:
  119. continue
  120. print(f' {y}年: {len(sy)}笔, 胜率{sy["盈利"].mean():.1%}, {sy["盈亏金额"].sum():+,.0f}元')
  121. # ─── 检查各层过滤对2026年的影响 ─────────────────────────────
  122. show('关键检查:各过滤条件对2026年17笔的影响')
  123. y26 = df[df['年份'] == 2026]
  124. print()
  125. c1 = y26['市场状态'] == '下跌趋势低波'
  126. c2 = y26['市场状态'] == '震荡低波'
  127. c3 = (y26['市场状态'] == '下跌趋势高波') & (y26['波动率水平'] == '极高')
  128. c4 = y26['T1调整'].str.contains('T0', na=False)
  129. c5 = y26['波动率分位'].fillna(0) > 0.70
  130. print(f' 过滤条件 2026年命中 胜率 盈亏')
  131. for name, cond in [
  132. ('下跌趋势低波', c1),
  133. ('震荡低波', c2),
  134. ('下跌趋势高波×极高', c3),
  135. ('T+1调整', c4),
  136. ('波动率分位>0.70', c5),
  137. ]:
  138. hits = y26[cond]
  139. missed = y26[~cond]
  140. if len(hits) == 0:
  141. print(f' {name:<30} 0笔 (不影响)')
  142. else:
  143. wr = hits['盈利'].mean()
  144. total = hits['盈亏金额'].sum()
  145. print(f' {name:<30} {len(hits)}笔 | 胜率{wr:.1%} | 盈亏{total:+,.0f}元 ← 误杀!')
  146. # 所有条件组合对2026的影响
  147. all_filter = c1 | c2 | c3 | c4 | c5
  148. kept_26 = y26[~all_filter]
  149. lost_26 = y26[all_filter]
  150. print(f'\n 全部条件叠加: 保留{len(kept_26)}笔(胜率{kept_26["盈利"].mean():.1%}, {kept_26["盈亏金额"].sum():+,.0f}元) | 过滤{len(lost_26)}笔(胜率{lost_26["盈利"].mean():.1%}, {lost_26["盈亏金额"].sum():+,.0f}元)')
  151. # ─── 找到不误杀2026且能改善差年份的最优组合 ──────────────────
  152. show('目标:找出 不误杀2026,且差年份胜率最高 的过滤策略')
  153. # 策略A: 只排除死亡区
  154. sA = df[df['市场状态'] != '下跌趋势低波']
  155. # 策略B: 排除死亡区 + 震荡低波
  156. sB = df[~df['市场状态'].isin(['下跌趋势低波', '震荡低波'])]
  157. # 策略C: 排除死亡区 + 下跌趋势高波×极高
  158. sC = df[~(df['市场状态'] == '下跌趋势低波') &
  159. ~((df['市场状态'] == '下跌趋势高波') & (df['波动率水平'] == '极高'))]
  160. # 策略D: 排除死亡区 + T+1
  161. sD = df[(df['市场状态'] != '下跌趋势低波') & (~df['T1调整'].str.contains('T0', na=False))]
  162. # 策略E: 排除死亡区 + 震荡低波 + T+1
  163. sE = df[~df['市场状态'].isin(['下跌趋势低波', '震荡低波']) &
  164. (~df['T1调整'].str.contains('T0', na=False))]
  165. print()
  166. for name, s in [('A:仅排死亡区', sA), ('B:+排震荡低波', sB), ('C:+排下跌高波×极高', sC),
  167. ('D:+排T+1', sD), ('E:+排震荡低波&T+1', sE)]:
  168. y26s = s[s['年份'] == 2026]
  169. y25s = s[s['年份'] == 2025]
  170. y24s = s[s['年份'] == 2024]
  171. y23s = s[s['年份'] == 2023]
  172. total_pnl = s['盈亏金额'].sum()
  173. print(f'\n 策略{name}: {len(s)}笔, 总盈亏{total_pnl:+,.0f}元')
  174. for yr, sy in [('2023', y23s), ('2024', y24s), ('2025', y25s), ('2026', y26s)]:
  175. if len(sy) == 0:
  176. continue
  177. print(f' {yr}年: {len(sy)}笔, 胜率{sy["盈利"].mean():.1%}, {sy["盈亏金额"].sum():+,.0f}元')