| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204 |
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- 第二层分析:在排除死亡区后,找出真正把2023-2024和2025-2026分开的指标
- """
- import sys, io
- if sys.platform == 'win32':
- sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
- sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
- import pandas as pd
- import numpy as np
- import warnings
- warnings.filterwarnings('ignore')
- df = pd.read_csv(
- 'D:/work/project/cyb50-quant/cat-fly/t1/t1_trades_with_environment_20260327_141655.csv',
- encoding='utf-8-sig'
- )
- cols = [
- '交易方向','开仓时间','平仓时间','开仓价格','平仓价格','仓位',
- '盈亏金额','盈亏百分比','退出原因','持仓周期数','持仓小时数',
- 'T1调整','原平仓时间','原平仓价格','原盈亏','盈亏变化',
- '入场信号','开仓市值','平仓时资金','市场状态',
- '趋势短期','趋势中期','趋势强度','波动率分位','波动率水平',
- '成交量分位','布林带位置','布林带区域','RSI分位','RSI区域',
- '1日动量','入场价格'
- ]
- df.columns = cols
- df['开仓时间'] = pd.to_datetime(df['开仓时间'])
- df['年份'] = df['开仓时间'].dt.year
- df['盈利'] = df['盈亏金额'] > 0
- df['好年份'] = df['年份'].isin([2025, 2026])
- # 排除死亡区
- gz = df[df['市场状态'] != '下跌趋势低波'].copy()
- SEP = '=' * 70
- def show(t):
- print(f'\n{SEP}\n {t}\n{SEP}')
- # ─── 在非死亡区,比较好年份和差年份各指标分布 ───────────────────
- show('非死亡区内:好年份(2025-2026) vs 差年份(2023-2024) 指标对比')
- good_gz = gz[gz['好年份']]
- bad_gz = gz[~gz['好年份']]
- print(f'\n 差年份(非死亡区): {len(bad_gz)}笔, 胜率{bad_gz["盈利"].mean():.1%}')
- print(f' 好年份(非死亡区): {len(good_gz)}笔, 胜率{good_gz["盈利"].mean():.1%}')
- print()
- print(' 各市场状态分布:')
- for ms in gz['市场状态'].value_counts().index:
- b = (bad_gz['市场状态'] == ms).mean()
- g = (good_gz['市场状态'] == ms).mean()
- b_wr = bad_gz[bad_gz['市场状态'] == ms]['盈利'].mean() if (bad_gz['市场状态'] == ms).any() else float('nan')
- g_wr = good_gz[good_gz['市场状态'] == ms]['盈利'].mean() if (good_gz['市场状态'] == ms).any() else float('nan')
- marker = ' ←' if abs(g - b) > 0.08 else ''
- print(f' {ms:<15} | 差:{b:.1%}(胜率{b_wr:.1%}) → 好:{g:.1%}(胜率{g_wr:.1%}){marker}')
- print()
- print(' 各波动率水平分布:')
- for vl in ['极低', '低', '中等', '高', '极高']:
- b = (bad_gz['波动率水平'] == vl).mean()
- g = (good_gz['波动率水平'] == vl).mean()
- b_wr = bad_gz[bad_gz['波动率水平'] == vl]['盈利'].mean() if (bad_gz['波动率水平'] == vl).any() else float('nan')
- g_wr = good_gz[good_gz['波动率水平'] == vl]['盈利'].mean() if (good_gz['波动率水平'] == vl).any() else float('nan')
- marker = ' ←' if abs(g - b) > 0.05 else ''
- print(f' {vl:<8} | 差:{b:.1%}(胜率{b_wr:.1%}) → 好:{g:.1%}(胜率{g_wr:.1%}){marker}')
- print()
- print(' 各RSI区域分布:')
- for rsi in gz['RSI区域'].value_counts().index:
- b = (bad_gz['RSI区域'] == rsi).mean()
- g = (good_gz['RSI区域'] == rsi).mean()
- b_wr = bad_gz[bad_gz['RSI区域'] == rsi]['盈利'].mean() if (bad_gz['RSI区域'] == rsi).any() else float('nan')
- g_wr = good_gz[good_gz['RSI区域'] == rsi]['盈利'].mean() if (good_gz['RSI区域'] == rsi).any() else float('nan')
- marker = ' ←' if abs(g - b) > 0.05 else ''
- print(f' {rsi:<10} | 差:{b:.1%}(胜率{b_wr:.1%}) → 好:{g:.1%}(胜率{g_wr:.1%}){marker}')
- # ─── 在非死亡区,找出在差年份里最集中的"毒药"组合 ──────────────
- show('非死亡区内:差年份的主要亏损来自哪些组合')
- combos = []
- for ms in gz['市场状态'].unique():
- for vl in gz['波动率水平'].unique():
- sub = bad_gz[(bad_gz['市场状态'] == ms) & (bad_gz['波动率水平'] == vl)]
- if len(sub) < 4:
- continue
- wr = sub['盈利'].mean()
- avg = sub['盈亏金额'].mean()
- total = sub['盈亏金额'].sum()
- combos.append((f'{ms} × {vl}', len(sub), wr, avg, total))
- combos.sort(key=lambda x: x[4]) # 按总盈亏升序(最亏排前)
- print(f'\n 差年份非死亡区内各组合(按总盈亏升序):')
- print(f' {"组合":<28} 笔数 胜率 均盈亏 总盈亏')
- for cond, n, wr, avg, total in combos[:12]:
- print(f' {cond:<28} {n:>4} {wr:.1%} {avg:>+8,.0f}元 {total:>+10,.0f}元')
- # ─── 逐步叠加过滤条件,看能否让差年份也变好 ───────────────────
- show('逐步叠加过滤:能否让差年份胜率提升至可接受水平')
- # 基础: 非死亡区
- print(f'\n [基础] 非死亡区: {len(gz)}笔, 胜率{gz["盈利"].mean():.1%}')
- # 过滤1: 再排除震荡低波
- f1 = gz[gz['市场状态'] != '震荡低波']
- print(f' [+过滤震荡低波] {len(f1)}笔, 胜率{f1["盈利"].mean():.1%}')
- for y in [2023, 2024, 2025, 2026]:
- sy = f1[f1['年份'] == y]
- print(f' {y}年: {len(sy)}笔, 胜率{sy["盈利"].mean():.1%}, {sy["盈亏金额"].sum():+,.0f}元')
- # 过滤2: 再排除 下跌趋势高波×极高波动率
- f2 = f1[~((f1['市场状态'] == '下跌趋势高波') & (f1['波动率水平'] == '极高'))]
- print(f'\n [+过滤 下跌趋势高波×极高] {len(f2)}笔, 胜率{f2["盈利"].mean():.1%}')
- for y in [2023, 2024, 2025, 2026]:
- sy = f2[f2['年份'] == y]
- if len(sy) == 0:
- continue
- print(f' {y}年: {len(sy)}笔, 胜率{sy["盈利"].mean():.1%}, {sy["盈亏金额"].sum():+,.0f}元')
- # 过滤3: 再排除T+1
- f3 = f2[~f2['T1调整'].str.contains('T0', na=False)]
- print(f'\n [+过滤T+1调整] {len(f3)}笔, 胜率{f3["盈利"].mean():.1%}')
- for y in [2023, 2024, 2025, 2026]:
- sy = f3[f3['年份'] == y]
- if len(sy) == 0:
- continue
- print(f' {y}年: {len(sy)}笔, 胜率{sy["盈利"].mean():.1%}, {sy["盈亏金额"].sum():+,.0f}元')
- # 过滤4: 再排除 波动率分位>0.7(非死亡区内高波动仍然不好)
- f4 = f3[f3['波动率分位'].fillna(0) <= 0.70]
- print(f'\n [+过滤 波动率分位>0.70] {len(f4)}笔, 胜率{f4["盈利"].mean():.1%}')
- for y in [2023, 2024, 2025, 2026]:
- sy = f4[f4['年份'] == y]
- if len(sy) == 0:
- continue
- print(f' {y}年: {len(sy)}笔, 胜率{sy["盈利"].mean():.1%}, {sy["盈亏金额"].sum():+,.0f}元')
- # ─── 检查各层过滤对2026年的影响 ─────────────────────────────
- show('关键检查:各过滤条件对2026年17笔的影响')
- y26 = df[df['年份'] == 2026]
- print()
- c1 = y26['市场状态'] == '下跌趋势低波'
- c2 = y26['市场状态'] == '震荡低波'
- c3 = (y26['市场状态'] == '下跌趋势高波') & (y26['波动率水平'] == '极高')
- c4 = y26['T1调整'].str.contains('T0', na=False)
- c5 = y26['波动率分位'].fillna(0) > 0.70
- print(f' 过滤条件 2026年命中 胜率 盈亏')
- for name, cond in [
- ('下跌趋势低波', c1),
- ('震荡低波', c2),
- ('下跌趋势高波×极高', c3),
- ('T+1调整', c4),
- ('波动率分位>0.70', c5),
- ]:
- hits = y26[cond]
- missed = y26[~cond]
- if len(hits) == 0:
- print(f' {name:<30} 0笔 (不影响)')
- else:
- wr = hits['盈利'].mean()
- total = hits['盈亏金额'].sum()
- print(f' {name:<30} {len(hits)}笔 | 胜率{wr:.1%} | 盈亏{total:+,.0f}元 ← 误杀!')
- # 所有条件组合对2026的影响
- all_filter = c1 | c2 | c3 | c4 | c5
- kept_26 = y26[~all_filter]
- lost_26 = y26[all_filter]
- print(f'\n 全部条件叠加: 保留{len(kept_26)}笔(胜率{kept_26["盈利"].mean():.1%}, {kept_26["盈亏金额"].sum():+,.0f}元) | 过滤{len(lost_26)}笔(胜率{lost_26["盈利"].mean():.1%}, {lost_26["盈亏金额"].sum():+,.0f}元)')
- # ─── 找到不误杀2026且能改善差年份的最优组合 ──────────────────
- show('目标:找出 不误杀2026,且差年份胜率最高 的过滤策略')
- # 策略A: 只排除死亡区
- sA = df[df['市场状态'] != '下跌趋势低波']
- # 策略B: 排除死亡区 + 震荡低波
- sB = df[~df['市场状态'].isin(['下跌趋势低波', '震荡低波'])]
- # 策略C: 排除死亡区 + 下跌趋势高波×极高
- sC = df[~(df['市场状态'] == '下跌趋势低波') &
- ~((df['市场状态'] == '下跌趋势高波') & (df['波动率水平'] == '极高'))]
- # 策略D: 排除死亡区 + T+1
- sD = df[(df['市场状态'] != '下跌趋势低波') & (~df['T1调整'].str.contains('T0', na=False))]
- # 策略E: 排除死亡区 + 震荡低波 + T+1
- sE = df[~df['市场状态'].isin(['下跌趋势低波', '震荡低波']) &
- (~df['T1调整'].str.contains('T0', na=False))]
- print()
- for name, s in [('A:仅排死亡区', sA), ('B:+排震荡低波', sB), ('C:+排下跌高波×极高', sC),
- ('D:+排T+1', sD), ('E:+排震荡低波&T+1', sE)]:
- y26s = s[s['年份'] == 2026]
- y25s = s[s['年份'] == 2025]
- y24s = s[s['年份'] == 2024]
- y23s = s[s['年份'] == 2023]
- total_pnl = s['盈亏金额'].sum()
- print(f'\n 策略{name}: {len(s)}笔, 总盈亏{total_pnl:+,.0f}元')
- for yr, sy in [('2023', y23s), ('2024', y24s), ('2025', y25s), ('2026', y26s)]:
- if len(sy) == 0:
- continue
- print(f' {yr}年: {len(sy)}笔, 胜率{sy["盈利"].mean():.1%}, {sy["盈亏金额"].sum():+,.0f}元')
|