#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 第二层分析:在排除死亡区后,找出真正把2023-2024和2025-2026分开的指标 """ import sys, io if sys.platform == 'win32': sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8') import pandas as pd import numpy as np import warnings warnings.filterwarnings('ignore') df = pd.read_csv( 'D:/work/project/cyb50-quant/cat-fly/t1/t1_trades_with_environment_20260327_141655.csv', encoding='utf-8-sig' ) cols = [ '交易方向','开仓时间','平仓时间','开仓价格','平仓价格','仓位', '盈亏金额','盈亏百分比','退出原因','持仓周期数','持仓小时数', 'T1调整','原平仓时间','原平仓价格','原盈亏','盈亏变化', '入场信号','开仓市值','平仓时资金','市场状态', '趋势短期','趋势中期','趋势强度','波动率分位','波动率水平', '成交量分位','布林带位置','布林带区域','RSI分位','RSI区域', '1日动量','入场价格' ] df.columns = cols df['开仓时间'] = pd.to_datetime(df['开仓时间']) df['年份'] = df['开仓时间'].dt.year df['盈利'] = df['盈亏金额'] > 0 df['好年份'] = df['年份'].isin([2025, 2026]) # 排除死亡区 gz = df[df['市场状态'] != '下跌趋势低波'].copy() SEP = '=' * 70 def show(t): print(f'\n{SEP}\n {t}\n{SEP}') # ─── 在非死亡区,比较好年份和差年份各指标分布 ─────────────────── show('非死亡区内:好年份(2025-2026) vs 差年份(2023-2024) 指标对比') good_gz = gz[gz['好年份']] bad_gz = gz[~gz['好年份']] print(f'\n 差年份(非死亡区): {len(bad_gz)}笔, 胜率{bad_gz["盈利"].mean():.1%}') print(f' 好年份(非死亡区): {len(good_gz)}笔, 胜率{good_gz["盈利"].mean():.1%}') print() print(' 各市场状态分布:') for ms in gz['市场状态'].value_counts().index: b = (bad_gz['市场状态'] == ms).mean() g = (good_gz['市场状态'] == ms).mean() b_wr = bad_gz[bad_gz['市场状态'] == ms]['盈利'].mean() if (bad_gz['市场状态'] == ms).any() else float('nan') g_wr = good_gz[good_gz['市场状态'] == ms]['盈利'].mean() if (good_gz['市场状态'] == ms).any() else float('nan') marker = ' ←' if abs(g - b) > 0.08 else '' print(f' {ms:<15} | 差:{b:.1%}(胜率{b_wr:.1%}) → 好:{g:.1%}(胜率{g_wr:.1%}){marker}') print() print(' 各波动率水平分布:') for vl in ['极低', '低', '中等', '高', '极高']: b = (bad_gz['波动率水平'] == vl).mean() g = (good_gz['波动率水平'] == vl).mean() b_wr = bad_gz[bad_gz['波动率水平'] == vl]['盈利'].mean() if (bad_gz['波动率水平'] == vl).any() else float('nan') g_wr = good_gz[good_gz['波动率水平'] == vl]['盈利'].mean() if (good_gz['波动率水平'] == vl).any() else float('nan') marker = ' ←' if abs(g - b) > 0.05 else '' print(f' {vl:<8} | 差:{b:.1%}(胜率{b_wr:.1%}) → 好:{g:.1%}(胜率{g_wr:.1%}){marker}') print() print(' 各RSI区域分布:') for rsi in gz['RSI区域'].value_counts().index: b = (bad_gz['RSI区域'] == rsi).mean() g = (good_gz['RSI区域'] == rsi).mean() b_wr = bad_gz[bad_gz['RSI区域'] == rsi]['盈利'].mean() if (bad_gz['RSI区域'] == rsi).any() else float('nan') g_wr = good_gz[good_gz['RSI区域'] == rsi]['盈利'].mean() if (good_gz['RSI区域'] == rsi).any() else float('nan') marker = ' ←' if abs(g - b) > 0.05 else '' print(f' {rsi:<10} | 差:{b:.1%}(胜率{b_wr:.1%}) → 好:{g:.1%}(胜率{g_wr:.1%}){marker}') # ─── 在非死亡区,找出在差年份里最集中的"毒药"组合 ────────────── show('非死亡区内:差年份的主要亏损来自哪些组合') combos = [] for ms in gz['市场状态'].unique(): for vl in gz['波动率水平'].unique(): sub = bad_gz[(bad_gz['市场状态'] == ms) & (bad_gz['波动率水平'] == vl)] if len(sub) < 4: continue wr = sub['盈利'].mean() avg = sub['盈亏金额'].mean() total = sub['盈亏金额'].sum() combos.append((f'{ms} × {vl}', len(sub), wr, avg, total)) combos.sort(key=lambda x: x[4]) # 按总盈亏升序(最亏排前) print(f'\n 差年份非死亡区内各组合(按总盈亏升序):') print(f' {"组合":<28} 笔数 胜率 均盈亏 总盈亏') for cond, n, wr, avg, total in combos[:12]: print(f' {cond:<28} {n:>4} {wr:.1%} {avg:>+8,.0f}元 {total:>+10,.0f}元') # ─── 逐步叠加过滤条件,看能否让差年份也变好 ─────────────────── show('逐步叠加过滤:能否让差年份胜率提升至可接受水平') # 基础: 非死亡区 print(f'\n [基础] 非死亡区: {len(gz)}笔, 胜率{gz["盈利"].mean():.1%}') # 过滤1: 再排除震荡低波 f1 = gz[gz['市场状态'] != '震荡低波'] print(f' [+过滤震荡低波] {len(f1)}笔, 胜率{f1["盈利"].mean():.1%}') for y in [2023, 2024, 2025, 2026]: sy = f1[f1['年份'] == y] print(f' {y}年: {len(sy)}笔, 胜率{sy["盈利"].mean():.1%}, {sy["盈亏金额"].sum():+,.0f}元') # 过滤2: 再排除 下跌趋势高波×极高波动率 f2 = f1[~((f1['市场状态'] == '下跌趋势高波') & (f1['波动率水平'] == '极高'))] print(f'\n [+过滤 下跌趋势高波×极高] {len(f2)}笔, 胜率{f2["盈利"].mean():.1%}') for y in [2023, 2024, 2025, 2026]: sy = f2[f2['年份'] == y] if len(sy) == 0: continue print(f' {y}年: {len(sy)}笔, 胜率{sy["盈利"].mean():.1%}, {sy["盈亏金额"].sum():+,.0f}元') # 过滤3: 再排除T+1 f3 = f2[~f2['T1调整'].str.contains('T0', na=False)] print(f'\n [+过滤T+1调整] {len(f3)}笔, 胜率{f3["盈利"].mean():.1%}') for y in [2023, 2024, 2025, 2026]: sy = f3[f3['年份'] == y] if len(sy) == 0: continue print(f' {y}年: {len(sy)}笔, 胜率{sy["盈利"].mean():.1%}, {sy["盈亏金额"].sum():+,.0f}元') # 过滤4: 再排除 波动率分位>0.7(非死亡区内高波动仍然不好) f4 = f3[f3['波动率分位'].fillna(0) <= 0.70] print(f'\n [+过滤 波动率分位>0.70] {len(f4)}笔, 胜率{f4["盈利"].mean():.1%}') for y in [2023, 2024, 2025, 2026]: sy = f4[f4['年份'] == y] if len(sy) == 0: continue print(f' {y}年: {len(sy)}笔, 胜率{sy["盈利"].mean():.1%}, {sy["盈亏金额"].sum():+,.0f}元') # ─── 检查各层过滤对2026年的影响 ───────────────────────────── show('关键检查:各过滤条件对2026年17笔的影响') y26 = df[df['年份'] == 2026] print() c1 = y26['市场状态'] == '下跌趋势低波' c2 = y26['市场状态'] == '震荡低波' c3 = (y26['市场状态'] == '下跌趋势高波') & (y26['波动率水平'] == '极高') c4 = y26['T1调整'].str.contains('T0', na=False) c5 = y26['波动率分位'].fillna(0) > 0.70 print(f' 过滤条件 2026年命中 胜率 盈亏') for name, cond in [ ('下跌趋势低波', c1), ('震荡低波', c2), ('下跌趋势高波×极高', c3), ('T+1调整', c4), ('波动率分位>0.70', c5), ]: hits = y26[cond] missed = y26[~cond] if len(hits) == 0: print(f' {name:<30} 0笔 (不影响)') else: wr = hits['盈利'].mean() total = hits['盈亏金额'].sum() print(f' {name:<30} {len(hits)}笔 | 胜率{wr:.1%} | 盈亏{total:+,.0f}元 ← 误杀!') # 所有条件组合对2026的影响 all_filter = c1 | c2 | c3 | c4 | c5 kept_26 = y26[~all_filter] lost_26 = y26[all_filter] print(f'\n 全部条件叠加: 保留{len(kept_26)}笔(胜率{kept_26["盈利"].mean():.1%}, {kept_26["盈亏金额"].sum():+,.0f}元) | 过滤{len(lost_26)}笔(胜率{lost_26["盈利"].mean():.1%}, {lost_26["盈亏金额"].sum():+,.0f}元)') # ─── 找到不误杀2026且能改善差年份的最优组合 ────────────────── show('目标:找出 不误杀2026,且差年份胜率最高 的过滤策略') # 策略A: 只排除死亡区 sA = df[df['市场状态'] != '下跌趋势低波'] # 策略B: 排除死亡区 + 震荡低波 sB = df[~df['市场状态'].isin(['下跌趋势低波', '震荡低波'])] # 策略C: 排除死亡区 + 下跌趋势高波×极高 sC = df[~(df['市场状态'] == '下跌趋势低波') & ~((df['市场状态'] == '下跌趋势高波') & (df['波动率水平'] == '极高'))] # 策略D: 排除死亡区 + T+1 sD = df[(df['市场状态'] != '下跌趋势低波') & (~df['T1调整'].str.contains('T0', na=False))] # 策略E: 排除死亡区 + 震荡低波 + T+1 sE = df[~df['市场状态'].isin(['下跌趋势低波', '震荡低波']) & (~df['T1调整'].str.contains('T0', na=False))] print() for name, s in [('A:仅排死亡区', sA), ('B:+排震荡低波', sB), ('C:+排下跌高波×极高', sC), ('D:+排T+1', sD), ('E:+排震荡低波&T+1', sE)]: y26s = s[s['年份'] == 2026] y25s = s[s['年份'] == 2025] y24s = s[s['年份'] == 2024] y23s = s[s['年份'] == 2023] total_pnl = s['盈亏金额'].sum() print(f'\n 策略{name}: {len(s)}笔, 总盈亏{total_pnl:+,.0f}元') for yr, sy in [('2023', y23s), ('2024', y24s), ('2025', y25s), ('2026', y26s)]: if len(sy) == 0: continue print(f' {yr}年: {len(sy)}笔, 胜率{sy["盈利"].mean():.1%}, {sy["盈亏金额"].sum():+,.0f}元')