dragon_residual_attribution.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279
  1. from __future__ import annotations
  2. from pathlib import Path
  3. from typing import Optional
  4. import pandas as pd
  5. def _load_csv(path: Path) -> pd.DataFrame:
  6. return pd.read_csv(path, encoding="utf-8-sig")
  7. def _pct(value: Optional[float]) -> str:
  8. if value is None or pd.isna(value):
  9. return "n/a"
  10. return f"{float(value):.2%}"
  11. def _regime_label(c1: float) -> str:
  12. if c1 < 12:
  13. return "deep_oversold"
  14. if c1 < 20:
  15. return "oversold"
  16. if c1 < 35:
  17. return "rebound_low"
  18. if c1 < 60:
  19. return "rebound_mid"
  20. if c1 < 80:
  21. return "high_mid"
  22. return "hot"
  23. def _trade_chain_type(entry_aligned: bool, exit_aligned: bool) -> str:
  24. if not entry_aligned and not exit_aligned:
  25. return "isolated_extra_trade"
  26. if not entry_aligned and exit_aligned:
  27. return "bridge_to_aligned_sell"
  28. if entry_aligned and not exit_aligned:
  29. return "premature_exit_of_aligned_trade"
  30. return "aligned_trade"
  31. def _event_context(events: pd.DataFrame, center_date: str, window_days: int = 10) -> str:
  32. center = pd.Timestamp(center_date)
  33. start = (center - pd.Timedelta(days=window_days)).date().isoformat()
  34. end = (center + pd.Timedelta(days=window_days)).date().isoformat()
  35. subset = events[(events["date"] >= start) & (events["date"] <= end)].copy()
  36. if subset.empty:
  37. return ""
  38. labels = []
  39. for _, row in subset.sort_values("date").iterrows():
  40. layer = row.get("layer", "")
  41. side = row.get("side", "")
  42. note = row.get("note", "") if "note" in row else ""
  43. note_text = f":{note}" if isinstance(note, str) and note else ""
  44. labels.append(f"{row['date']} {side}({layer}){note_text}")
  45. return " | ".join(labels)
  46. def _calc_mfe_mae(indicators: pd.DataFrame, buy_date: str, sell_date: str, entry_price: float) -> tuple[Optional[float], Optional[float]]:
  47. window = indicators[(indicators["date"] >= buy_date) & (indicators["date"] <= sell_date)]
  48. if window.empty or entry_price is None or pd.isna(entry_price):
  49. return None, None
  50. mfe = window["high"].max() / entry_price - 1
  51. mae = window["low"].min() / entry_price - 1
  52. return float(mfe), float(mae)
  53. def _calc_forward_backward_returns(indicators: pd.DataFrame, event_date: str, days: int = 5) -> tuple[Optional[float], Optional[float]]:
  54. row = indicators[indicators["date"] == event_date]
  55. if row.empty:
  56. return None, None
  57. idx = row.index[0]
  58. close_now = float(indicators.loc[idx, "close"])
  59. pre = None
  60. post = None
  61. if idx - days >= indicators.index.min():
  62. close_prev = float(indicators.loc[idx - days, "close"])
  63. pre = close_now / close_prev - 1
  64. if idx + days <= indicators.index.max():
  65. close_next = float(indicators.loc[idx + days, "close"])
  66. post = close_next / close_now - 1
  67. return pre, post
  68. def _recommendation(
  69. chain_type: str,
  70. return_pct: float,
  71. mfe_pct: Optional[float],
  72. holding_days: int,
  73. ) -> tuple[str, str]:
  74. if chain_type == "premature_exit_of_aligned_trade":
  75. if return_pct <= 0.02:
  76. return "DELETE_CANDIDATE", "额外卖点提前截断了已对齐持仓,且收益补偿不足。"
  77. return "OBSERVE", "额外卖点发生在已对齐持仓内,但收益贡献不差,需要和风险控制一起评估。"
  78. if chain_type == "bridge_to_aligned_sell":
  79. if return_pct > 0.01:
  80. return "KEEP_BRIDGE", "额外买点承接到了后续对齐卖点,且桥接段本身有正收益。"
  81. return "OBSERVE_BRIDGE", "额外买点承接了后续对齐卖点,但桥接段收益一般,需要替代方案后再删。"
  82. if chain_type == "isolated_extra_trade":
  83. if return_pct > 0.05 and (mfe_pct is not None and mfe_pct > 0.08):
  84. return "KEEP_ALPHA", "虽然是额外交易,但收益明显,可能代表工作簿未显式记录的顺势 alpha。"
  85. if return_pct <= 0.02 and holding_days <= 15:
  86. return "DELETE_CANDIDATE", "额外交易自成闭环,且收益/持有质量偏弱,优先删除。"
  87. return "OBSERVE", "额外交易自成闭环,但收益质量不算差,先保留观察。"
  88. return "OBSERVE", "需要结合上下文进一步人工判断。"
  89. def _impact_text(chain_type: str, side: str, buy_date: str, sell_date: str, paired_aligned: bool) -> str:
  90. if side == "BUY" and chain_type == "bridge_to_aligned_sell":
  91. return f"若删除,最直接风险是丢失后续对齐卖点 {sell_date} 的持仓承接。"
  92. if side == "SELL" and chain_type == "premature_exit_of_aligned_trade":
  93. return f"若删除,理论上更接近工作簿原始持仓路径,可继续观察后续卖点是否自然保留。"
  94. if chain_type == "isolated_extra_trade":
  95. return f"该点与配对交易构成局部闭环,删除通常应连同 {buy_date}->{sell_date} 一并评估。"
  96. if paired_aligned:
  97. return "该点与对齐事件相连,删除需检查下游状态转移。"
  98. return "该点对下游对齐影响有限,更偏向局部收益质量问题。"
  99. def main() -> None:
  100. base_dir = Path(__file__).resolve().parent
  101. gaps = _load_csv(base_dir / "dragon_event_gaps.csv")
  102. trades = _load_csv(base_dir / "dragon_strategy_trades.csv")
  103. strategy_events = _load_csv(base_dir / "dragon_strategy_events.csv")
  104. workbook_layers = _load_csv(base_dir / "dragon_workbook_layers.csv")
  105. indicators = _load_csv(base_dir / "dragon_indicator_snapshot.csv")
  106. indicators = indicators.sort_values("date").reset_index(drop=True)
  107. workbook_real_buy = set(
  108. workbook_layers[(workbook_layers["layer"] == "real_trade") & (workbook_layers["side"] == "BUY")]["date"]
  109. )
  110. workbook_real_sell = set(
  111. workbook_layers[(workbook_layers["layer"] == "real_trade") & (workbook_layers["side"] == "SELL")]["date"]
  112. )
  113. residuals = gaps[
  114. (gaps["diagnostic_type"] == "extra_in_strategy")
  115. & (gaps["target_layer"] == "real_trade")
  116. ].copy()
  117. residuals["side"] = residuals["target_side"]
  118. residuals["rule"] = residuals["source_reason"]
  119. residuals = residuals[["date", "side", "rule", "a1", "b1", "c1"]].sort_values(["date", "side"])
  120. rows: list[dict[str, object]] = []
  121. for _, residual in residuals.iterrows():
  122. side = residual["side"]
  123. event_date = residual["date"]
  124. trade = None
  125. if side == "BUY":
  126. match = trades[trades["buy_date"] == event_date]
  127. else:
  128. match = trades[trades["sell_date"] == event_date]
  129. if not match.empty:
  130. trade = match.iloc[0]
  131. if trade is None:
  132. continue
  133. buy_date = str(trade["buy_date"])
  134. sell_date = str(trade["sell_date"])
  135. buy_reason = str(trade["buy_reason"])
  136. sell_reason = str(trade["sell_reason"])
  137. holding_days = int(trade["holding_days"])
  138. return_pct = float(trade["return_pct"])
  139. buy_price = float(trade["buy_price"])
  140. entry_aligned = buy_date in workbook_real_buy
  141. exit_aligned = sell_date in workbook_real_sell
  142. chain_type = _trade_chain_type(entry_aligned, exit_aligned)
  143. paired_aligned = exit_aligned if side == "BUY" else entry_aligned
  144. regime = _regime_label(float(residual["c1"]))
  145. mfe_pct, mae_pct = _calc_mfe_mae(indicators, buy_date, sell_date, buy_price)
  146. pre_5d_return, post_5d_return = _calc_forward_backward_returns(indicators, event_date, days=5)
  147. recommendation, recommendation_reason = _recommendation(
  148. chain_type=chain_type,
  149. return_pct=return_pct,
  150. mfe_pct=mfe_pct,
  151. holding_days=holding_days,
  152. )
  153. impact_text = _impact_text(
  154. chain_type=chain_type,
  155. side=side,
  156. buy_date=buy_date,
  157. sell_date=sell_date,
  158. paired_aligned=paired_aligned,
  159. )
  160. workbook_context = _event_context(workbook_layers, event_date, window_days=10)
  161. strategy_context = _event_context(strategy_events, event_date, window_days=10)
  162. rows.append(
  163. {
  164. "date": event_date,
  165. "side": side,
  166. "rule": residual["rule"],
  167. "regime": regime,
  168. "buy_date": buy_date,
  169. "buy_reason": buy_reason,
  170. "sell_date": sell_date,
  171. "sell_reason": sell_reason,
  172. "holding_days": holding_days,
  173. "return_pct": return_pct,
  174. "mfe_pct": mfe_pct,
  175. "mae_pct": mae_pct,
  176. "event_a1": float(residual["a1"]),
  177. "event_b1": float(residual["b1"]),
  178. "event_c1": float(residual["c1"]),
  179. "pre_5d_return": pre_5d_return,
  180. "post_5d_return": post_5d_return,
  181. "entry_is_workbook_real": entry_aligned,
  182. "exit_is_workbook_real": exit_aligned,
  183. "chain_type": chain_type,
  184. "paired_aligned_event_date": sell_date if side == "BUY" and exit_aligned else buy_date if side == "SELL" and entry_aligned else "",
  185. "delete_impact": impact_text,
  186. "recommendation": recommendation,
  187. "recommendation_reason": recommendation_reason,
  188. "workbook_context": workbook_context,
  189. "strategy_context": strategy_context,
  190. }
  191. )
  192. attribution = pd.DataFrame(rows).sort_values(["recommendation", "date"]).reset_index(drop=True)
  193. attribution.to_csv(base_dir / "dragon_residual_trade_attribution.csv", index=False, encoding="utf-8-sig")
  194. lines = [
  195. "# Dragon Residual Trade Review",
  196. "",
  197. "## Snapshot",
  198. f"- Residual real-trade rows reviewed: `{len(attribution)}`",
  199. f"- DELETE_CANDIDATE: `{int((attribution['recommendation'] == 'DELETE_CANDIDATE').sum())}`",
  200. f"- KEEP_BRIDGE / KEEP_ALPHA: `{int(attribution['recommendation'].isin(['KEEP_BRIDGE', 'KEEP_ALPHA']).sum())}`",
  201. f"- OBSERVE / OBSERVE_BRIDGE: `{int(attribution['recommendation'].isin(['OBSERVE', 'OBSERVE_BRIDGE']).sum())}`",
  202. "",
  203. "## Recommendation Summary",
  204. ]
  205. for label in ["DELETE_CANDIDATE", "KEEP_BRIDGE", "KEEP_ALPHA", "OBSERVE_BRIDGE", "OBSERVE"]:
  206. subset = attribution[attribution["recommendation"] == label]
  207. if subset.empty:
  208. continue
  209. lines.append(f"### {label}")
  210. for _, row in subset.iterrows():
  211. lines.append(
  212. f"- `{row['date']}` `{row['side']}` `{row['rule']}` | trade `{row['buy_date']} -> {row['sell_date']}` | "
  213. f"ret `{_pct(row['return_pct'])}` mfe `{_pct(row['mfe_pct'])}` mae `{_pct(row['mae_pct'])}` | "
  214. f"{row['recommendation_reason']}"
  215. )
  216. lines.append("")
  217. lines.extend(["## Detailed Cards", ""])
  218. for _, row in attribution.sort_values("date").iterrows():
  219. lines.extend(
  220. [
  221. f"### {row['date']} {row['side']} {row['rule']}",
  222. f"- Regime: `{row['regime']}`",
  223. f"- Trade: `{row['buy_date']} -> {row['sell_date']}` | buy `{row['buy_reason']}` | sell `{row['sell_reason']}`",
  224. f"- Holding / Return: `{int(row['holding_days'])}` days / `{_pct(row['return_pct'])}`",
  225. f"- MFE / MAE: `{_pct(row['mfe_pct'])}` / `{_pct(row['mae_pct'])}`",
  226. f"- Event indicators: `a1={row['event_a1']:.4f}` `b1={row['event_b1']:.4f}` `c1={row['event_c1']:.2f}`",
  227. f"- Pre/Post 5d return: `{_pct(row['pre_5d_return'])}` / `{_pct(row['post_5d_return'])}`",
  228. f"- Chain type: `{row['chain_type']}` | entry aligned `{bool(row['entry_is_workbook_real'])}` | exit aligned `{bool(row['exit_is_workbook_real'])}`",
  229. f"- Delete impact: {row['delete_impact']}",
  230. f"- Recommendation: `{row['recommendation']}` | {row['recommendation_reason']}",
  231. f"- Workbook context: {row['workbook_context'] or 'n/a'}",
  232. f"- Strategy context: {row['strategy_context'] or 'n/a'}",
  233. "",
  234. ]
  235. )
  236. (base_dir / "dragon_residual_trade_review.md").write_text("\n".join(lines), encoding="utf-8")
  237. if __name__ == "__main__":
  238. main()