dragon_short_holding_experiments.py 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. from __future__ import annotations
  2. from pathlib import Path
  3. import pandas as pd
  4. from dragon_branch_configs import alpha_first_selective_veto_config
  5. from dragon_strategy import DragonRuleEngine
  6. from dragon_strategy_config import StrategyConfig
  7. def _load_indicator_snapshot(base_dir: Path) -> pd.DataFrame:
  8. df = pd.read_csv(base_dir / "dragon_indicator_snapshot.csv", encoding="utf-8-sig")
  9. df["date"] = pd.to_datetime(df["date"])
  10. return df.set_index("date", drop=False)
  11. def _load_true_trade_events(base_dir: Path) -> pd.DataFrame:
  12. return pd.read_csv(base_dir / "true_trade_events.csv", encoding="utf-8-sig")
  13. def _profit_factor(series: pd.Series) -> float:
  14. gross_profit = series[series > 0].sum()
  15. gross_loss = -series[series < 0].sum()
  16. if gross_loss == 0:
  17. return float("inf") if gross_profit > 0 else 0.0
  18. return float(gross_profit / gross_loss)
  19. def _holding_bucket(days: int) -> str:
  20. if days <= 5:
  21. return "00-05d"
  22. if days <= 10:
  23. return "06-10d"
  24. if days <= 20:
  25. return "11-20d"
  26. if days <= 40:
  27. return "21-40d"
  28. return "41d+"
  29. def _event_match(strategy_events: pd.DataFrame, workbook_events: pd.DataFrame, side: str) -> tuple[int, int]:
  30. wb = set(workbook_events[(workbook_events["side"] == side) & (workbook_events["layer"] == "real_trade")]["date"])
  31. st = set(strategy_events[(strategy_events["side"] == side) & (strategy_events["layer"] == "real_trade")]["date"])
  32. return len(wb & st), len(st - wb)
  33. def _run_branch(
  34. label: str,
  35. config: StrategyConfig,
  36. indicator_df: pd.DataFrame,
  37. workbook_events: pd.DataFrame,
  38. first_date: str,
  39. last_date: str,
  40. ) -> tuple[dict[str, object], pd.DataFrame]:
  41. engine = DragonRuleEngine(config)
  42. events, trades = engine.run(indicator_df)
  43. events = events[(events["date"] >= first_date) & (events["date"] <= last_date)].copy()
  44. trades = trades[
  45. (trades["buy_date"] >= first_date)
  46. & (trades["buy_date"] <= last_date)
  47. & (trades["sell_date"] >= first_date)
  48. & (trades["sell_date"] <= last_date)
  49. ].copy()
  50. trades["holding_bucket"] = trades["holding_days"].astype(int).map(_holding_bucket)
  51. short = trades[trades["holding_bucket"].isin({"00-05d", "06-10d"})].copy()
  52. glued_short = short[short["buy_reason"] == "glued_buy"].copy()
  53. buy_overlap, buy_extra = _event_match(events, workbook_events, "BUY")
  54. sell_overlap, sell_extra = _event_match(events, workbook_events, "SELL")
  55. row = {
  56. "experiment": label,
  57. "trades": int(len(trades)),
  58. "win_rate": float((trades["return_pct"] > 0).mean()) if not trades.empty else float("nan"),
  59. "avg_return": float(trades["return_pct"].mean()) if not trades.empty else float("nan"),
  60. "median_return": float(trades["return_pct"].median()) if not trades.empty else float("nan"),
  61. "profit_factor": _profit_factor(trades["return_pct"]) if not trades.empty else float("nan"),
  62. "real_buy_overlap": int(buy_overlap),
  63. "real_buy_extra": int(buy_extra),
  64. "real_sell_overlap": int(sell_overlap),
  65. "real_sell_extra": int(sell_extra),
  66. "short_trade_count": int(len(short)),
  67. "short_avg_return": float(short["return_pct"].mean()) if not short.empty else float("nan"),
  68. "short_00_05d_avg_return": float(short[short["holding_bucket"] == "00-05d"]["return_pct"].mean()),
  69. "short_06_10d_avg_return": float(short[short["holding_bucket"] == "06-10d"]["return_pct"].mean()),
  70. "glued_short_count": int(len(glued_short)),
  71. "glued_short_avg_return": float(glued_short["return_pct"].mean()) if not glued_short.empty else float("nan"),
  72. "post_sell_rebound_short_count": int(short[short["buy_reason"] == "post_sell_rebound_buy"].shape[0]),
  73. }
  74. diff = trades[trades["holding_bucket"].isin({"00-05d", "06-10d"})][
  75. ["buy_date", "sell_date", "buy_reason", "sell_reason", "holding_days", "return_pct"]
  76. ].copy()
  77. diff["experiment"] = label
  78. return row, diff
  79. def main() -> None:
  80. base_dir = Path(__file__).resolve().parent
  81. indicator_df = _load_indicator_snapshot(base_dir)
  82. workbook_events = _load_true_trade_events(base_dir)
  83. first_date = workbook_events["date"].min()
  84. last_date = workbook_events["date"].max()
  85. baseline = alpha_first_selective_veto_config()
  86. experiments = [
  87. ("baseline_alpha_first", baseline),
  88. ("disable_post_sell_rebound_buy", baseline.with_updates(disabled_rules={"post_sell_rebound_buy"})),
  89. (
  90. "glued_veto_hot_positive_b1",
  91. baseline.with_updates(
  92. glued_selective_hot_c1_min=40.0,
  93. glued_selective_hot_b1_min=0.10,
  94. ),
  95. ),
  96. (
  97. "glued_veto_low_weak_range",
  98. baseline.with_updates(
  99. glued_selective_low_c1_min=23.0,
  100. glued_selective_low_c1_max=28.0,
  101. glued_selective_low_b1_max=0.02,
  102. ),
  103. ),
  104. (
  105. "glued_veto_hot_and_low",
  106. baseline.with_updates(
  107. glued_selective_hot_c1_min=40.0,
  108. glued_selective_hot_b1_min=0.10,
  109. glued_selective_low_c1_min=23.0,
  110. glued_selective_low_c1_max=28.0,
  111. glued_selective_low_b1_max=0.02,
  112. ),
  113. ),
  114. (
  115. "glued_veto_hot_low_and_disable_post_sell",
  116. baseline.with_updates(
  117. disabled_rules={"post_sell_rebound_buy"},
  118. glued_selective_hot_c1_min=40.0,
  119. glued_selective_hot_b1_min=0.10,
  120. glued_selective_low_c1_min=23.0,
  121. glued_selective_low_c1_max=28.0,
  122. glued_selective_low_b1_max=0.02,
  123. ),
  124. ),
  125. ]
  126. rows: list[dict[str, object]] = []
  127. diffs: list[pd.DataFrame] = []
  128. for label, config in experiments:
  129. row, diff = _run_branch(label, config, indicator_df, workbook_events, first_date, last_date)
  130. rows.append(row)
  131. diffs.append(diff)
  132. result_df = pd.DataFrame(rows)
  133. baseline_row = result_df[result_df["experiment"] == "baseline_alpha_first"].iloc[0]
  134. for col in [
  135. "trades",
  136. "win_rate",
  137. "avg_return",
  138. "median_return",
  139. "profit_factor",
  140. "real_buy_overlap",
  141. "real_sell_overlap",
  142. "short_trade_count",
  143. "short_avg_return",
  144. "short_00_05d_avg_return",
  145. "short_06_10d_avg_return",
  146. "glued_short_count",
  147. "glued_short_avg_return",
  148. "post_sell_rebound_short_count",
  149. ]:
  150. result_df[f"delta_{col}"] = result_df[col] - baseline_row[col]
  151. diff_df = pd.concat(diffs, ignore_index=True)
  152. result_df.to_csv(base_dir / "dragon_short_holding_experiments.csv", index=False, encoding="utf-8-sig")
  153. diff_df.to_csv(base_dir / "dragon_short_holding_experiment_trades.csv", index=False, encoding="utf-8-sig")
  154. lines = [
  155. "# Dragon Short Holding Experiments",
  156. "",
  157. "- Baseline branch: `alpha_first_selective_veto`.",
  158. "- Goal: reduce the main short-holding drag with the smallest possible extra complexity.",
  159. "",
  160. "## Summary",
  161. ]
  162. for _, row in result_df.iterrows():
  163. lines.append(
  164. f"- `{row['experiment']}`: trades `{int(row['trades'])}`, avg_return `{row['avg_return']:.2%}`, "
  165. f"profit_factor `{row['profit_factor']:.2f}`, short_avg_return `{row['short_avg_return']:.2%}`, "
  166. f"`00-05d` `{row['short_00_05d_avg_return']:.2%}`, `06-10d` `{row['short_06_10d_avg_return']:.2%}`, "
  167. f"real BUY / SELL `{int(row['real_buy_overlap'])}/{int(row['real_sell_overlap'])}`"
  168. )
  169. lines.extend(["", "## Delta Vs Alpha-First Baseline"])
  170. for _, row in result_df[result_df["experiment"] != "baseline_alpha_first"].iterrows():
  171. lines.append(
  172. f"- `{row['experiment']}`: delta_avg_return `{row['delta_avg_return']:.2%}`, "
  173. f"delta_profit_factor `{row['delta_profit_factor']:.2f}`, delta_short_avg_return `{row['delta_short_avg_return']:.2%}`, "
  174. f"delta_glued_short_avg_return `{row['delta_glued_short_avg_return']:.2%}`, "
  175. f"real BUY / SELL `{int(row['real_buy_overlap'])}/{int(row['real_sell_overlap'])}`"
  176. )
  177. best = result_df[result_df["experiment"] != "baseline_alpha_first"].sort_values(
  178. ["avg_return", "profit_factor"], ascending=[False, False]
  179. ).head(1)
  180. if not best.empty:
  181. row = best.iloc[0]
  182. lines.extend(
  183. [
  184. "",
  185. "## Quant Judgment",
  186. f"- Best branch in this pack: `{row['experiment']}` with avg_return `{row['avg_return']:.2%}` and profit_factor `{row['profit_factor']:.2f}`.",
  187. "- Compare the winning branch to the audit: if glued short trades fall materially while overlap loss stays controlled, the next optimization should stay on the glued-entry side.",
  188. "- If disabling `post_sell_rebound_buy` contributes little, that family is secondary for this stage.",
  189. ]
  190. )
  191. (base_dir / "dragon_short_holding_experiments.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
  192. if __name__ == "__main__":
  193. main()