dragon_research_baseline.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. from __future__ import annotations
  2. import json
  3. from dataclasses import asdict
  4. from pathlib import Path
  5. import pandas as pd
  6. from dragon_strategy_config import StrategyConfig
  7. def _load_csv(base_dir: Path, name: str) -> pd.DataFrame:
  8. return pd.read_csv(base_dir / name, encoding="utf-8-sig")
  9. def _profit_factor(series: pd.Series) -> float:
  10. gross_profit = series[series > 0].sum()
  11. gross_loss = -series[series < 0].sum()
  12. if gross_loss == 0:
  13. return float("inf") if gross_profit > 0 else 0.0
  14. return float(gross_profit / gross_loss)
  15. def _format_pct(value: float) -> str:
  16. if pd.isna(value):
  17. return "NA"
  18. if value == float("inf"):
  19. return "inf"
  20. return f"{value:.2%}"
  21. def _format_num(value: float) -> str:
  22. if pd.isna(value):
  23. return "NA"
  24. if value == float("inf"):
  25. return "inf"
  26. return f"{value:.2f}"
  27. def _baseline_snapshot(config: StrategyConfig) -> dict[str, object]:
  28. snapshot = asdict(config)
  29. snapshot["disabled_rules"] = sorted(config.disabled_rules)
  30. return snapshot
  31. def main() -> None:
  32. base_dir = Path(__file__).resolve().parent
  33. trades = _load_csv(base_dir, "dragon_strategy_trades.csv")
  34. fit = (base_dir / "dragon_strategy_fit.md").read_text(encoding="utf-8")
  35. entry_contrib = _load_csv(base_dir, "dragon_rule_contribution_entry.csv")
  36. ablation = _load_csv(base_dir, "dragon_rule_ablation.csv")
  37. sensitivity = _load_csv(base_dir, "dragon_threshold_sensitivity_summary.csv")
  38. walk_forward = _load_csv(base_dir, "dragon_walk_forward_summary.csv")
  39. family_stability = _load_csv(base_dir, "dragon_walk_forward_family_stability.csv")
  40. config = StrategyConfig()
  41. snapshot = _baseline_snapshot(config)
  42. (base_dir / "dragon_baseline_config_snapshot.json").write_text(
  43. json.dumps(snapshot, indent=2, ensure_ascii=False) + "\n",
  44. encoding="utf-8",
  45. )
  46. baseline_ablation = ablation[ablation["experiment"] == "baseline"].iloc[0]
  47. returns = trades["return_pct"].astype(float)
  48. overall_profit_factor = _profit_factor(returns)
  49. core_alpha_names = ["glued_buy", "early_crash_probe_buy", "oversold_recovery_buy"]
  50. structural_support_names = ["dual_gold_resonance_buy", "deep_oversold_rebound_buy:classic_oversold"]
  51. active_research_names = [
  52. "deep_oversold_rebound_buy:positive_b1_rebound",
  53. "deep_oversold_rebound_buy:shallow_false_start",
  54. "deep_oversold_rebound_buy:mixed_oversold",
  55. "deep_oversold_rebound_buy:deep_capitulation",
  56. "post_washout_kdj_reentry_buy",
  57. "oversold_reversal_after_ql_buy",
  58. "post_sell_rebound_buy",
  59. ]
  60. core_alpha = entry_contrib[entry_contrib["buy_reason"].isin(core_alpha_names)].copy()
  61. structural_support = entry_contrib[entry_contrib["buy_reason"].isin(structural_support_names)].copy()
  62. weak_research = entry_contrib[entry_contrib["buy_reason"].isin(active_research_names)].copy()
  63. weak_research = weak_research.sort_values(["avg_return", "trades"], ascending=[True, False])
  64. fragile = sensitivity[sensitivity["stable_real_alignment"] == False].sort_values("avg_return_range", ascending=False)
  65. robust = sensitivity[sensitivity["stable_real_alignment"] == True].sort_values("avg_return_range")
  66. anchored = walk_forward[walk_forward["scheme"] == "anchored_expanding"].copy()
  67. rolling = walk_forward[walk_forward["scheme"] == "rolling_3y"].copy()
  68. anchored_positive = int((anchored["test_avg_return"] > 0).sum()) if not anchored.empty else 0
  69. anchored_total = int(len(anchored))
  70. rolling_positive = int((rolling["test_avg_return"] > 0).sum()) if not rolling.empty else 0
  71. rolling_total = int(len(rolling))
  72. stable_families = family_stability[
  73. (family_stability["avg_yearly_avg_return"] > 0)
  74. & (family_stability["positive_years"] >= family_stability["negative_years"])
  75. ].sort_values(["avg_yearly_avg_return", "total_trades"], ascending=[False, False])
  76. unstable_families = family_stability[
  77. (family_stability["avg_yearly_avg_return"] < 0)
  78. | (family_stability["negative_years"] > family_stability["positive_years"])
  79. ].sort_values(["avg_yearly_avg_return", "min_yearly_avg_return"])
  80. lines = [
  81. "# Dragon Formal Research Baseline",
  82. "",
  83. "## Scope",
  84. "- Universe: `399673` only.",
  85. "- Objective: preserve workbook real-trade alignment while upgrading the strategy into a researchable, testable, parameter-aware baseline.",
  86. "- Current baseline type: `workbook-preserving baseline`.",
  87. "",
  88. "## Locked Baseline Metrics",
  89. f"- real BUY overlap: `{int(baseline_ablation['real_buy_overlap'])}/106`",
  90. f"- real SELL overlap: `{int(baseline_ablation['real_sell_overlap'])}/105`",
  91. f"- aux BUY overlap: `{int(baseline_ablation['aux_buy_overlap'])}/1`",
  92. f"- aux SELL overlap: `{int(baseline_ablation['aux_sell_overlap'])}/21`",
  93. f"- strategy trades: `{int(baseline_ablation['trades'])}`",
  94. f"- win_rate: `{_format_pct(float(baseline_ablation['win_rate']))}`",
  95. f"- avg_return: `{_format_pct(float(baseline_ablation['avg_return']))}`",
  96. f"- median_return: `{_format_pct(float(baseline_ablation['median_return']))}`",
  97. f"- profit_factor: `{_format_num(overall_profit_factor)}`",
  98. "",
  99. "## Baseline Config Snapshot",
  100. "- Snapshot file: `dragon_baseline_config_snapshot.json`.",
  101. "- Rule switches default to the current aligned strategy baseline; any future research branch should fork from this snapshot rather than editing against memory.",
  102. "",
  103. "## Core Alpha Families",
  104. ]
  105. for _, row in core_alpha.iterrows():
  106. lines.append(
  107. f"- `{row['buy_reason']}`: trades `{int(row['trades'])}`, avg_return `{_format_pct(float(row['avg_return']))}`, "
  108. f"win_rate `{_format_pct(float(row['win_rate']))}`"
  109. )
  110. lines.extend(["", "## Structural Support Families"])
  111. for _, row in structural_support.iterrows():
  112. lines.append(
  113. f"- `{row['buy_reason']}`: trades `{int(row['trades'])}`, avg_return `{_format_pct(float(row['avg_return']))}`, "
  114. f"win_rate `{_format_pct(float(row['win_rate']))}`"
  115. )
  116. lines.extend(
  117. [
  118. "",
  119. "## Frozen Bridge Rules",
  120. "- `predictive_b1_break_exit`: bridge-style split-chain exit; loosening worsens results, tightening breaks workbook alignment.",
  121. "- `predictive_error_reentry_buy`: part of the same bridge chain; should be evaluated together with the predictive-break exit, not as an isolated entry.",
  122. "- Any internal hold gates added only to preserve workbook-aligned split paths should remain frozen unless the objective explicitly changes away from workbook preservation.",
  123. "",
  124. "## Redundant Or Label-Only Families",
  125. "- `non_glued_positive_expansion_buy`: now absorbed by `dual_gold_resonance_buy` on the same in-sample dates; treat as redundant label, not independent alpha.",
  126. "- Auxiliary same-side post-exit sell compression: keep as hygiene logic, not as a primary optimization frontier.",
  127. "",
  128. "## Active Research Families",
  129. ]
  130. )
  131. for _, row in weak_research.iterrows():
  132. lines.append(
  133. f"- `{row['buy_reason']}`: trades `{int(row['trades'])}`, avg_return `{_format_pct(float(row['avg_return']))}`, "
  134. f"win_rate `{_format_pct(float(row['win_rate']))}`"
  135. )
  136. lines.extend(
  137. [
  138. "",
  139. "## Threshold Classification",
  140. "- Fragile parameters: change them only inside explicit experiment branches and always rerun full alignment diagnostics.",
  141. ]
  142. )
  143. for _, row in fragile.iterrows():
  144. lines.append(
  145. f"- `{row['parameter']}`: avg_return_range `{_format_pct(float(row['avg_return_range']))}`, "
  146. f"min real BUY `{int(row['real_buy_overlap_min'])}`, min real SELL `{int(row['real_sell_overlap_min'])}`"
  147. )
  148. lines.append("- Relatively robust parameters: acceptable first candidates for future controlled sweeps.")
  149. for _, row in robust.head(4).iterrows():
  150. lines.append(
  151. f"- `{row['parameter']}`: avg_return_range `{_format_pct(float(row['avg_return_range']))}`, "
  152. f"profit_factor_range `{_format_num(float(row['profit_factor_range']))}`"
  153. )
  154. lines.extend(
  155. [
  156. "",
  157. "## Temporal Stability",
  158. f"- Anchored expanding windows: positive out-of-sample years `{anchored_positive}/{anchored_total}`.",
  159. f"- Rolling 3Y windows: positive out-of-sample years `{rolling_positive}/{rolling_total}`.",
  160. "- This validation holds the strategy fixed; it is a time-stability audit, not a refit-based optimizer.",
  161. "- Strong family persistence candidates:",
  162. ]
  163. )
  164. for _, row in stable_families.head(5).iterrows():
  165. lines.append(
  166. f"- `{row['entry_family']}`: years_active `{int(row['years_active'])}`, positive_years `{int(row['positive_years'])}`, "
  167. f"negative_years `{int(row['negative_years'])}`, avg_yearly_avg_return `{_format_pct(float(row['avg_yearly_avg_return']))}`"
  168. )
  169. lines.append("- Weak family persistence candidates:")
  170. for _, row in unstable_families.head(5).iterrows():
  171. lines.append(
  172. f"- `{row['entry_family']}`: years_active `{int(row['years_active'])}`, positive_years `{int(row['positive_years'])}`, "
  173. f"negative_years `{int(row['negative_years'])}`, avg_yearly_avg_return `{_format_pct(float(row['avg_yearly_avg_return']))}`"
  174. )
  175. lines.extend(
  176. [
  177. "",
  178. "## Operating Rules For Future Research",
  179. "- Do not trade off `106/106` and `105/105` alignment silently. Any alignment loss must be treated as a branch with an explicit objective change.",
  180. "- Do not blind-tune predictive-break thresholds. That family is frozen under the current baseline objective.",
  181. "- Do not optimize the auxiliary layer first. The main leverage is now in weak entry-family redesign and short-holding loss control.",
  182. "- New ideas should first be tested as local attribution experiments, then full-sample reruns, then temporal-stability checks.",
  183. "",
  184. "## Next Research Track",
  185. "- Track A: redesign remaining `deep_oversold_rebound_buy` weak subtypes with delayed confirmation or fallback routing, not blunt deletion.",
  186. "- Track B: explicitly target short holding buckets `00-05d` and `06-10d`, which remain the main quality drag.",
  187. "- Track C: separate a future `alpha-first` research branch from this workbook-preserving baseline if the goal later changes from reconstruction to pure performance.",
  188. ]
  189. )
  190. (base_dir / "dragon_formal_research_baseline.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
  191. # Keep the baseline fit markdown referenced by this script as an explicit dependency.
  192. if "real_trade BUY: workbook `106`" not in fit:
  193. raise RuntimeError("Unexpected baseline fit file contents; baseline report expects the aligned workbook-preserving version.")
  194. if __name__ == "__main__":
  195. main()