| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230 |
- from __future__ import annotations
- import json
- from dataclasses import asdict
- from pathlib import Path
- import pandas as pd
- from dragon_strategy_config import StrategyConfig
- def _load_csv(base_dir: Path, name: str) -> pd.DataFrame:
- return pd.read_csv(base_dir / name, encoding="utf-8-sig")
- def _profit_factor(series: pd.Series) -> float:
- gross_profit = series[series > 0].sum()
- gross_loss = -series[series < 0].sum()
- if gross_loss == 0:
- return float("inf") if gross_profit > 0 else 0.0
- return float(gross_profit / gross_loss)
- def _format_pct(value: float) -> str:
- if pd.isna(value):
- return "NA"
- if value == float("inf"):
- return "inf"
- return f"{value:.2%}"
- def _format_num(value: float) -> str:
- if pd.isna(value):
- return "NA"
- if value == float("inf"):
- return "inf"
- return f"{value:.2f}"
- def _baseline_snapshot(config: StrategyConfig) -> dict[str, object]:
- snapshot = asdict(config)
- snapshot["disabled_rules"] = sorted(config.disabled_rules)
- return snapshot
- def main() -> None:
- base_dir = Path(__file__).resolve().parent
- trades = _load_csv(base_dir, "dragon_strategy_trades.csv")
- fit = (base_dir / "dragon_strategy_fit.md").read_text(encoding="utf-8")
- entry_contrib = _load_csv(base_dir, "dragon_rule_contribution_entry.csv")
- ablation = _load_csv(base_dir, "dragon_rule_ablation.csv")
- sensitivity = _load_csv(base_dir, "dragon_threshold_sensitivity_summary.csv")
- walk_forward = _load_csv(base_dir, "dragon_walk_forward_summary.csv")
- family_stability = _load_csv(base_dir, "dragon_walk_forward_family_stability.csv")
- config = StrategyConfig()
- snapshot = _baseline_snapshot(config)
- (base_dir / "dragon_baseline_config_snapshot.json").write_text(
- json.dumps(snapshot, indent=2, ensure_ascii=False) + "\n",
- encoding="utf-8",
- )
- baseline_ablation = ablation[ablation["experiment"] == "baseline"].iloc[0]
- returns = trades["return_pct"].astype(float)
- overall_profit_factor = _profit_factor(returns)
- core_alpha_names = ["glued_buy", "early_crash_probe_buy", "oversold_recovery_buy"]
- structural_support_names = ["dual_gold_resonance_buy", "deep_oversold_rebound_buy:classic_oversold"]
- active_research_names = [
- "deep_oversold_rebound_buy:positive_b1_rebound",
- "deep_oversold_rebound_buy:shallow_false_start",
- "deep_oversold_rebound_buy:mixed_oversold",
- "deep_oversold_rebound_buy:deep_capitulation",
- "post_washout_kdj_reentry_buy",
- "oversold_reversal_after_ql_buy",
- "post_sell_rebound_buy",
- ]
- core_alpha = entry_contrib[entry_contrib["buy_reason"].isin(core_alpha_names)].copy()
- structural_support = entry_contrib[entry_contrib["buy_reason"].isin(structural_support_names)].copy()
- weak_research = entry_contrib[entry_contrib["buy_reason"].isin(active_research_names)].copy()
- weak_research = weak_research.sort_values(["avg_return", "trades"], ascending=[True, False])
- fragile = sensitivity[sensitivity["stable_real_alignment"] == False].sort_values("avg_return_range", ascending=False)
- robust = sensitivity[sensitivity["stable_real_alignment"] == True].sort_values("avg_return_range")
- anchored = walk_forward[walk_forward["scheme"] == "anchored_expanding"].copy()
- rolling = walk_forward[walk_forward["scheme"] == "rolling_3y"].copy()
- anchored_positive = int((anchored["test_avg_return"] > 0).sum()) if not anchored.empty else 0
- anchored_total = int(len(anchored))
- rolling_positive = int((rolling["test_avg_return"] > 0).sum()) if not rolling.empty else 0
- rolling_total = int(len(rolling))
- stable_families = family_stability[
- (family_stability["avg_yearly_avg_return"] > 0)
- & (family_stability["positive_years"] >= family_stability["negative_years"])
- ].sort_values(["avg_yearly_avg_return", "total_trades"], ascending=[False, False])
- unstable_families = family_stability[
- (family_stability["avg_yearly_avg_return"] < 0)
- | (family_stability["negative_years"] > family_stability["positive_years"])
- ].sort_values(["avg_yearly_avg_return", "min_yearly_avg_return"])
- lines = [
- "# Dragon Formal Research Baseline",
- "",
- "## Scope",
- "- Universe: `399673` only.",
- "- Objective: preserve workbook real-trade alignment while upgrading the strategy into a researchable, testable, parameter-aware baseline.",
- "- Current baseline type: `workbook-preserving baseline`.",
- "",
- "## Locked Baseline Metrics",
- f"- real BUY overlap: `{int(baseline_ablation['real_buy_overlap'])}/106`",
- f"- real SELL overlap: `{int(baseline_ablation['real_sell_overlap'])}/105`",
- f"- aux BUY overlap: `{int(baseline_ablation['aux_buy_overlap'])}/1`",
- f"- aux SELL overlap: `{int(baseline_ablation['aux_sell_overlap'])}/21`",
- f"- strategy trades: `{int(baseline_ablation['trades'])}`",
- f"- win_rate: `{_format_pct(float(baseline_ablation['win_rate']))}`",
- f"- avg_return: `{_format_pct(float(baseline_ablation['avg_return']))}`",
- f"- median_return: `{_format_pct(float(baseline_ablation['median_return']))}`",
- f"- profit_factor: `{_format_num(overall_profit_factor)}`",
- "",
- "## Baseline Config Snapshot",
- "- Snapshot file: `dragon_baseline_config_snapshot.json`.",
- "- Rule switches default to the current aligned strategy baseline; any future research branch should fork from this snapshot rather than editing against memory.",
- "",
- "## Core Alpha Families",
- ]
- for _, row in core_alpha.iterrows():
- lines.append(
- f"- `{row['buy_reason']}`: trades `{int(row['trades'])}`, avg_return `{_format_pct(float(row['avg_return']))}`, "
- f"win_rate `{_format_pct(float(row['win_rate']))}`"
- )
- lines.extend(["", "## Structural Support Families"])
- for _, row in structural_support.iterrows():
- lines.append(
- f"- `{row['buy_reason']}`: trades `{int(row['trades'])}`, avg_return `{_format_pct(float(row['avg_return']))}`, "
- f"win_rate `{_format_pct(float(row['win_rate']))}`"
- )
- lines.extend(
- [
- "",
- "## Frozen Bridge Rules",
- "- `predictive_b1_break_exit`: bridge-style split-chain exit; loosening worsens results, tightening breaks workbook alignment.",
- "- `predictive_error_reentry_buy`: part of the same bridge chain; should be evaluated together with the predictive-break exit, not as an isolated entry.",
- "- Any internal hold gates added only to preserve workbook-aligned split paths should remain frozen unless the objective explicitly changes away from workbook preservation.",
- "",
- "## Redundant Or Label-Only Families",
- "- `non_glued_positive_expansion_buy`: now absorbed by `dual_gold_resonance_buy` on the same in-sample dates; treat as redundant label, not independent alpha.",
- "- Auxiliary same-side post-exit sell compression: keep as hygiene logic, not as a primary optimization frontier.",
- "",
- "## Active Research Families",
- ]
- )
- for _, row in weak_research.iterrows():
- lines.append(
- f"- `{row['buy_reason']}`: trades `{int(row['trades'])}`, avg_return `{_format_pct(float(row['avg_return']))}`, "
- f"win_rate `{_format_pct(float(row['win_rate']))}`"
- )
- lines.extend(
- [
- "",
- "## Threshold Classification",
- "- Fragile parameters: change them only inside explicit experiment branches and always rerun full alignment diagnostics.",
- ]
- )
- for _, row in fragile.iterrows():
- lines.append(
- f"- `{row['parameter']}`: avg_return_range `{_format_pct(float(row['avg_return_range']))}`, "
- f"min real BUY `{int(row['real_buy_overlap_min'])}`, min real SELL `{int(row['real_sell_overlap_min'])}`"
- )
- lines.append("- Relatively robust parameters: acceptable first candidates for future controlled sweeps.")
- for _, row in robust.head(4).iterrows():
- lines.append(
- f"- `{row['parameter']}`: avg_return_range `{_format_pct(float(row['avg_return_range']))}`, "
- f"profit_factor_range `{_format_num(float(row['profit_factor_range']))}`"
- )
- lines.extend(
- [
- "",
- "## Temporal Stability",
- f"- Anchored expanding windows: positive out-of-sample years `{anchored_positive}/{anchored_total}`.",
- f"- Rolling 3Y windows: positive out-of-sample years `{rolling_positive}/{rolling_total}`.",
- "- This validation holds the strategy fixed; it is a time-stability audit, not a refit-based optimizer.",
- "- Strong family persistence candidates:",
- ]
- )
- for _, row in stable_families.head(5).iterrows():
- lines.append(
- f"- `{row['entry_family']}`: years_active `{int(row['years_active'])}`, positive_years `{int(row['positive_years'])}`, "
- f"negative_years `{int(row['negative_years'])}`, avg_yearly_avg_return `{_format_pct(float(row['avg_yearly_avg_return']))}`"
- )
- lines.append("- Weak family persistence candidates:")
- for _, row in unstable_families.head(5).iterrows():
- lines.append(
- f"- `{row['entry_family']}`: years_active `{int(row['years_active'])}`, positive_years `{int(row['positive_years'])}`, "
- f"negative_years `{int(row['negative_years'])}`, avg_yearly_avg_return `{_format_pct(float(row['avg_yearly_avg_return']))}`"
- )
- lines.extend(
- [
- "",
- "## Operating Rules For Future Research",
- "- Do not trade off `106/106` and `105/105` alignment silently. Any alignment loss must be treated as a branch with an explicit objective change.",
- "- Do not blind-tune predictive-break thresholds. That family is frozen under the current baseline objective.",
- "- Do not optimize the auxiliary layer first. The main leverage is now in weak entry-family redesign and short-holding loss control.",
- "- New ideas should first be tested as local attribution experiments, then full-sample reruns, then temporal-stability checks.",
- "",
- "## Next Research Track",
- "- Track A: redesign remaining `deep_oversold_rebound_buy` weak subtypes with delayed confirmation or fallback routing, not blunt deletion.",
- "- Track B: explicitly target short holding buckets `00-05d` and `06-10d`, which remain the main quality drag.",
- "- Track C: separate a future `alpha-first` research branch from this workbook-preserving baseline if the goal later changes from reconstruction to pure performance.",
- ]
- )
- (base_dir / "dragon_formal_research_baseline.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
- # Keep the baseline fit markdown referenced by this script as an explicit dependency.
- if "real_trade BUY: workbook `106`" not in fit:
- raise RuntimeError("Unexpected baseline fit file contents; baseline report expects the aligned workbook-preserving version.")
- if __name__ == "__main__":
- main()
|