| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163 |
- from __future__ import annotations
- from pathlib import Path
- import pandas as pd
- from dragon_branch_configs import alpha_first_glued_refined_hot_cap_config, alpha_first_selective_veto_config
- from dragon_execution_common import apply_execution_model as _apply_execution_model, risk_cluster as _risk_cluster, summary as _summary
- from dragon_shared import END_DATE, START_DATE, format_num as _format_num, format_pct as _format_pct
- from dragon_strategy import DragonRuleEngine
- def _load_indicator_snapshot(base_dir: Path) -> pd.DataFrame:
- df = pd.read_csv(base_dir / "dragon_indicator_snapshot.csv", encoding="utf-8-sig")
- df["date"] = pd.to_datetime(df["date"])
- return df.sort_values("date").reset_index(drop=True)
- def _entry_family(reason: str) -> str:
- return str(reason).split(":", 1)[0]
- def _run_branch(indicators: pd.DataFrame, config) -> pd.DataFrame:
- indexed = indicators.set_index("date", drop=False)
- engine = DragonRuleEngine(config=config)
- _, trades = engine.run(indexed)
- trades = trades[
- (trades["buy_date"] >= START_DATE)
- & (trades["buy_date"] <= END_DATE)
- & (trades["sell_date"] >= START_DATE)
- & (trades["sell_date"] <= END_DATE)
- ].copy()
- trades["buy_dt"] = pd.to_datetime(trades["buy_date"])
- trades["sell_dt"] = pd.to_datetime(trades["sell_date"])
- trades["sell_year"] = trades["sell_dt"].dt.year.astype(int)
- trades["entry_family"] = trades["buy_reason"].map(_entry_family)
- return trades
- def _add_execution_prices(trades: pd.DataFrame, indicators: pd.DataFrame) -> pd.DataFrame:
- trades = trades.copy()
- lookup = indicators.set_index(indicators["date"].dt.date)
- next_by_date = {
- indicators.iloc[idx]["date"].date().isoformat(): indicators.iloc[idx + 1]
- for idx in range(len(indicators) - 1)
- }
- same_entry: list[float] = []
- same_exit: list[float] = []
- next_open_entry: list[float] = []
- next_open_exit: list[float] = []
- next_close_entry: list[float] = []
- next_close_exit: list[float] = []
- for _, trade in trades.iterrows():
- buy_key = trade["buy_date"]
- sell_key = trade["sell_date"]
- buy_row = lookup.loc[pd.Timestamp(trade["buy_date"]).date()]
- sell_row = lookup.loc[pd.Timestamp(trade["sell_date"]).date()]
- buy_next = next_by_date.get(buy_key)
- sell_next = next_by_date.get(sell_key)
- same_entry.append(float(buy_row["close"]))
- same_exit.append(float(sell_row["close"]))
- next_open_entry.append(float("nan") if buy_next is None else float(buy_next["open"]))
- next_open_exit.append(float("nan") if sell_next is None else float(sell_next["open"]))
- next_close_entry.append(float("nan") if buy_next is None else float(buy_next["close"]))
- next_close_exit.append(float("nan") if sell_next is None else float(sell_next["close"]))
- trades["exec_same_close_entry"] = same_entry
- trades["exec_same_close_exit"] = same_exit
- trades["exec_next_open_entry"] = next_open_entry
- trades["exec_next_open_exit"] = next_open_exit
- trades["exec_next_close_entry"] = next_close_entry
- trades["exec_next_close_exit"] = next_close_exit
- return trades
- def main() -> None:
- base_dir = Path(__file__).resolve().parent
- indicators = _load_indicator_snapshot(base_dir)
- branches = {
- "alpha_first_selective_veto": _add_execution_prices(
- _run_branch(indicators, alpha_first_selective_veto_config()),
- indicators,
- ),
- "alpha_first_glued_refined_hot_cap": _add_execution_prices(
- _run_branch(indicators, alpha_first_glued_refined_hot_cap_config()),
- indicators,
- ),
- }
- execution_models = ["same_close", "next_open", "next_close"]
- cost_levels = [0.0, 5.0, 10.0, 20.0]
- stress_rows: list[dict[str, object]] = []
- latency_rows: list[dict[str, object]] = []
- risk_rows: list[dict[str, object]] = []
- for branch, trades in branches.items():
- for model in execution_models:
- model_trades = _apply_execution_model(trades, model, 0.0)
- latency_rows.append(_summary(branch, model_trades))
- if model in {"same_close", "next_open"}:
- risk_rows.append(_risk_cluster(branch, model_trades))
- for cost in cost_levels:
- stressed = _apply_execution_model(trades, model, cost)
- stress_rows.append(_summary(branch, stressed))
- stress_df = pd.DataFrame(stress_rows).sort_values(["execution_model", "cost_bps_side", "branch"]).reset_index(drop=True)
- latency_df = pd.DataFrame(latency_rows).sort_values(["execution_model", "branch"]).reset_index(drop=True)
- risk_df = pd.DataFrame(risk_rows).sort_values(["execution_model", "branch"]).reset_index(drop=True)
- stress_df.to_csv(base_dir / "dragon_refined_execution_stress.csv", index=False, encoding="utf-8-sig")
- latency_df.to_csv(base_dir / "dragon_refined_latency_review.csv", index=False, encoding="utf-8-sig")
- risk_df.to_csv(base_dir / "dragon_refined_risk_cluster_review.csv", index=False, encoding="utf-8-sig")
- same_close = latency_df[latency_df["execution_model"] == "same_close"].set_index("branch")
- next_open = latency_df[latency_df["execution_model"] == "next_open"].set_index("branch")
- next_close = latency_df[latency_df["execution_model"] == "next_close"].set_index("branch")
- stress_20 = stress_df[(stress_df["execution_model"] == "next_open") & (stress_df["cost_bps_side"] == 20.0)].set_index("branch")
- risk_next_open = risk_df[risk_df["execution_model"] == "next_open"].set_index("branch")
- refined_key = "alpha_first_glued_refined_hot_cap"
- control_key = "alpha_first_selective_veto"
- lines = [
- "# Dragon Refined Stability Review",
- "",
- "## Scope",
- "- branches: `alpha_first_selective_veto` vs `alpha_first_glued_refined_hot_cap`",
- "- execution models: `same_close`, `next_open`, `next_close`",
- "- costs: `0`, `5`, `10`, `20 bps/side`",
- "",
- "## Latency Review",
- f"- same_close control vs refined: avg_return `{_format_pct(float(same_close.loc[control_key, 'avg_return']))}` -> `{_format_pct(float(same_close.loc[refined_key, 'avg_return']))}`, PF `{_format_num(float(same_close.loc[control_key, 'profit_factor']))}` -> `{_format_num(float(same_close.loc[refined_key, 'profit_factor']))}`",
- f"- next_open control vs refined: avg_return `{_format_pct(float(next_open.loc[control_key, 'avg_return']))}` -> `{_format_pct(float(next_open.loc[refined_key, 'avg_return']))}`, PF `{_format_num(float(next_open.loc[control_key, 'profit_factor']))}` -> `{_format_num(float(next_open.loc[refined_key, 'profit_factor']))}`",
- f"- next_close control vs refined: avg_return `{_format_pct(float(next_close.loc[control_key, 'avg_return']))}` -> `{_format_pct(float(next_close.loc[refined_key, 'avg_return']))}`, PF `{_format_num(float(next_close.loc[control_key, 'profit_factor']))}` -> `{_format_num(float(next_close.loc[refined_key, 'profit_factor']))}`",
- "",
- "## Cost + Next-Open Stress",
- f"- next_open + 20 bps/side control CAGR `{_format_pct(float(stress_20.loc[control_key, 'cagr']))}` vs refined `{_format_pct(float(stress_20.loc[refined_key, 'cagr']))}`",
- f"- next_open + 20 bps/side control PF `{_format_num(float(stress_20.loc[control_key, 'profit_factor']))}` vs refined `{_format_num(float(stress_20.loc[refined_key, 'profit_factor']))}`",
- f"- next_open + 20 bps/side control max DD `{_format_pct(float(stress_20.loc[control_key, 'max_drawdown']))}` vs refined `{_format_pct(float(stress_20.loc[refined_key, 'max_drawdown']))}`",
- "",
- "## Risk Cluster Review",
- f"- next_open control max loss streak `{int(risk_next_open.loc[control_key, 'max_loss_streak'])}` vs refined `{int(risk_next_open.loc[refined_key, 'max_loss_streak'])}`",
- f"- next_open control worst 5-trade sum `{_format_pct(float(risk_next_open.loc[control_key, 'worst_5trade_sum']))}` vs refined `{_format_pct(float(risk_next_open.loc[refined_key, 'worst_5trade_sum']))}`",
- f"- next_open control short-loss share `{_format_pct(float(risk_next_open.loc[control_key, 'short_loss_share']))}` vs refined `{_format_pct(float(risk_next_open.loc[refined_key, 'short_loss_share']))}`",
- f"- next_open control worst loss family `{risk_next_open.loc[control_key, 'worst_loss_family']}` vs refined `{risk_next_open.loc[refined_key, 'worst_loss_family']}`",
- "",
- "## Judgment",
- "- If refined still leads after next-bar execution and cost drag, its edge is less likely to be a same-bar backtest artifact.",
- "- If refined also keeps loss clustering and drawdown no worse than control, the branch is moving closer to a deployable research baseline.",
- ]
- (base_dir / "dragon_refined_stability_review.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
- if __name__ == "__main__":
- main()
|