openclaw
/
cyb50-quant


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
							from __future__ import annotations

import json
from dataclasses import asdict
from pathlib import Path

import pandas as pd

from dragon_strategy_config import StrategyConfig


def _load_csv(base_dir: Path, name: str) -> pd.DataFrame:
    return pd.read_csv(base_dir / name, encoding="utf-8-sig")


def _profit_factor(series: pd.Series) -> float:
    gross_profit = series[series > 0].sum()
    gross_loss = -series[series < 0].sum()
    if gross_loss == 0:
        return float("inf") if gross_profit > 0 else 0.0
    return float(gross_profit / gross_loss)


def _format_pct(value: float) -> str:
    if pd.isna(value):
        return "NA"
    if value == float("inf"):
        return "inf"
    return f"{value:.2%}"


def _format_num(value: float) -> str:
    if pd.isna(value):
        return "NA"
    if value == float("inf"):
        return "inf"
    return f"{value:.2f}"


def _baseline_snapshot(config: StrategyConfig) -> dict[str, object]:
    snapshot = asdict(config)
    snapshot["disabled_rules"] = sorted(config.disabled_rules)
    return snapshot


def main() -> None:
    base_dir = Path(__file__).resolve().parent

    trades = _load_csv(base_dir, "dragon_strategy_trades.csv")
    fit = (base_dir / "dragon_strategy_fit.md").read_text(encoding="utf-8")
    entry_contrib = _load_csv(base_dir, "dragon_rule_contribution_entry.csv")
    ablation = _load_csv(base_dir, "dragon_rule_ablation.csv")
    sensitivity = _load_csv(base_dir, "dragon_threshold_sensitivity_summary.csv")
    walk_forward = _load_csv(base_dir, "dragon_walk_forward_summary.csv")
    family_stability = _load_csv(base_dir, "dragon_walk_forward_family_stability.csv")

    config = StrategyConfig()
    snapshot = _baseline_snapshot(config)
    (base_dir / "dragon_baseline_config_snapshot.json").write_text(
        json.dumps(snapshot, indent=2, ensure_ascii=False) + "\n",
        encoding="utf-8",
    )

    baseline_ablation = ablation[ablation["experiment"] == "baseline"].iloc[0]
    returns = trades["return_pct"].astype(float)
    overall_profit_factor = _profit_factor(returns)

    core_alpha_names = ["glued_buy", "early_crash_probe_buy", "oversold_recovery_buy"]
    structural_support_names = ["dual_gold_resonance_buy", "deep_oversold_rebound_buy:classic_oversold"]
    active_research_names = [
        "deep_oversold_rebound_buy:positive_b1_rebound",
        "deep_oversold_rebound_buy:shallow_false_start",
        "deep_oversold_rebound_buy:mixed_oversold",
        "deep_oversold_rebound_buy:deep_capitulation",
        "post_washout_kdj_reentry_buy",
        "oversold_reversal_after_ql_buy",
        "post_sell_rebound_buy",
    ]

    core_alpha = entry_contrib[entry_contrib["buy_reason"].isin(core_alpha_names)].copy()
    structural_support = entry_contrib[entry_contrib["buy_reason"].isin(structural_support_names)].copy()
    weak_research = entry_contrib[entry_contrib["buy_reason"].isin(active_research_names)].copy()
    weak_research = weak_research.sort_values(["avg_return", "trades"], ascending=[True, False])

    fragile = sensitivity[sensitivity["stable_real_alignment"] == False].sort_values("avg_return_range", ascending=False)
    robust = sensitivity[sensitivity["stable_real_alignment"] == True].sort_values("avg_return_range")

    anchored = walk_forward[walk_forward["scheme"] == "anchored_expanding"].copy()
    rolling = walk_forward[walk_forward["scheme"] == "rolling_3y"].copy()
    anchored_positive = int((anchored["test_avg_return"] > 0).sum()) if not anchored.empty else 0
    anchored_total = int(len(anchored))
    rolling_positive = int((rolling["test_avg_return"] > 0).sum()) if not rolling.empty else 0
    rolling_total = int(len(rolling))

    stable_families = family_stability[
        (family_stability["avg_yearly_avg_return"] > 0)
        & (family_stability["positive_years"] >= family_stability["negative_years"])
    ].sort_values(["avg_yearly_avg_return", "total_trades"], ascending=[False, False])
    unstable_families = family_stability[
        (family_stability["avg_yearly_avg_return"] < 0)
        | (family_stability["negative_years"] > family_stability["positive_years"])
    ].sort_values(["avg_yearly_avg_return", "min_yearly_avg_return"])

    lines = [
        "# Dragon Formal Research Baseline",
        "",
        "## Scope",
        "- Universe: `399673` only.",
        "- Objective: preserve workbook real-trade alignment while upgrading the strategy into a researchable, testable, parameter-aware baseline.",
        "- Current baseline type: `workbook-preserving baseline`.",
        "",
        "## Locked Baseline Metrics",
        f"- real BUY overlap: `{int(baseline_ablation['real_buy_overlap'])}/106`",
        f"- real SELL overlap: `{int(baseline_ablation['real_sell_overlap'])}/105`",
        f"- aux BUY overlap: `{int(baseline_ablation['aux_buy_overlap'])}/1`",
        f"- aux SELL overlap: `{int(baseline_ablation['aux_sell_overlap'])}/21`",
        f"- strategy trades: `{int(baseline_ablation['trades'])}`",
        f"- win_rate: `{_format_pct(float(baseline_ablation['win_rate']))}`",
        f"- avg_return: `{_format_pct(float(baseline_ablation['avg_return']))}`",
        f"- median_return: `{_format_pct(float(baseline_ablation['median_return']))}`",
        f"- profit_factor: `{_format_num(overall_profit_factor)}`",
        "",
        "## Baseline Config Snapshot",
        "- Snapshot file: `dragon_baseline_config_snapshot.json`.",
        "- Rule switches default to the current aligned strategy baseline; any future research branch should fork from this snapshot rather than editing against memory.",
        "",
        "## Core Alpha Families",
    ]
    for _, row in core_alpha.iterrows():
        lines.append(
            f"- `{row['buy_reason']}`: trades `{int(row['trades'])}`, avg_return `{_format_pct(float(row['avg_return']))}`, "
            f"win_rate `{_format_pct(float(row['win_rate']))}`"
        )

    lines.extend(["", "## Structural Support Families"])
    for _, row in structural_support.iterrows():
        lines.append(
            f"- `{row['buy_reason']}`: trades `{int(row['trades'])}`, avg_return `{_format_pct(float(row['avg_return']))}`, "
            f"win_rate `{_format_pct(float(row['win_rate']))}`"
        )

    lines.extend(
        [
            "",
            "## Frozen Bridge Rules",
            "- `predictive_b1_break_exit`: bridge-style split-chain exit; loosening worsens results, tightening breaks workbook alignment.",
            "- `predictive_error_reentry_buy`: part of the same bridge chain; should be evaluated together with the predictive-break exit, not as an isolated entry.",
            "- Any internal hold gates added only to preserve workbook-aligned split paths should remain frozen unless the objective explicitly changes away from workbook preservation.",
            "",
            "## Redundant Or Label-Only Families",
            "- `non_glued_positive_expansion_buy`: now absorbed by `dual_gold_resonance_buy` on the same in-sample dates; treat as redundant label, not independent alpha.",
            "- Auxiliary same-side post-exit sell compression: keep as hygiene logic, not as a primary optimization frontier.",
            "",
            "## Active Research Families",
        ]
    )
    for _, row in weak_research.iterrows():
        lines.append(
            f"- `{row['buy_reason']}`: trades `{int(row['trades'])}`, avg_return `{_format_pct(float(row['avg_return']))}`, "
            f"win_rate `{_format_pct(float(row['win_rate']))}`"
        )

    lines.extend(
        [
            "",
            "## Threshold Classification",
            "- Fragile parameters: change them only inside explicit experiment branches and always rerun full alignment diagnostics.",
        ]
    )
    for _, row in fragile.iterrows():
        lines.append(
            f"- `{row['parameter']}`: avg_return_range `{_format_pct(float(row['avg_return_range']))}`, "
            f"min real BUY `{int(row['real_buy_overlap_min'])}`, min real SELL `{int(row['real_sell_overlap_min'])}`"
        )

    lines.append("- Relatively robust parameters: acceptable first candidates for future controlled sweeps.")
    for _, row in robust.head(4).iterrows():
        lines.append(
            f"- `{row['parameter']}`: avg_return_range `{_format_pct(float(row['avg_return_range']))}`, "
            f"profit_factor_range `{_format_num(float(row['profit_factor_range']))}`"
        )

    lines.extend(
        [
            "",
            "## Temporal Stability",
            f"- Anchored expanding windows: positive out-of-sample years `{anchored_positive}/{anchored_total}`.",
            f"- Rolling 3Y windows: positive out-of-sample years `{rolling_positive}/{rolling_total}`.",
            "- This validation holds the strategy fixed; it is a time-stability audit, not a refit-based optimizer.",
            "- Strong family persistence candidates:",
        ]
    )
    for _, row in stable_families.head(5).iterrows():
        lines.append(
            f"- `{row['entry_family']}`: years_active `{int(row['years_active'])}`, positive_years `{int(row['positive_years'])}`, "
            f"negative_years `{int(row['negative_years'])}`, avg_yearly_avg_return `{_format_pct(float(row['avg_yearly_avg_return']))}`"
        )

    lines.append("- Weak family persistence candidates:")
    for _, row in unstable_families.head(5).iterrows():
        lines.append(
            f"- `{row['entry_family']}`: years_active `{int(row['years_active'])}`, positive_years `{int(row['positive_years'])}`, "
            f"negative_years `{int(row['negative_years'])}`, avg_yearly_avg_return `{_format_pct(float(row['avg_yearly_avg_return']))}`"
        )

    lines.extend(
        [
            "",
            "## Operating Rules For Future Research",
            "- Do not trade off `106/106` and `105/105` alignment silently. Any alignment loss must be treated as a branch with an explicit objective change.",
            "- Do not blind-tune predictive-break thresholds. That family is frozen under the current baseline objective.",
            "- Do not optimize the auxiliary layer first. The main leverage is now in weak entry-family redesign and short-holding loss control.",
            "- New ideas should first be tested as local attribution experiments, then full-sample reruns, then temporal-stability checks.",
            "",
            "## Next Research Track",
            "- Track A: redesign remaining `deep_oversold_rebound_buy` weak subtypes with delayed confirmation or fallback routing, not blunt deletion.",
            "- Track B: explicitly target short holding buckets `00-05d` and `06-10d`, which remain the main quality drag.",
            "- Track C: separate a future `alpha-first` research branch from this workbook-preserving baseline if the goal later changes from reconstruction to pure performance.",
        ]
    )

    (base_dir / "dragon_formal_research_baseline.md").write_text("\n".join(lines) + "\n", encoding="utf-8")

    # Keep the baseline fit markdown referenced by this script as an explicit dependency.
    if "real_trade BUY: workbook `106`" not in fit:
        raise RuntimeError("Unexpected baseline fit file contents; baseline report expects the aligned workbook-preserving version.")


if __name__ == "__main__":
    main()