dragon_rule_layer_attribution.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. from __future__ import annotations
  2. from pathlib import Path
  3. import pandas as pd
  4. from dragon_rule_catalog import classify_entry_reason, classify_exit_reason
  5. from dragon_shared import END_DATE, START_DATE, format_num, format_pct, profit_factor
  6. DEFAULT_BRANCH = "alpha_first_glued_refined_hot_cap"
  7. DETAIL_OUTPUT = "dragon_rule_layer_attribution.csv"
  8. SUMMARY_OUTPUT = "dragon_rule_layer_attribution_summary.csv"
  9. REPORT_OUTPUT = "dragon_rule_layer_attribution.md"
  10. def _aggregate(df: pd.DataFrame, layer_col: str, family_col: str, label: str) -> pd.DataFrame:
  11. if df.empty:
  12. return pd.DataFrame(columns=["view", "layer", "family", "trades", "win_rate", "avg_return", "median_return", "profit_factor"])
  13. records: list[dict[str, object]] = []
  14. grouped = df.groupby([layer_col, family_col], dropna=False)
  15. for (layer_value, family_value), group in grouped:
  16. returns = group["return_pct"].astype(float)
  17. row: dict[str, object] = {
  18. "view": label,
  19. "layer": str(layer_value),
  20. "family": str(family_value),
  21. }
  22. row.update(
  23. {
  24. "trades": int(len(group)),
  25. "win_rate": float((returns > 0).mean()),
  26. "avg_return": float(returns.mean()),
  27. "median_return": float(returns.median()),
  28. "profit_factor": profit_factor(returns),
  29. }
  30. )
  31. records.append(row)
  32. return pd.DataFrame(records).sort_values(["trades", "avg_return"], ascending=[False, False])
  33. def main() -> None:
  34. base_dir = Path(__file__).resolve().parent
  35. details_path = base_dir / "dragon_historical_trade_details.csv"
  36. details = pd.read_csv(details_path, encoding="utf-8-sig")
  37. details = details[details["branch"] == DEFAULT_BRANCH].copy()
  38. details = details[
  39. (details["buy_date"] >= START_DATE)
  40. & (details["buy_date"] <= END_DATE)
  41. & (details["sell_date"] >= START_DATE)
  42. & (details["sell_date"] <= END_DATE)
  43. ].copy()
  44. buy_meta = details["buy_reason"].map(classify_entry_reason)
  45. sell_meta = details["sell_reason"].map(classify_exit_reason)
  46. details["buy_layer"] = [meta.layer.value for meta in buy_meta]
  47. details["buy_family"] = [meta.family.value for meta in buy_meta]
  48. details["buy_code"] = [meta.code for meta in buy_meta]
  49. details["sell_layer"] = [meta.layer.value for meta in sell_meta]
  50. details["sell_family"] = [meta.family.value for meta in sell_meta]
  51. details["sell_code"] = [meta.code for meta in sell_meta]
  52. detail_cols = [
  53. "branch",
  54. "buy_date",
  55. "buy_reason",
  56. "buy_layer",
  57. "buy_family",
  58. "buy_code",
  59. "sell_date",
  60. "sell_reason",
  61. "sell_layer",
  62. "sell_family",
  63. "sell_code",
  64. "holding_days",
  65. "return_pct",
  66. ]
  67. detail_df = details[detail_cols].copy()
  68. detail_df.to_csv(base_dir / DETAIL_OUTPUT, index=False, encoding="utf-8-sig")
  69. buy_summary = _aggregate(detail_df, "buy_layer", "buy_family", label="entry_layer_family")
  70. sell_summary = _aggregate(detail_df, "sell_layer", "sell_family", label="exit_layer_family")
  71. summary_df = pd.concat([buy_summary, sell_summary], ignore_index=True)
  72. summary_df.to_csv(base_dir / SUMMARY_OUTPUT, index=False, encoding="utf-8-sig")
  73. unknown_entry = detail_df[detail_df["buy_layer"] == "unknown"]["buy_reason"].value_counts()
  74. unknown_exit = detail_df[detail_df["sell_layer"] == "unknown"]["sell_reason"].value_counts()
  75. lines: list[str] = [
  76. "# Dragon Rule Layer Attribution",
  77. "",
  78. f"- branch: `{DEFAULT_BRANCH}`",
  79. f"- evaluation_window: `{START_DATE}` to `{END_DATE}`",
  80. f"- trades: `{len(detail_df)}`",
  81. "",
  82. "## Entry Layer Summary",
  83. ]
  84. if buy_summary.empty:
  85. lines.append("- no data")
  86. else:
  87. for _, row in buy_summary.iterrows():
  88. lines.append(
  89. "- "
  90. f"`{row['layer']}/{row['family']}` "
  91. f"trades `{int(row['trades'])}` "
  92. f"win_rate `{format_pct(float(row['win_rate']))}` "
  93. f"avg_return `{format_pct(float(row['avg_return']))}` "
  94. f"profit_factor `{format_num(float(row['profit_factor']))}`"
  95. )
  96. lines.extend(["", "## Exit Layer Summary"])
  97. if sell_summary.empty:
  98. lines.append("- no data")
  99. else:
  100. for _, row in sell_summary.iterrows():
  101. lines.append(
  102. "- "
  103. f"`{row['layer']}/{row['family']}` "
  104. f"trades `{int(row['trades'])}` "
  105. f"win_rate `{format_pct(float(row['win_rate']))}` "
  106. f"avg_return `{format_pct(float(row['avg_return']))}` "
  107. f"profit_factor `{format_num(float(row['profit_factor']))}`"
  108. )
  109. lines.extend(["", "## Unknown Mapping Audit"])
  110. if unknown_entry.empty and unknown_exit.empty:
  111. lines.append("- no unknown reason mapping")
  112. else:
  113. if not unknown_entry.empty:
  114. for reason, count in unknown_entry.items():
  115. lines.append(f"- unknown entry reason `{reason}`: `{int(count)}`")
  116. if not unknown_exit.empty:
  117. for reason, count in unknown_exit.items():
  118. lines.append(f"- unknown exit reason `{reason}`: `{int(count)}`")
  119. lines.extend(
  120. [
  121. "",
  122. "## Artifacts",
  123. f"- `{DETAIL_OUTPUT}`",
  124. f"- `{SUMMARY_OUTPUT}`",
  125. ]
  126. )
  127. (base_dir / REPORT_OUTPUT).write_text("\n".join(lines) + "\n", encoding="utf-8")
  128. if __name__ == "__main__":
  129. main()