events.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. from __future__ import annotations
  2. import numpy as np
  3. import pandas as pd
  4. EVENT_LOG_COLUMNS = [
  5. 'event_date',
  6. 'from_state',
  7. 'to_state',
  8. 'event_state',
  9. 'event_type',
  10. 'horizon',
  11. 'confirm_horizon',
  12. 'asset_forward_close_return',
  13. 'strategy_forward_return',
  14. 'target_exposure',
  15. 'has_risk_off_within_confirm',
  16. ]
  17. EVENT_SUMMARY_COLUMNS = [
  18. 'event_type',
  19. 'event_state',
  20. 'count',
  21. 'avg_asset_forward_return',
  22. 'avg_strategy_forward_return',
  23. 'avg_target_exposure',
  24. ]
  25. def _forward_strategy_return(series: pd.Series, horizon: int) -> pd.Series:
  26. values = series.fillna(0.0).to_numpy(dtype=float)
  27. out = np.full(len(values), np.nan, dtype=float)
  28. for i in range(len(values)):
  29. end = i + horizon
  30. if end < len(values):
  31. out[i] = np.prod(1.0 + values[i + 1 : end + 1]) - 1.0
  32. return pd.Series(out, index=series.index, dtype=float)
  33. def _classify_event_type(
  34. *,
  35. from_state: str,
  36. to_state: str,
  37. asset_forward_close_return: float,
  38. has_risk_off_within_confirm: bool,
  39. ) -> str:
  40. if to_state == 'risk_off':
  41. return 'crash_onset'
  42. if from_state == 'euphoric_late' and pd.notna(asset_forward_close_return) and asset_forward_close_return < 0.0:
  43. return 'crowded_unwind'
  44. if from_state in {'risk_off', 'chop'} and to_state in {'repair', 'trend'}:
  45. if pd.notna(asset_forward_close_return) and asset_forward_close_return > 0.0 and not has_risk_off_within_confirm:
  46. return 'true_repair'
  47. return 'false_rebound'
  48. return 'state_transition'
  49. def build_transition_event_log(
  50. df: pd.DataFrame,
  51. *,
  52. horizon: int = 10,
  53. confirm_horizon: int = 10,
  54. ) -> pd.DataFrame:
  55. if 'state' not in df.columns:
  56. raise ValueError('state column required for event diagnostics.')
  57. if 'close' not in df.columns:
  58. raise ValueError('close column required for event diagnostics.')
  59. if 'strategy_return_net' not in df.columns:
  60. raise ValueError('strategy_return_net column required for event diagnostics.')
  61. if horizon <= 0 or confirm_horizon <= 0:
  62. raise ValueError('horizon and confirm_horizon must be positive integers.')
  63. out = df.copy().sort_index()
  64. out['state_prev'] = out['state'].shift(1)
  65. out['state_change'] = out['state'] != out['state_prev']
  66. if not out.empty:
  67. out.iloc[0, out.columns.get_loc('state_change')] = False
  68. out['asset_forward_close_return'] = out['close'].shift(-horizon) / out['close'] - 1.0
  69. out['strategy_forward_return'] = _forward_strategy_return(out['strategy_return_net'], horizon=horizon)
  70. events = out[out['state_change']].copy()
  71. if events.empty:
  72. return pd.DataFrame(columns=EVENT_LOG_COLUMNS)
  73. rows: list[dict[str, object]] = []
  74. states = out['state'].astype(str)
  75. for ts, row in events.iterrows():
  76. from_state = str(row['state_prev'])
  77. to_state = str(row['state'])
  78. try:
  79. pos = int(out.index.get_loc(ts))
  80. except Exception:
  81. continue
  82. future_states = states.iloc[pos + 1 : pos + 1 + confirm_horizon]
  83. has_risk_off = bool((future_states == 'risk_off').any())
  84. asset_fwd = float(row['asset_forward_close_return']) if pd.notna(row['asset_forward_close_return']) else np.nan
  85. event_type = _classify_event_type(
  86. from_state=from_state,
  87. to_state=to_state,
  88. asset_forward_close_return=asset_fwd,
  89. has_risk_off_within_confirm=has_risk_off,
  90. )
  91. rows.append(
  92. {
  93. 'event_date': ts,
  94. 'from_state': from_state,
  95. 'to_state': to_state,
  96. 'event_state': to_state,
  97. 'event_type': event_type,
  98. 'horizon': int(horizon),
  99. 'confirm_horizon': int(confirm_horizon),
  100. 'asset_forward_close_return': asset_fwd,
  101. 'strategy_forward_return': (
  102. float(row['strategy_forward_return']) if pd.notna(row['strategy_forward_return']) else np.nan
  103. ),
  104. 'target_exposure': float(row['target_exposure']) if 'target_exposure' in row and pd.notna(row['target_exposure']) else np.nan,
  105. 'has_risk_off_within_confirm': has_risk_off,
  106. }
  107. )
  108. event_log = pd.DataFrame(rows)
  109. if event_log.empty:
  110. return pd.DataFrame(columns=EVENT_LOG_COLUMNS)
  111. return event_log[EVENT_LOG_COLUMNS]
  112. def summarize_transition_events(
  113. df: pd.DataFrame,
  114. horizon: int = 10,
  115. confirm_horizon: int = 10,
  116. ) -> pd.DataFrame:
  117. event_log = build_transition_event_log(df, horizon=horizon, confirm_horizon=confirm_horizon)
  118. if event_log.empty:
  119. return pd.DataFrame(columns=EVENT_SUMMARY_COLUMNS)
  120. summary = (
  121. event_log.groupby(['event_type', 'event_state'])
  122. .agg(
  123. count=('event_type', 'size'),
  124. avg_asset_forward_return=('asset_forward_close_return', 'mean'),
  125. avg_strategy_forward_return=('strategy_forward_return', 'mean'),
  126. avg_target_exposure=('target_exposure', 'mean'),
  127. )
  128. .reset_index()
  129. )
  130. return summary.sort_values(['count', 'event_type'], ascending=[False, True]).reset_index(drop=True)