Source code for utils.rank_fragility.fragility
"""Summary helpers for composition-driven leaderboard fragility."""
from __future__ import annotations
import numpy as np
import pandas as pd
def _numeric_target(value) -> float | None:
try:
if isinstance(value, str) and value.lower() == "observed":
return None
return float(value)
except Exception:
return None
def _smallest_numeric_rate(df: pd.DataFrame, predicate) -> float | None:
candidates: list[float] = []
for _, row in df.iterrows():
rate = _numeric_target(row.get("target_rate"))
if rate is None:
continue
try:
if predicate(row):
candidates.append(rate)
except Exception:
continue
return min(candidates) if candidates else None
[docs]
def compute_fragility_summary(
rank_probabilities: pd.DataFrame,
sota_margin_by_composition: pd.DataFrame,
sota_model: str,
) -> pd.DataFrame:
"""Summarize composition rates where the SOTA conclusion becomes fragile."""
modes = sorted(
set(rank_probabilities.get("mode", pd.Series(dtype=object)).dropna().tolist())
| set(sota_margin_by_composition.get("mode", pd.Series(dtype=object)).dropna().tolist())
)
rows: list[dict] = []
for mode in modes:
rp = rank_probabilities[(rank_probabilities["mode"] == mode) & (rank_probabilities["model"] == sota_model)]
margins = sota_margin_by_composition[sota_margin_by_composition["mode"] == mode]
observed_rate = "observed" if (
(rp.get("target_rate", pd.Series(dtype=object)).astype(str) == "observed").any()
or (margins.get("target_rate", pd.Series(dtype=object)).astype(str) == "observed").any()
) else np.nan
rank_fragile_rate = _smallest_numeric_rate(
rp, lambda row: pd.notna(row.get("probability_rank_1")) and row.get("probability_rank_1") < 0.5
)
ci_zero_rate = _smallest_numeric_rate(
margins,
lambda row: pd.notna(row.get("ci_lower"))
and pd.notna(row.get("ci_upper"))
and row.get("ci_lower") <= 0 <= row.get("ci_upper"),
)
fraction_rate = _smallest_numeric_rate(
margins,
lambda row: pd.notna(row.get("fraction_delta_positive")) and row.get("fraction_delta_positive") < 0.95,
)
if rp.empty and margins.empty:
interpretation = "inconclusive due to infeasible or small panels"
elif rank_fragile_rate is not None:
interpretation = "rank-fragile"
elif ci_zero_rate is not None or fraction_rate is not None:
interpretation = "statistically indistinguishable"
elif margins["mean_delta"].isna().all() if not margins.empty else False:
interpretation = "inconclusive due to infeasible or small panels"
else:
interpretation = "stable"
rows.append(
{
"mode": mode,
"original_observed_rate": observed_rate,
"smallest_target_rate_sota_rank_probability_below_0_5": rank_fragile_rate,
"smallest_target_rate_sota_baseline_ci_includes_zero": ci_zero_rate,
"smallest_target_rate_fraction_delta_positive_below_0_95": fraction_rate,
"interpretation": interpretation,
"interpretation_text": f"SOTA conclusion is {interpretation}.",
}
)
return pd.DataFrame(rows)