Source code for utils.rank_fragility.fragility

"""Summary helpers for composition-driven leaderboard fragility."""

from __future__ import annotations

import numpy as np
import pandas as pd


def _numeric_target(value) -> float | None:
    try:
        if isinstance(value, str) and value.lower() == "observed":
            return None
        return float(value)
    except Exception:
        return None


def _smallest_numeric_rate(df: pd.DataFrame, predicate) -> float | None:
    candidates: list[float] = []
    for _, row in df.iterrows():
        rate = _numeric_target(row.get("target_rate"))
        if rate is None:
            continue
        try:
            if predicate(row):
                candidates.append(rate)
        except Exception:
            continue
    return min(candidates) if candidates else None



[docs]
def compute_fragility_summary(
    rank_probabilities: pd.DataFrame,
    sota_margin_by_composition: pd.DataFrame,
    sota_model: str,
) -> pd.DataFrame:
    """Summarize composition rates where the SOTA conclusion becomes fragile."""
    modes = sorted(
        set(rank_probabilities.get("mode", pd.Series(dtype=object)).dropna().tolist())
        | set(sota_margin_by_composition.get("mode", pd.Series(dtype=object)).dropna().tolist())
    )
    rows: list[dict] = []
    for mode in modes:
        rp = rank_probabilities[(rank_probabilities["mode"] == mode) & (rank_probabilities["model"] == sota_model)]
        margins = sota_margin_by_composition[sota_margin_by_composition["mode"] == mode]
        observed_rate = "observed" if (
            (rp.get("target_rate", pd.Series(dtype=object)).astype(str) == "observed").any()
            or (margins.get("target_rate", pd.Series(dtype=object)).astype(str) == "observed").any()
        ) else np.nan

        rank_fragile_rate = _smallest_numeric_rate(
            rp, lambda row: pd.notna(row.get("probability_rank_1")) and row.get("probability_rank_1") < 0.5
        )
        ci_zero_rate = _smallest_numeric_rate(
            margins,
            lambda row: pd.notna(row.get("ci_lower"))
            and pd.notna(row.get("ci_upper"))
            and row.get("ci_lower") <= 0 <= row.get("ci_upper"),
        )
        fraction_rate = _smallest_numeric_rate(
            margins,
            lambda row: pd.notna(row.get("fraction_delta_positive")) and row.get("fraction_delta_positive") < 0.95,
        )

        if rp.empty and margins.empty:
            interpretation = "inconclusive due to infeasible or small panels"
        elif rank_fragile_rate is not None:
            interpretation = "rank-fragile"
        elif ci_zero_rate is not None or fraction_rate is not None:
            interpretation = "statistically indistinguishable"
        elif margins["mean_delta"].isna().all() if not margins.empty else False:
            interpretation = "inconclusive due to infeasible or small panels"
        else:
            interpretation = "stable"

        rows.append(
            {
                "mode": mode,
                "original_observed_rate": observed_rate,
                "smallest_target_rate_sota_rank_probability_below_0_5": rank_fragile_rate,
                "smallest_target_rate_sota_baseline_ci_includes_zero": ci_zero_rate,
                "smallest_target_rate_fraction_delta_positive_below_0_95": fraction_rate,
                "interpretation": interpretation,
                "interpretation_text": f"SOTA conclusion is {interpretation}.",
            }
        )
    return pd.DataFrame(rows)