Source code for utils.rank_fragility.leaderboard

"""Leaderboard scoring and ranking helpers."""

from __future__ import annotations

import pandas as pd

from .metrics import compute_metric, higher_is_better


[docs] def evaluate_models(pred_audit_df: pd.DataFrame, subset_ids, task: str, metric: str) -> pd.DataFrame: """Evaluate every model on a molecule-id subset.""" if subset_ids is None: subset = pred_audit_df.copy() else: ids = set(map(str, subset_ids)) subset = pred_audit_df[pred_audit_df["molecule_id"].astype(str).isin(ids)].copy() rows = [] for model, group in subset.groupby("model", sort=True): rows.append( { "model": model, "n": int(len(group)), "score": compute_metric(group["y_true"], group["y_pred"], task=task, metric=metric), } ) return pd.DataFrame(rows, columns=["model", "n", "score"])
[docs] def rank_models(scores_df: pd.DataFrame, metric: str) -> pd.DataFrame: """Assign average ranks with rank 1 as best.""" if scores_df.empty: return pd.DataFrame(columns=["model", "score", "rank"]) out = scores_df[["model", "score"]].copy() ascending = not higher_is_better(metric) out["rank"] = out["score"].rank(method="average", ascending=ascending, na_option="bottom") return out.sort_values(["rank", "model"], kind="mergesort").reset_index(drop=True)
[docs] def original_leaderboard(pred_audit_df: pd.DataFrame, task: str, metric: str) -> pd.DataFrame: """Evaluate and rank models on the full original test set.""" subset_ids = pred_audit_df["molecule_id"].drop_duplicates().tolist() scores = evaluate_models(pred_audit_df, subset_ids, task=task, metric=metric) ranked = rank_models(scores, metric=metric) return ranked.merge(scores[["model", "n"]], on="model", how="left")[["model", "n", "score", "rank"]]