Source code for maldideepkit.uncertainty.conformal

"""Split conformal prediction estimator.

Implements the LAC (Least Ambiguous set-valued Classifier) variant of
split conformal classification.
The non-conformity score for a calibration sample ``(x_i, y_i)`` is
``1 - p_hat[y_i | x_i]``; a single empirical quantile of those scores
gives a coverage guarantee on every test sample with no further model
training. Pure NumPy: no dependency outside the package.
"""

from __future__ import annotations

import warnings
from typing import Any

import numpy as np
import torch

from ..base.classifier import BaseSpectralClassifier
from ._base import BaseUncertaintyEstimator, _apply_classifier_preprocessing
from ._result import UncertaintyResult, _softmax


[docs] class ConformalPredictor(BaseUncertaintyEstimator): """Split conformal predictor with the LAC non-conformity score. Parameters ---------- classifier : BaseSpectralClassifier A fitted classifier whose :meth:`predict_proba`-style softmax scores act as the underlying probability estimate. alpha : float, default=0.1 Miscoverage level in ``(0, 1)``. The target marginal coverage is ``1 - alpha``. score : {"lac"}, default="lac" Non-conformity score. Currently only ``"lac"`` is supported: ``s(x, y) = 1 - p_hat(y | x)``. Notes ----- The :attr:`UncertaintyResult.uncertainty` field is the prediction set size normalised by ``n_classes``: ``1 / n_classes`` for a singleton set, ``1.0`` when every class is included. Empty sets (which can occur with very small calibration sets) yield ``0`` and are flagged in metadata via the empty-set count. """
[docs] def __init__( self, classifier: BaseSpectralClassifier, alpha: float = 0.1, score: str = "lac", ) -> None: super().__init__(classifier) if not 0.0 < float(alpha) < 1.0: raise ValueError(f"alpha must be in (0, 1); got {alpha!r}.") if score != "lac": raise ValueError( f"score={score!r} is not supported; only 'lac' is implemented." ) self.alpha = float(alpha) self.score = score self.quantile_: float | None = None self.calibration_coverage_: float | None = None self.n_calibration_: int | None = None
def _classifier_proba(self, X_proc: np.ndarray) -> np.ndarray: """Run the classifier's ``model_`` in eval mode and return softmax.""" device = self.classifier._device_ X_t_full = torch.from_numpy(X_proc.astype(np.float32)).to(device) chunk = max(1, int(getattr(self.classifier, "batch_size", 32))) model = self.classifier.model_ model.eval() logits_chunks: list[np.ndarray] = [] with torch.no_grad(): for start in range(0, X_t_full.shape[0], chunk): x_chunk = X_t_full[start : start + chunk] logits_chunks.append(model(x_chunk).detach().cpu().numpy()) if logits_chunks: logits = np.concatenate(logits_chunks, axis=0) else: logits = np.empty((0, int(self.classifier.n_classes_)), dtype=np.float32) temperature = getattr(self.classifier, "temperature_", None) if temperature is not None: logits = logits / float(temperature) return _softmax(logits)
[docs] def calibrate(self, X_cal: Any, y_cal: Any) -> "ConformalPredictor": """Compute the conformal quantile from calibration data. Parameters ---------- X_cal : array-like or MaldiSet of shape (n_samples, n_bins) Calibration spectra. y_cal : array-like of shape (n_samples,) Calibration labels using the original label space stored in ``classifier.classes_``. Returns ------- ConformalPredictor ``self``, with :attr:`quantile_`, :attr:`calibration_coverage_`, and :attr:`n_calibration_` populated. """ if hasattr(y_cal, "to_numpy"): y_cal = y_cal.to_numpy() y_np = np.asarray(y_cal).ravel() classes = np.asarray(self.classifier.classes_) if not np.all(np.isin(y_np, classes)): unknown = np.setdiff1d(np.unique(y_np), classes).tolist() raise ValueError(f"y_cal contains labels not seen at fit time: {unknown}.") y_encoded = np.searchsorted(classes, y_np).astype(np.int64) X_proc = _apply_classifier_preprocessing(self.classifier, X_cal) if X_proc.shape[0] != y_encoded.shape[0]: raise ValueError( f"X_cal has {X_proc.shape[0]} rows but y_cal has {y_encoded.shape[0]}." ) n_cal = int(X_proc.shape[0]) if n_cal < 1: raise ValueError("Calibration set must contain at least one sample.") proba = self._classifier_proba(X_proc) true_proba = proba[np.arange(n_cal), y_encoded] scores = 1.0 - true_proba sorted_scores = np.sort(scores) k = int(np.ceil((n_cal + 1) * (1.0 - self.alpha))) k = min(max(k, 1), n_cal) quantile = float(sorted_scores[k - 1]) threshold = 1.0 - quantile prediction_sets = proba >= threshold covered = prediction_sets[np.arange(n_cal), y_encoded] coverage = float(np.mean(covered)) target = 1.0 - self.alpha if coverage < target: warnings.warn( f"Empirical calibration coverage {coverage:.3f} is below the " f"target {target:.3f}. Consider a larger calibration set or a " "less aggressive alpha.", stacklevel=2, ) self.quantile_ = quantile self.calibration_coverage_ = coverage self.n_calibration_ = n_cal return self
[docs] def predict_with_uncertainty(self, X: Any) -> UncertaintyResult: """Return conformal predictions and prediction sets for ``X``. Returns ------- UncertaintyResult ``method="conformal"`` with :attr:`epistemic` and :attr:`aleatoric` set to ``None``. Boolean prediction sets are stored in ``metadata["prediction_sets"]`` with shape ``(n_samples, n_classes)``; the empirical calibration coverage is stored in ``metadata["calibration_coverage"]``. Raises ------ RuntimeError If :meth:`calibrate` has not been called. """ if self.quantile_ is None: raise RuntimeError( "ConformalPredictor has not been calibrated. " "Call calibrate(X_cal, y_cal) before predict_with_uncertainty." ) X_proc = _apply_classifier_preprocessing(self.classifier, X) proba = self._classifier_proba(X_proc) threshold = 1.0 - float(self.quantile_) prediction_sets = proba >= threshold n_classes = int(self.classifier.n_classes_) set_sizes = prediction_sets.sum(axis=1).astype(np.float64) uncertainty = np.clip(set_sizes / float(n_classes), 0.0, 1.0) idx = np.argmax(proba, axis=1) predictions = np.asarray(self.classifier.classes_)[idx] metadata: dict[str, Any] = { "prediction_sets": prediction_sets, "calibration_coverage": self.calibration_coverage_, "quantile": float(self.quantile_), "alpha": float(self.alpha), "n_calibration": self.n_calibration_, "set_sizes": set_sizes.astype(np.int64, copy=False), "n_empty_sets": int(np.sum(set_sizes == 0)), } return UncertaintyResult( predictions=predictions, proba_mean=proba.astype(np.float64, copy=False), uncertainty=uncertainty.astype(np.float64, copy=False), epistemic=None, aleatoric=None, method="conformal", metadata=metadata, )
__all__ = ["ConformalPredictor"]