Source code for maldideepkit.utils.ensemble

"""Simple mean-of-``predict_proba`` ensemble for spectral classifiers.

A thin wrapper that fits each member independently and averages
``predict_proba`` across them.
"""

from __future__ import annotations

from pathlib import Path
from typing import Any

import numpy as np


[docs] class SpectralEnsemble: """Ensemble N fitted or unfitted spectral classifiers. Parameters ---------- classifiers : sequence of BaseSpectralClassifier Unfitted classifier instances. :meth:`fit` calls each member's own ``fit`` in order. Attributes ---------- classes_ : np.ndarray Union of class labels reported by the members. Members must agree on the label set after fitting. """
[docs] def __init__(self, classifiers: list[Any]) -> None: if not classifiers: raise ValueError("SpectralEnsemble needs at least one classifier.") self.classifiers = list(classifiers)
[docs] def fit(self, X: Any, y: Any) -> SpectralEnsemble: """Fit every member on the same ``(X, y)``.""" first_classes: np.ndarray | None = None for i, clf in enumerate(self.classifiers): clf.fit(X, y) if first_classes is None: first_classes = np.asarray(clf.classes_) elif not np.array_equal(clf.classes_, first_classes): raise ValueError( f"Ensemble member {i} produced classes_={clf.classes_!r}; " f"expected {first_classes!r}. All members must see the same labels." ) self.classes_ = first_classes return self
[docs] def predict_proba(self, X: Any) -> np.ndarray: """Return the mean of member ``predict_proba`` outputs.""" probas = [clf.predict_proba(X) for clf in self.classifiers] stacked = np.stack(probas, axis=0) return stacked.mean(axis=0)
[docs] def predict(self, X: Any) -> np.ndarray: """Argmax of the averaged probabilities. Per-member post-hoc calibration / thresholds are intentionally not averaged. """ proba = self.predict_proba(X) idx = np.argmax(proba, axis=1) return np.asarray(self.classes_)[idx]
[docs] def score(self, X: Any, y: Any) -> float: """Mean accuracy against ``y``.""" preds = self.predict(X) if hasattr(y, "to_numpy"): y = y.to_numpy() return float(np.mean(preds == np.asarray(y).ravel()))
[docs] def save(self, path: str | Path) -> None: """Save each member under ``<path>_<i>``. Example: ``SpectralEnsemble.save("my_ens")`` writes ``my_ens_0.pt`` / ``my_ens_0.json`` / ... plus an index file ``my_ens.ensemble.json`` recording the per-member classes. """ import json base = Path(path) if base.suffix: base = base.with_suffix("") base.parent.mkdir(parents=True, exist_ok=True) member_paths = [] for i, clf in enumerate(self.classifiers): member_path = base.parent / f"{base.name}_{i}" clf.save(member_path) member_paths.append(str(member_path.name)) index_path = base.with_suffix(".ensemble.json") with open(index_path, "w") as fh: json.dump( { "version": 1, "n_members": len(self.classifiers), "member_files": member_paths, "classes_": ( np.asarray(self.classes_).tolist() if hasattr(self, "classes_") and self.classes_ is not None else None ), "member_class_names": [type(c).__name__ for c in self.classifiers], }, fh, indent=2, )
[docs] @classmethod def load(cls, path: str | Path) -> SpectralEnsemble: """Inverse of :meth:`save`.""" import json from ..base.classifier import BaseSpectralClassifier base = Path(path) if base.suffix: base = base.with_suffix("") index_path = base.with_suffix(".ensemble.json") if not index_path.exists(): raise FileNotFoundError(index_path) with open(index_path) as fh: meta = json.load(fh) members: list[Any] = [] for name in meta["member_files"]: member_path = base.parent / name members.append(BaseSpectralClassifier.load(member_path)) ens = cls(members) if meta.get("classes_") is not None: ens.classes_ = np.asarray(meta["classes_"]) return ens