Skip to content

NeuRepTrace

Decoding

IPS-Stuttgart/NeuRepTrace

Decoding

`neureptrace.decoding`

`ECOCLinearSVC`

Bases: ClassifierMixin, BaseEstimator

Output-code linear SVM with class-level decision scores.

sklearn's OutputCodeClassifier exposes predict but not decision_function. NeuRepTrace needs a score matrix so uncalibrated emissions and CalibratedClassifierCV can produce probabilities. This wrapper converts binary code margins into negative distances to each class code word.

Source code in src/neureptrace/decoding/__init__.py

class ECOCLinearSVC(ClassifierMixin, BaseEstimator):
    """Output-code linear SVM with class-level decision scores.

    sklearn's ``OutputCodeClassifier`` exposes ``predict`` but not
    ``decision_function``.  NeuRepTrace needs a score matrix so uncalibrated
    emissions and ``CalibratedClassifierCV`` can produce probabilities.  This
    wrapper converts binary code margins into negative distances to each class
    code word.
    """

    def __init__(
        self,
        C: float = 1.0,
        code_size: float = 2.0,
        max_iter: int = 1000,
        class_weight: str | dict | None = "balanced",
        random_state: int | None = 13,
    ):
        self.C = C
        self.code_size = code_size
        self.max_iter = max_iter
        self.class_weight = class_weight
        self.random_state = random_state

    def fit(self, features: Sequence[Sequence[float]] | np.ndarray, labels: Sequence | np.ndarray):
        base = LinearSVC(
            class_weight=self.class_weight,
            C=float(self.C),
            max_iter=int(self.max_iter),
            random_state=self.random_state,
        )
        self.model_ = OutputCodeClassifier(
            base,
            code_size=float(self.code_size),
            random_state=self.random_state,
        )
        self.model_.fit(features, labels)
        self.classes_ = np.asarray(self.model_.classes_)
        return self

    def _class_score_matrix(self, features: Sequence[Sequence[float]] | np.ndarray) -> np.ndarray:
        if not hasattr(self, "model_"):
            raise RuntimeError("ECOCLinearSVC must be fitted before prediction.")
        binary_scores = []
        for estimator in self.model_.estimators_:
            if hasattr(estimator, "decision_function"):
                scores = np.asarray(estimator.decision_function(features), dtype=float)
                if scores.ndim > 1:
                    scores = scores[:, -1]
            else:
                scores = np.asarray(estimator.predict(features), dtype=float)
                scores = np.where(scores > 0, 1.0, -1.0)
            binary_scores.append(scores)
        code_scores = np.column_stack(binary_scores)
        code_book = np.asarray(self.model_.code_book_, dtype=float)
        distances = np.linalg.norm(code_scores[:, None, :] - code_book[None, :, :], axis=2)
        return -distances

    def decision_function(self, features: Sequence[Sequence[float]] | np.ndarray) -> np.ndarray:
        class_scores = self._class_score_matrix(features)
        if self.classes_.size == 2:
            return class_scores[:, 1] - class_scores[:, 0]
        return class_scores

    def predict(self, features: Sequence[Sequence[float]] | np.ndarray) -> np.ndarray:
        return self.classes_[np.argmax(self._class_score_matrix(features), axis=1)]

`HierarchicalThreeClassLogistic`

Bases: ClassifierMixin, BaseEstimator

Three-class hierarchy: primary class versus rest, then the two rest classes.

The default primary class index is 1, matching the ds006629 sorted label order for Inter dev / Large dev / Stand, where Large dev is the first-stage branch. The estimator remains generic for any three encoded classes.

Source code in src/neureptrace/decoding/__init__.py

class HierarchicalThreeClassLogistic(ClassifierMixin, BaseEstimator):
    """Three-class hierarchy: primary class versus rest, then the two rest classes.

    The default primary class index is 1, matching the ds006629 sorted label
    order for Inter dev / Large dev / Stand, where Large dev is the first-stage
    branch. The estimator remains generic for any three encoded classes.
    """

    def __init__(
        self,
        primary_class_index: int = 1,
        C: float = 1.0,
        max_iter: int = 1000,
        random_state: int | None = 13,
    ):
        self.primary_class_index = primary_class_index
        self.C = C
        self.max_iter = max_iter
        self.random_state = random_state

    def fit(self, features: Sequence[Sequence[float]] | np.ndarray, labels: Sequence | np.ndarray):
        features = np.asarray(features, dtype=float)
        labels = np.asarray(labels).ravel()
        self.classes_ = np.unique(labels)
        if self.classes_.shape[0] != 3:
            raise ValueError("HierarchicalThreeClassLogistic requires exactly three classes.")
        primary_position = int(self.primary_class_index)
        if primary_position < 0 or primary_position >= self.classes_.shape[0]:
            raise ValueError("primary_class_index must refer to one of the three fitted classes.")
        self.primary_class_ = self.classes_[primary_position]
        first_labels = labels == self.primary_class_
        if len(np.unique(first_labels)) != 2:
            raise ValueError("Both primary and non-primary samples are required.")
        self.first_stage_ = LogisticRegression(
            class_weight="balanced",
            C=float(self.C),
            max_iter=int(self.max_iter),
            random_state=self.random_state,
            solver="lbfgs",
        )
        self.first_stage_.fit(features, first_labels)

        rest_mask = ~first_labels
        rest_labels = labels[rest_mask]
        if len(np.unique(rest_labels)) != 2:
            raise ValueError("The non-primary branch requires exactly two classes.")
        self.second_stage_ = LogisticRegression(
            class_weight="balanced",
            C=float(self.C),
            max_iter=int(self.max_iter),
            random_state=self.random_state,
            solver="lbfgs",
        )
        self.second_stage_.fit(features[rest_mask], rest_labels)
        return self

    def predict_proba(self, features: Sequence[Sequence[float]] | np.ndarray) -> np.ndarray:
        if not hasattr(self, "first_stage_") or not hasattr(self, "second_stage_"):
            raise RuntimeError("HierarchicalThreeClassLogistic must be fitted before prediction.")
        features = np.asarray(features, dtype=float)
        first_probabilities = np.asarray(self.first_stage_.predict_proba(features), dtype=float)
        primary_column = int(np.flatnonzero(self.first_stage_.classes_ == True)[0])  # noqa: E712
        primary_probability = first_probabilities[:, primary_column]
        rest_probability = 1.0 - primary_probability

        second_probabilities = np.asarray(self.second_stage_.predict_proba(features), dtype=float)
        output = np.zeros((features.shape[0], self.classes_.shape[0]), dtype=float)
        class_to_column = {class_label: index for index, class_label in enumerate(self.classes_.tolist())}
        output[:, class_to_column[self.primary_class_]] = primary_probability
        for source_column, class_label in enumerate(self.second_stage_.classes_.tolist()):
            output[:, class_to_column[class_label]] = rest_probability * second_probabilities[:, source_column]
        row_sums = output.sum(axis=1, keepdims=True)
        return output / np.where(row_sums <= 0.0, 1.0, row_sums)

    def decision_function(self, features: Sequence[Sequence[float]] | np.ndarray) -> np.ndarray:
        return np.log(np.clip(self.predict_proba(features), 1e-12, 1.0))

    def predict(self, features: Sequence[Sequence[float]] | np.ndarray) -> np.ndarray:
        return self.classes_[np.argmax(self.predict_proba(features), axis=1)]

`PCA`

Bases: PCA

PCA that caps explicit component counts to the current training fold.

Cross-subject MEG folds can become smaller than a requested PCA dimension, especially inside nested calibration or small OpenNeuro smoke runs. Sklearn's PCA raises in that case; this subclass keeps the public n_components parameter unchanged for provenance and grid-search names, but uses the largest feasible integer component count during each fit.

Source code in src/neureptrace/decoding/__init__.py

class PCA(SklearnPCA):
    """PCA that caps explicit component counts to the current training fold.

    Cross-subject MEG folds can become smaller than a requested PCA dimension,
    especially inside nested calibration or small OpenNeuro smoke runs. Sklearn's
    PCA raises in that case; this subclass keeps the public ``n_components``
    parameter unchanged for provenance and grid-search names, but uses the
    largest feasible integer component count during each fit.
    """

    def _fit(self, X):
        requested_n_components = self.n_components
        effective_n_components = requested_n_components
        if isinstance(requested_n_components, (int, np.integer)) and not isinstance(requested_n_components, bool):
            n_samples, n_features = X.shape
            effective_n_components = min(int(requested_n_components), max(1, min(int(n_samples), int(n_features))))

        self.n_components = effective_n_components
        try:
            result = super()._fit(X)
        except Exception:
            self.n_components = requested_n_components
            raise
        self.n_components = requested_n_components
        self.requested_n_components_ = requested_n_components
        self.effective_n_components_ = getattr(self, "n_components_", effective_n_components)
        return result

`PLSDiscriminantTransformer`

Bases: TransformerMixin, BaseEstimator

Supervised PLS-DA feature projection for high-dimensional M/EEG windows.

The transformer maps class labels to one-hot targets and fits a PLSRegression model on the training fold only. Its output is the PLS X-score matrix, which can then be consumed by the existing sklearn classifiers. This gives the BUSH-MEG pipelines a supervised dimensionality reduction option without changing outer LOSO semantics.

Source code in src/neureptrace/decoding/__init__.py

class PLSDiscriminantTransformer(TransformerMixin, BaseEstimator):
    """Supervised PLS-DA feature projection for high-dimensional M/EEG windows.

    The transformer maps class labels to one-hot targets and fits a
    ``PLSRegression`` model on the training fold only.  Its output is the PLS
    X-score matrix, which can then be consumed by the existing sklearn
    classifiers.  This gives the BUSH-MEG pipelines a supervised dimensionality
    reduction option without changing outer LOSO semantics.
    """

    def __init__(self, n_components: int | str | None = DEFAULT_PLS_COMPONENTS):
        self.n_components = n_components

    def fit(self, features: Sequence[Sequence[float]] | np.ndarray, labels: Sequence | np.ndarray):
        x = np.asarray(features, dtype=float)
        if x.ndim != 2:
            raise ValueError("PLSDiscriminantTransformer expects a two-dimensional feature matrix.")
        if x.shape[0] < 2 or x.shape[1] < 1:
            raise ValueError("PLSDiscriminantTransformer needs at least two samples and one feature.")
        y_raw = np.asarray(labels)
        if y_raw.shape[0] != x.shape[0]:
            raise ValueError("features and labels must contain the same number of rows.")
        self.classes_, encoded = np.unique(y_raw, return_inverse=True)
        if self.classes_.shape[0] < 2:
            raise ValueError("PLSDiscriminantTransformer needs at least two classes.")

        requested = normalize_pls_components(self.n_components)
        max_components = max(1, min(int(x.shape[1]), int(x.shape[0]) - 1))
        n_components = min(int(requested), max_components)

        y = np.zeros((x.shape[0], self.classes_.shape[0]), dtype=float)
        y[np.arange(x.shape[0]), encoded] = 1.0
        self.model_ = PLSRegression(n_components=n_components, scale=False)
        self.model_.fit(x, y)
        self.n_components_ = n_components
        self.n_features_in_ = x.shape[1]
        return self

    def transform(self, features: Sequence[Sequence[float]] | np.ndarray) -> np.ndarray:
        if not hasattr(self, "model_"):
            raise RuntimeError("PLSDiscriminantTransformer must be fitted before transform.")
        x = np.asarray(features, dtype=float)
        if x.ndim != 2:
            raise ValueError("PLSDiscriminantTransformer expects a two-dimensional feature matrix.")
        transformed = self.model_.transform(x)
        if isinstance(transformed, tuple):
            transformed = transformed[0]
        return np.asarray(transformed, dtype=float)

`RegistryDecoder`

Bases: ClassifierMixin, BaseEstimator

Scikit-learn estimator adapter for decoding.classifiers entries.

The time-resolved MNE decoder path expects estimators that can be placed in a sklearn pipeline and, optionally, wrapped in CalibratedClassifierCV. Most legacy registry classifiers are factory functions rather than sklearn estimators themselves; this adapter exposes them through the standard fit/predict/decision_function/predict_proba API.

Source code in src/neureptrace/decoding/__init__.py

class RegistryDecoder(ClassifierMixin, BaseEstimator):
    """Scikit-learn estimator adapter for ``decoding.classifiers`` entries.

    The time-resolved MNE decoder path expects estimators that can be placed in
    a sklearn pipeline and, optionally, wrapped in ``CalibratedClassifierCV``.
    Most legacy registry classifiers are factory functions rather than sklearn
    estimators themselves; this adapter exposes them through the standard
    ``fit``/``predict``/``decision_function``/``predict_proba`` API.
    """

    def __init__(self, classifier: str, classifier_param: Any = None, random_state: int | None = 13):
        self.classifier = classifier
        self.classifier_param = classifier_param
        self.random_state = random_state

    def fit(
        self,
        features: Sequence[Sequence[float]] | np.ndarray,
        labels: Sequence | np.ndarray,
        sample_weight: Sequence[float] | np.ndarray | None = None,
    ):
        classifier = normalize_registry_decoder_name(self.classifier)
        classifier_param = get_default_classifier_param(classifier) if self.classifier_param is None else self.classifier_param
        self.model_ = train_multiclass_classifier(
            features,
            labels,
            classifier,
            classifier_param,
            random_state=self.random_state,
            sample_weight=sample_weight,
        )
        self.classes_ = np.asarray(getattr(self.model_, "classes_", np.unique(labels)))
        self.classifier_ = classifier
        self.classifier_param_ = classifier_param
        return self

    def _raw_model(self):
        if not hasattr(self, "model_"):
            raise RuntimeError("RegistryDecoder must be fitted before prediction.")
        return getattr(self.model_, "model", self.model_)

    def predict(self, features: Sequence[Sequence[float]] | np.ndarray) -> np.ndarray:
        if not hasattr(self, "model_"):
            raise RuntimeError("RegistryDecoder must be fitted before prediction.")
        return np.asarray(self.model_.predict(features))

    def decision_function(self, features: Sequence[Sequence[float]] | np.ndarray) -> np.ndarray:
        raw_model = self._raw_model()
        if hasattr(raw_model, "decision_function"):
            scores = np.asarray(raw_model.decision_function(features), dtype=float)
            if scores.ndim == 2 and getattr(self, "classes_", np.array([])).shape[0] == 2:
                return scores[:, 1] - scores[:, 0]
            return scores
        if hasattr(raw_model, "predict_proba"):
            probabilities = np.asarray(raw_model.predict_proba(features), dtype=float)
            if probabilities.ndim == 2 and probabilities.shape[1] == 2:
                return np.log(np.clip(probabilities[:, 1], 1e-12, 1.0)) - np.log(np.clip(probabilities[:, 0], 1e-12, 1.0))
            return np.log(np.clip(probabilities, 1e-12, 1.0))
        return np.asarray(self.model_.decision_function(features), dtype=float)

    def predict_proba(self, features: Sequence[Sequence[float]] | np.ndarray) -> np.ndarray:
        if not hasattr(self, "model_"):
            raise RuntimeError("RegistryDecoder must be fitted before prediction.")
        if not hasattr(self.model_, "predict_proba"):
            raise AttributeError(f"{self.classifier!r} does not provide predict_proba")
        return np.asarray(self.model_.predict_proba(features), dtype=float)

`TorchMLPClassifier`

Bases: ClassifierMixin, BaseEstimator

Small CPU-friendly PyTorch MLP exposed as a sklearn classifier.

The estimator intentionally imports torch only inside fit and predict so the optional torch extra is not required for normal sklearn decoder use or for constructing config grids that do not select this model. It is designed for held-out-subject MEG smoke runs: a single hidden layer, class-balanced cross entropy, modest early stopping, and no background GPU assumptions.

Source code in src/neureptrace/decoding/__init__.py

class TorchMLPClassifier(ClassifierMixin, BaseEstimator):
    """Small CPU-friendly PyTorch MLP exposed as a sklearn classifier.

    The estimator intentionally imports torch only inside ``fit`` and
    ``predict`` so the optional torch extra is not required for normal sklearn
    decoder use or for constructing config grids that do not select this model.
    It is designed for held-out-subject MEG smoke runs: a single hidden layer,
    class-balanced cross entropy, modest early stopping, and no background GPU
    assumptions.
    """

    def __init__(
        self,
        hidden_units: int = 64,
        max_iter: int = 100,
        batch_size: int = 128,
        learning_rate: float = 1e-3,
        weight_decay: float = 1e-4,
        validation_fraction: float = 0.1,
        patience: int = 8,
        dropout: float = 0.1,
        random_state: int | None = 13,
        class_weight: str | None = "balanced",
    ):
        self.hidden_units = hidden_units
        self.max_iter = max_iter
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.weight_decay = weight_decay
        self.validation_fraction = validation_fraction
        self.patience = patience
        self.dropout = dropout
        self.random_state = random_state
        self.class_weight = class_weight

    def _torch(self):
        try:
            import torch
        except ImportError as exc:  # pragma: no cover - exercised only without the optional extra
            raise ImportError("The 'torch_mlp' decoder requires the optional torch extra, e.g. `pip install neureptrace[torch]`.") from exc
        return torch

    def fit(self, features: Sequence[Sequence[float]] | np.ndarray, labels: Sequence | np.ndarray):
        torch = self._torch()
        if self.random_state is not None:
            torch.manual_seed(int(self.random_state))

        x = np.asarray(features, dtype=np.float32)
        if x.ndim != 2:
            raise ValueError("TorchMLPClassifier expects a two-dimensional feature matrix.")
        y_raw = np.asarray(labels)
        self.classes_, y = np.unique(y_raw, return_inverse=True)
        y = y.astype(np.int64, copy=False)
        n_classes = int(self.classes_.shape[0])
        if n_classes < 2:
            raise ValueError("TorchMLPClassifier needs at least two classes.")

        hidden_units = int(self.hidden_units)
        max_iter = int(self.max_iter)
        batch_size = int(self.batch_size)
        if hidden_units < 1 or max_iter < 1 or batch_size < 1:
            raise ValueError("hidden_units, max_iter, and batch_size must be positive integers.")
        if not np.isfinite(self.learning_rate) or self.learning_rate <= 0:
            raise ValueError("learning_rate must be positive and finite.")
        if not np.isfinite(self.weight_decay) or self.weight_decay < 0:
            raise ValueError("weight_decay must be non-negative and finite.")

        indices = np.arange(y.shape[0])
        class_counts = np.bincount(y, minlength=n_classes)
        can_validate = (
            0.0 < float(self.validation_fraction) < 1.0
            and y.shape[0] >= 2 * n_classes
            and np.min(class_counts) >= 2
        )
        if can_validate:
            train_idx, validation_idx = train_test_split(
                indices,
                test_size=float(self.validation_fraction),
                random_state=self.random_state,
                stratify=y,
            )
        else:
            train_idx = indices
            validation_idx = indices

        model = torch.nn.Sequential(
            torch.nn.Linear(x.shape[1], hidden_units),
            torch.nn.ReLU(),
            torch.nn.Dropout(float(self.dropout)),
            torch.nn.Linear(hidden_units, n_classes),
        )
        if self.class_weight == "balanced":
            train_counts = np.bincount(y[train_idx], minlength=n_classes).astype(np.float32)
            weights = train_idx.shape[0] / np.maximum(train_counts, 1.0) / float(n_classes)
            class_weights = torch.as_tensor(weights, dtype=torch.float32)
        else:
            class_weights = None
        loss_fn = torch.nn.CrossEntropyLoss(weight=class_weights)
        optimizer = torch.optim.AdamW(
            model.parameters(),
            lr=float(self.learning_rate),
            weight_decay=float(self.weight_decay),
        )

        x_tensor = torch.from_numpy(x)
        y_tensor = torch.from_numpy(y)
        rng = np.random.default_rng(self.random_state)
        best_loss = np.inf
        best_state = None
        patience_left = int(self.patience)
        for _epoch in range(max_iter):
            model.train()
            epoch_indices = rng.permutation(train_idx)
            for start in range(0, train_idx.shape[0], batch_size):
                batch_idx = epoch_indices[start : start + batch_size]
                optimizer.zero_grad(set_to_none=True)
                loss = loss_fn(model(x_tensor[batch_idx]), y_tensor[batch_idx])
                loss.backward()
                optimizer.step()
            model.eval()
            with torch.no_grad():
                validation_loss = float(loss_fn(model(x_tensor[validation_idx]), y_tensor[validation_idx]).detach().cpu())
            if validation_loss + 1e-6 < best_loss:
                best_loss = validation_loss
                best_state = {key: value.detach().cpu().clone() for key, value in model.state_dict().items()}
                patience_left = int(self.patience)
            else:
                patience_left -= 1
                if patience_left <= 0:
                    break

        if best_state is not None:
            model.load_state_dict(best_state)
        self.model_ = model.eval()
        self.n_features_in_ = x.shape[1]
        return self

    def decision_function(self, features: Sequence[Sequence[float]] | np.ndarray) -> np.ndarray:
        if not hasattr(self, "model_"):
            raise RuntimeError("TorchMLPClassifier must be fitted before prediction.")
        torch = self._torch()
        x = torch.as_tensor(np.asarray(features, dtype=np.float32))
        self.model_.eval()
        with torch.no_grad():
            logits = self.model_(x).detach().cpu().numpy()
        if logits.shape[1] == 2:
            return logits[:, 1] - logits[:, 0]
        return logits

    def predict_proba(self, features: Sequence[Sequence[float]] | np.ndarray) -> np.ndarray:
        if not hasattr(self, "model_"):
            raise RuntimeError("TorchMLPClassifier must be fitted before prediction.")
        torch = self._torch()
        x = torch.as_tensor(np.asarray(features, dtype=np.float32))
        self.model_.eval()
        with torch.no_grad():
            probabilities = torch.softmax(self.model_(x), dim=1).detach().cpu().numpy()
        return probabilities.astype(float, copy=False)

    def predict(self, features: Sequence[Sequence[float]] | np.ndarray) -> np.ndarray:
        return self.classes_[np.argmax(self.predict_proba(features), axis=1)]

`make_cross_validator(labels, groups, n_splits)`

Create stratified CV splits, optionally preserving group boundaries.

Source code in src/neureptrace/decoding/__init__.py

def make_cross_validator(labels: np.ndarray, groups: np.ndarray | None, n_splits: int):
    """Create stratified CV splits, optionally preserving group boundaries."""
    _, class_counts = np.unique(labels, return_counts=True)
    if len(class_counts) < 2:
        raise ValueError("Need at least two classes for decoding.")
    if np.min(class_counts) < n_splits:
        raise ValueError(
            f"Need at least {n_splits} examples per class; smallest class has {np.min(class_counts)}."
        )
    if groups is not None:
        unique_groups = np.unique(groups)
        if len(unique_groups) < n_splits:
            raise ValueError(
                f"Need at least {n_splits} groups for grouped CV, found {len(unique_groups)}."
            )
        return StratifiedGroupKFold(n_splits=n_splits).split(
            np.zeros_like(labels),
            labels,
            groups,
        )
    return StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=13).split(
        np.zeros_like(labels),
        labels,
    )

`make_decoder(name='logistic', *, max_iter=1000, emission_mode='calibrated', feature_preprocessor='none', pca_components=None, tune_hyperparameters=False, tuning_cv=3, tuning_scoring='accuracy', tuning_c_grid=None, classifier_param=None, random_state=13)`

Create a standard probability-producing decoder by name.

Optional feature preprocessing is inserted after fold-local standardization and before the classifier. This keeps low-rank transforms such as PCA inside each cross-validation fold and prevents train/test leakage.

When tune_hyperparameters is enabled, the returned estimator is a GridSearchCV wrapper around the same decoder family. The caller can pass an integer CV count or precomputed inner-CV splits via tuning_cv.

Source code in src/neureptrace/decoding/__init__.py

def make_decoder(
    name: str = "logistic",
    *,
    max_iter: int = 1000,
    emission_mode: str = "calibrated",
    feature_preprocessor: str = "none",
    pca_components: int | float | str | None = None,
    tune_hyperparameters: bool = False,
    tuning_cv: int | Sequence[tuple[np.ndarray, np.ndarray]] = 3,
    tuning_scoring: str = "accuracy",
    tuning_c_grid: Sequence[float] | str | None = None,
    classifier_param: Any = None,
    random_state: int | None = 13,
):
    """Create a standard probability-producing decoder by name.

    Optional feature preprocessing is inserted after fold-local standardization
    and before the classifier. This keeps low-rank transforms such as PCA inside
    each cross-validation fold and prevents train/test leakage.

    When ``tune_hyperparameters`` is enabled, the returned estimator is a
    ``GridSearchCV`` wrapper around the same decoder family. The caller can pass
    an integer CV count or precomputed inner-CV splits via ``tuning_cv``.
    """
    normalized = normalize_decoder_name(name)
    emission_mode = normalize_emission_mode(emission_mode)
    feature_steps = _feature_preprocessor_steps(feature_preprocessor, pca_components)

    if tune_hyperparameters:
        return make_tuned_decoder(
            normalized,
            max_iter=max_iter,
            emission_mode=emission_mode,
            feature_preprocessor=feature_preprocessor,
            pca_components=pca_components,
            cv=tuning_cv,
            scoring=tuning_scoring,
            c_grid=tuning_c_grid,
            classifier_param=classifier_param,
            random_state=random_state,
        )

    if normalized == "logistic":
        c_value = _positive_float_classifier_param(classifier_param, default=1.0, name="LogisticRegression C")
        return make_pipeline(
            StandardScaler(),
            *feature_steps,
            LogisticRegression(
                class_weight="balanced",
                C=c_value,
                max_iter=max_iter,
                solver="lbfgs",
            ),
        )
    if normalized == "sparse_logistic":
        c_value = _positive_float_classifier_param(classifier_param, default=1.0, name="LogisticRegression C")
        return make_pipeline(
            StandardScaler(),
            *feature_steps,
            LogisticRegression(
                class_weight="balanced",
                penalty="l1",
                C=c_value,
                max_iter=max_iter,
                random_state=13,
                solver="saga",
            ),
        )
    if normalized == "elastic_net_logistic":
        c_value = _positive_float_classifier_param(classifier_param, default=1.0, name="LogisticRegression C")
        return make_pipeline(
            StandardScaler(),
            *feature_steps,
            LogisticRegression(
                class_weight="balanced",
                penalty="elasticnet",
                C=c_value,
                l1_ratio=DEFAULT_ELASTIC_NET_L1_RATIO,
                max_iter=max_iter,
                random_state=13,
                solver="saga",
            ),
        )
    if normalized == "gaussian_nb":
        return make_pipeline(
            StandardScaler(),
            *feature_steps,
            GaussianNB(),
        )
    if normalized == "lda":
        return make_pipeline(
            StandardScaler(),
            *feature_steps,
            LinearDiscriminantAnalysis(solver="svd"),
        )
    if normalized == "shrinkage_lda":
        return make_pipeline(
            StandardScaler(),
            *feature_steps,
            LinearDiscriminantAnalysis(solver="lsqr", shrinkage="auto"),
        )
    if normalized == "ridge":
        ridge = make_pipeline(
            StandardScaler(),
            *feature_steps,
            RidgeClassifier(
                class_weight="balanced",
                max_iter=max_iter,
            ),
        )
        if emission_mode == "uncalibrated":
            return ridge
        return _make_calibrated_classifier(
            ridge,
            method="sigmoid",
            cv=3,
        )

    if normalized == "linear_svm":
        c_value = _positive_float_classifier_param(classifier_param, default=1.0, name="LinearSVC C")
        linear_svm = make_pipeline(
            StandardScaler(),
            *feature_steps,
            LinearSVC(
                class_weight="balanced",
                C=c_value,
                max_iter=max_iter,
            ),
        )
        if emission_mode == "uncalibrated":
            return linear_svm
        return _make_calibrated_classifier(
            linear_svm,
            method="sigmoid",
            cv=3,
        )

    if normalized == "ovo_logistic":
        c_value = _positive_float_classifier_param(classifier_param, default=1.0, name="LogisticRegression C")
        ovo_logistic = make_pipeline(
            StandardScaler(),
            *feature_steps,
            OneVsOneClassifier(
                LogisticRegression(
                    class_weight="balanced",
                    C=c_value,
                    max_iter=max_iter,
                    random_state=random_state,
                    solver="lbfgs",
                )
            ),
        )
        if emission_mode == "uncalibrated":
            return ovo_logistic
        return _make_calibrated_classifier(
            ovo_logistic,
            method="sigmoid",
            cv=3,
        )

    if normalized in {"ovo_linear_svm", "ecoc_linear_svm"}:
        c_value = _positive_float_classifier_param(classifier_param, default=1.0, name="LinearSVC C")
        multiclass_svm = (
            OneVsOneClassifier(
                LinearSVC(
                    class_weight="balanced",
                    C=c_value,
                    max_iter=max_iter,
                    random_state=random_state,
                )
            )
            if normalized == "ovo_linear_svm"
            else ECOCLinearSVC(
                C=c_value,
                max_iter=max_iter,
                random_state=random_state,
            )
        )
        model = make_pipeline(
            StandardScaler(),
            *feature_steps,
            multiclass_svm,
        )
        if emission_mode == "uncalibrated":
            return model
        return _make_calibrated_classifier(
            model,
            method="sigmoid",
            cv=3,
        )

    if normalized == "hierarchical_logistic":
        c_value = _positive_float_classifier_param(classifier_param, default=1.0, name="Hierarchical logistic C")
        return make_pipeline(
            StandardScaler(),
            *feature_steps,
            HierarchicalThreeClassLogistic(
                primary_class_index=1,
                C=c_value,
                max_iter=max_iter,
                random_state=random_state,
            ),
        )

    if normalized == "torch_mlp":
        weight_decay = _positive_float_classifier_param(
            classifier_param,
            default=1e-4,
            name="TorchMLP weight_decay",
        )
        return make_pipeline(
            StandardScaler(),
            *feature_steps,
            TorchMLPClassifier(
                max_iter=max_iter,
                weight_decay=weight_decay,
                random_state=random_state,
            ),
        )
    if normalized == "dann":
        raise ValueError(
            "The 'dann' decoder requires unlabeled target fold features and is only supported "
            "through run_time_resolved_decode / decode-from-config."
        )

    registry_decoder = _make_registry_decoder_pipeline(
        normalized,
        feature_preprocessor=feature_preprocessor,
        pca_components=pca_components,
        classifier_param=classifier_param,
        random_state=random_state,
    )
    if emission_mode == "uncalibrated":
        return registry_decoder
    return _make_calibrated_classifier(
        registry_decoder,
        method="sigmoid",
        cv=3,
    )

`make_logistic_decoder(max_iter=1000, *, feature_preprocessor='none', pca_components=None)`

Create the default calibrated-probability baseline decoder.

Source code in src/neureptrace/decoding/__init__.py

def make_logistic_decoder(
    max_iter: int = 1000,
    *,
    feature_preprocessor: str = "none",
    pca_components: int | float | str | None = None,
):
    """Create the default calibrated-probability baseline decoder."""
    return make_decoder(
        "logistic",
        max_iter=max_iter,
        feature_preprocessor=feature_preprocessor,
        pca_components=pca_components,
    )

`make_tuned_decoder(name='logistic', *, max_iter=1000, emission_mode='calibrated', feature_preprocessor='none', pca_components=None, cv=3, scoring='accuracy', c_grid=None, classifier_param=None, random_state=13)`

Create a decoder with inner-CV hyperparameter selection.

Logistic regression, sparse logistic regression, and linear SVM tune the regularization strength C. Elastic-net logistic regression tunes both C and the L1/L2 mixing ratio. Ridge tunes the L2 penalty strength alpha. Gaussian NB tunes variance smoothing. LDA compares the default SVD solver with shrinkage LDA (solver='lsqr', shrinkage='auto'), which is often better conditioned for high-dimensional M/EEG windows.

Source code in src/neureptrace/decoding/__init__.py

def make_tuned_decoder(
    name: str = "logistic",
    *,
    max_iter: int = 1000,
    emission_mode: str = "calibrated",
    feature_preprocessor: str = "none",
    pca_components: int | float | str | None = None,
    cv: int | Sequence[tuple[np.ndarray, np.ndarray]] = 3,
    scoring: str = "accuracy",
    c_grid: Sequence[float] | str | None = None,
    classifier_param: Any = None,
    random_state: int | None = 13,
):
    """Create a decoder with inner-CV hyperparameter selection.

    Logistic regression, sparse logistic regression, and linear SVM tune the
    regularization strength ``C``. Elastic-net logistic regression tunes both
    ``C`` and the L1/L2 mixing ratio. Ridge tunes the L2 penalty strength
    ``alpha``. Gaussian NB tunes variance smoothing. LDA compares the default
    SVD solver with shrinkage LDA
    (``solver='lsqr', shrinkage='auto'``), which is often better conditioned for
    high-dimensional M/EEG windows.
    """
    normalized = normalize_decoder_name(name)
    emission_mode = normalize_emission_mode(emission_mode)
    scoring = normalize_tuning_scoring(scoring)
    c_grid = parse_c_grid(c_grid)
    feature_steps = _feature_preprocessor_steps(feature_preprocessor, pca_components)

    if normalized == "logistic":
        estimator = make_pipeline(
            StandardScaler(),
            *feature_steps,
            LogisticRegression(
                class_weight="balanced",
                max_iter=max_iter,
                solver="lbfgs",
            ),
        )
        param_grid = {"logisticregression__C": c_grid}
        param_grid = _with_feature_preprocessor_tuning(estimator, param_grid, feature_preprocessor)
    elif normalized == "sparse_logistic":
        estimator = make_pipeline(
            StandardScaler(),
            *feature_steps,
            LogisticRegression(
                class_weight="balanced",
                penalty="l1",
                max_iter=max_iter,
                random_state=13,
                solver="saga",
            ),
        )
        param_grid = {"logisticregression__C": c_grid}
        param_grid = _with_feature_preprocessor_tuning(estimator, param_grid, feature_preprocessor)
    elif normalized == "elastic_net_logistic":
        estimator = make_pipeline(
            StandardScaler(),
            *feature_steps,
            LogisticRegression(
                class_weight="balanced",
                penalty="elasticnet",
                l1_ratio=DEFAULT_ELASTIC_NET_L1_RATIO,
                max_iter=max_iter,
                random_state=13,
                solver="saga",
            ),
        )
        param_grid = {
            "logisticregression__C": c_grid,
            "logisticregression__l1_ratio": ELASTIC_NET_L1_RATIO_GRID,
        }
        param_grid = _with_feature_preprocessor_tuning(estimator, param_grid, feature_preprocessor)
    elif normalized == "gaussian_nb":
        estimator = make_pipeline(
            StandardScaler(),
            *feature_steps,
            GaussianNB(),
        )
        param_grid = {"gaussiannb__var_smoothing": DEFAULT_TUNING_VAR_SMOOTHING_GRID}
        param_grid = _with_feature_preprocessor_tuning(estimator, param_grid, feature_preprocessor)
    elif normalized == "lda":
        estimator = make_pipeline(
            StandardScaler(),
            *feature_steps,
            LinearDiscriminantAnalysis(),
        )
        param_grid = [
            {
                "lineardiscriminantanalysis__solver": ["svd"],
                "lineardiscriminantanalysis__shrinkage": [None],
            },
            {
                "lineardiscriminantanalysis__solver": ["lsqr"],
                "lineardiscriminantanalysis__shrinkage": ["auto"],
            },
        ]
        param_grid = _with_feature_preprocessor_tuning(estimator, param_grid, feature_preprocessor)
    elif normalized == "shrinkage_lda":
        estimator = make_pipeline(
            StandardScaler(),
            *feature_steps,
            LinearDiscriminantAnalysis(solver="lsqr"),
        )
        param_grid = {"lineardiscriminantanalysis__shrinkage": ["auto", 0.1, 0.3, 0.5, 0.7, 0.9]}
        param_grid = _with_feature_preprocessor_tuning(estimator, param_grid, feature_preprocessor)
    elif normalized == "ridge":
        if emission_mode == "uncalibrated" and scoring == "neg_log_loss":
            raise ValueError("neg_log_loss tuning requires probability estimates; use calibrated emissions for ridge.")
        ridge = make_pipeline(
            StandardScaler(),
            *feature_steps,
            RidgeClassifier(
                class_weight="balanced",
                max_iter=max_iter,
            ),
        )
        if emission_mode == "uncalibrated":
            estimator = ridge
            param_grid = {"ridgeclassifier__alpha": DEFAULT_TUNING_ALPHA_GRID}
        else:
            estimator = _make_calibrated_classifier(ridge, method="sigmoid", cv=3)
            param_grid = {_calibrated_estimator_param(estimator, "ridgeclassifier__alpha"): DEFAULT_TUNING_ALPHA_GRID}
        param_grid = _with_feature_preprocessor_tuning(estimator, param_grid, feature_preprocessor)
    elif normalized == "linear_svm":
        linear_svm = make_pipeline(
            StandardScaler(),
            *feature_steps,
            LinearSVC(
                class_weight="balanced",
                max_iter=max_iter,
            ),
        )
        if emission_mode == "uncalibrated":
            estimator = linear_svm
            param_grid = {"linearsvc__C": c_grid}
        else:
            estimator = _make_calibrated_classifier(linear_svm, method="sigmoid", cv=3)
            param_grid = {_calibrated_estimator_param(estimator, "linearsvc__C"): c_grid}
        param_grid = _with_feature_preprocessor_tuning(estimator, param_grid, feature_preprocessor)
    elif normalized == "ovo_logistic":
        estimator = make_pipeline(
            StandardScaler(),
            *feature_steps,
            OneVsOneClassifier(
                LogisticRegression(
                    class_weight="balanced",
                    max_iter=max_iter,
                    random_state=random_state,
                    solver="lbfgs",
                )
            ),
        )
        if emission_mode == "uncalibrated":
            param_grid = {"onevsoneclassifier__estimator__C": c_grid}
        else:
            estimator = _make_calibrated_classifier(estimator, method="sigmoid", cv=3)
            param_grid = {_calibrated_estimator_param(estimator, "onevsoneclassifier__estimator__C"): c_grid}
        param_grid = _with_feature_preprocessor_tuning(estimator, param_grid, feature_preprocessor)
    elif normalized in {"ovo_linear_svm", "ecoc_linear_svm"}:
        multiclass_svm = (
            OneVsOneClassifier(
                LinearSVC(
                    class_weight="balanced",
                    max_iter=max_iter,
                    random_state=random_state,
                )
            )
            if normalized == "ovo_linear_svm"
            else ECOCLinearSVC(
                max_iter=max_iter,
                random_state=random_state,
            )
        )
        estimator = make_pipeline(
            StandardScaler(),
            *feature_steps,
            multiclass_svm,
        )
        svm_c_param = "onevsoneclassifier__estimator__C" if normalized == "ovo_linear_svm" else "ecoclinearsvc__C"
        if emission_mode == "uncalibrated":
            param_grid = {svm_c_param: c_grid}
        else:
            estimator = _make_calibrated_classifier(estimator, method="sigmoid", cv=3)
            param_grid = {_calibrated_estimator_param(estimator, svm_c_param): c_grid}
        param_grid = _with_feature_preprocessor_tuning(estimator, param_grid, feature_preprocessor)
    elif normalized == "hierarchical_logistic":
        estimator = make_pipeline(
            StandardScaler(),
            *feature_steps,
            HierarchicalThreeClassLogistic(
                primary_class_index=1,
                max_iter=max_iter,
                random_state=random_state,
            ),
        )
        param_grid = {"hierarchicalthreeclasslogistic__C": c_grid}
        param_grid = _with_feature_preprocessor_tuning(estimator, param_grid, feature_preprocessor)
    elif normalized == "torch_mlp":
        estimator = make_pipeline(
            StandardScaler(),
            *feature_steps,
            TorchMLPClassifier(
                max_iter=max_iter,
                random_state=random_state,
            ),
        )
        # Interpret the shared C grid as inverse regularization strength for this
        # decoder so CLI tuning semantics remain consistent with linear models.
        param_grid = {"torchmlpclassifier__weight_decay": tuple(1.0 / value for value in c_grid)}
        param_grid = _with_feature_preprocessor_tuning(estimator, param_grid, feature_preprocessor)
    else:
        registry_name = normalize_registry_decoder_name(normalized)
        registry_decoder = _make_registry_decoder_pipeline(
            registry_name,
            feature_preprocessor=feature_preprocessor,
            pca_components=pca_components,
            classifier_param=classifier_param,
            random_state=random_state,
        )
        param_grid = _registry_tuning_param_grid(registry_name, c_grid)
        if emission_mode == "uncalibrated":
            estimator = registry_decoder
        else:
            estimator = _make_calibrated_classifier(registry_decoder, method="sigmoid", cv=3)
            param_grid = _calibrated_param_grid(estimator, param_grid)

    return GridSearchCV(
        estimator=estimator,
        param_grid=param_grid,
        scoring=make_tuning_scorer(scoring, emission_mode=emission_mode),
        cv=cv,
        refit=True,
    )

`make_tuning_cross_validator(labels, groups, n_splits)`

Create feasible inner-CV splits for nested decoder hyperparameter tuning.

Source code in src/neureptrace/decoding/__init__.py

def make_tuning_cross_validator(labels: np.ndarray, groups: np.ndarray | None, n_splits: int):
    """Create feasible inner-CV splits for nested decoder hyperparameter tuning."""
    _, class_counts = np.unique(labels, return_counts=True)
    if len(class_counts) < 2:
        raise ValueError("Need at least two classes for decoder hyperparameter tuning.")
    feasible_splits = min(int(n_splits), int(np.min(class_counts)))
    if groups is not None:
        feasible_splits = min(feasible_splits, len(np.unique(groups)))
    if feasible_splits < 2:
        raise ValueError("Need at least two examples per class and two groups when grouped to tune decoder hyperparameters.")
    return list(make_cross_validator(labels, groups, feasible_splits))

`make_tuning_scorer(scoring, *, emission_mode='calibrated')`

Return a GridSearchCV scorer for decoder hyperparameter tuning.

Accuracy-oriented objectives are forwarded to scikit-learn by name. Probability objectives are implemented here so they use the same calibrated or score-derived emissions that NeuRepTrace writes to the held-out observation tables. This keeps model selection aligned with downstream temporal-state inference, where probability quality matters more than the hard class label.

Source code in src/neureptrace/decoding/__init__.py

def make_tuning_scorer(scoring: str, *, emission_mode: str = "calibrated") -> str | Callable:
    """Return a GridSearchCV scorer for decoder hyperparameter tuning.

    Accuracy-oriented objectives are forwarded to scikit-learn by name. Probability
    objectives are implemented here so they use the same calibrated or
    score-derived emissions that NeuRepTrace writes to the held-out observation
    tables. This keeps model selection aligned with downstream temporal-state
    inference, where probability quality matters more than the hard class label.
    """
    normalized = normalize_tuning_scoring(scoring)
    emission_mode = normalize_emission_mode(emission_mode)
    if normalized in {"accuracy", "balanced_accuracy"}:
        return normalized
    return _make_probability_tuning_scorer(normalized, emission_mode=emission_mode)

`normalize_anova_select_percentile(percentile)`

Normalize ANOVA feature-selection percentile specifications.

Source code in src/neureptrace/decoding/__init__.py

def normalize_anova_select_percentile(percentile: int | float | str | None) -> int:
    """Normalize ANOVA feature-selection percentile specifications."""
    if percentile is None:
        return DEFAULT_ANOVA_SELECT_PERCENTILE
    if isinstance(percentile, str):
        stripped = percentile.strip()
        if stripped == "" or stripped.lower() in {"auto", "default"}:
            return DEFAULT_ANOVA_SELECT_PERCENTILE
        try:
            parsed: int | float = float(stripped) if any(marker in stripped for marker in (".", "e", "E")) else int(stripped)
        except ValueError as exc:
            raise ValueError("anova_select percentile must be a number in (0, 100].") from exc
        return normalize_anova_select_percentile(parsed)
    if isinstance(percentile, (np.integer,)):
        percentile = int(percentile)
    if isinstance(percentile, (np.floating,)):
        percentile = float(percentile)
    if isinstance(percentile, bool):
        raise ValueError("anova_select percentile must be numeric, not boolean.")
    if not isinstance(percentile, (int, float)) or not np.isfinite(percentile) or percentile <= 0 or percentile > 100:
        raise ValueError("anova_select percentile must be finite and in (0, 100].")
    if not float(percentile).is_integer():
        raise ValueError("anova_select percentile must be an integer percentage.")
    return int(percentile)

`normalize_decoder_name(name)`

Normalize decoder aliases to the names used in result tables.

Source code in src/neureptrace/decoding/__init__.py

def normalize_decoder_name(name: str) -> str:
    """Normalize decoder aliases to the names used in result tables."""
    normalized = name.strip().lower().replace("-", "_")
    if normalized in {"nb", "naive_bayes", "gaussian_naive_bayes"}:
        return "gaussian_nb"
    if normalized == "svm":
        return "linear_svm"
    if normalized in {"l1_logistic", "logistic_l1", "sparse_logreg"}:
        return "sparse_logistic"
    if normalized in {"elasticnet_logistic", "logistic_elastic_net", "elastic_net_logreg"}:
        return "elastic_net_logistic"
    if normalized in {"ridge_classifier", "ridge_classification"}:
        return "ridge"
    if normalized in {"lda_shrinkage", "shrinkage_lda", "shrinkagelda"}:
        return "shrinkage_lda"
    if normalized in {"one_vs_one_linear_svm", "onevsone_linear_svm", "ovo_svm", "ovo_linear_svm"}:
        return "ovo_linear_svm"
    if normalized in {"one_vs_one_logistic", "onevsone_logistic", "ovo_logistic"}:
        return "ovo_logistic"
    if normalized in {"ecoc_svm", "output_code_linear_svm", "outputcode_linear_svm", "ecoc_linear_svm"}:
        return "ecoc_linear_svm"
    if normalized in {
        "hierarchical",
        "hierarchical_3class_logistic",
        "hierarchical_three_class_logistic",
        "ds006629_hierarchical",
        "large_dev_hierarchical",
    }:
        return "hierarchical_logistic"
    if normalized in {"deep_mlp", "mlp", "torch_deep_mlp", "shallow_torch_mlp"}:
        return "torch_mlp"
    if normalized in {"domain_adversarial", "domain_adversarial_nn", "domain_adversarial_neural_network"}:
        return "dann"
    if normalized in FOLD_AWARE_DECODER_CHOICES:
        return normalized
    if normalized in BUILTIN_DECODER_CHOICES:
        return normalized
    registry_name = _normalize_registry_decoder_name_or_none(name)
    if registry_name is not None:
        return registry_name
    raise ValueError(f"Unknown decoder '{name}'. Available decoders: {', '.join(DECODER_CHOICES)}.")

`normalize_emission_mode(mode)`

Normalize calibrated/uncalibrated emission mode names.

Source code in src/neureptrace/decoding/__init__.py

def normalize_emission_mode(mode: str) -> str:
    """Normalize calibrated/uncalibrated emission mode names."""
    normalized = mode.lower().replace("-", "_")
    if normalized not in EMISSION_MODE_CHOICES:
        raise ValueError(f"Unknown emission mode '{mode}'. Available modes: {', '.join(EMISSION_MODE_CHOICES)}.")
    return normalized

`normalize_feature_preprocessor(name)`

Normalize feature-preprocessor aliases to canonical result-table names.

Source code in src/neureptrace/decoding/__init__.py

def normalize_feature_preprocessor(name: str | None) -> str:
    """Normalize feature-preprocessor aliases to canonical result-table names."""
    normalized = "none" if name is None else name.lower().replace("-", "_")
    if normalized in {"identity", "standard", "standardize", "scaler", "standard_scaler"}:
        return "none"
    if normalized in {"pca_whitened", "whitened_pca", "whiten_pca"}:
        return "pca_whiten"
    if normalized in {"anova", "anova_percentile", "select_percentile", "select_k_best", "kbest"}:
        return "anova_select"
    if normalized in {"pls", "plsd", "pls_da", "pls_discriminant", "pls_regression", "pls_discriminant_analysis", "supervised_pca"}:
        return "pls_da"
    if normalized not in FEATURE_PREPROCESSOR_CHOICES:
        raise ValueError(
            f"Unknown feature preprocessor '{name}'. Available preprocessors: {', '.join(FEATURE_PREPROCESSOR_CHOICES)}."
        )
    return normalized

`normalize_pca_components(n_components)`

Normalize PCA component specifications for sklearn.

Integers select an explicit component count. Floats in (0, 1) select an explained-variance fraction. None, auto, or an empty string keep sklearn's default PCA(n_components=None) behavior.

Source code in src/neureptrace/decoding/__init__.py

def normalize_pca_components(n_components: int | float | str | None) -> int | float | None:
    """Normalize PCA component specifications for sklearn.

    Integers select an explicit component count. Floats in ``(0, 1)`` select an
    explained-variance fraction. ``None``, ``auto``, or an empty string keep
    sklearn's default ``PCA(n_components=None)`` behavior.
    """
    if n_components is None:
        return None
    if isinstance(n_components, str):
        stripped = n_components.strip()
        if stripped == "" or stripped.lower() in {"none", "auto", "default"}:
            return None
        try:
            parsed: int | float = float(stripped) if any(marker in stripped for marker in (".", "e", "E")) else int(stripped)
        except ValueError as exc:
            raise ValueError("pca_components must be an integer count, a variance fraction in (0, 1), or None.") from exc
        return normalize_pca_components(parsed)
    if isinstance(n_components, (np.integer,)):
        n_components = int(n_components)
    if isinstance(n_components, (np.floating,)):
        n_components = float(n_components)
    if isinstance(n_components, bool):
        raise ValueError("pca_components must be numeric, not boolean.")
    if isinstance(n_components, int):
        if n_components < 1:
            raise ValueError("Integer pca_components must be at least 1.")
        return n_components
    if isinstance(n_components, float):
        if not np.isfinite(n_components) or n_components <= 0.0:
            raise ValueError("Float pca_components must be finite and positive.")
        if n_components < 1.0:
            return float(n_components)
        if n_components.is_integer():
            return int(n_components)
    raise ValueError("pca_components must be an integer count, a variance fraction in (0, 1), or None.")

`normalize_pls_components(n_components)`

Normalize supervised PLS-DA component counts.

PLS component counts are integer-only. Fractional explained-variance values are intentionally rejected because PLS-DA is supervised and does not have the same variance-retention semantics as PCA.

Source code in src/neureptrace/decoding/__init__.py

def normalize_pls_components(n_components: int | str | None) -> int:
    """Normalize supervised PLS-DA component counts.

    PLS component counts are integer-only.  Fractional explained-variance values
    are intentionally rejected because PLS-DA is supervised and does not have the
    same variance-retention semantics as PCA.
    """

    if n_components is None:
        return DEFAULT_PLS_COMPONENTS
    if isinstance(n_components, str) and n_components.strip().lower() in {"", "none", "auto", "default"}:
        return DEFAULT_PLS_COMPONENTS
    normalized = normalize_pca_components(n_components)
    if isinstance(normalized, float):
        raise ValueError("PLS-DA components must be an integer count or auto/default, not a variance fraction.")
    if normalized is None:
        return DEFAULT_PLS_COMPONENTS
    return int(normalized)

`normalize_registry_decoder_name(name)`

Normalize aliases for classifier-registry decoders.

Source code in src/neureptrace/decoding/__init__.py

def normalize_registry_decoder_name(name: str) -> str:
    """Normalize aliases for classifier-registry decoders."""

    normalized = _normalize_registry_decoder_name_or_none(name)
    if normalized is None:
        supported = ", ".join(sorted(CLASSIFIER_REGISTRY))
        raise ValueError(f"Unknown registry decoder '{name}'. Available registry decoders: {supported}.")
    return normalized

`normalize_tuning_scoring(scoring)`

Normalize inner-CV scoring names.

Source code in src/neureptrace/decoding/__init__.py

def normalize_tuning_scoring(scoring: str) -> str:
    """Normalize inner-CV scoring names."""
    normalized = scoring.lower().replace("-", "_")
    if normalized not in TUNING_SCORING_CHOICES:
        raise ValueError(f"Unknown tuning scoring '{scoring}'. Available values: {', '.join(TUNING_SCORING_CHOICES)}.")
    return normalized

`parse_c_grid(values)`

Normalize a regularization-strength grid for CLI and API callers.

Source code in src/neureptrace/decoding/__init__.py

def parse_c_grid(values: Sequence[float] | str | None) -> tuple[float, ...]:
    """Normalize a regularization-strength grid for CLI and API callers."""
    if values is None:
        return DEFAULT_TUNING_C_GRID
    if isinstance(values, str):
        values = [value.strip() for value in values.split(",") if value.strip()]
    grid_values = []
    for value in values:
        if isinstance(value, (bool, np.bool_)):
            raise ValueError("All C values must be positive finite numbers.")
        grid_values.append(float(value))
    grid = tuple(grid_values)
    if not grid:
        raise ValueError("At least one C value is required for hyperparameter tuning.")
    if any(not np.isfinite(value) or value <= 0 for value in grid):
        raise ValueError("All C values must be positive finite numbers.")
    return grid

`predict_emission_probabilities(model, features, *, emission_mode='calibrated')`

Predict calibrated probabilities or uncalibrated score-derived emissions.

Source code in src/neureptrace/decoding/__init__.py

def predict_emission_probabilities(model, features: np.ndarray, *, emission_mode: str = "calibrated") -> np.ndarray:
    """Predict calibrated probabilities or uncalibrated score-derived emissions."""
    emission_mode = normalize_emission_mode(emission_mode)
    if emission_mode == "uncalibrated" and hasattr(model, "decision_function"):
        return score_to_probabilities(model.decision_function(features))
    if hasattr(model, "predict_proba"):
        probabilities = np.asarray(model.predict_proba(features), dtype=float)
        if np.all(np.isfinite(probabilities)):
            return probabilities
        if hasattr(model, "predict") and hasattr(model, "classes_"):
            predictions = np.asarray(model.predict(features))
            model_classes = np.asarray(model.classes_)
            fallback = np.zeros((len(predictions), len(model_classes)), dtype=float)
            class_indices = {label: index for index, label in enumerate(model_classes)}
            for row_index, label in enumerate(predictions):
                fallback[row_index, class_indices[label]] = 1.0
            invalid_rows = ~np.all(np.isfinite(probabilities), axis=1)
            probabilities[invalid_rows] = fallback[invalid_rows]
        return probabilities
    if hasattr(model, "decision_function"):
        return score_to_probabilities(model.decision_function(features))
    raise ValueError("Decoder does not provide predict_proba or decision_function.")

`score_to_probabilities(scores)`

Convert uncalibrated decision scores into pseudo-probability emissions.

Source code in src/neureptrace/decoding/__init__.py

def score_to_probabilities(scores: np.ndarray) -> np.ndarray:
    """Convert uncalibrated decision scores into pseudo-probability emissions."""
    scores = np.asarray(scores, dtype=float)
    if scores.ndim == 1:
        clipped = np.clip(scores, -50.0, 50.0)
        positive = 1.0 / (1.0 + np.exp(-clipped))
        return np.column_stack([1.0 - positive, positive])
    if scores.ndim != 2:
        raise ValueError("Decision scores must be one- or two-dimensional.")
    shifted = scores - scores.max(axis=1, keepdims=True)
    exp_scores = np.exp(np.clip(shifted, -50.0, 50.0))
    return exp_scores / exp_scores.sum(axis=1, keepdims=True)

`time_windows(times, window_ms, step_ms)`

Return sample index windows and their center times for time-resolved decoding.

Source code in src/neureptrace/decoding/__init__.py

def time_windows(times: np.ndarray, window_ms: float, step_ms: float) -> list[tuple[int, int, float]]:
    """Return sample index windows and their center times for time-resolved decoding."""
    if times.ndim != 1:
        raise ValueError("times must be one-dimensional")
    if len(times) < 2:
        raise ValueError("times must contain at least two samples")
    if window_ms <= 0 or step_ms <= 0:
        raise ValueError("window_ms and step_ms must be positive")

    sfreq = 1000.0 / np.median(np.diff(times * 1000.0))
    window_samples = max(1, int(round((window_ms / 1000.0) * sfreq)))
    step_samples = max(1, int(round((step_ms / 1000.0) * sfreq)))
    windows = []
    for start in range(0, len(times) - window_samples + 1, step_samples):
        stop = start + window_samples
        center = float(np.mean(times[start:stop]))
        windows.append((start, stop, center))
    return windows

`neureptrace.decoding.foundation`

Frozen foundation-model feature extraction for M/EEG decoders.

The classes in this module provide integration points for BENDR-, LaBraM-, EEGPT-, CBraMod-, or project-local encoders without making any of those model packages mandatory NeuRepTrace dependencies. Encoders are used as frozen feature extractors; downstream probes are trained with ordinary NeuRepTrace source-label workflows.

`FoundationModelSpec` `dataclass`

Metadata and conservative defaults for an external foundation encoder family.

Source code in src/neureptrace/decoding/foundation.py

@dataclass(frozen=True)
class FoundationModelSpec:
    """Metadata and conservative defaults for an external foundation encoder family."""

    name: str
    aliases: tuple[str, ...]
    default_pooling: str = "flatten"
    default_input_layout: str = "channels_first"
    default_load_mode: str = "torchscript"
    default_preprocessing: str = "none"
    default_output_key: str | None = None
    default_output_attribute: str | None = None
    package_hint: str = ""
    notes: str = ""

    def defaults(self) -> dict[str, Any]:
        """Return classifier-param defaults for this family."""

        return {
            "model_family": self.name,
            "pooling": self.default_pooling,
            "input_layout": self.default_input_layout,
            "load_mode": self.default_load_mode,
            "preprocessing": self.default_preprocessing,
            "output_key": self.default_output_key,
            "output_attribute": self.default_output_attribute,
        }

`defaults()`

Return classifier-param defaults for this family.

Source code in src/neureptrace/decoding/foundation.py

def defaults(self) -> dict[str, Any]:
    """Return classifier-param defaults for this family."""

    return {
        "model_family": self.name,
        "pooling": self.default_pooling,
        "input_layout": self.default_input_layout,
        "load_mode": self.default_load_mode,
        "preprocessing": self.default_preprocessing,
        "output_key": self.default_output_key,
        "output_attribute": self.default_output_attribute,
    }

`FrozenTorchEncoderTransformer`

Bases: TransformerMixin, BaseEstimator

Transform feature rows with a frozen PyTorch foundation encoder.

Source code in src/neureptrace/decoding/foundation.py

class FrozenTorchEncoderTransformer(TransformerMixin, BaseEstimator):
    """Transform feature rows with a frozen PyTorch foundation encoder."""

    def __init__(
        self,
        *,
        model_family: str | None = None,
        encoder: Any | None = None,
        model_path: str | Path | None = None,
        model_factory: str | Callable[..., Any] | None = None,
        model_kwargs: Mapping[str, Any] | None = None,
        input_shape: Sequence[int] | str | None = None,
        input_layout: str | None = None,
        preprocessing: str | None = None,
        batch_size: int = 128,
        device: str = "cpu",
        output_key: str | None = None,
        output_attribute: str | None = None,
        output_index: int | None = None,
        pooling: str | None = None,
        load_mode: str | None = None,
        dtype: str = "float32",
        checkpoint_key: str | Sequence[str] | None = None,
        state_dict_key: str | Sequence[str] | None = None,
        strict_load: bool = True,
        strip_state_dict_prefix: str | Sequence[str] | bool | None = None,
        encoder_attr: str | None = None,
        forward_kwargs: Mapping[str, Any] | None = None,
    ):
        self.model_family = model_family
        self.encoder = encoder
        self.model_path = model_path
        self.model_factory = model_factory
        self.model_kwargs = model_kwargs
        self.input_shape = input_shape
        self.input_layout = input_layout
        self.preprocessing = preprocessing
        self.batch_size = batch_size
        self.device = device
        self.output_key = output_key
        self.output_attribute = output_attribute
        self.output_index = output_index
        self.pooling = pooling
        self.load_mode = load_mode
        self.dtype = dtype
        self.checkpoint_key = checkpoint_key
        self.state_dict_key = state_dict_key
        self.strict_load = strict_load
        self.strip_state_dict_prefix = strip_state_dict_prefix
        self.encoder_attr = encoder_attr
        self.forward_kwargs = forward_kwargs

    def fit(self, features: Sequence[Sequence[float]] | np.ndarray, labels: Sequence | np.ndarray | None = None):
        del labels
        x = _validate_feature_matrix(features, name="features")
        self.n_features_in_ = int(x.shape[1])
        self.model_spec_ = get_foundation_model_spec(self.model_family)
        self.input_shape_ = parse_input_shape(self.input_shape)
        if self.input_shape_ is not None and int(np.prod(self.input_shape_)) != self.n_features_in_:
            raise ValueError(
                "input_shape product must equal the number of feature columns; "
                f"got product {int(np.prod(self.input_shape_))} for {self.n_features_in_} columns."
            )
        self.pooling_ = normalize_pooling(self.pooling if self.pooling is not None else self.model_spec_.default_pooling)
        self.input_layout_ = normalize_input_layout(self.input_layout if self.input_layout is not None else self.model_spec_.default_input_layout)
        self.preprocessing_ = normalize_preprocessing(self.preprocessing if self.preprocessing is not None else self.model_spec_.default_preprocessing)
        self.load_mode_ = normalize_load_mode(self.load_mode if self.load_mode is not None else self.model_spec_.default_load_mode)
        self.batch_size_ = _positive_int(self.batch_size, name="batch_size")
        self.forward_kwargs_ = dict(self.forward_kwargs or {})
        self.encoder_ = self._load_encoder()
        self._freeze_encoder(self.encoder_)
        return self

    def transform(self, features: Sequence[Sequence[float]] | np.ndarray) -> np.ndarray:
        if not hasattr(self, "encoder_"):
            raise RuntimeError("FrozenTorchEncoderTransformer must be fitted before transform.")
        x = _validate_feature_matrix(features, name="features")
        if x.shape[1] != self.n_features_in_:
            raise ValueError(f"Expected {self.n_features_in_} feature columns, got {x.shape[1]}.")

        torch = _import_torch()
        tensor_dtype = _torch_dtype(torch, self.dtype)
        encoded_batches: list[np.ndarray] = []
        for start in range(0, x.shape[0], self.batch_size_):
            batch = torch.as_tensor(x[start : start + self.batch_size_], dtype=tensor_dtype, device=self.device)
            batch = self._prepare_batch(torch, batch)
            with torch.no_grad():
                output = self.encoder_(batch, **self.forward_kwargs_)
            output = self._select_output(output)
            output = self._pool_output(torch, output)
            batch_features = output.detach().cpu().numpy()
            batch_features = _ensure_2d_output(batch_features)
            encoded_batches.append(batch_features.astype(np.float32, copy=False))

        transformed = np.concatenate(encoded_batches, axis=0) if encoded_batches else np.empty((0, 0), dtype=np.float32)
        if transformed.ndim != 2:
            raise ValueError("Foundation encoder output must be convertible to a two-dimensional feature matrix.")
        self.n_features_out_ = int(transformed.shape[1])
        return transformed

    def get_feature_names_out(self, input_features: Sequence[str] | None = None) -> np.ndarray:
        del input_features
        n_features = getattr(self, "n_features_out_", None)
        if n_features is None:
            raise RuntimeError("Feature names are available after transform has determined the encoder output width.")
        family = getattr(getattr(self, "model_spec_", None), "name", "foundation")
        return np.asarray([f"{family}_foundation_{index}" for index in range(n_features)], dtype=object)

    def _load_encoder(self):
        if self.encoder is not None:
            encoder = self.encoder
        elif self.load_mode_ == "factory":
            encoder = self._instantiate_model()
        elif self.model_path is None:
            hint = f" {self.model_spec_.package_hint}" if self.model_spec_.package_hint else ""
            raise ValueError("FrozenTorchEncoderTransformer requires an encoder object, model_path, or model_factory." + hint)
        elif self.load_mode_ in {"torchscript", "jit", "torch_jit"}:
            torch = _import_torch()
            encoder = torch.jit.load(str(self.model_path), map_location=self.device)
        elif self.load_mode_ in {"module", "torch", "pickle", "stateful_module"}:
            torch = _import_torch()
            checkpoint = _torch_load(torch, str(self.model_path), self.device, weights_only=False)
            encoder = _extract_path_or_auto(checkpoint, self.checkpoint_key, _CHECKPOINT_MODULE_AUTO_KEYS)
        elif self.load_mode_ in {"state_dict", "weights", "checkpoint"}:
            encoder = self._load_state_dict_encoder()
        else:  # pragma: no cover - normalize_load_mode should prevent this branch
            raise ValueError(f"Unsupported load_mode: {self.load_mode!r}")

        if self.encoder_attr:
            encoder = _resolve_object_path(encoder, self.encoder_attr)
        return encoder

    def _instantiate_model(self):
        if self.model_factory is None:
            raise ValueError("load_mode='state_dict' or 'factory' requires model_factory or an encoder object.")
        factory = _resolve_import_path(self.model_factory) if isinstance(self.model_factory, str) else self.model_factory
        if not callable(factory):
            raise TypeError("model_factory must be callable or an import path to a callable.")
        return factory(**dict(self.model_kwargs or {}))

    def _load_state_dict_encoder(self):
        torch = _import_torch()
        encoder = self._instantiate_model()
        checkpoint = _torch_load(torch, str(self.model_path), self.device, weights_only=True)
        state_dict = _extract_state_dict(checkpoint, self.state_dict_key)
        state_dict = _strip_state_dict_prefixes(state_dict, self.strip_state_dict_prefix)
        missing_or_unexpected = encoder.load_state_dict(state_dict, strict=bool(self.strict_load))
        self.load_state_dict_result_ = missing_or_unexpected
        return encoder

    def _freeze_encoder(self, encoder: Any) -> None:
        if hasattr(encoder, "to"):
            encoder.to(self.device)
        if hasattr(encoder, "eval"):
            encoder.eval()
        if hasattr(encoder, "parameters"):
            for parameter in encoder.parameters():
                parameter.requires_grad_(False)

    def _prepare_batch(self, torch: Any, batch: Any) -> Any:
        if self.input_shape_ is not None:
            batch = batch.reshape((batch.shape[0], *self.input_shape_))
        batch = self._apply_input_layout(batch)
        batch = self._apply_preprocessing(torch, batch)
        return batch

    def _apply_input_layout(self, batch: Any) -> Any:
        layout = self.input_layout_
        if layout in {"auto", "none", "channels_first"}:
            return batch
        if layout == "time_first":
            if batch.ndim != 3:
                raise ValueError("input_layout='time_first' expects a 3-D batch after input_shape reshaping.")
            return batch.permute(0, 2, 1)
        if layout == "add_channel_dim":
            if batch.ndim < 2:
                raise ValueError("input_layout='add_channel_dim' expects at least a 2-D batch.")
            return batch.unsqueeze(1)
        if layout == "add_feature_dim":
            if batch.ndim != 3:
                raise ValueError("input_layout='add_feature_dim' expects a 3-D batch after input_shape reshaping.")
            return batch.unsqueeze(2)
        raise ValueError(f"Unknown input_layout: {self.input_layout!r}")

    def _apply_preprocessing(self, torch: Any, batch: Any) -> Any:
        preprocessing = self.preprocessing_
        if preprocessing == "none":
            return batch
        eps = torch.finfo(batch.dtype).eps if getattr(batch, "is_floating_point", lambda: False)() else 1e-6
        if preprocessing == "trial_zscore":
            dims = tuple(range(1, batch.ndim))
            mean = batch.mean(dim=dims, keepdim=True)
            std = batch.std(dim=dims, keepdim=True, unbiased=False).clamp_min(eps)
            return (batch - mean) / std
        if preprocessing == "channel_zscore":
            if batch.ndim < 2:
                raise ValueError("channel_zscore preprocessing expects at least a 2-D batch.")
            axis = -1 if batch.ndim >= 3 else 1
            mean = batch.mean(dim=axis, keepdim=True)
            std = batch.std(dim=axis, keepdim=True, unbiased=False).clamp_min(eps)
            return (batch - mean) / std
        raise ValueError(f"Unknown preprocessing mode: {self.preprocessing!r}")

    def _select_output(self, output: Any) -> Any:
        if self.output_attribute is not None:
            output = _resolve_object_path(output, self.output_attribute)
        elif self.model_spec_.default_output_attribute is not None and hasattr(output, self.model_spec_.default_output_attribute):
            output = getattr(output, self.model_spec_.default_output_attribute)

        if self.output_key is not None:
            if not isinstance(output, Mapping):
                raise ValueError("output_key can only be used when the encoder returns a mapping.")
            output = _resolve_mapping_path(output, self.output_key)
        elif self.model_spec_.default_output_key is not None and isinstance(output, Mapping) and self.model_spec_.default_output_key in output:
            output = output[self.model_spec_.default_output_key]
        elif isinstance(output, Mapping):
            output = _select_mapping_output(output)

        if self.output_index is not None:
            output = output[int(self.output_index)]
        elif isinstance(output, (tuple, list)):
            output = output[0]
        return output

    def _pool_output(self, torch: Any, output: Any) -> Any:
        if not hasattr(output, "ndim"):
            output = torch.as_tensor(output, device=self.device)
        pooling = self.pooling_
        if pooling in {"flatten", "identity"}:
            return output
        if pooling in {"mean", "mean_time"}:
            if output.ndim <= 2:
                return output
            return output.mean(dim=-1)
        if pooling == "mean_tokens":
            if output.ndim <= 2:
                return output
            return output.mean(dim=1)
        if pooling == "mean_channels":
            if output.ndim <= 2:
                return output
            return output.mean(dim=1)
        if pooling == "cls":
            if output.ndim < 3:
                raise ValueError("pooling='cls' expects a sequence-shaped encoder output with at least three dimensions.")
            return output[:, 0, ...]
        if pooling == "last":
            if output.ndim < 3:
                raise ValueError("pooling='last' expects a sequence-shaped encoder output with at least three dimensions.")
            return output[:, -1, ...]
        raise ValueError(f"Unknown pooling mode: {self.pooling!r}")

`fit_foundation_linear_probe(features, labels, classifier_param, random_state=None)`

Fit a frozen-foundation-encoder probe on labeled source features.

Source code in src/neureptrace/decoding/foundation.py

def fit_foundation_linear_probe(
    features: Sequence[Sequence[float]] | np.ndarray,
    labels: Sequence | np.ndarray,
    classifier_param: Mapping[str, Any] | str | Path | None,
    random_state: int | None = None,
):
    """Fit a frozen-foundation-encoder probe on labeled source features."""

    params = normalize_foundation_linear_probe_params(classifier_param)
    model = make_foundation_linear_probe(params, max_iter=int(params["max_iter"]), random_state=random_state)
    model.fit(features, labels)
    return model

`foundation_model_defaults(model_family)`

Return default classifier parameters for a model family.

Source code in src/neureptrace/decoding/foundation.py

def foundation_model_defaults(model_family: str | None) -> dict[str, Any]:
    """Return default classifier parameters for a model family."""

    return get_foundation_model_spec(model_family).defaults()

`get_foundation_model_spec(model_family)`

Return the spec for a foundation-model family or alias.

Source code in src/neureptrace/decoding/foundation.py

def get_foundation_model_spec(model_family: str | None) -> FoundationModelSpec:
    """Return the spec for a foundation-model family or alias."""

    return FOUNDATION_MODEL_SPECS[normalize_foundation_model_family(model_family)]

`list_foundation_model_families()`

Return canonical foundation-model family names supported by NeuRepTrace wrappers.

Source code in src/neureptrace/decoding/foundation.py

def list_foundation_model_families() -> tuple[str, ...]:
    """Return canonical foundation-model family names supported by NeuRepTrace wrappers."""

    return tuple(FOUNDATION_MODEL_SPECS)

`make_bendr_linear_probe(classifier_param=None, **overrides)`

Create a frozen BENDR-family linear-probe pipeline.

Source code in src/neureptrace/decoding/foundation.py

def make_bendr_linear_probe(classifier_param: Mapping[str, Any] | str | Path | None = None, **overrides: Any):
    """Create a frozen BENDR-family linear-probe pipeline."""

    return make_foundation_linear_probe(_with_model_family("bendr", classifier_param, overrides))

`make_cbramod_linear_probe(classifier_param=None, **overrides)`

Create a frozen CBraMod-family linear-probe pipeline.

Source code in src/neureptrace/decoding/foundation.py

def make_cbramod_linear_probe(classifier_param: Mapping[str, Any] | str | Path | None = None, **overrides: Any):
    """Create a frozen CBraMod-family linear-probe pipeline."""

    return make_foundation_linear_probe(_with_model_family("cbramod", classifier_param, overrides))

`make_eegpt_linear_probe(classifier_param=None, **overrides)`

Create a frozen EEGPT-family linear-probe pipeline.

Source code in src/neureptrace/decoding/foundation.py

def make_eegpt_linear_probe(classifier_param: Mapping[str, Any] | str | Path | None = None, **overrides: Any):
    """Create a frozen EEGPT-family linear-probe pipeline."""

    return make_foundation_linear_probe(_with_model_family("eegpt", classifier_param, overrides))

`make_foundation_linear_probe(classifier_param, *, max_iter=1000, random_state=13)`

Create a frozen-foundation-encoder probe pipeline.

Source code in src/neureptrace/decoding/foundation.py

def make_foundation_linear_probe(
    classifier_param: Mapping[str, Any] | str | Path | None,
    *,
    max_iter: int = 1000,
    random_state: int | None = 13,
):
    """Create a frozen-foundation-encoder probe pipeline."""

    params = normalize_foundation_linear_probe_params(classifier_param)
    probe_max_iter = int(params.get("max_iter") or max_iter)
    steps: list[tuple[str, Any]] = []
    if params["pre_encoder_standardize"]:
        steps.append(("pre_encoder_standardscaler", StandardScaler()))
    steps.extend(
        [
            (
                "frozen_torch_encoder",
                FrozenTorchEncoderTransformer(
                    model_family=params.get("model_family"),
                    encoder=params.get("encoder"),
                    model_path=params.get("model_path"),
                    model_factory=params.get("model_factory"),
                    model_kwargs=params.get("model_kwargs"),
                    input_shape=params.get("input_shape"),
                    input_layout=params.get("input_layout"),
                    preprocessing=params.get("preprocessing"),
                    batch_size=params.get("batch_size"),
                    device=params.get("device", "cpu"),
                    output_key=params.get("output_key"),
                    output_attribute=params.get("output_attribute"),
                    output_index=params.get("output_index"),
                    pooling=params.get("pooling", "flatten"),
                    load_mode=params.get("load_mode", "torchscript"),
                    dtype=params.get("dtype", "float32"),
                    checkpoint_key=params.get("checkpoint_key"),
                    state_dict_key=params.get("state_dict_key"),
                    strict_load=params.get("strict_load", True),
                    strip_state_dict_prefix=params.get("strip_state_dict_prefix"),
                    encoder_attr=params.get("encoder_attr"),
                    forward_kwargs=params.get("forward_kwargs"),
                ),
            ),
            ("foundation_standardscaler", StandardScaler()),
            ("foundation_probe", _make_probe(params, max_iter=probe_max_iter, random_state=random_state)),
        ]
    )
    return make_pipeline(*(step for _, step in steps))

`make_labram_linear_probe(classifier_param=None, **overrides)`

Create a frozen LaBraM-family linear-probe pipeline.

Source code in src/neureptrace/decoding/foundation.py

def make_labram_linear_probe(classifier_param: Mapping[str, Any] | str | Path | None = None, **overrides: Any):
    """Create a frozen LaBraM-family linear-probe pipeline."""

    return make_foundation_linear_probe(_with_model_family("labram", classifier_param, overrides))

`normalize_foundation_linear_probe_params(classifier_param)`

Normalize a frozen-foundation-encoder linear-probe configuration.

Source code in src/neureptrace/decoding/foundation.py

def normalize_foundation_linear_probe_params(classifier_param: Any) -> dict[str, Any]:
    """Normalize a frozen-foundation-encoder linear-probe configuration."""

    if classifier_param is None:
        params: dict[str, Any] = {}
    elif isinstance(classifier_param, Mapping):
        params = dict(classifier_param)
    elif isinstance(classifier_param, (str, Path)):
        params = {"model_path": str(classifier_param)}
    else:
        params = {"C": classifier_param}

    aliases = {
        "path": "model_path",
        "checkpoint": "model_path",
        "checkpoint_path": "model_path",
        "weights_path": "model_path",
        "encoder_path": "model_path",
        "shape": "input_shape",
        "layout": "input_layout",
        "pool": "pooling",
        "standardize_before_encoder": "pre_encoder_standardize",
        "pre_standardize": "pre_encoder_standardize",
        "preprocess": "preprocessing",
        "head": "probe",
        "family": "model_family",
        "foundation_family": "model_family",
        "foundation_model": "model_family",
        "model_name": "model_family",
        "factory": "model_factory",
        "model_class": "model_factory",
        "factory_kwargs": "model_kwargs",
        "kwargs": "model_kwargs",
        "module_attr": "encoder_attr",
        "feature_extractor_attr": "encoder_attr",
        "output_attr": "output_attribute",
        "state_key": "state_dict_key",
        "state_dict": "state_dict_key",
        "trusted_load": "load_mode",
    }
    for alias, canonical in aliases.items():
        if alias in params and canonical not in params:
            params[canonical] = params.pop(alias)

    model_family = normalize_foundation_model_family(params.get("model_family"))
    family_defaults = foundation_model_defaults(model_family)
    normalized = {**DEFAULT_FOUNDATION_LINEAR_PROBE_PARAMS, **family_defaults, **params}
    normalized["model_family"] = model_family
    normalized["pooling"] = normalize_pooling(normalized.get("pooling"))
    normalized["input_layout"] = normalize_input_layout(normalized.get("input_layout"))
    normalized["preprocessing"] = normalize_preprocessing(normalized.get("preprocessing"))
    normalized["load_mode"] = normalize_load_mode(normalized.get("load_mode"))
    normalized["input_shape"] = parse_input_shape(normalized.get("input_shape"))
    normalized["batch_size"] = _positive_int(normalized.get("batch_size"), name="batch_size")
    normalized["max_iter"] = _positive_int(normalized.get("max_iter"), name="max_iter")
    normalized["C"] = _positive_float(normalized.get("C"), name="C")
    normalized["pre_encoder_standardize"] = _bool_param(normalized.get("pre_encoder_standardize"), name="pre_encoder_standardize")
    normalized["strict_load"] = _bool_param(normalized.get("strict_load"), name="strict_load")
    if normalized.get("model_kwargs") is None:
        normalized["model_kwargs"] = None
    elif not isinstance(normalized.get("model_kwargs"), Mapping):
        raise ValueError("model_kwargs must be a mapping when provided.")
    else:
        normalized["model_kwargs"] = dict(normalized["model_kwargs"])
    if normalized.get("forward_kwargs") is None:
        normalized["forward_kwargs"] = None
    elif not isinstance(normalized.get("forward_kwargs"), Mapping):
        raise ValueError("forward_kwargs must be a mapping when provided.")
    else:
        normalized["forward_kwargs"] = dict(normalized["forward_kwargs"])
    probe = str(normalized.get("probe", "logistic")).strip().lower().replace("-", "_")
    if probe in {"svm", "linear_svc", "linear_support_vector_machine"}:
        probe = "linear_svm"
    if probe not in PROBE_CHOICES:
        raise ValueError(f"Unknown foundation probe '{normalized.get('probe')}'. Available probes: {', '.join(PROBE_CHOICES)}.")
    normalized["probe"] = probe
    return normalized

`normalize_foundation_model_family(model_family)`

Normalize aliases for foundation-model families.

Source code in src/neureptrace/decoding/foundation.py

def normalize_foundation_model_family(model_family: str | None) -> str:
    """Normalize aliases for foundation-model families."""

    if model_family is None:
        return "generic"
    normalized = str(model_family).strip().lower().replace("_", "-")
    if normalized in {"", "none", "default"}:
        return "generic"
    try:
        return _FOUNDATION_MODEL_ALIASES[normalized]
    except KeyError as exc:
        families = ", ".join(list_foundation_model_families())
        raise ValueError(f"Unknown foundation model family '{model_family}'. Available families: {families}.") from exc

`normalize_input_layout(input_layout)`

Normalize input-layout names for foundation encoders.

Source code in src/neureptrace/decoding/foundation.py

def normalize_input_layout(input_layout: str | None) -> str:
    """Normalize input-layout names for foundation encoders."""

    normalized = "auto" if input_layout is None else str(input_layout).strip().lower().replace("-", "_")
    aliases = {
        "flat": "auto",
        "identity": "auto",
        "batch_channel_time": "channels_first",
        "batch_channels_time": "channels_first",
        "bct": "channels_first",
        "channels_first": "channels_first",
        "channel_first": "channels_first",
        "batch_time_channel": "time_first",
        "batch_time_channels": "time_first",
        "btc": "time_first",
        "time_first": "time_first",
        "time_major": "time_first",
        "4d": "add_channel_dim",
        "add_channel": "add_channel_dim",
        "add_channel_axis": "add_channel_dim",
        "unsqueeze_channel": "add_channel_dim",
        "b1ct": "add_channel_dim",
        "add_feature": "add_feature_dim",
        "add_feature_axis": "add_feature_dim",
        "unsqueeze_feature": "add_feature_dim",
        "bc1t": "add_feature_dim",
        "bendr": "channels_first",
        "labram": "channels_first",
        "eegpt": "channels_first",
        "cbramod": "channels_first",
    }
    normalized = aliases.get(normalized, normalized)
    if normalized not in INPUT_LAYOUT_CHOICES:
        raise ValueError(f"Unknown input_layout '{input_layout}'. Available layouts: {', '.join(INPUT_LAYOUT_CHOICES)}.")
    return normalized

`normalize_load_mode(load_mode)`

Normalize foundation-model loading mode names.

Source code in src/neureptrace/decoding/foundation.py

def normalize_load_mode(load_mode: str | None) -> str:
    """Normalize foundation-model loading mode names."""

    normalized = "torchscript" if load_mode is None else str(load_mode).strip().lower().replace("-", "_")
    aliases = {
        "jit": "torchscript",
        "torch_jit": "torchscript",
        "torchscript_module": "torchscript",
        "torch_module": "module",
        "full_module": "module",
        "pickle": "module",
        "stateful_module": "module",
        "weights": "state_dict",
        "checkpoint": "state_dict",
        "ckpt": "state_dict",
        "factory_only": "factory",
    }
    normalized = aliases.get(normalized, normalized)
    if normalized not in {"torchscript", "module", "state_dict", "factory"}:
        raise ValueError("load_mode must be one of 'torchscript', 'module', 'state_dict', or 'factory'.")
    return normalized

`normalize_pooling(pooling)`

Normalize foundation-encoder pooling names.

Source code in src/neureptrace/decoding/foundation.py

def normalize_pooling(pooling: str | None) -> str:
    """Normalize foundation-encoder pooling names."""

    normalized = "flatten" if pooling is None else str(pooling).strip().lower().replace("-", "_")
    aliases = {
        "flat": "flatten",
        "ravel": "flatten",
        "none": "identity",
        "token_mean": "mean_tokens",
        "tokens_mean": "mean_tokens",
        "sequence_mean": "mean_tokens",
        "mean_sequence": "mean_tokens",
        "time_mean": "mean_time",
        "mean_tokens": "mean_tokens",
        "class_token": "cls",
        "cls_token": "cls",
        "final": "last",
        "last_token": "last",
        "channel_mean": "mean_channels",
        "channels_mean": "mean_channels",
    }
    normalized = aliases.get(normalized, normalized)
    if normalized not in POOLING_CHOICES:
        raise ValueError(f"Unknown pooling mode '{pooling}'. Available modes: {', '.join(POOLING_CHOICES)}.")
    return normalized

`normalize_preprocessing(preprocessing)`

Normalize stateless foundation-input preprocessing modes.

Source code in src/neureptrace/decoding/foundation.py

def normalize_preprocessing(preprocessing: str | None) -> str:
    """Normalize stateless foundation-input preprocessing modes."""

    normalized = "none" if preprocessing is None else str(preprocessing).strip().lower().replace("-", "_")
    aliases = {
        "off": "none",
        "false": "none",
        "no": "none",
        "zscore": "trial_zscore",
        "z_score": "trial_zscore",
        "per_trial_zscore": "trial_zscore",
        "trial_z_score": "trial_zscore",
        "channel_z_score": "channel_zscore",
        "per_channel_zscore": "channel_zscore",
        "per_channel_z_score": "channel_zscore",
    }
    normalized = aliases.get(normalized, normalized)
    if normalized not in PREPROCESSING_CHOICES:
        raise ValueError(f"Unknown preprocessing mode '{preprocessing}'. Available modes: {', '.join(PREPROCESSING_CHOICES)}.")
    return normalized

`parse_input_shape(input_shape)`

Parse an encoder input-shape specification.

Source code in src/neureptrace/decoding/foundation.py

def parse_input_shape(input_shape: Sequence[int] | str | None) -> tuple[int, ...] | None:
    """Parse an encoder input-shape specification."""

    if input_shape is None:
        return None
    if isinstance(input_shape, str):
        stripped = input_shape.strip()
        if not stripped or stripped.lower() in {"none", "flat", "flatten", "auto"}:
            return None
        for separator in ("x", "X", ";", " "):
            stripped = stripped.replace(separator, ",")
        values = [value for value in stripped.split(",") if value]
    else:
        values = list(input_shape)
    shape = tuple(_positive_int(value, name="input_shape") for value in values)
    if not shape:
        return None
    return shape

`register_foundation_linear_probe()`

Register foundation-model linear probes as optional decoder names.

Source code in src/neureptrace/decoding/foundation.py

def register_foundation_linear_probe() -> None:
    """Register foundation-model linear probes as optional decoder names."""

    from neureptrace.decoding.classifiers import CLASSIFIER_REGISTRY, DEFAULT_CLASSIFIER_PARAMS, ClassifierSpec

    DEFAULT_CLASSIFIER_PARAMS.setdefault("foundation-linear-probe", normalize_foundation_linear_probe_params(None))
    CLASSIFIER_REGISTRY["foundation-linear-probe"] = ClassifierSpec(_build_foundation_linear_probe_classifier, fits_in_builder=True)
    for family, decoder_name in FAMILY_LINEAR_PROBE_DECODER_NAMES.items():
        DEFAULT_CLASSIFIER_PARAMS.setdefault(decoder_name, normalize_foundation_linear_probe_params({"model_family": family}))
        CLASSIFIER_REGISTRY[decoder_name] = ClassifierSpec(_family_builder(family), fits_in_builder=True)
    _refresh_decoding_choices()

`neureptrace.decoding.alignment_window`

Feature-window adaptation helpers for cross-window alignment projections.

These utilities support decoding workflows that fit an alignment projection on one feature window, then apply that projection to features extracted from a possibly different decoding window. When the feature widths differ, the projection and centering vector can be collapsed to channel space and reused across the decoding window samples.

`AlignmentWindow` `dataclass`

Resolved alignment-window parameters.

Source code in src/neureptrace/decoding/alignment_window.py

@dataclass(frozen=True)
class AlignmentWindow:
    """Resolved alignment-window parameters."""

    center: float
    size: float

    @property
    def start(self) -> float:
        """Window start time, using center-size convention."""

        return self.center - self.size / 2.0

    @property
    def stop(self) -> float:
        """Window stop time, using center-size convention."""

        return self.center + self.size / 2.0

`start` `property`

Window start time, using center-size convention.

`stop` `property`

Window stop time, using center-size convention.

`WindowedFeatureSet`

Bases: Protocol

Minimal feature-set interface needed for alignment-window adaptation.

Flattened MNE epoch arrays use the default channel_time layout because data[:, :, start:stop].reshape(n_trials, -1) stores all time samples of channel 0 first, then all time samples of channel 1, and so on. Legacy or synthetic feature sets that are flattened as [t0c0, t0c1, t1c0, ...] can set feature_order = "time_channel".

Source code in src/neureptrace/decoding/alignment_window.py

class WindowedFeatureSet(Protocol):
    """Minimal feature-set interface needed for alignment-window adaptation.

    Flattened MNE epoch arrays use the default ``channel_time`` layout because
    ``data[:, :, start:stop].reshape(n_trials, -1)`` stores all time samples of
    channel 0 first, then all time samples of channel 1, and so on. Legacy or
    synthetic feature sets that are flattened as ``[t0c0, t0c1, t1c0, ...]`` can
    set ``feature_order = "time_channel"``.
    """

    features: np.ndarray
    labels: np.ndarray
    n_channels: int
    n_window_samples: int

`resolved_alignment_window(config)`

Return explicit alignment-window values, defaulting to the decoding window.

The config object is expected to expose window_center and window_size attributes. Optional alignment_window_center and alignment_window_size attributes override the decoding window when they are not None.

Source code in src/neureptrace/decoding/alignment_window.py

def resolved_alignment_window(config) -> AlignmentWindow:
    """Return explicit alignment-window values, defaulting to the decoding window.

    The ``config`` object is expected to expose ``window_center`` and
    ``window_size`` attributes. Optional ``alignment_window_center`` and
    ``alignment_window_size`` attributes override the decoding window when they
    are not ``None``.
    """

    center = config.window_center if getattr(config, "alignment_window_center", None) is None else config.alignment_window_center
    size = config.window_size if getattr(config, "alignment_window_size", None) is None else config.alignment_window_size
    return AlignmentWindow(center=float(center), size=float(size))

`transform_with_alignment_projection(features, *, decode_feature_set, projection, projection_feature_mean, projection_feature_set, feature_mean=None, projection_template_mean=None, feature_mean_set=None)`

Apply an alignment projection to features from a possibly different window.

When feature widths and window metadata match, this is the standard centered linear projection. When widths differ, or when timing metadata show that the projection was fitted on a different same-width window, the projection and centering vector are collapsed to channel space by averaging across the alignment-window samples, then applied independently to each decoding-window sample.

projection_template_mean shifts projected rows into a learned template coordinate system after direct or cross-window projection. When the decoding output contains multiple time samples, the template mean is expanded using the decoding feature order.

Source code in src/neureptrace/decoding/alignment_window.py

def transform_with_alignment_projection(
    features: np.ndarray,
    *,
    decode_feature_set: WindowedFeatureSet,
    projection: np.ndarray,
    projection_feature_mean: np.ndarray,
    projection_feature_set: WindowedFeatureSet,
    feature_mean: np.ndarray | None = None,
    projection_template_mean: Sequence[float] | np.ndarray | None = None,
    feature_mean_set: WindowedFeatureSet | None = None,
) -> np.ndarray:
    """Apply an alignment projection to features from a possibly different window.

    When feature widths and window metadata match, this is the standard centered
    linear projection. When widths differ, or when timing metadata show that the
    projection was fitted on a different same-width window, the projection and
    centering vector are collapsed to channel space by averaging across the
    alignment-window samples, then applied independently to each decoding-window
    sample.

    ``projection_template_mean`` shifts projected rows into a learned template
    coordinate system after direct or cross-window projection. When the decoding
    output contains multiple time samples, the template mean is expanded using the
    decoding feature order.
    """

    matrix = _feature_matrix(features, name="features")
    projection = _feature_matrix(projection, name="projection")
    projection_mean = _feature_vector(projection_feature_mean, name="projection_feature_mean")
    feature_mean_was_explicit = feature_mean is not None
    mean = projection_mean if not feature_mean_was_explicit else _feature_vector(feature_mean, name="feature_mean")
    mean_set = _resolve_feature_mean_set(
        mean,
        decode_feature_set=decode_feature_set,
        projection_feature_set=projection_feature_set,
        explicit_feature_mean_set=(
            feature_mean_set
            if feature_mean_was_explicit
            else projection_feature_set
        ),
        feature_mean_was_explicit=feature_mean_was_explicit,
    )

    if matrix.shape[1] == projection.shape[0] and _feature_windows_match(
        decode_feature_set,
        projection_feature_set,
    ):
        if mean.shape[0] != matrix.shape[1]:
            raise ValueError(f"feature_mean length must match features columns: {mean.shape[0]} != {matrix.shape[1]}.")
        transformed = (matrix - mean) @ projection
        return _add_template_mean(
            transformed,
            projection_template_mean,
            output_feature_set=decode_feature_set,
        )

    channel_projection = _projection_to_channel_space(projection, projection_feature_set)
    channel_mean = _feature_mean_to_channel_space(mean, mean_set)
    transformed = _apply_channel_projection(matrix, decode_feature_set, channel_projection, channel_mean)
    return _add_template_mean(
        transformed,
        projection_template_mean,
        output_feature_set=decode_feature_set,
    )

`uses_separate_alignment_window(config)`

Return whether alignment and decoding windows differ.

Source code in src/neureptrace/decoding/alignment_window.py

def uses_separate_alignment_window(config) -> bool:
    """Return whether alignment and decoding windows differ."""

    alignment_window = resolved_alignment_window(config)
    return not (np.isclose(alignment_window.center, float(config.window_center)) and np.isclose(alignment_window.size, float(config.window_size)))

`validate_paired_feature_sets(decode_set, alignment_set, *, participant=None)`

Validate that two feature sets refer to the same trial rows.

The decoding and alignment feature matrices may have different column counts because they can represent different windows. They must, however, have the same row count, labels, and number of channels.

Source code in src/neureptrace/decoding/alignment_window.py

def validate_paired_feature_sets(decode_set: WindowedFeatureSet, alignment_set: WindowedFeatureSet, *, participant: int | None = None) -> None:
    """Validate that two feature sets refer to the same trial rows.

    The decoding and alignment feature matrices may have different column counts
    because they can represent different windows. They must, however, have the
    same row count, labels, and number of channels.
    """

    decode_rows = int(decode_set.features.shape[0])
    alignment_rows = int(alignment_set.features.shape[0])
    if decode_rows != alignment_rows:
        context = "" if participant is None else f" for participant {participant}"
        raise ValueError(f"Decoding and alignment feature rows differ{context}.")
    decode_labels = _label_vector(
        decode_set.labels,
        expected_length=decode_rows,
        name="Decoding labels",
        participant=participant,
    )
    alignment_labels = _label_vector(
        alignment_set.labels,
        expected_length=alignment_rows,
        name="Alignment labels",
        participant=participant,
    )
    if not np.array_equal(decode_labels, alignment_labels):
        context = "" if participant is None else f" for participant {participant}"
        raise ValueError(f"Decoding and alignment labels differ{context}.")
    if int(decode_set.n_channels) != int(alignment_set.n_channels):
        context = "" if participant is None else f" for participant {participant}"
        raise ValueError(f"Decoding and alignment channel counts differ{context}.")