Skip to content

Inference

neureptrace.inference

sign_flip_time_inference(csv_paths, *, metric='accuracy', chance=0.5, n_permutations=10000, random_state=13, cluster_alpha=0.05, decoder=None, emission_mode=None, observation_csv_paths=None, observation_subject_column=None, ece_bins=DEFAULT_ECE_BINS, metric_direction='auto')

Run one-sided subject-level sign-flip inference over time.

The test uses fold-size-weighted subject time courses as independent samples. Pointwise p-values test whether the metric is better than the reference value chance. Higher-is-better metrics use metric - chance; lower-is-better metrics use chance - metric. Cluster p-values use a max-cluster-mass correction over contiguous positive-effect time points.

Source code in src/neureptrace/inference.py
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
def sign_flip_time_inference(
    csv_paths: list[Path],
    *,
    metric: str = "accuracy",
    chance: float = 0.5,
    n_permutations: int = 10_000,
    random_state: int = 13,
    cluster_alpha: float = 0.05,
    decoder: str | None = None,
    emission_mode: str | None = None,
    observation_csv_paths: list[Path] | None = None,
    observation_subject_column: str | None = None,
    ece_bins: int = DEFAULT_ECE_BINS,
    metric_direction: str = "auto",
) -> tuple[pd.DataFrame, pd.DataFrame]:
    """Run one-sided subject-level sign-flip inference over time.

    The test uses fold-size-weighted subject time courses as independent
    samples. Pointwise p-values test whether the metric is better than the
    reference value ``chance``. Higher-is-better metrics use ``metric - chance``;
    lower-is-better metrics use ``chance - metric``. Cluster p-values use a
    max-cluster-mass correction over contiguous positive-effect time points.
    """
    if not 0 < cluster_alpha < 1:
        raise ValueError("cluster_alpha must be between 0 and 1.")

    effects, condition_values, resolved_direction = _subject_time_effects_and_conditions(
        csv_paths,
        metric=metric,
        chance=chance,
        decoder=decoder,
        emission_mode=emission_mode,
        observation_csv_paths=observation_csv_paths,
        observation_subject_column=observation_subject_column,
        ece_bins=ece_bins,
        metric_direction=metric_direction,
    )
    effect_values = effects.to_numpy(dtype=float)
    times = effects.columns.to_numpy(dtype=float)
    n_subjects = effect_values.shape[0]

    observed_statistic = _t_statistic(effect_values)
    observed_effect = effect_values.mean(axis=0)
    null_statistics = _sign_flip_t_statistics(effect_values, n_permutations=n_permutations, random_state=random_state)

    pointwise_p = (1.0 + (null_statistics >= observed_statistic[None, :]).sum(axis=0)) / (n_permutations + 1.0)
    cluster_threshold = np.quantile(null_statistics, 1.0 - cluster_alpha, axis=0)

    observed_clusters = _contiguous_clusters(observed_statistic >= cluster_threshold)
    max_null_masses = np.zeros(n_permutations)
    for idx, null_statistic in enumerate(null_statistics):
        masses = _cluster_masses(null_statistic, cluster_threshold)
        max_null_masses[idx] = max(masses) if masses else 0.0

    cluster_ids = np.full(len(times), -1, dtype=int)
    cluster_p_values = np.full(len(times), np.nan, dtype=float)
    cluster_rows = []
    for cluster_id, (start, stop) in enumerate(observed_clusters, start=1):
        cluster_statistic = observed_statistic[start:stop]
        cluster_mass = float(cluster_statistic.sum())
        cluster_p = float((1.0 + np.sum(max_null_masses >= cluster_mass)) / (n_permutations + 1.0))
        peak_offset = int(np.argmax(cluster_statistic))
        cluster_ids[start:stop] = cluster_id
        cluster_p_values[start:stop] = cluster_p
        cluster_rows.append(
            {
                "cluster_id": cluster_id,
                "start_time": float(times[start]),
                "stop_time": float(times[stop - 1]),
                "peak_time": float(times[start + peak_offset]),
                "n_timepoints": stop - start,
                "cluster_mass": cluster_mass,
                "peak_statistic": float(cluster_statistic[peak_offset]),
                "cluster_p": cluster_p,
            }
        )

    time_table = pd.DataFrame(
        {
            "time": times,
            "n_subjects": n_subjects,
            "metric_direction": resolved_direction,
            "reference_value": chance,
            f"{metric}_mean": observed_effect + chance if resolved_direction == "higher" else chance - observed_effect,
            "effect_mean": observed_effect,
            "statistic": observed_statistic,
            "pointwise_p": pointwise_p,
            "cluster_threshold": cluster_threshold,
            "cluster_id": cluster_ids,
            "cluster_p": cluster_p_values,
        }
    )
    cluster_table = pd.DataFrame(
        cluster_rows,
        columns=[
            "cluster_id",
            "start_time",
            "stop_time",
            "peak_time",
            "n_timepoints",
            "cluster_mass",
            "peak_statistic",
            "cluster_p",
        ],
    )
    cluster_table["metric_direction"] = resolved_direction
    cluster_table["reference_value"] = chance
    return _prepend_condition_columns(time_table, condition_values), _prepend_condition_columns(cluster_table, condition_values)

subject_time_effects(csv_paths, *, metric='accuracy', chance=0.5, decoder=None, emission_mode=None, observation_csv_paths=None, observation_subject_column=None, ece_bins=DEFAULT_ECE_BINS, metric_direction='auto')

Return a subject-by-time matrix of signed effects against a reference value.

The sign is chosen so positive values mean better than the reference: higher-is-better metrics use metric minus reference, while lower-is-better metrics use reference minus metric. ECE inference uses pooled held-out probability observations. Pass observation_csv_paths when metric='ece'; fold-averaged ECE is not used for inferential tests.

Source code in src/neureptrace/inference.py
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
def subject_time_effects(
    csv_paths: list[Path],
    *,
    metric: str = "accuracy",
    chance: float = 0.5,
    decoder: str | None = None,
    emission_mode: str | None = None,
    observation_csv_paths: list[Path] | None = None,
    observation_subject_column: str | None = None,
    ece_bins: int = DEFAULT_ECE_BINS,
    metric_direction: str = "auto",
) -> pd.DataFrame:
    """Return a subject-by-time matrix of signed effects against a reference value.

    The sign is chosen so positive values mean better than the reference:
    higher-is-better metrics use metric minus reference, while lower-is-better
    metrics use reference minus metric. ECE inference uses pooled held-out
    probability observations. Pass observation_csv_paths when metric='ece';
    fold-averaged ECE is not used for inferential tests.
    """
    effects, _, _ = _subject_time_effects_and_conditions(
        csv_paths,
        metric=metric,
        chance=chance,
        decoder=decoder,
        emission_mode=emission_mode,
        observation_csv_paths=observation_csv_paths,
        observation_subject_column=observation_subject_column,
        ece_bins=ece_bins,
        metric_direction=metric_direction,
    )
    return effects