Skip to content

Continuous Stimulus Scan

neureptrace.continuous_stimulus_scan turns the long-stream event-detection idea into a single reproducible workflow:

  1. train an event-locked decoder on labeled events from one raw run;
  2. scan a held-out raw run with the same window and preprocessing;
  3. export P(class | time) as NeuRepTrace stream observations;
  4. run neureptrace.stimulus_detection; and
  5. write event-level precision, recall, F1, latency, and false-alarm summaries.

Use this when the question is:

I have an event-locked decoder for a stimulus class. Does a held-out continuous recording contain intervals that look like that class?

CLI Example

python -m neureptrace.continuous_stimulus_scan \
  --train-raw data/ds000117/sub-01/ses-meg/meg/sub-01_ses-meg_task-facerecognition_run-01_meg.fif \
  --train-events data/ds000117/sub-01/ses-meg/meg/sub-01_ses-meg_task-facerecognition_run-01_events.tsv \
  --scan-raw data/ds000117/sub-01/ses-meg/meg/sub-01_ses-meg_task-facerecognition_run-02_meg.fif \
  --scan-events data/ds000117/sub-01/ses-meg/meg/sub-01_ses-meg_task-facerecognition_run-02_events.tsv \
  --source-column stim_type \
  --positive-pattern "Famous|Unfamiliar" \
  --negative-pattern "Scrambled" \
  --positive-label face \
  --negative-label scrambled \
  --target-class face \
  --train-window 0.15 0.25 \
  --picks meg \
  --demean-window \
  --slice-duration 6.0 \
  --slice-count 10 \
  --require-target-event \
  --exclude-events-from-threshold-window \
  --threshold-window 0.0 0.8 \
  --detection-window 0.8 6.0 \
  --threshold-method max_run \
  --threshold-quantile 0.975 \
  --min-consecutive 2 \
  --min-duration 0.05 \
  --merge-gap 0.05 \
  --refractory 0.30 \
  --match-tolerance 0.35 \
  --out-dir results/ds000117_continuous_scan

The installed command neureptrace-continuous-stimulus-scan exposes the same arguments.

Outputs

The output directory contains:

File Meaning
stream_observations.csv Long-stream probability observations with prob_class_* columns.
stimulus_annotations.csv Held-out event annotations converted to stream-relative times.
stimulus_thresholds.csv Class-specific detector thresholds.
stimulus_events.csv One row per detected event.
stimulus_summary.csv Precision, recall, F1, false alarms, and latency summaries.
heldout_event_metrics.csv Event-locked held-out accuracy/log-loss before continuous scanning.
training_class_counts.csv Training event counts per class.

API Reference

neureptrace.continuous_stimulus_scan

ContinuousStimulusScanResult dataclass

Tables emitted by the continuous stimulus-scan workflow.

Source code in src/neureptrace/continuous_stimulus_scan.py
38
39
40
41
42
43
44
45
46
47
@dataclass(frozen=True)
class ContinuousStimulusScanResult:
    """Tables emitted by the continuous stimulus-scan workflow."""

    observations: pd.DataFrame
    annotations: pd.DataFrame
    thresholds: pd.DataFrame
    events: pd.DataFrame
    summary: pd.DataFrame
    event_metrics: pd.DataFrame

ScanSegment dataclass

One continuous interval to scan as an independent probability stream.

Source code in src/neureptrace/continuous_stimulus_scan.py
28
29
30
31
32
33
34
35
@dataclass(frozen=True)
class ScanSegment:
    """One continuous interval to scan as an independent probability stream."""

    stream_id: str
    start: float
    stop: float
    output_origin: float

build_scan_segments(*, scan_raw, scan_start, scan_stop, slice_duration=None, slice_starts=None, slice_count=None, slice_seed=13, scan_events=None, onset_column='onset', label_column='stimulus_class', target_classes=None, threshold_window=None, detection_window=None, require_target_event=False, exclude_events_from_threshold_window=False, stream_id=None)

Build full-run, explicit-slice, or random-slice scan segments.

Source code in src/neureptrace/continuous_stimulus_scan.py
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
def build_scan_segments(
    *,
    scan_raw: Path,
    scan_start: float | None,
    scan_stop: float | None,
    slice_duration: float | None = None,
    slice_starts: Sequence[float] | None = None,
    slice_count: int | None = None,
    slice_seed: int = 13,
    scan_events: pd.DataFrame | None = None,
    onset_column: str = "onset",
    label_column: str = "stimulus_class",
    target_classes: Sequence[str] | None = None,
    threshold_window: tuple[float, float] | None = None,
    detection_window: tuple[float, float] | None = None,
    require_target_event: bool = False,
    exclude_events_from_threshold_window: bool = False,
    stream_id: str | None = None,
) -> list[ScanSegment]:
    """Build full-run, explicit-slice, or random-slice scan segments."""

    raw = mne.io.read_raw_fif(scan_raw, preload=False, verbose="error")
    raw_start = 0.0 if scan_start is None else scan_start
    raw_stop = float(raw.times[-1]) if scan_stop is None else scan_stop
    if raw_stop <= raw_start:
        raise ValueError("scan_stop must be greater than scan_start.")
    base_stream_id = stream_id or _safe_stream_id(scan_raw)
    if slice_duration is None:
        return [ScanSegment(base_stream_id, raw_start, raw_stop, 0.0)]
    if slice_duration <= 0:
        raise ValueError("slice_duration must be positive.")

    starts: list[float]
    if slice_starts:
        starts = [float(start) for start in slice_starts]
    elif slice_count:
        rng = np.random.default_rng(slice_seed)
        starts = []
        target_set = set(map(str, target_classes or [])) or None
        tries = 0
        while len(starts) < slice_count and tries < max(1000, slice_count * 500):
            tries += 1
            start = float(rng.uniform(raw_start, raw_stop - slice_duration))
            if scan_events is not None and exclude_events_from_threshold_window and threshold_window is not None:
                if _event_mask_in_window(
                    scan_events,
                    onset_column=onset_column,
                    start=start + threshold_window[0],
                    stop=start + threshold_window[1],
                    label_column=label_column,
                ).any():
                    continue
            if scan_events is not None and require_target_event and detection_window is not None:
                if not _event_mask_in_window(
                    scan_events,
                    onset_column=onset_column,
                    start=start + detection_window[0],
                    stop=start + detection_window[1],
                    labels=target_set,
                    label_column=label_column,
                ).any():
                    continue
            starts.append(start)
        if len(starts) < slice_count:
            raise ValueError(f"Only selected {len(starts)} random slice(s); requested {slice_count}.")
    else:
        starts = list(np.arange(raw_start, raw_stop - slice_duration + 1e-12, slice_duration))

    segments = []
    for index, start in enumerate(starts):
        stop = start + slice_duration
        if start < raw_start or stop > raw_stop:
            raise ValueError(f"Slice [{start}, {stop}] is outside scan interval [{raw_start}, {raw_stop}].")
        segments.append(ScanSegment(f"{base_stream_id}_slice{index:03d}", start, stop, start))
    return segments

label_event_table(events, *, onset_column='onset', label_column='stimulus_class', source_column=None, positive_pattern=None, negative_pattern=None, positive_label='positive', negative_label='negative', case_sensitive=False)

Return events with numeric onsets and string class labels.

If source_column and positive_pattern are supplied, labels are built from regex matches. Otherwise label_column is used directly.

Source code in src/neureptrace/continuous_stimulus_scan.py
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
def label_event_table(
    events: pd.DataFrame,
    *,
    onset_column: str = "onset",
    label_column: str = "stimulus_class",
    source_column: str | None = None,
    positive_pattern: str | None = None,
    negative_pattern: str | None = None,
    positive_label: str = "positive",
    negative_label: str = "negative",
    case_sensitive: bool = False,
) -> pd.DataFrame:
    """Return events with numeric onsets and string class labels.

    If ``source_column`` and ``positive_pattern`` are supplied, labels are built
    from regex matches. Otherwise ``label_column`` is used directly.
    """

    if onset_column not in events.columns:
        raise ValueError(f"Event table is missing onset column '{onset_column}'.")
    labeled = events.copy()
    if source_column is not None or positive_pattern is not None:
        if source_column is None or positive_pattern is None:
            raise ValueError("source_column and positive_pattern must be provided together.")
        if source_column not in labeled.columns:
            raise ValueError(f"Event table is missing source column '{source_column}'.")
        positive = _pattern_mask(labeled[source_column], positive_pattern, case_sensitive=case_sensitive)
        negative = (
            _pattern_mask(labeled[source_column], negative_pattern, case_sensitive=case_sensitive)
            if negative_pattern is not None
            else labeled[source_column].notna() & ~positive
        )
        labeled[label_column] = pd.NA
        labeled.loc[positive, label_column] = positive_label
        labeled.loc[negative, label_column] = negative_label
    elif label_column not in labeled.columns:
        raise ValueError(f"Event table is missing label column '{label_column}'.")

    labeled[onset_column] = pd.to_numeric(labeled[onset_column], errors="raise")
    labeled = labeled.loc[labeled[label_column].notna()].copy()
    labeled[label_column] = labeled[label_column].astype(str)
    return labeled.sort_values(onset_column).reset_index(drop=True)

run_continuous_stimulus_scan(*, train_raw, train_events, scan_raw, scan_events=None, out_dir, onset_column='onset', label_column='stimulus_class', train_window=(0.1, 0.2), picks='data', baseline=None, decoder='logistic', emission_mode='calibrated', max_iter=1000, demean_window=False, scan_step=0.025, scan_start=None, scan_stop=None, slice_duration=None, slice_starts=None, slice_count=None, slice_seed=13, stream_id=None, subject=None, target_classes=None, threshold_window=(0.0, 0.8), threshold_quantile=0.95, threshold_method='max_run', score_mode='class_probability', detection_window=None, min_consecutive=1, min_duration=None, merge_gap=None, refractory=None, conflict_resolution='none', match_tolerance=0.1, annotation_latency=None, require_target_event=False, exclude_events_from_threshold_window=False)

Train an event-locked decoder, scan raw data, and detect stimulus events.

Source code in src/neureptrace/continuous_stimulus_scan.py
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
def run_continuous_stimulus_scan(
    *,
    train_raw: Path,
    train_events: pd.DataFrame,
    scan_raw: Path,
    scan_events: pd.DataFrame | None = None,
    out_dir: Path,
    onset_column: str = "onset",
    label_column: str = "stimulus_class",
    train_window: tuple[float, float] = (0.1, 0.2),
    picks: str = "data",
    baseline: tuple[float | None, float | None] | None = None,
    decoder: str = "logistic",
    emission_mode: str = "calibrated",
    max_iter: int = 1000,
    demean_window: bool = False,
    scan_step: float = 0.025,
    scan_start: float | None = None,
    scan_stop: float | None = None,
    slice_duration: float | None = None,
    slice_starts: Sequence[float] | None = None,
    slice_count: int | None = None,
    slice_seed: int = 13,
    stream_id: str | None = None,
    subject: str | None = None,
    target_classes: Sequence[str] | None = None,
    threshold_window: tuple[float, float] = (0.0, 0.8),
    threshold_quantile: float = 0.95,
    threshold_method: str = "max_run",
    score_mode: str = "class_probability",
    detection_window: tuple[float, float] | None = None,
    min_consecutive: int = 1,
    min_duration: float | None = None,
    merge_gap: float | None = None,
    refractory: float | None = None,
    conflict_resolution: str = "none",
    match_tolerance: float = 0.1,
    annotation_latency: float | None = None,
    require_target_event: bool = False,
    exclude_events_from_threshold_window: bool = False,
) -> ContinuousStimulusScanResult:
    """Train an event-locked decoder, scan raw data, and detect stimulus events."""

    out_dir.mkdir(parents=True, exist_ok=True)
    decoder_name = normalize_decoder_name(decoder)
    emission_mode_name = normalize_emission_mode(emission_mode)
    model, encoder, channel_names, n_window_samples, train_counts = _fit_decoder(
        train_raw=train_raw,
        train_events=train_events,
        onset_column=onset_column,
        label_column=label_column,
        train_window=train_window,
        picks=picks,
        baseline=baseline,
        decoder=decoder_name,
        emission_mode=emission_mode_name,
        max_iter=max_iter,
        demean_window=demean_window,
    )
    targets = list(target_classes or [str(encoder.classes_[0])])
    latency = float(np.mean(train_window)) if annotation_latency is None else annotation_latency
    split_id = _continuous_split_id(train_raw=train_raw, scan_raw=scan_raw, slice_seed=slice_seed)
    preprocessing_hash = _continuous_preprocessing_hash(
        train_window=train_window,
        picks=picks,
        baseline=baseline,
        demean_window=demean_window,
        scan_step=scan_step,
        n_window_samples=n_window_samples,
        channel_names=channel_names,
    )
    model_hash = _continuous_model_hash(
        decoder=decoder_name,
        emission_mode=emission_mode_name,
        max_iter=max_iter,
        train_window=train_window,
    )
    segments = build_scan_segments(
        scan_raw=scan_raw,
        scan_start=scan_start,
        scan_stop=scan_stop,
        slice_duration=slice_duration,
        slice_starts=slice_starts,
        slice_count=slice_count,
        slice_seed=slice_seed,
        scan_events=scan_events,
        onset_column=onset_column,
        label_column=label_column,
        target_classes=targets,
        threshold_window=threshold_window,
        detection_window=detection_window,
        require_target_event=require_target_event,
        exclude_events_from_threshold_window=exclude_events_from_threshold_window,
        stream_id=stream_id,
    )
    observations = _scan_raw_probabilities(
        scan_raw=scan_raw,
        model=model,
        encoder=encoder,
        channel_names=channel_names,
        n_window_samples=n_window_samples,
        segments=segments,
        scan_step=scan_step,
        decoder=decoder_name,
        emission_mode=emission_mode_name,
        subject=subject,
        demean_window=demean_window,
    )
    observations = _standardize_stream_observations(
        observations,
        subject=subject,
        split_id=split_id,
        slice_seed=slice_seed,
        decoder=decoder_name,
        emission_mode=emission_mode_name,
        train_time=float(np.mean(train_window)),
        preprocessing_hash=preprocessing_hash,
        model_hash=model_hash,
    )
    annotations = _annotation_table(
        scan_events=scan_events,
        segments=segments,
        onset_column=onset_column,
        label_column=label_column,
        target_classes=targets,
        annotation_latency=latency,
        detection_window=detection_window,
    )
    event_metrics = _held_out_event_metrics(
        model=model,
        encoder=encoder,
        scan_raw=scan_raw,
        scan_events=scan_events,
        onset_column=onset_column,
        label_column=label_column,
        train_window=train_window,
        picks=channel_names,
        baseline=baseline,
        decoder=decoder_name,
        emission_mode=emission_mode_name,
        demean_window=demean_window,
    )
    if not event_metrics.empty and subject is not None:
        event_metrics.insert(0, "subject", subject)
    if not train_counts.empty:
        train_counts.to_csv(out_dir / "training_class_counts.csv", index=False)

    thresholds = fit_stimulus_detection_thresholds(
        observations,
        threshold_window=threshold_window,
        threshold_quantile=threshold_quantile,
        threshold_method=threshold_method,
        score_mode=score_mode,
        target_classes=targets,
        stream_columns=("stream_id",),
        min_consecutive=min_consecutive,
        min_duration=min_duration,
    )
    events = detect_stimulus_events(
        observations,
        thresholds=thresholds,
        threshold_window=threshold_window,
        threshold_quantile=threshold_quantile,
        threshold_method=threshold_method,
        score_mode=score_mode,
        target_classes=targets,
        stream_columns=("stream_id",),
        detection_window=detection_window,
        min_consecutive=min_consecutive,
        min_duration=min_duration,
        merge_gap=merge_gap,
        refractory=refractory,
        conflict_resolution=conflict_resolution,
    )
    if not annotations.empty:
        events = match_stimulus_annotations(events, annotations, stream_columns=("stream_id",), match_tolerance=match_tolerance)
    summary = summarize_stimulus_events(
        events,
        annotations=annotations if not annotations.empty else None,
        observations=observations,
        stream_columns=("stream_id",),
    )

    observations.to_csv(out_dir / "stream_observations.csv", index=False)
    annotations.to_csv(out_dir / "stimulus_annotations.csv", index=False)
    thresholds.to_csv(out_dir / "stimulus_thresholds.csv", index=False)
    events.to_csv(out_dir / "stimulus_events.csv", index=False)
    summary.to_csv(out_dir / "stimulus_summary.csv", index=False)
    event_metrics.to_csv(out_dir / "heldout_event_metrics.csv", index=False)
    return ContinuousStimulusScanResult(observations, annotations, thresholds, events, summary, event_metrics)