Skip to content

NeuRepTrace

Continuous Stimulus Scan

IPS-Stuttgart/NeuRepTrace

Continuous Stimulus Scan

neureptrace.continuous_stimulus_scan turns the long-stream event-detection idea into a single reproducible workflow:

train an event-locked decoder on labeled events from one raw run;
scan a held-out raw run with the same window and preprocessing;
export P(class | time) as NeuRepTrace stream observations;
run neureptrace.stimulus_detection; and
write event-level precision, recall, F1, latency, and false-alarm summaries.

Use this when the question is:

I have an event-locked decoder for a stimulus class. Does a held-out continuous recording contain intervals that look like that class?

CLI Example

python -m neureptrace.continuous_stimulus_scan \
  --train-raw data/ds000117/sub-01/ses-meg/meg/sub-01_ses-meg_task-facerecognition_run-01_meg.fif \
  --train-events data/ds000117/sub-01/ses-meg/meg/sub-01_ses-meg_task-facerecognition_run-01_events.tsv \
  --scan-raw data/ds000117/sub-01/ses-meg/meg/sub-01_ses-meg_task-facerecognition_run-02_meg.fif \
  --scan-events data/ds000117/sub-01/ses-meg/meg/sub-01_ses-meg_task-facerecognition_run-02_events.tsv \
  --source-column stim_type \
  --positive-pattern "Famous|Unfamiliar" \
  --negative-pattern "Scrambled" \
  --positive-label face \
  --negative-label scrambled \
  --target-class face \
  --train-window 0.15 0.25 \
  --picks meg \
  --demean-window \
  --slice-duration 6.0 \
  --slice-count 10 \
  --require-target-event \
  --exclude-events-from-threshold-window \
  --threshold-window 0.0 0.8 \
  --detection-window 0.8 6.0 \
  --threshold-method max_run \
  --threshold-quantile 0.975 \
  --min-consecutive 2 \
  --min-duration 0.05 \
  --merge-gap 0.05 \
  --refractory 0.30 \
  --match-tolerance 0.35 \
  --out-dir results/ds000117_continuous_scan

The installed command neureptrace-continuous-stimulus-scan exposes the same arguments.

Outputs

The output directory contains:

File	Meaning
`stream_observations.csv`	Long-stream probability observations with `prob_class_*` columns.
`stimulus_annotations.csv`	Held-out event annotations converted to stream-relative times.
`stimulus_thresholds.csv`	Class-specific detector thresholds.
`stimulus_events.csv`	One row per detected event.
`stimulus_summary.csv`	Precision, recall, F1, false alarms, and latency summaries.
`heldout_event_metrics.csv`	Event-locked held-out accuracy/log-loss before continuous scanning.
`training_class_counts.csv`	Training event counts per class.

API Reference

`neureptrace.continuous_stimulus_scan`

`ContinuousStimulusScanResult` `dataclass`

Tables emitted by the continuous stimulus-scan workflow.

Source code in src/neureptrace/continuous_stimulus_scan.py

@dataclass(frozen=True)
class ContinuousStimulusScanResult:
    """Tables emitted by the continuous stimulus-scan workflow."""

    observations: pd.DataFrame
    annotations: pd.DataFrame
    thresholds: pd.DataFrame
    events: pd.DataFrame
    summary: pd.DataFrame
    event_metrics: pd.DataFrame

`ScanSegment` `dataclass`

One continuous interval to scan as an independent probability stream.

Source code in src/neureptrace/continuous_stimulus_scan.py

@dataclass(frozen=True)
class ScanSegment:
    """One continuous interval to scan as an independent probability stream."""

    stream_id: str
    start: float
    stop: float
    output_origin: float

`build_scan_segments(*, scan_raw, scan_start, scan_stop, slice_duration=None, slice_starts=None, slice_count=None, slice_seed=13, scan_events=None, onset_column='onset', label_column='stimulus_class', target_classes=None, threshold_window=None, detection_window=None, require_target_event=False, exclude_events_from_threshold_window=False, stream_id=None)`

Build full-run, explicit-slice, or random-slice scan segments.

Source code in src/neureptrace/continuous_stimulus_scan.py

def build_scan_segments(
    *,
    scan_raw: Path,
    scan_start: float | None,
    scan_stop: float | None,
    slice_duration: float | None = None,
    slice_starts: Sequence[float] | None = None,
    slice_count: int | None = None,
    slice_seed: int = 13,
    scan_events: pd.DataFrame | None = None,
    onset_column: str = "onset",
    label_column: str = "stimulus_class",
    target_classes: Sequence[str] | None = None,
    threshold_window: tuple[float, float] | None = None,
    detection_window: tuple[float, float] | None = None,
    require_target_event: bool = False,
    exclude_events_from_threshold_window: bool = False,
    stream_id: str | None = None,
) -> list[ScanSegment]:
    """Build full-run, explicit-slice, or random-slice scan segments."""

    raw = mne.io.read_raw_fif(scan_raw, preload=False, verbose="error")
    raw_start = 0.0 if scan_start is None else scan_start
    raw_stop = float(raw.times[-1]) if scan_stop is None else scan_stop
    if raw_stop <= raw_start:
        raise ValueError("scan_stop must be greater than scan_start.")
    base_stream_id = stream_id or _safe_stream_id(scan_raw)
    if slice_duration is None:
        return [ScanSegment(base_stream_id, raw_start, raw_stop, 0.0)]
    if slice_duration <= 0:
        raise ValueError("slice_duration must be positive.")

    starts: list[float]
    if slice_starts:
        starts = [float(start) for start in slice_starts]
    elif slice_count:
        rng = np.random.default_rng(slice_seed)
        starts = []
        target_set = set(map(str, target_classes or [])) or None
        tries = 0
        while len(starts) < slice_count and tries < max(1000, slice_count * 500):
            tries += 1
            start = float(rng.uniform(raw_start, raw_stop - slice_duration))
            if scan_events is not None and exclude_events_from_threshold_window and threshold_window is not None:
                if _event_mask_in_window(
                    scan_events,
                    onset_column=onset_column,
                    start=start + threshold_window[0],
                    stop=start + threshold_window[1],
                    label_column=label_column,
                ).any():
                    continue
            if scan_events is not None and require_target_event and detection_window is not None:
                if not _event_mask_in_window(
                    scan_events,
                    onset_column=onset_column,
                    start=start + detection_window[0],
                    stop=start + detection_window[1],
                    labels=target_set,
                    label_column=label_column,
                ).any():
                    continue
            starts.append(start)
        if len(starts) < slice_count:
            raise ValueError(f"Only selected {len(starts)} random slice(s); requested {slice_count}.")
    else:
        starts = list(np.arange(raw_start, raw_stop - slice_duration + 1e-12, slice_duration))

    segments = []
    for index, start in enumerate(starts):
        stop = start + slice_duration
        if start < raw_start or stop > raw_stop:
            raise ValueError(f"Slice [{start}, {stop}] is outside scan interval [{raw_start}, {raw_stop}].")
        segments.append(ScanSegment(f"{base_stream_id}_slice{index:03d}", start, stop, start))
    return segments

`label_event_table(events, *, onset_column='onset', label_column='stimulus_class', source_column=None, positive_pattern=None, negative_pattern=None, positive_label='positive', negative_label='negative', case_sensitive=False)`

Return events with numeric onsets and string class labels.

If source_column and positive_pattern are supplied, labels are built from regex matches. Otherwise label_column is used directly.

Source code in src/neureptrace/continuous_stimulus_scan.py

def label_event_table(
    events: pd.DataFrame,
    *,
    onset_column: str = "onset",
    label_column: str = "stimulus_class",
    source_column: str | None = None,
    positive_pattern: str | None = None,
    negative_pattern: str | None = None,
    positive_label: str = "positive",
    negative_label: str = "negative",
    case_sensitive: bool = False,
) -> pd.DataFrame:
    """Return events with numeric onsets and string class labels.

    If ``source_column`` and ``positive_pattern`` are supplied, labels are built
    from regex matches. Otherwise ``label_column`` is used directly.
    """

    if onset_column not in events.columns:
        raise ValueError(f"Event table is missing onset column '{onset_column}'.")
    labeled = events.copy()
    if source_column is not None or positive_pattern is not None:
        if source_column is None or positive_pattern is None:
            raise ValueError("source_column and positive_pattern must be provided together.")
        if source_column not in labeled.columns:
            raise ValueError(f"Event table is missing source column '{source_column}'.")
        positive = _pattern_mask(labeled[source_column], positive_pattern, case_sensitive=case_sensitive)
        negative = (
            _pattern_mask(labeled[source_column], negative_pattern, case_sensitive=case_sensitive)
            if negative_pattern is not None
            else labeled[source_column].notna() & ~positive
        )
        labeled[label_column] = pd.NA
        labeled.loc[positive, label_column] = positive_label
        labeled.loc[negative, label_column] = negative_label
    elif label_column not in labeled.columns:
        raise ValueError(f"Event table is missing label column '{label_column}'.")

    labeled[onset_column] = pd.to_numeric(labeled[onset_column], errors="raise")
    labeled = labeled.loc[labeled[label_column].notna()].copy()
    labeled[label_column] = labeled[label_column].astype(str)
    return labeled.sort_values(onset_column).reset_index(drop=True)

run_continuous_stimulus_scan(*, train_raw, train_events, scan_raw, scan_events=None, out_dir, onset_column='onset', label_column='stimulus_class', train_window=(0.1, 0.2), picks='data', baseline=None, decoder='logistic', emission_mode='calibrated', max_iter=1000, demean_window=False, scan_step=0.025, scan_start=None, scan_stop=None, slice_duration=None, slice_starts=None, slice_count=None, slice_seed=13, stream_id=None, subject=None, target_classes=None, threshold_window=(0.0, 0.8), threshold_quantile=0.95, threshold_method='max_run', score_mode='class_probability', detection_window=None, min_consecutive=1, min_duration=None, merge_gap=None, refractory=None, conflict_resolution='none', match_tolerance=0.1, annotation_latency=None, require_target_event=False, exclude_events_from_threshold_window=False)

Train an event-locked decoder, scan raw data, and detect stimulus events.

Source code in src/neureptrace/continuous_stimulus_scan.py

def run_continuous_stimulus_scan(
    *,
    train_raw: Path,
    train_events: pd.DataFrame,
    scan_raw: Path,
    scan_events: pd.DataFrame | None = None,
    out_dir: Path,
    onset_column: str = "onset",
    label_column: str = "stimulus_class",
    train_window: tuple[float, float] = (0.1, 0.2),
    picks: str = "data",
    baseline: tuple[float | None, float | None] | None = None,
    decoder: str = "logistic",
    emission_mode: str = "calibrated",
    max_iter: int = 1000,
    demean_window: bool = False,
    scan_step: float = 0.025,
    scan_start: float | None = None,
    scan_stop: float | None = None,
    slice_duration: float | None = None,
    slice_starts: Sequence[float] | None = None,
    slice_count: int | None = None,
    slice_seed: int = 13,
    stream_id: str | None = None,
    subject: str | None = None,
    target_classes: Sequence[str] | None = None,
    threshold_window: tuple[float, float] = (0.0, 0.8),
    threshold_quantile: float = 0.95,
    threshold_method: str = "max_run",
    score_mode: str = "class_probability",
    detection_window: tuple[float, float] | None = None,
    min_consecutive: int = 1,
    min_duration: float | None = None,
    merge_gap: float | None = None,
    refractory: float | None = None,
    conflict_resolution: str = "none",
    match_tolerance: float = 0.1,
    annotation_latency: float | None = None,
    require_target_event: bool = False,
    exclude_events_from_threshold_window: bool = False,
) -> ContinuousStimulusScanResult:
    """Train an event-locked decoder, scan raw data, and detect stimulus events."""

    out_dir.mkdir(parents=True, exist_ok=True)
    decoder_name = normalize_decoder_name(decoder)
    emission_mode_name = normalize_emission_mode(emission_mode)
    model, encoder, channel_names, n_window_samples, train_counts = _fit_decoder(
        train_raw=train_raw,
        train_events=train_events,
        onset_column=onset_column,
        label_column=label_column,
        train_window=train_window,
        picks=picks,
        baseline=baseline,
        decoder=decoder_name,
        emission_mode=emission_mode_name,
        max_iter=max_iter,
        demean_window=demean_window,
    )
    targets = list(target_classes or [str(encoder.classes_[0])])
    latency = float(np.mean(train_window)) if annotation_latency is None else annotation_latency
    split_id = _continuous_split_id(train_raw=train_raw, scan_raw=scan_raw, slice_seed=slice_seed)
    preprocessing_hash = _continuous_preprocessing_hash(
        train_window=train_window,
        picks=picks,
        baseline=baseline,
        demean_window=demean_window,
        scan_step=scan_step,
        n_window_samples=n_window_samples,
        channel_names=channel_names,
    )
    model_hash = _continuous_model_hash(
        decoder=decoder_name,
        emission_mode=emission_mode_name,
        max_iter=max_iter,
        train_window=train_window,
    )
    segments = build_scan_segments(
        scan_raw=scan_raw,
        scan_start=scan_start,
        scan_stop=scan_stop,
        slice_duration=slice_duration,
        slice_starts=slice_starts,
        slice_count=slice_count,
        slice_seed=slice_seed,
        scan_events=scan_events,
        onset_column=onset_column,
        label_column=label_column,
        target_classes=targets,
        threshold_window=threshold_window,
        detection_window=detection_window,
        require_target_event=require_target_event,
        exclude_events_from_threshold_window=exclude_events_from_threshold_window,
        stream_id=stream_id,
    )
    observations = _scan_raw_probabilities(
        scan_raw=scan_raw,
        model=model,
        encoder=encoder,
        channel_names=channel_names,
        n_window_samples=n_window_samples,
        segments=segments,
        scan_step=scan_step,
        decoder=decoder_name,
        emission_mode=emission_mode_name,
        subject=subject,
        demean_window=demean_window,
    )
    observations = _standardize_stream_observations(
        observations,
        subject=subject,
        split_id=split_id,
        slice_seed=slice_seed,
        decoder=decoder_name,
        emission_mode=emission_mode_name,
        train_time=float(np.mean(train_window)),
        preprocessing_hash=preprocessing_hash,
        model_hash=model_hash,
    )
    annotations = _annotation_table(
        scan_events=scan_events,
        segments=segments,
        onset_column=onset_column,
        label_column=label_column,
        target_classes=targets,
        annotation_latency=latency,
        detection_window=detection_window,
    )
    event_metrics = _held_out_event_metrics(
        model=model,
        encoder=encoder,
        scan_raw=scan_raw,
        scan_events=scan_events,
        onset_column=onset_column,
        label_column=label_column,
        train_window=train_window,
        picks=channel_names,
        baseline=baseline,
        decoder=decoder_name,
        emission_mode=emission_mode_name,
        demean_window=demean_window,
    )
    if not event_metrics.empty and subject is not None:
        event_metrics.insert(0, "subject", subject)
    if not train_counts.empty:
        train_counts.to_csv(out_dir / "training_class_counts.csv", index=False)

    thresholds = fit_stimulus_detection_thresholds(
        observations,
        threshold_window=threshold_window,
        threshold_quantile=threshold_quantile,
        threshold_method=threshold_method,
        score_mode=score_mode,
        target_classes=targets,
        stream_columns=("stream_id",),
        min_consecutive=min_consecutive,
        min_duration=min_duration,
    )
    events = detect_stimulus_events(
        observations,
        thresholds=thresholds,
        threshold_window=threshold_window,
        threshold_quantile=threshold_quantile,
        threshold_method=threshold_method,
        score_mode=score_mode,
        target_classes=targets,
        stream_columns=("stream_id",),
        detection_window=detection_window,
        min_consecutive=min_consecutive,
        min_duration=min_duration,
        merge_gap=merge_gap,
        refractory=refractory,
        conflict_resolution=conflict_resolution,
    )
    if not annotations.empty:
        events = match_stimulus_annotations(events, annotations, stream_columns=("stream_id",), match_tolerance=match_tolerance)
    summary = summarize_stimulus_events(
        events,
        annotations=annotations if not annotations.empty else None,
        observations=observations,
        stream_columns=("stream_id",),
    )

    observations.to_csv(out_dir / "stream_observations.csv", index=False)
    annotations.to_csv(out_dir / "stimulus_annotations.csv", index=False)
    thresholds.to_csv(out_dir / "stimulus_thresholds.csv", index=False)
    events.to_csv(out_dir / "stimulus_events.csv", index=False)
    summary.to_csv(out_dir / "stimulus_summary.csv", index=False)
    event_metrics.to_csv(out_dir / "heldout_event_metrics.csv", index=False)
    return ContinuousStimulusScanResult(observations, annotations, thresholds, events, summary, event_metrics)