Skip to content

NeuRepTrace

Onset Workflow

IPS-Stuttgart/NeuRepTrace

Onset Workflow

neureptrace.onset_workflow runs onset detection across multiple task result directories and writes task-level plus combined summaries.

Example:

python -m neureptrace.onset_workflow \
  --task-dir results/nod_animate_all \
  --task-dir results/nod_superclass_canine_device_all \
  --task-dir results/nod_superclass_container_covering_all \
  --threshold-window -0.100 0.000 \
  --threshold-quantile 0.95 \
  --threshold-method max_run \
  --detection-start 0.000 \
  --detection-window 0.000 0.800 \
  --min-consecutive 3 \
  --require-stable-prediction \
  --out-dir results/onset_detection_all \
  --plot-out results/onset_detection_all/onset_summary.png

The workflow looks for observations/*_observations.csv in each task directory by default. It writes per-task onset_events.csv and onset_summary.csv files, a combined onset_summary_all.csv, and optionally a compact plot and combined event table.

Use --detection-window 0.000 0.800 for a post-stimulus latency benchmark. Use an earlier start, for example --detection-window -0.200 0.800, when the goal is to allow and count pre-stimulus false alarms.

`neureptrace.onset_workflow`

`OnsetWorkflowRun` `dataclass`

Top-level outputs from a multi-task onset workflow.

Source code in src/neureptrace/onset_workflow.py

@dataclass(frozen=True)
class OnsetWorkflowRun:
    """Top-level outputs from a multi-task onset workflow."""

    out_dir: Path
    task_outputs: list[TaskOnsetOutput]
    summary_all_csv: Path
    events_all_csv: Path | None
    plot_path: Path | None

`TaskOnsetOutput` `dataclass`

Output paths and counts for one task-directory onset run.

Source code in src/neureptrace/onset_workflow.py

@dataclass(frozen=True)
class TaskOnsetOutput:
    """Output paths and counts for one task-directory onset run."""

    task: str
    task_dir: Path
    observation_csvs: list[Path]
    events_csv: Path
    summary_csv: Path
    n_events: int
    n_summary_rows: int

`plot_onset_summary(summary, out_path)`

Plot compact onset latency and false-alarm summaries.

Source code in src/neureptrace/onset_workflow.py

def plot_onset_summary(summary: pd.DataFrame, out_path: Path) -> Path:
    """Plot compact onset latency and false-alarm summaries."""

    if summary.empty:
        raise ValueError("Cannot plot an empty onset summary.")
    required = {"task", "post_detection_latency_median", "false_alarm_rate", "post_zero_detected_rate"}
    missing = sorted(required.difference(summary.columns))
    if missing:
        raise ValueError(f"Onset summary is missing required columns for plotting: {missing}")

    sort_columns = ["task", "decoder", "emission_mode"] if "decoder" in summary else ["task"]
    frame = summary.copy().sort_values(sort_columns)
    labels = frame["task"].astype(str)
    if "decoder" in frame.columns:
        labels = labels + "\n" + frame["decoder"].astype(str)
    if "emission_mode" in frame.columns:
        labels = labels + " / " + frame["emission_mode"].astype(str)

    fig, axes = plt.subplots(1, 2, figsize=(max(7.0, 0.8 * len(frame)), 4.2))
    positions = range(len(frame))

    axes[0].bar(positions, frame["post_detection_latency_median"])
    axes[0].set_xticks(list(positions))
    axes[0].set_xticklabels(labels, rotation=35, ha="right")
    axes[0].set_ylabel("Median post-zero onset latency (s)")
    axes[0].set_title("Onset latency")
    axes[0].axhline(0.0, color="0.4", linewidth=1.0)
    axes[0].grid(axis="y", color="0.9", linewidth=0.8)

    width = 0.38
    axes[1].bar(
        [position - width / 2 for position in positions],
        frame["false_alarm_rate"],
        width=width,
        label="false alarm",
    )
    axes[1].bar(
        [position + width / 2 for position in positions],
        frame["post_zero_detected_rate"],
        width=width,
        label="post-zero detected",
    )
    axes[1].set_xticks(list(positions))
    axes[1].set_xticklabels(labels, rotation=35, ha="right")
    axes[1].set_ylabel("Rate")
    axes[1].set_ylim(0.0, 1.0)
    axes[1].set_title("Detection quality")
    axes[1].legend(loc="best")
    axes[1].grid(axis="y", color="0.9", linewidth=0.8)

    fig.tight_layout()
    out_path.parent.mkdir(parents=True, exist_ok=True)
    fig.savefig(out_path, dpi=160)
    plt.close(fig)
    return out_path

`run_onset_workflow(task_dirs, *, out_dir, observations_glob=DEFAULT_OBSERVATIONS_GLOB, threshold_window=DEFAULT_THRESHOLD_WINDOW, threshold_quantile=DEFAULT_THRESHOLD_QUANTILE, threshold_method='point', score_column='confidence', detection_start=None, detection_window=DEFAULT_DETECTION_WINDOW, event_window=None, min_consecutive=1, min_duration=None, require_stable_prediction=False, allow_missing=False, write_combined_events=False, plot_out=None)`

Run onset detection across multiple task result directories.

Source code in src/neureptrace/onset_workflow.py

def run_onset_workflow(
    task_dirs: list[Path],
    *,
    out_dir: Path,
    observations_glob: str = DEFAULT_OBSERVATIONS_GLOB,
    threshold_window: tuple[float, float] = DEFAULT_THRESHOLD_WINDOW,
    threshold_quantile: float = DEFAULT_THRESHOLD_QUANTILE,
    threshold_method: str = "point",
    score_column: str = "confidence",
    detection_start: float | None = None,
    detection_window: tuple[float, float] = DEFAULT_DETECTION_WINDOW,
    event_window: tuple[float, float] | None = None,
    min_consecutive: int = 1,
    min_duration: float | None = None,
    require_stable_prediction: bool = False,
    allow_missing: bool = False,
    write_combined_events: bool = False,
    plot_out: Path | None = None,
) -> OnsetWorkflowRun:
    """Run onset detection across multiple task result directories."""

    if not task_dirs:
        raise ValueError("At least one task directory is required.")

    out_dir.mkdir(parents=True, exist_ok=True)
    task_outputs: list[TaskOnsetOutput] = []
    event_frames: list[pd.DataFrame] = []
    summary_frames: list[pd.DataFrame] = []
    missing_tasks: list[str] = []

    for task_dir in task_dirs:
        try:
            output, events, summary = run_task_onset_detection(
                task_dir,
                out_dir=out_dir,
                observations_glob=observations_glob,
                threshold_window=threshold_window,
                threshold_quantile=threshold_quantile,
                threshold_method=threshold_method,
                score_column=score_column,
                detection_start=detection_start,
                detection_window=detection_window,
                event_window=event_window,
                min_consecutive=min_consecutive,
                min_duration=min_duration,
                require_stable_prediction=require_stable_prediction,
            )
        except FileNotFoundError:
            if not allow_missing:
                raise
            missing_tasks.append(str(task_dir))
            continue
        task_outputs.append(output)
        event_frames.append(events)
        summary_frames.append(summary)

    if not summary_frames:
        raise FileNotFoundError(
            "No task observation CSVs were found. Missing task directories: "
            + ", ".join(missing_tasks)
        )

    summary_all = pd.concat(summary_frames, ignore_index=True)
    summary_all_csv = out_dir / "onset_summary_all.csv"
    summary_all.to_csv(summary_all_csv, index=False)

    events_all_csv = None
    if write_combined_events:
        events_all = pd.concat(event_frames, ignore_index=True)
        events_all_csv = out_dir / "onset_events_all.csv"
        events_all.to_csv(events_all_csv, index=False)

    plot_path = None
    if plot_out is not None:
        plot_path = plot_onset_summary(summary_all, plot_out)

    return OnsetWorkflowRun(
        out_dir=out_dir,
        task_outputs=task_outputs,
        summary_all_csv=summary_all_csv,
        events_all_csv=events_all_csv,
        plot_path=plot_path,
    )

`run_task_onset_detection(task_dir, *, out_dir, observations_glob=DEFAULT_OBSERVATIONS_GLOB, threshold_window=DEFAULT_THRESHOLD_WINDOW, threshold_quantile=DEFAULT_THRESHOLD_QUANTILE, threshold_method='point', score_column='confidence', detection_start=None, detection_window=DEFAULT_DETECTION_WINDOW, event_window=None, min_consecutive=1, min_duration=None, require_stable_prediction=False)`

Run onset detection for one benchmark task directory.

The task directory is expected to contain observation files under observations/*_observations.csv unless observations_glob is changed. Outputs are written under out_dir / task_dir.name.

Source code in src/neureptrace/onset_workflow.py

def run_task_onset_detection(
    task_dir: Path,
    *,
    out_dir: Path,
    observations_glob: str = DEFAULT_OBSERVATIONS_GLOB,
    threshold_window: tuple[float, float] = DEFAULT_THRESHOLD_WINDOW,
    threshold_quantile: float = DEFAULT_THRESHOLD_QUANTILE,
    threshold_method: str = "point",
    score_column: str = "confidence",
    detection_start: float | None = None,
    detection_window: tuple[float, float] = DEFAULT_DETECTION_WINDOW,
    event_window: tuple[float, float] | None = None,
    min_consecutive: int = 1,
    min_duration: float | None = None,
    require_stable_prediction: bool = False,
) -> tuple[TaskOnsetOutput, pd.DataFrame, pd.DataFrame]:
    """Run onset detection for one benchmark task directory.

    The task directory is expected to contain observation files under
    ``observations/*_observations.csv`` unless ``observations_glob`` is changed.
    Outputs are written under ``out_dir / task_dir.name``.
    """

    task_dir = task_dir.resolve()
    task = _task_name(task_dir)
    observation_csvs = _observation_paths(task_dir, observations_glob)
    if not observation_csvs:
        raise FileNotFoundError(f"No observation CSVs found in {task_dir / observations_glob}.")

    events, summary = detect_onsets_from_csvs(
        observation_csvs,
        threshold_window=threshold_window,
        threshold_quantile=threshold_quantile,
        threshold_method=threshold_method,
        score_column=score_column,
        detection_start=detection_start,
        detection_window=detection_window,
        event_window=event_window,
        min_consecutive=min_consecutive,
        min_duration=min_duration,
        require_stable_prediction=require_stable_prediction,
    )
    tagged_events = _insert_task_columns(events, task=task, task_dir=task_dir)
    tagged_summary = _insert_task_columns(summary, task=task, task_dir=task_dir)

    task_out_dir = out_dir / task
    task_out_dir.mkdir(parents=True, exist_ok=True)
    events_csv = task_out_dir / "onset_events.csv"
    summary_csv = task_out_dir / "onset_summary.csv"
    tagged_events.to_csv(events_csv, index=False)
    tagged_summary.to_csv(summary_csv, index=False)

    output = TaskOnsetOutput(
        task=task,
        task_dir=task_dir,
        observation_csvs=observation_csvs,
        events_csv=events_csv,
        summary_csv=summary_csv,
        n_events=len(tagged_events),
        n_summary_rows=len(tagged_summary),
    )
    return output, tagged_events, tagged_summary