---
title: "Simulation and Benchmark Workflows"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{Simulation and Benchmark Workflows}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r, include = FALSE}
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
has_glmnet <- requireNamespace("glmnet", quietly = TRUE)
```

`SelectBoost.FDA` now includes a validation layer for repeated simulations,
method benchmarks, plain-SelectBoost baselines, and direct advantage summaries
for FDA-aware `SelectBoost`.

## Simulate a benchmark scenario

```{r}
library(SelectBoost.FDA)

sim_grid <- simulate_fda_scenario(
  n = 60,
  grid_length = 30,
  scenario = "localized_dense",
  representation = "grid",
  seed = 1
)

sim_grid
head(selection_map(sim_grid$design))
sim_grid$truth$active_predictors
```

The returned object keeps both the fitted `fda_design` and the mapped truth for
the transformed feature space.

## Benchmark multiple methods on shared truth

```{r, eval = has_glmnet}
bench <- benchmark_selection_methods(
  sim_grid,
  methods = c("stability", "interval", "selectboost", "plain_selectboost"),
  levels = c("feature", "group"),
  stability_args = list(selector = "lasso", B = 8, cutoff = 0.5, seed = 2),
  interval_args = list(selector = "lasso", width = 5, B = 8, cutoff = 0.5, seed = 3),
  selectboost_args = list(selector = "lasso", B = 4, steps.seq = c(0.7, 0.4), c0lim = FALSE),
  plain_selectboost_args = list(selector = "lasso", B = 4, steps.seq = c(0.7, 0.4), c0lim = FALSE)
)

bench
bench$metrics
head(selection_map(bench, level = "group"))
summarise_benchmark_performance(bench, level = "feature", metric = "f1")
summarise_benchmark_advantage(
  bench,
  target = "selectboost",
  reference = c("plain_selectboost", "stability"),
  level = "feature",
  metric = "f1"
)
```

This keeps the comparison object available, so the same `selection_map()` and
`selected()` methods work on top of the benchmark output. The summary helpers
make it easier to answer the benchmark question directly: whether FDA-aware
`SelectBoost` improves feature recovery over the plain baseline and grouped
stability selection once each method is evaluated at its best `c0`.

## Run a repeated study

```{r, eval = has_glmnet}
study_dense <- run_simulation_study(
  n_rep = 2,
  simulate_args = list(
    n = 50,
    grid_length = 28,
    scenario = "localized_dense",
    representation = "basis"
  ),
  benchmark_args = list(
    methods = c("stability", "selectboost", "plain_selectboost"),
    levels = c("feature", "group", "basis"),
    stability_args = list(selector = "lasso", B = 6, cutoff = 0.5, seed = 4),
    selectboost_args = list(selector = "lasso", B = 4, steps.seq = c(0.7, 0.4), c0lim = FALSE),
    plain_selectboost_args = list(selector = "lasso", B = 4, steps.seq = c(0.7, 0.4), c0lim = FALSE)
  ),
  seed = 10
)

study_smooth <- run_simulation_study(
  n_rep = 2,
  simulate_args = list(
    n = 50,
    grid_length = 28,
    scenario = "distributed_smooth",
    representation = "basis"
  ),
  benchmark_args = list(
    methods = c("stability", "selectboost", "plain_selectboost"),
    levels = c("feature", "group", "basis"),
    stability_args = list(selector = "lasso", B = 6, cutoff = 0.5, seed = 14),
    selectboost_args = list(selector = "lasso", B = 4, steps.seq = c(0.7, 0.4), c0lim = FALSE),
    plain_selectboost_args = list(selector = "lasso", B = 4, steps.seq = c(0.7, 0.4), c0lim = FALSE)
  ),
  seed = 20
)

summarise_benchmark_advantage(
  study_dense,
  target = "selectboost",
  reference = c("plain_selectboost", "stability"),
  level = "feature",
  metric = "f1"
)

summarise_benchmark_advantage(
  study_smooth,
  target = "selectboost",
  reference = c("plain_selectboost", "stability"),
  level = "feature",
  metric = "f1"
)
```

The repeated-study summary reports the mean and standard deviation of recovery
metrics by method, evaluation level, scenario, and `c0` when applicable. In
practice, the `localized_dense` setting is the most direct stress test for the
FDA-aware grouping built into `selectboost_fda()`.

## Run a targeted sensitivity study for FDA-aware SelectBoost

```{r, eval = has_glmnet}
sensitivity <- run_selectboost_sensitivity_study(
  n_rep = 1,
  simulate_grid = data.frame(
    scenario = c("localized_dense", "confounded_blocks"),
    confounding_strength = c(0.4, 0.9),
    active_region_scale = c(0.8, 0.7),
    local_correlation = c(1, 2),
    stringsAsFactors = FALSE
  ),
  selectboost_grid = data.frame(
    association_method = c("correlation", "hybrid", "interval"),
    bandwidth = c(NA, 4, 4),
    stringsAsFactors = FALSE
  ),
  simulate_args = list(n = 50, grid_length = 28, representation = "grid"),
  benchmark_args = list(
    methods = c("stability", "selectboost", "plain_selectboost"),
    levels = c("feature", "group"),
    stability_args = list(selector = "lasso", B = 6, cutoff = 0.5, seed = 40),
    selectboost_args = list(selector = "lasso", B = 4, steps.seq = c(0.7, 0.4), c0lim = FALSE),
    plain_selectboost_args = list(selector = "lasso", B = 4, steps.seq = c(0.7, 0.4), c0lim = FALSE)
  ),
  seed = 50
)

summarise_benchmark_advantage(
  sensitivity,
  target = "selectboost",
  reference = "plain_selectboost",
  level = "feature",
  metric = "f1"
)
```

This is the intended benchmark workflow when the goal is to show when
FDA-aware grouping matters. The summary table keeps `association_method`,
`bandwidth`, `confounding_strength`, `active_region_scale`, and
`local_correlation` as explicit columns, so it is straightforward to isolate
the settings where `selectboost_fda()` gains over the plain baseline.

## Inspect the saved larger study

The repository also ships a larger saved sensitivity study generated by
`tools/run_selectboost_sensitivity_study.R`. That script runs a broader sweep
and writes reusable benchmark summaries to `inst/extdata/benchmarks/`.

```{r}
benchmark_dir <- system.file("extdata", "benchmarks", package = "SelectBoost.FDA")
top_feature_settings <- utils::read.csv(
  file.path(benchmark_dir, "selectboost_sensitivity_top_settings.csv"),
  stringsAsFactors = FALSE
)

utils::head(
  top_feature_settings[
    ,
    c(
      "scenario",
      "confounding_strength",
      "active_region_scale",
      "local_correlation",
      "association_method",
      "bandwidth",
      "selectboost_f1_mean",
      "plain_selectboost_f1_mean",
      "delta_mean",
      "win_rate"
    )
  ],
  10
)
```

The key comparison columns are `selectboost_f1_mean`,
`plain_selectboost_f1_mean`, and `delta_mean`. This makes the algorithm
comparison explicit at the feature-selection level while keeping the
FDA-specific settings attached to each row.