Custom CovariateData Builder — Eunomia Demo

This vignette demonstrates how to use OdysseusCharacterizationModule as a custom covariate builder for the FeatureExtraction package. When you pass an OCM covariate settings object to FeatureExtraction::getDbCovariateData(), it returns a standard CovariateData (Andromeda) object that plugs directly into CohortMethod, PatientLevelPrediction, or any other HADES package that consumes covariates.

Prerequisites

for (pkg in c("DatabaseConnector", "Eunomia", "Andromeda")) {
  if (!requireNamespace(pkg, quietly = TRUE)) install.packages(pkg)
}

library(OdysseusCharacterizationModule)
library(DatabaseConnector)
library(Eunomia)

1. Connect to Eunomia

connectionDetails <- getEunomiaConnectionDetails()
Eunomia::createCohorts(connectionDetails)
connection <- connect(connectionDetails)
COHORT_ID  <- 1L          # Celecoxib new users
CDM_SCHEMA <- "main"

2. Create covariate settings

createOcmCovariateSettings() accepts the same parameters as planAnalysis() — analysis windows, base features, cohort features, and concept-set features. The object it returns carries an attribute fun that tells FeatureExtraction which builder function to call.

ocmSettings <- createOcmCovariateSettings(
  analysisWindows = defineAnalysisWindows(
    startDays = c(-365),
    endDays   = c(-1)
  ),
  useBaseFeatures = list(
    condition_occurrence = list(include = TRUE, type = "start"),
    drug_exposure        = list(include = TRUE, atc = FALSE),
    condition_era        = list(include = FALSE),
    drug_era             = list(include = FALSE),
    procedure_occurrence = list(include = FALSE),
    observation          = list(include = FALSE),
    device_exposure      = list(include = FALSE),
    visit_occurrence     = list(include = FALSE),
    measurement          = list(include = FALSE)
  )
)

class(ocmSettings)
#> [1] "covariateSettings"

attr(ocmSettings, "fun")
#> [1] "getDbOcmCovariateData"

3. Standalone usage — call the builder directly

You do not need FeatureExtraction installed to use the builder. getDbOcmCovariateData() runs the full OCM pipeline and assembles a CovariateData Andromeda object.

covData <- getDbOcmCovariateData(
  connection        = connection,
  cdmDatabaseSchema = CDM_SCHEMA,
  cohortTable       = "main.cohort",
  cohortIds         = c(COHORT_ID),
  rowIdField        = "subject_id",
  covariateSettings = ocmSettings,
  aggregated        = FALSE
)

Inspect the covariates table

Each row is a sparse (rowId, covariateId, covariateValue) triple. covariateValue is 1 for binary features.

covDf <- covData$covariates |> as.data.frame()
cat("Total covariate entries:", nrow(covDf), "\n")
cat("Unique patients:",        length(unique(covDf$rowId)), "\n")
cat("Unique covariates:",      length(unique(covDf$covariateId)), "\n")
head(covDf)

Inspect the covariate reference

Maps each covariateId to a human-readable name, concept ID, and analysis ID.

refDf <- covData$covariateRef |> as.data.frame()
head(refDf, 10)

Inspect the analysis reference

One row per analysis describing the domain, time window, and whether the covariate is binary.

analysisDf <- covData$analysisRef |> as.data.frame()
analysisDf
Andromeda::close(covData)

4. FeatureExtraction integration

When FeatureExtraction is available, pass ocmSettings as covariateSettings. FeatureExtraction reads the fun attribute, calls getDbOcmCovariateData() internally, and returns the result as a standard CovariateData object.

if (requireNamespace("FeatureExtraction", quietly = TRUE)) {

  covDataFE <- FeatureExtraction::getDbCovariateData(
    connection            = connection,
    cdmDatabaseSchema     = CDM_SCHEMA,
    cohortDatabaseSchema  = CDM_SCHEMA,
    cohortTable           = "cohort",
    cohortIds             = c(COHORT_ID),
    covariateSettings     = ocmSettings,
    aggregated            = FALSE
  )

  cat("Covariates (via FE):", nrow(as.data.frame(covDataFE$covariates)), "\n")
  Andromeda::close(covDataFE)

} else {
  message("FeatureExtraction not installed — skipping integration demo.")
}

5. Combining with standard FeatureExtraction covariates

A key benefit of the custom builder pattern is that you can combine OCM covariates with FeatureExtraction’s built-in covariates. Pass a list of settings objects:

if (requireNamespace("FeatureExtraction", quietly = TRUE)) {

  feSettings <- FeatureExtraction::createCovariateSettings(
    useDemographicsGender    = TRUE,
    useDemographicsAge       = TRUE,
    useDemographicsIndexYear = TRUE
  )

  combinedCovData <- FeatureExtraction::getDbCovariateData(
    connection            = connection,
    cdmDatabaseSchema     = CDM_SCHEMA,
    cohortDatabaseSchema  = CDM_SCHEMA,
    cohortTable           = "cohort",
    cohortIds             = c(COHORT_ID),
    covariateSettings     = list(feSettings, ocmSettings),
    aggregated            = FALSE
  )

  covDf <- as.data.frame(combinedCovData$covariates)
  cat("Total covariate entries (combined):", nrow(covDf), "\n")
  cat("Unique covariates (combined):",      length(unique(covDf$covariateId)), "\n")

  Andromeda::close(combinedCovData)

} else {
  message("FeatureExtraction not installed — skipping combined demo.")
}

6. Multiple domains and time windows

ocmSettingsWide <- createOcmCovariateSettings(
  analysisWindows = defineAnalysisWindows(
    startDays = c(-365, -30, 1),
    endDays   = c(-1,   -1, 30)
  ),
  useBaseFeatures = list(
    condition_occurrence = list(include = TRUE, type = "start"),
    drug_exposure        = list(include = TRUE, atc = FALSE),
    procedure_occurrence = list(include = TRUE),
    condition_era        = list(include = FALSE),
    drug_era             = list(include = FALSE),
    observation          = list(include = FALSE),
    device_exposure      = list(include = FALSE),
    visit_occurrence     = list(include = TRUE, type = "start"),
    measurement          = list(include = TRUE)
  )
)

covDataWide <- getDbOcmCovariateData(
  connection        = connection,
  cdmDatabaseSchema = CDM_SCHEMA,
  cohortTable       = "main.cohort",
  cohortIds         = c(COHORT_ID),
  covariateSettings = ocmSettingsWide,
  aggregated        = FALSE
)

cat("Analyses:",   nrow(as.data.frame(covDataWide$analysisRef)), "\n")
cat("Covariates:", nrow(as.data.frame(covDataWide$covariates)), "\n")

Andromeda::close(covDataWide)

7. Using concept-set features

Define a custom concept set (e.g. hypertension-related conditions) and extract it as a binary covariate:

ocmConceptSet <- createOcmCovariateSettings(
  analysisWindows = defineAnalysisWindows(
    startDays = c(-365),
    endDays   = c(-1)
  ),
  useBaseFeatures = list(
    condition_occurrence = list(include = FALSE),
    condition_era        = list(include = FALSE),
    drug_exposure        = list(include = FALSE),
    drug_era             = list(include = FALSE),
    procedure_occurrence = list(include = FALSE),
    observation          = list(include = FALSE),
    device_exposure      = list(include = FALSE),
    visit_occurrence     = list(include = FALSE),
    measurement          = list(include = FALSE)
  ),
  useConceptSetFeatures = list(
    include = TRUE,
    type    = "binary",
    conceptSets = list(
      hypertension = list(
        items = list(
          list(
            concept            = list(CONCEPT_ID = 316866L),
            includeDescendants = TRUE,
            includeMapped      = FALSE,
            isExcluded         = FALSE
          )
        ),
        tables = c("condition_occurrence")
      )
    )
  )
)

covDataCS <- getDbOcmCovariateData(
  connection        = connection,
  cdmDatabaseSchema = CDM_SCHEMA,
  cohortTable       = "main.cohort",
  cohortIds         = c(COHORT_ID),
  covariateSettings = ocmConceptSet,
  aggregated        = FALSE
)

cat("Concept-set covariates:", nrow(as.data.frame(covDataCS$covariates)), "\n")
as.data.frame(covDataCS$covariateRef)

Andromeda::close(covDataCS)

Cleanup

disconnect(connection)

Summary

Function Purpose
createOcmCovariateSettings() Configure OCM features as a covariateSettings object
getDbOcmCovariateData() Execute the pipeline and return a CovariateData Andromeda object
FeatureExtraction::getDbCovariateData(..., covariateSettings = ocmSettings) Use OCM as a plug-in builder inside FeatureExtraction

The returned CovariateData object contains: