--- title: "OdysseusCharacterizationModule — Eunomia Walkthrough" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{OdysseusCharacterizationModule — Eunomia Walkthrough} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = FALSE ) ``` This vignette demonstrates every major feature of **OdysseusCharacterizationModule** using the [Eunomia](https://ohdsi.github.io/Eunomia/) synthetic OMOP CDM database. ## Prerequisites ```{r prerequisites} for(package in c("DatabaseConnector", "Eunomia")) { if (!requireNamespace(package, quietly = TRUE)) { install.packages(package) } } library(OdysseusCharacterizationModule) library(DatabaseConnector) library(Eunomia) ``` ## 1. Connect to Eunomia and create cohorts Eunomia ships four built-in cohorts — *Celecoxib* (id = 1), *Diclofenac* (id = 2), *GiBleed* (id = 3), and *NSAIDs* (id = 4). ```{r connect} connectionDetails <- getEunomiaConnectionDetails() Eunomia::createCohorts(connectionDetails) connection <- connect(connectionDetails) ``` Verify the cohort table: ```{r verify-cohorts} cohortCounts <- querySql(connection, " SELECT cohort_definition_id, COUNT(*) AS cnt FROM main.cohort GROUP BY cohort_definition_id ORDER BY cohort_definition_id ") cohortCounts ``` We will characterise the **Celecoxib** new-user cohort (id = 1) throughout this vignette. ```{r common-params} COHORT_ID <- 1L CDM_SCHEMA <- "main" COHORT_TBL <- "cohort" TEMP_SCHEMA <- "main" # SQLite temp-table emulation ``` ## 2. Define analysis windows ```{r windows} windows <- defineAnalysisWindows( startDays = c(-365, 1), endDays = c( -1, 365) ) windows ``` ## 3. Base feature — Condition Occurrence (start type) The simplest case: one domain, start-date logic, aggregated. ```{r condition-start} plan_cond <- planAnalysis( analysisWindows = windows, useBaseFeatures = list( condition_occurrence = list(include = TRUE, type = "start"), condition_era = list(include = FALSE), drug_exposure = list(include = FALSE), drug_era = list(include = FALSE), procedure_occurrence = list(include = FALSE), observation = list(include = FALSE), device_exposure = list(include = FALSE), visit_occurrence = list(include = FALSE), measurement = list(include = FALSE) ), useCohortFeatures = list(include = FALSE), useConceptSetFeatures = list(include = FALSE) ) specs_cond <- singleNodeSetting( plan = plan_cond, cohortId = COHORT_ID, cohortDatabaseSchema = CDM_SCHEMA, cohortTable = COHORT_TBL, cdmDatabaseSchema = CDM_SCHEMA, vocabularyDatabaseSchema = CDM_SCHEMA, aggregated = TRUE ) cat("Specs generated:", length(specs_cond), "\n") results_cond <- executeSpecs( connection, specs_cond, tempEmulationSchema = TEMP_SCHEMA, cleanTempTables = TRUE ) head(results_cond[["1001"]], 10) ``` ## 4. Base feature — Drug Exposure ```{r drug-exposure} plan_drug <- planAnalysis( analysisWindows = windows, useBaseFeatures = list( condition_occurrence = list(include = FALSE), condition_era = list(include = FALSE), drug_exposure = list(include = TRUE), drug_era = list(include = FALSE), procedure_occurrence = list(include = FALSE), observation = list(include = FALSE), device_exposure = list(include = FALSE), visit_occurrence = list(include = FALSE), measurement = list(include = FALSE) ), useCohortFeatures = list(include = FALSE), useConceptSetFeatures = list(include = FALSE) ) specs_drug <- singleNodeSetting( plan = plan_drug, cohortId = COHORT_ID, cohortDatabaseSchema = CDM_SCHEMA, cohortTable = COHORT_TBL, cdmDatabaseSchema = CDM_SCHEMA, vocabularyDatabaseSchema = CDM_SCHEMA, aggregated = TRUE ) results_drug <- executeSpecs( connection, specs_drug, tempEmulationSchema = TEMP_SCHEMA, cleanTempTables = TRUE ) head(results_drug[["1001"]], 10) ``` ## 5. Base feature — Condition Era (overlap type) Overlap logic checks whether the era period overlaps the analysis window, rather than simply checking the start date. ```{r condition-era-overlap} plan_era <- planAnalysis( analysisWindows = windows, useBaseFeatures = list( condition_occurrence = list(include = FALSE), condition_era = list(include = TRUE, type = "overlap"), drug_exposure = list(include = FALSE), drug_era = list(include = FALSE), procedure_occurrence = list(include = FALSE), observation = list(include = FALSE), device_exposure = list(include = FALSE), visit_occurrence = list(include = FALSE), measurement = list(include = FALSE) ), useCohortFeatures = list(include = FALSE), useConceptSetFeatures = list(include = FALSE) ) specs_era <- singleNodeSetting( plan = plan_era, cohortId = COHORT_ID, cohortDatabaseSchema = CDM_SCHEMA, cohortTable = COHORT_TBL, cdmDatabaseSchema = CDM_SCHEMA, vocabularyDatabaseSchema = CDM_SCHEMA, aggregated = TRUE ) results_era <- executeSpecs( connection, specs_era, tempEmulationSchema = TEMP_SCHEMA, cleanTempTables = TRUE ) head(results_era[["1001"]], 10) ``` ## 6. Base feature — Drug Era (overlap type) ```{r drug-era-overlap} plan_dera <- planAnalysis( analysisWindows = windows, useBaseFeatures = list( condition_occurrence = list(include = FALSE), condition_era = list(include = FALSE), drug_exposure = list(include = FALSE), drug_era = list(include = TRUE, type = "overlap"), procedure_occurrence = list(include = FALSE), observation = list(include = FALSE), device_exposure = list(include = FALSE), visit_occurrence = list(include = FALSE), measurement = list(include = FALSE) ), useCohortFeatures = list(include = FALSE), useConceptSetFeatures = list(include = FALSE) ) specs_dera <- singleNodeSetting( plan = plan_dera, cohortId = COHORT_ID, cohortDatabaseSchema = CDM_SCHEMA, cohortTable = COHORT_TBL, cdmDatabaseSchema = CDM_SCHEMA, vocabularyDatabaseSchema = CDM_SCHEMA, aggregated = TRUE ) results_dera <- executeSpecs( connection, specs_dera, tempEmulationSchema = TEMP_SCHEMA, cleanTempTables = TRUE ) head(results_dera[["1001"]], 10) ``` ## 7. Base feature — Procedure Occurrence ```{r procedure} plan_proc <- planAnalysis( analysisWindows = windows, useBaseFeatures = list( condition_occurrence = list(include = FALSE), condition_era = list(include = FALSE), drug_exposure = list(include = FALSE), drug_era = list(include = FALSE), procedure_occurrence = list(include = TRUE, type = "start"), observation = list(include = FALSE), device_exposure = list(include = FALSE), visit_occurrence = list(include = FALSE), measurement = list(include = FALSE) ), useCohortFeatures = list(include = FALSE), useConceptSetFeatures = list(include = FALSE) ) specs_proc <- singleNodeSetting( plan = plan_proc, cohortId = COHORT_ID, cohortDatabaseSchema = CDM_SCHEMA, cohortTable = COHORT_TBL, cdmDatabaseSchema = CDM_SCHEMA, vocabularyDatabaseSchema = CDM_SCHEMA, aggregated = TRUE ) results_proc <- executeSpecs( connection, specs_proc, tempEmulationSchema = TEMP_SCHEMA, cleanTempTables = TRUE ) head(results_proc[["1001"]], 10) ``` ## 8. Base feature — Measurement ```{r measurement} plan_meas <- planAnalysis( analysisWindows = windows, useBaseFeatures = list( condition_occurrence = list(include = FALSE), condition_era = list(include = FALSE), drug_exposure = list(include = FALSE), drug_era = list(include = FALSE), procedure_occurrence = list(include = FALSE), observation = list(include = FALSE), device_exposure = list(include = FALSE), visit_occurrence = list(include = FALSE), measurement = list(include = TRUE) ), useCohortFeatures = list(include = FALSE), useConceptSetFeatures = list(include = FALSE) ) specs_meas <- singleNodeSetting( plan = plan_meas, cohortId = COHORT_ID, cohortDatabaseSchema = CDM_SCHEMA, cohortTable = COHORT_TBL, cdmDatabaseSchema = CDM_SCHEMA, vocabularyDatabaseSchema = CDM_SCHEMA, aggregated = TRUE ) results_meas <- executeSpecs( connection, specs_meas, tempEmulationSchema = TEMP_SCHEMA, cleanTempTables = TRUE ) head(results_meas[["1001"]], 10) ``` ## 9. Base feature — Observation ```{r observation} plan_obs <- planAnalysis( analysisWindows = windows, useBaseFeatures = list( condition_occurrence = list(include = FALSE), condition_era = list(include = FALSE), drug_exposure = list(include = FALSE), drug_era = list(include = FALSE), procedure_occurrence = list(include = FALSE), observation = list(include = TRUE), device_exposure = list(include = FALSE), visit_occurrence = list(include = FALSE), measurement = list(include = FALSE) ), useCohortFeatures = list(include = FALSE), useConceptSetFeatures = list(include = FALSE) ) specs_obs <- singleNodeSetting( plan = plan_obs, cohortId = COHORT_ID, cohortDatabaseSchema = CDM_SCHEMA, cohortTable = COHORT_TBL, cdmDatabaseSchema = CDM_SCHEMA, vocabularyDatabaseSchema = CDM_SCHEMA, aggregated = TRUE ) results_obs <- executeSpecs( connection, specs_obs, tempEmulationSchema = TEMP_SCHEMA, cleanTempTables = TRUE ) head(results_obs[["1001"]]) ``` ## 10. Base feature — Visit Occurrence (overlap type) ```{r visit-overlap} plan_visit <- planAnalysis( analysisWindows = windows, useBaseFeatures = list( condition_occurrence = list(include = FALSE), condition_era = list(include = FALSE), drug_exposure = list(include = FALSE), drug_era = list(include = FALSE), procedure_occurrence = list(include = FALSE), observation = list(include = FALSE), device_exposure = list(include = FALSE), visit_occurrence = list(include = TRUE, type = "overlap"), measurement = list(include = FALSE) ), useCohortFeatures = list(include = FALSE), useConceptSetFeatures = list(include = FALSE) ) specs_visit <- singleNodeSetting( plan = plan_visit, cohortId = COHORT_ID, cohortDatabaseSchema = CDM_SCHEMA, cohortTable = COHORT_TBL, cdmDatabaseSchema = CDM_SCHEMA, vocabularyDatabaseSchema = CDM_SCHEMA, aggregated = TRUE ) results_visit <- executeSpecs( connection, specs_visit, tempEmulationSchema = TEMP_SCHEMA, cleanTempTables = TRUE ) head(results_visit[["1001"]]) ``` ## 11. Non-aggregated (patient-level) output Setting `aggregated = FALSE` returns one row per patient-concept pair instead of summing across patients. ```{r non-aggregated} specs_raw <- singleNodeSetting( plan = plan_cond, cohortId = COHORT_ID, cohortDatabaseSchema = CDM_SCHEMA, cohortTable = COHORT_TBL, cdmDatabaseSchema = CDM_SCHEMA, vocabularyDatabaseSchema = CDM_SCHEMA, aggregated = FALSE ) results_raw <- executeSpecs( connection, specs_raw, tempEmulationSchema = TEMP_SCHEMA, cleanTempTables = TRUE ) cat("Patient-level rows (window 1):", nrow(results_raw[["1001"]]), "\n") head(results_raw[["1001"]], 10) ``` ## 12. Multiple domains at once Enable several domains in a single plan for an integrated analysis. ```{r multi-domain} plan_multi <- planAnalysis( analysisWindows = windows, useBaseFeatures = list( condition_occurrence = list(include = TRUE, type = "start"), condition_era = list(include = TRUE, type = "overlap"), drug_exposure = list(include = TRUE), drug_era = list(include = TRUE, type = "overlap"), procedure_occurrence = list(include = TRUE), observation = list(include = FALSE), device_exposure = list(include = FALSE), visit_occurrence = list(include = TRUE, type = "overlap"), measurement = list(include = TRUE) ), useCohortFeatures = list(include = FALSE), useConceptSetFeatures = list(include = FALSE) ) specs_multi <- singleNodeSetting( plan = plan_multi, cohortId = COHORT_ID, cohortDatabaseSchema = CDM_SCHEMA, cohortTable = COHORT_TBL, cdmDatabaseSchema = CDM_SCHEMA, vocabularyDatabaseSchema = CDM_SCHEMA, aggregated = TRUE ) cat("Total specs:", length(specs_multi), "\n") results_multi <- executeSpecs( connection, specs_multi, tempEmulationSchema = TEMP_SCHEMA, cleanTempTables = TRUE ) # Summary across all specs summary_df <- do.call(rbind, lapply(names(results_multi), function(nm) { df <- results_multi[[nm]] data.frame(analysis_id = nm, rows = nrow(df), stringsAsFactors = FALSE) })) summary_df ``` ## 13. Cohort features — Using GiBleed cohort as a covariate Use pre-defined cohorts as binary covariates. Here we test whether Celecoxib patients overlap with the GiBleed cohort. ```{r cohort-features} plan_cohort <- planAnalysis( analysisWindows = windows, useBaseFeatures = list( condition_occurrence = list(include = FALSE), condition_era = list(include = FALSE), drug_exposure = list(include = FALSE), drug_era = list(include = FALSE), procedure_occurrence = list(include = FALSE), observation = list(include = FALSE), device_exposure = list(include = FALSE), visit_occurrence = list(include = FALSE), measurement = list(include = FALSE) ), useCohortFeatures = list( include = TRUE, type = "start", cohortIds = c(3L, 4L), cohortNames = c("GiBleed", "NSAIDs"), cohortTable = "cohort", covariateSchema = "main" ), useConceptSetFeatures = list(include = FALSE) ) specs_cohort <- singleNodeSetting( plan = plan_cohort, cohortId = COHORT_ID, cohortDatabaseSchema = CDM_SCHEMA, cohortTable = COHORT_TBL, cdmDatabaseSchema = CDM_SCHEMA, vocabularyDatabaseSchema = CDM_SCHEMA, aggregated = TRUE ) cat("Cohort feature specs:", length(specs_cohort), "\n") results_cohort <- executeSpecs( connection, specs_cohort, tempEmulationSchema = TEMP_SCHEMA, cleanTempTables = TRUE ) # Show results for every cohort feature spec for (nm in names(results_cohort)) { cat("\n--- Analysis", nm, "---\n") print(results_cohort[[nm]]) } ``` ## 14. Cohort features — Overlap type ```{r cohort-overlap} plan_coh_ov <- planAnalysis( analysisWindows = windows, useBaseFeatures = list( condition_occurrence = list(include = FALSE), condition_era = list(include = FALSE), drug_exposure = list(include = FALSE), drug_era = list(include = FALSE), procedure_occurrence = list(include = FALSE), observation = list(include = FALSE), device_exposure = list(include = FALSE), visit_occurrence = list(include = FALSE), measurement = list(include = FALSE) ), useCohortFeatures = list( include = TRUE, type = "overlap", cohortIds = c(3L), cohortNames = c("GiBleed"), cohortTable = "cohort", covariateSchema = "main" ), useConceptSetFeatures = list(include = FALSE) ) specs_coh_ov <- singleNodeSetting( plan = plan_coh_ov, cohortId = COHORT_ID, cohortDatabaseSchema = CDM_SCHEMA, cohortTable = COHORT_TBL, cdmDatabaseSchema = CDM_SCHEMA, vocabularyDatabaseSchema = CDM_SCHEMA, aggregated = TRUE ) cat("Overlap flag:", specs_coh_ov[[1]]$overlap, "\n") results_coh_ov <- executeSpecs( connection, specs_coh_ov, tempEmulationSchema = TEMP_SCHEMA, cleanTempTables = TRUE ) for (nm in names(results_coh_ov)) { cat("\n--- Analysis", nm, "---\n") print(results_coh_ov[[nm]]) } ``` ## 15. SQL rendering without execution You can inspect the generated SQL without a database connection using `renderSpecSql()` and `renderAllSpecSql()`. ```{r render-only} sql_default <- renderSpecSql(specs_cond[[1]]) cat("--- SQL Server (default) ---\n") cat(substr(sql_default, 1, 500), "\n...\n") ``` Translate to other dialects: ```{r render-dialects} for (dialect in c("postgresql", "redshift", "oracle", "spark")) { cat("\n--- Dialect:", dialect, "---\n") sql_translated <- renderSpecSql(specs_cond[[1]], targetDialect = dialect) cat(substr(sql_translated, 1, 400), "\n...\n") } ``` Batch rendering: ```{r render-all} all_sql <- renderAllSpecSql(specs_cond) cat("Number of rendered SQL statements:", length(all_sql), "\n") cat("Analysis IDs:", paste(names(all_sql), collapse = ", "), "\n") ``` ## 16. Multiple time windows The number of specs scales linearly with the number of windows. ```{r many-windows} windows_8 <- defineAnalysisWindows( startDays = c(-365, -180, -90, -30, 1, 31, 91, 181), endDays = c( -1, -1, -1, -1, 30, 90, 180, 365) ) plan_8w <- planAnalysis( analysisWindows = windows_8, useBaseFeatures = list( condition_occurrence = list(include = TRUE, type = "start"), condition_era = list(include = FALSE), drug_exposure = list(include = FALSE), drug_era = list(include = FALSE), procedure_occurrence = list(include = FALSE), observation = list(include = FALSE), device_exposure = list(include = FALSE), visit_occurrence = list(include = FALSE), measurement = list(include = FALSE) ), useCohortFeatures = list(include = FALSE), useConceptSetFeatures = list(include = FALSE) ) specs_8w <- singleNodeSetting( plan = plan_8w, cohortId = COHORT_ID, cohortDatabaseSchema = CDM_SCHEMA, cohortTable = COHORT_TBL, cdmDatabaseSchema = CDM_SCHEMA, vocabularyDatabaseSchema = CDM_SCHEMA, aggregated = TRUE ) cat("Specs with 8 windows:", length(specs_8w), "\n") results_8w <- executeSpecs( connection, specs_8w, tempEmulationSchema = TEMP_SCHEMA, cleanTempTables = TRUE ) data.frame( analysis_id = names(results_8w), rows = vapply(results_8w, nrow, integer(1)) ) ``` ## 17. Combined — Base + Cohort features in one run ```{r combined} plan_combined <- planAnalysis( analysisWindows = windows, useBaseFeatures = list( condition_occurrence = list(include = TRUE, type = "start"), condition_era = list(include = FALSE), drug_exposure = list(include = TRUE), drug_era = list(include = FALSE), procedure_occurrence = list(include = TRUE), observation = list(include = FALSE), device_exposure = list(include = FALSE), visit_occurrence = list(include = FALSE), measurement = list(include = TRUE) ), useCohortFeatures = list( include = TRUE, type = "start", cohortIds = c(3L), cohortNames = c("GiBleed"), cohortTable = "cohort", covariateSchema = "main" ), useConceptSetFeatures = list(include = FALSE) ) specs_combined <- singleNodeSetting( plan = plan_combined, cohortId = COHORT_ID, cohortDatabaseSchema = CDM_SCHEMA, cohortTable = COHORT_TBL, cdmDatabaseSchema = CDM_SCHEMA, vocabularyDatabaseSchema = CDM_SCHEMA, aggregated = TRUE ) cat("Total specs (4 base domains x 2 windows + 1 cohort x 2 windows):", length(specs_combined), "\n") results_combined <- executeSpecs( connection, specs_combined, tempEmulationSchema = TEMP_SCHEMA, cleanTempTables = TRUE ) summary_combined <- do.call(rbind, lapply(names(results_combined), function(nm) { df <- results_combined[[nm]] data.frame( analysis_id = nm, source = if (nrow(df) > 0) "data" else "empty", rows = nrow(df), stringsAsFactors = FALSE ) })) summary_combined ``` ## 18. Characterising a different cohort — Diclofenac All examples so far used Celecoxib (id = 1). Switching to a different cohort is as simple as changing `cohortId`. ```{r diclofenac} specs_diclo <- singleNodeSetting( plan = plan_cond, cohortId = 2L, cohortDatabaseSchema = CDM_SCHEMA, cohortTable = COHORT_TBL, cdmDatabaseSchema = CDM_SCHEMA, vocabularyDatabaseSchema = CDM_SCHEMA, aggregated = TRUE ) results_diclo <- executeSpecs( connection, specs_diclo, tempEmulationSchema = TEMP_SCHEMA, cleanTempTables = TRUE ) cat("Diclofenac condition covariates (pre-index):\n") head(results_diclo[["1001"]], 10) ``` ## 19. Error handling with `stopOnError = FALSE` When executing many specs, you can continue past failures. ```{r error-handling} results_safe <- executeSpecs( connection, specs_multi, tempEmulationSchema = TEMP_SCHEMA, cleanTempTables = TRUE, stopOnError = FALSE ) failed <- vapply(results_safe, function(df) !is.null(attr(df, "error")), logical(1)) cat("Failed specs:", sum(failed), "/", length(results_safe), "\n") ``` ## 20. Cleanup ```{r cleanup} disconnect(connection) ``` ## Session info ```{r session-info} sessionInfo() ```