## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5 ) ## ----setup, message = FALSE--------------------------------------------------- library(pepdiff) library(dplyr) ## ----generate-data------------------------------------------------------------ set.seed(789) n_peptides <- 40 n_reps <- 5 # 5 reps per condition = 20 observations per peptide peptides <- paste0("PEP_", sprintf("%03d", 1:n_peptides)) genes <- paste0("GENE_", LETTERS[((1:n_peptides - 1) %% 26) + 1]) # Step 1: Create base abundance for each peptide peptide_info <- tibble( peptide = peptides, gene_id = genes, pep_num = 1:n_peptides, base = rgamma(n_peptides, shape = 6, rate = 0.6) ) # Step 2: Create the experimental design design <- expand.grid( peptide = peptides, treatment = c("ctrl", "trt"), timepoint = c("early", "late"), bio_rep = 1:n_reps, stringsAsFactors = FALSE ) # Step 3: Join design with peptide info and apply effects # Using heavier-tailed noise to justify ART sim_data <- design %>% left_join(peptide_info, by = "peptide") %>% mutate( # Group A (peptides 1-10): Treatment effect only # trt is 3-fold higher than ctrl, same at both timepoints trt_effect = ifelse(pep_num <= 10 & treatment == "trt", 3, 1), # Group B (peptides 11-20): Timepoint effect only # late is 3-fold higher than early, same for both treatments time_effect = ifelse(pep_num > 10 & pep_num <= 20 & timepoint == "late", 3, 1), # Group C (peptides 21-30): Interaction # Treatment works at late (4-fold) but NOT at early int_effect = case_when( pep_num > 20 & pep_num <= 30 & treatment == "trt" & timepoint == "late" ~ 4, TRUE ~ 1 ), # Group D (peptides 31-40): No effect # All effect multipliers are 1 # Add heavy-tailed noise (occasional extreme values) # This is why we might prefer ART over GLM extreme = ifelse(runif(n()) < 0.05, runif(n(), 2, 4), 1), # Final abundance with Gamma noise and occasional extremes value = rgamma(n(), shape = 10, rate = 10 / (base * trt_effect * time_effect * int_effect)) * extreme ) %>% select(peptide, gene_id, treatment, timepoint, bio_rep, value) # Import temp_file <- tempfile(fileext = ".csv") write.csv(sim_data, temp_file, row.names = FALSE) dat <- read_pepdiff( temp_file, id = "peptide", gene = "gene_id", value = "value", factors = c("treatment", "timepoint"), replicate = "bio_rep" ) dat ## ----run-art, message = FALSE------------------------------------------------- results_art <- compare( dat, compare = "treatment", ref = "ctrl", method = "art" ) results_art ## ----view-results------------------------------------------------------------- results_art$results %>% mutate( pep_num = as.numeric(gsub("PEP_", "", peptide)), group = case_when( pep_num <= 10 ~ "A: Treatment only", pep_num <= 20 ~ "B: Timepoint only", pep_num <= 30 ~ "C: Interaction", TRUE ~ "D: No effect" ) ) %>% group_by(group) %>% summarise( n_significant = sum(significant), n_peptides = n(), median_fc = round(median(fold_change, na.rm = TRUE), 2) ) ## ----plot-art-results, fig.height = 6----------------------------------------- plot(results_art) ## ----art-stratified, message = FALSE------------------------------------------ results_strat <- compare( dat, compare = "treatment", ref = "ctrl", within = "timepoint", method = "art" ) results_strat ## ----view-stratified---------------------------------------------------------- results_strat$results %>% mutate( pep_num = as.numeric(gsub("PEP_", "", peptide)), group = case_when( pep_num <= 10 ~ "A: Treatment only", pep_num <= 20 ~ "B: Timepoint only", pep_num <= 30 ~ "C: Interaction", TRUE ~ "D: No effect" ) ) %>% group_by(group, timepoint) %>% summarise( n_significant = sum(significant), median_fc = round(median(fold_change, na.rm = TRUE), 2), .groups = "drop" ) %>% tidyr::pivot_wider( names_from = timepoint, values_from = c(n_significant, median_fc) ) ## ----compare-methods, message = FALSE----------------------------------------- results_glm <- compare(dat, compare = "treatment", ref = "ctrl", method = "glm") ## ----compare-significant------------------------------------------------------ # Compare significant calls comparison <- tibble( peptide = results_glm$results$peptide, glm_sig = results_glm$results$significant, art_sig = results_art$results$significant, glm_pval = round(results_glm$results$p_value, 4), art_pval = round(results_art$results$p_value, 4) ) # Agreement cat("Both significant:", sum(comparison$glm_sig & comparison$art_sig), "\n") cat("GLM only:", sum(comparison$glm_sig & !comparison$art_sig), "\n") cat("ART only:", sum(!comparison$glm_sig & comparison$art_sig), "\n") cat("Neither:", sum(!comparison$glm_sig & !comparison$art_sig), "\n") ## ----disagreements------------------------------------------------------------ # Peptides where methods disagree disagreements <- comparison %>% filter(glm_sig != art_sig) %>% arrange(pmin(glm_pval, art_pval)) if (nrow(disagreements) > 0) { print(disagreements) } else { cat("No disagreements between methods\n") } ## ----art-diagnostics---------------------------------------------------------- results_art$diagnostics %>% head() ## ----check-convergence-------------------------------------------------------- n_converged <- sum(results_art$diagnostics$converged) n_failed <- sum(!results_art$diagnostics$converged) cat("Converged:", n_converged, "\n") cat("Failed:", n_failed, "\n") ## ----results-structure-------------------------------------------------------- glimpse(results_art$results) ## ----stratified-structure----------------------------------------------------- glimpse(results_strat$results) ## ----export-results, eval = FALSE--------------------------------------------- # # All results # write.csv(results_art$results, "art_results.csv", row.names = FALSE) # # # Significant hits only # results_art$results %>% # filter(significant) %>% # write.csv("art_significant.csv", row.names = FALSE) # # # Stratified results # write.csv(results_strat$results, "art_stratified.csv", row.names = FALSE) ## ----cleanup, include = FALSE------------------------------------------------- unlink(temp_file)