## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5 ) ## ----setup, message = FALSE--------------------------------------------------- library(pepdiff) library(dplyr) ## ----generate-data------------------------------------------------------------ set.seed(456) n_peptides <- 40 n_reps <- 5 # 5 reps per condition = 20 observations per peptide peptides <- paste0("PEP_", sprintf("%03d", 1:n_peptides)) genes <- paste0("GENE_", LETTERS[((1:n_peptides - 1) %% 26) + 1]) # Step 1: Create base abundance for each peptide # Each peptide gets ONE base value shared across all its observations peptide_info <- tibble( peptide = peptides, gene_id = genes, pep_num = 1:n_peptides, base = rgamma(n_peptides, shape = 6, rate = 0.6) # Random baseline abundance ) # Step 2: Create the experimental design (all combinations) # 40 peptides × 2 treatments × 2 timepoints × 5 reps = 800 observations design <- expand.grid( peptide = peptides, treatment = c("ctrl", "trt"), timepoint = c("0h", "24h"), bio_rep = 1:n_reps, stringsAsFactors = FALSE ) # Step 3: Join design with peptide info and apply effects sim_data <- design %>% left_join(peptide_info, by = "peptide") %>% mutate( # Group A (peptides 1-10): Treatment effect only # trt is 3-fold higher than ctrl, same at both timepoints trt_effect = ifelse(pep_num <= 10 & treatment == "trt", 3, 1), # Group B (peptides 11-20): Timepoint effect only # 24h is 3-fold higher than 0h, same for both treatments # NO treatment effect - ctrl and trt should be identical time_effect = ifelse(pep_num > 10 & pep_num <= 20 & timepoint == "24h", 3, 1), # Group C (peptides 21-30): Interaction # Treatment works at 24h (4-fold) but NOT at 0h int_effect = case_when( pep_num > 20 & pep_num <= 30 & treatment == "trt" & timepoint == "24h" ~ 4, TRUE ~ 1 ), # Group D (peptides 31-40): No effect # All effect multipliers are 1 (no change) # Final abundance = base × all effects, with Gamma noise value = rgamma(n(), shape = 15, rate = 15 / (base * trt_effect * time_effect * int_effect)) ) %>% select(peptide, gene_id, treatment, timepoint, bio_rep, value) # Import temp_file <- tempfile(fileext = ".csv") write.csv(sim_data, temp_file, row.names = FALSE) dat <- read_pepdiff( temp_file, id = "peptide", gene = "gene_id", value = "value", factors = c("treatment", "timepoint"), replicate = "bio_rep" ) dat ## ----basic-glm---------------------------------------------------------------- results <- compare( dat, compare = "treatment", ref = "ctrl", method = "glm" ) results ## ----view-results------------------------------------------------------------- results$results %>% mutate( pep_num = as.numeric(gsub("PEP_", "", peptide)), group = case_when( pep_num <= 10 ~ "A: Treatment only", pep_num <= 20 ~ "B: Timepoint only", pep_num <= 30 ~ "C: Interaction", TRUE ~ "D: No effect" ) ) %>% group_by(group) %>% summarise( n_significant = sum(significant), n_peptides = n(), median_fc = round(median(fold_change, na.rm = TRUE), 2) ) ## ----plot-glm-results, fig.height = 6----------------------------------------- plot(results) ## ----stratified--------------------------------------------------------------- results_stratified <- compare( dat, compare = "treatment", ref = "ctrl", within = "timepoint", method = "glm" ) results_stratified ## ----view-stratified---------------------------------------------------------- results_stratified$results %>% mutate( pep_num = as.numeric(gsub("PEP_", "", peptide)), group = case_when( pep_num <= 10 ~ "A: Treatment only", pep_num <= 20 ~ "B: Timepoint only", pep_num <= 30 ~ "C: Interaction", TRUE ~ "D: No effect" ) ) %>% group_by(group, timepoint) %>% summarise( n_significant = sum(significant), median_fc = round(median(fold_change, na.rm = TRUE), 2), .groups = "drop" ) %>% tidyr::pivot_wider( names_from = timepoint, values_from = c(n_significant, median_fc) ) ## ----check-interaction-------------------------------------------------------- # Compare main vs stratified for all groups main_summary <- results$results %>% mutate( pep_num = as.numeric(gsub("PEP_", "", peptide)), group = case_when( pep_num <= 10 ~ "A", pep_num <= 20 ~ "B", pep_num <= 30 ~ "C", TRUE ~ "D" ) ) %>% group_by(group) %>% summarise(main_fc = round(median(fold_change, na.rm = TRUE), 2)) strat_summary <- results_stratified$results %>% mutate( pep_num = as.numeric(gsub("PEP_", "", peptide)), group = case_when( pep_num <= 10 ~ "A", pep_num <= 20 ~ "B", pep_num <= 30 ~ "C", TRUE ~ "D" ) ) %>% group_by(group, timepoint) %>% summarise(fc = round(median(fold_change, na.rm = TRUE), 2), .groups = "drop") %>% tidyr::pivot_wider(names_from = timepoint, values_from = fc) left_join(main_summary, strat_summary, by = "group") ## ----check-diagnostics-------------------------------------------------------- # Check diagnostics n_converged <- sum(results$diagnostics$converged) n_failed <- sum(!results$diagnostics$converged) cat("Converged:", n_converged, "\n") cat("Failed:", n_failed, "\n") ## ----view-failed, eval = FALSE------------------------------------------------ # # View failed peptides (if any) # results$diagnostics %>% # filter(!converged) %>% # select(peptide, converged) ## ----model-diag--------------------------------------------------------------- # Deviance: lower is better fit (relative to other peptides) results$diagnostics %>% arrange(desc(deviance)) %>% head() ## ----results-structure-------------------------------------------------------- # Main effect results glimpse(results$results) ## ----stratified-structure----------------------------------------------------- # Stratified results - note the timepoint column glimpse(results_stratified$results) ## ----export-results, eval = FALSE--------------------------------------------- # # All results # write.csv(results$results, "glm_results.csv", row.names = FALSE) # # # Significant hits only # results$results %>% # filter(significant) %>% # write.csv("significant_hits.csv", row.names = FALSE) # # # Stratified results (one row per peptide per timepoint) # write.csv(results_stratified$results, "stratified_results.csv", row.names = FALSE) ## ----cleanup, include = FALSE------------------------------------------------- unlink(temp_file)