## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup, message=FALSE, warning=FALSE-------------------------------------- library(pathdb) library(clusterProfiler) library(dplyr) # Preview the first few rows head(hypoxia_deseq) ## ----define_deg--------------------------------------------------------------- # Define up- and down- regulation based on LFC and padj df <- hypoxia_deseq |> as.data.frame() |> mutate(diffexp = case_when( log2FoldChange > 0 & padj < 0.05 ~ "UP", log2FoldChange < 0 & padj < 0.05 ~ "DOWN", TRUE ~ "NO" # using TRUE as catch-all for padj >= 0.1 or NA )) # Filter out non-significant or missing adjusted p-value genes df <- df[df$diffexp != "NO" & !is.na(df$diffexp), ] ## ----convert_ids, eval=FALSE-------------------------------------------------- # # Convert ids to Ensembl format to ensure compatibility # df <- convert_id( # genes = rownames(df), # data = df, # species_id = 96 # ) ## ----split_lists-------------------------------------------------------------- # Split into up or down groups in a list deg_results_list <- split(df, df$diffexp) ## ----t2g_prep, eval = FALSE--------------------------------------------------- # bg_genes <- T2G_prep( # species_id = 96, # category = c("KEGG"), # genes = rownames(hypoxia_deseq) # ) ## ----t2g_load, echo=FALSE----------------------------------------------------- bg_genes <- hypoxia_T2G ## ----run_ora------------------------------------------------------------------ padj_cutoff <- 0.1 genecount_cutoff <- 5 # Apply enricher across Up/Down lists res <- lapply(names(deg_results_list), function(direction) { enricher( gene = rownames(deg_results_list[[direction]]), pvalueCutoff = 0.05, TERM2GENE = bg_genes ) }) # Name the lists according to regulation names(res) <- names(deg_results_list) ## ----format_results----------------------------------------------------------- # Bind results by row res_df <- lapply(names(res), function(x) { res_chunk <- res[[x]]@result res_chunk$diffexp <- x # Keep track of the regulation direction return(res_chunk) }) res_df <- do.call(rbind, res_df) # Filter rows that meet our significance and count standards target_pws <- res_df$p.adjust < padj_cutoff & res_df$Count > genecount_cutoff # Filter and select top terms res_sig <- res_df[target_pws, ] |> arrange(p.adjust) |> group_by(diffexp) |> slice_head(n = 20) |> # get up to 20 top pathways per direction ungroup() # View enriched pathways for UP-regulated genes res_up <- res_sig |> filter(diffexp == "UP") head(res_up[, c("GeneRatio", "FoldEnrichment", "p.adjust")]) # View enriched pathways for DOWN-regulated genes res_down <- res_sig |> filter(diffexp == "DOWN") head(res_up[, c("GeneRatio", "FoldEnrichment", "p.adjust")]) ## ----gsea_prep---------------------------------------------------------------- # Create a ranked geneList geneList <- setNames( object = hypoxia_deseq$log2FoldChange, nm = rownames(hypoxia_deseq) ) # Order the list from highest to lowest geneList <- sort(geneList, decreasing = TRUE) ## ----run_gsea, warning=FALSE-------------------------------------------------- # Execute GSEA gsea <- GSEA( geneList = geneList, TERM2GENE = bg_genes, pvalueCutoff = 0.05, maxGSSize = 2000 ) # Extract and filter top results gsea_res <- gsea@result |> arrange(p.adjust) |> slice(1:20) head(gsea_res[, c("setSize", "enrichmentScore", "NES", "p.adjust")])