This tutorial demonstrates how to perform competitive gene
set testing using CAMERA (Correlation Adjusted
MEan RAnk gene set test) from the limma package.
CAMERA is a competitive gene set test that:
In this tutorial, we’ll use the ALL (Acute Lymphoblastic Leukemia) dataset to compare B-cell vs T-cell samples and identify enriched KEGG pathways.
We need several Bioconductor packages for this analysis:
library(limma)
library(ALL)
library(hgu95av2.db)
library(org.Hs.eg.db)
library(clusterProfiler)
Package purposes:
limma: Differential expression and CAMERA analysisALL: The acute lymphoblastic leukemia datasethgu95av2.db: Microarray platform annotationsorg.Hs.eg.db: Human gene annotations and KEGG
pathwaysclusterProfiler: For downloading KEGG pathway
descriptionsThe ALL dataset contains gene expression profiles from patients with acute lymphoblastic leukemia. We’ll compare B-cell and T-cell subtypes.
# Load the dataset
data(ALL)
# Examine the dataset structure
print(ALL)
## ExpressionSet (storageMode: lockedEnvironment)
## assayData: 12625 features, 128 samples
## element names: exprs
## protocolData: none
## phenoData
## sampleNames: 01005 01010 ... LAL4 (128 total)
## varLabels: cod diagnosis ... date last seen (21 total)
## varMetadata: labelDescription
## featureData: none
## experimentData: use 'experimentData(object)'
## pubMedIds: 14684422 16243790
## Annotation: hgu95av2
# View sample information
print("Sample phenotype data (first 5 samples, first 5 columns):")
## [1] "Sample phenotype data (first 5 samples, first 5 columns):"
print(pData(ALL)[1:5, 1:5])
## cod diagnosis sex age BT
## 01005 1005 5/21/1997 M 53 B2
## 01010 1010 3/29/2000 M 19 B2
## 03002 3002 6/24/1998 F 52 B4
## 04006 4006 7/17/1997 M 38 B1
## 04007 4007 7/22/1997 M 57 B2
We’ll focus on comparing B-cell and T-cell ALL samples.
# Identify B-cell samples (includes B, B1, B2, B3, B4 subtypes)
bcell_samples <- which(ALL$BT %in% c("B", "B1", "B2", "B3", "B4"))
# Identify T-cell samples
tcell_samples <- which(ALL$BT == "T")
# Create a subset with B-cell and T-cell samples
# Using 10 B-cell and all T-cell samples for a balanced comparison
samples_to_keep <- c(bcell_samples[1:10], tcell_samples)
ALL_subset <- ALL[, samples_to_keep]
cat("Number of samples selected:\n")
## Number of samples selected:
cat(" B-cell samples:", length(bcell_samples[1:10]), "\n")
## B-cell samples: 10
cat(" T-cell samples:", length(tcell_samples), "\n")
## T-cell samples: 5
cat(" Total:", length(samples_to_keep), "\n")
## Total: 15
We extract the expression data and create a design matrix for the linear model.
# Extract expression data
exprs_data <- exprs(ALL_subset)
# Create cell type factor
cell_type <- factor(ifelse(ALL_subset$BT == "T", "T", "B"))
# Create design matrix (group-means parameterization)
design <- model.matrix(~0 + cell_type)
colnames(design) <- c("B_cell", "T_cell")
cat("Design matrix structure:\n")
## Design matrix structure:
print(head(design))
## B_cell T_cell
## 1 1 0
## 2 1 0
## 3 1 0
## 4 1 0
## 5 1 0
## 6 1 0
cat("\nSample distribution:\n")
##
## Sample distribution:
print(table(cell_type))
## cell_type
## B T
## 10 5
Design matrix notes:
~0 + cell_type for a group-means
parameterizationWe use limma to identify differentially expressed genes
between T-cell and B-cell samples.
# Fit linear model
fit <- lmFit(exprs_data, design)
# Define contrast: T-cell vs B-cell
contrast_matrix <- makeContrasts(
TvsB = T_cell - B_cell,
levels = design
)
cat("Contrast matrix:\n")
## Contrast matrix:
print(contrast_matrix)
## Contrasts
## Levels TvsB
## B_cell -1
## T_cell 1
# Fit contrasts and apply empirical Bayes moderation
fit2 <- contrasts.fit(fit, contrast_matrix)
fit2 <- eBayes(fit2)
# View top differentially expressed genes
top_genes <- topTable(fit2, coef = "TvsB", number = 20)
cat("\nTop 20 differentially expressed genes:\n")
##
## Top 20 differentially expressed genes:
print(top_genes[, c("logFC", "AveExpr", "t", "P.Value", "adj.P.Val")])
## logFC AveExpr t P.Value adj.P.Val
## 38319_at 4.483282 6.385744 13.829293 3.094569e-10 3.906894e-06
## 38147_at 2.726426 4.519145 12.825759 9.269009e-10 5.851062e-06
## 2031_s_at -3.292335 8.801557 -11.095824 7.347931e-09 3.092254e-05
## 37039_at -2.678274 11.234888 -9.753845 4.397852e-08 1.388072e-04
## 41723_s_at -3.354824 8.195279 -8.804350 1.746216e-07 4.409197e-04
## 41409_at -1.954163 7.165671 -8.589302 2.420616e-07 4.421542e-04
## 2059_s_at 2.228525 7.176821 8.581013 2.451548e-07 4.421542e-04
## 38095_i_at -3.272734 10.384079 -8.465467 2.928754e-07 4.621940e-04
## 266_s_at -3.522492 7.100962 -8.141603 4.863535e-07 5.729679e-04
## 32506_at -1.204799 6.644397 -8.103231 5.169197e-07 5.729679e-04
## 32842_at -1.786812 7.956271 -8.087330 5.301713e-07 5.729679e-04
## 38096_f_at -3.505889 9.618419 -8.070480 5.446031e-07 5.729679e-04
## 41468_at 3.449934 7.576728 7.679060 1.026687e-06 9.931685e-04
## 32793_at 2.463675 8.432056 7.636465 1.101335e-06 9.931685e-04
## 1110_at 3.625333 6.993577 7.592573 1.184230e-06 9.967271e-04
## 33238_at 2.652044 7.271808 7.516535 1.343675e-06 1.060243e-03
## 38833_at -2.579441 10.288284 -7.334979 1.822289e-06 1.324982e-03
## 40473_at -1.218719 9.134394 -7.288674 1.970921e-06 1.324982e-03
## 35016_at -2.684294 10.388821 -7.281804 1.994032e-06 1.324982e-03
## 36773_f_at -2.572337 8.137303 -7.104531 2.699720e-06 1.704199e-03
Key statistics:
Gene set testing requires mapping microarray probes to genes and then to pathways.
# Get probe to Entrez Gene ID mapping from the array annotation
probe_to_entrez <- as.list(hgu95av2ENTREZID)
probe_to_entrez <- probe_to_entrez[!is.na(probe_to_entrez)]
# Map our probes to Entrez IDs
probe_ids <- rownames(exprs_data)
entrez_mapping <- unlist(probe_to_entrez[probe_ids])
entrez_mapping <- entrez_mapping[!is.na(entrez_mapping)]
cat("Probe to gene mapping:\n")
## Probe to gene mapping:
cat(" Total probes in dataset:", length(probe_ids), "\n")
## Total probes in dataset: 12625
cat(" Probes mapped to Entrez IDs:", length(entrez_mapping), "\n")
## Probes mapped to Entrez IDs: 11683
cat(" Mapping rate:", round(100 * length(entrez_mapping) / length(probe_ids), 1), "%\n")
## Mapping rate: 92.5 %
We use org.Hs.egPATH to map Entrez Gene IDs to KEGG
pathway IDs.
# Get gene-to-pathway mapping
pathway_map <- org.Hs.egPATH
mapped_genes <- mappedkeys(pathway_map)
gene_to_pathway <- as.list(pathway_map[mapped_genes])
cat("Gene-to-pathway mapping:\n")
## Gene-to-pathway mapping:
cat(" Total genes mapped to pathways:", length(gene_to_pathway), "\n")
## Total genes mapped to pathways: 5868
cat(" Example - first gene:", names(gene_to_pathway)[1], "\n")
## Example - first gene: 2
cat(" Pathways for this gene:", paste(gene_to_pathway[[1]], collapse = ", "), "\n")
## Pathways for this gene: 04610
CAMERA requires a list where each pathway contains its member genes.
# Create pathway-to-genes mapping
pathway_to_genes <- list()
for (gene_id in names(gene_to_pathway)) {
pathways <- gene_to_pathway[[gene_id]]
for (pathway_id in pathways) {
if (is.null(pathway_to_genes[[pathway_id]])) {
pathway_to_genes[[pathway_id]] <- character(0)
}
pathway_to_genes[[pathway_id]] <- c(pathway_to_genes[[pathway_id]], gene_id)
}
}
cat("Pathway-to-genes mapping:\n")
## Pathway-to-genes mapping:
cat(" Total KEGG pathways:", length(pathway_to_genes), "\n")
## Total KEGG pathways: 229
cat(" First few pathway IDs:", paste(names(pathway_to_genes)[1:5], collapse = ", "), "\n")
## First few pathway IDs: 04610, 00232, 00983, 01100, 00380
CAMERA requires pathways as lists of row indices in the expression matrix.
create_gene_set_indices <- function(pathway_list, gene_mapping, min_genes = 10) {
pathway_indices <- list()
for (pathway_id in names(pathway_list)) {
# Get genes in this pathway
pathway_genes <- pathway_list[[pathway_id]]
# Find probes that map to these genes
probes_in_pathway <- names(gene_mapping)[gene_mapping %in% pathway_genes]
# Get row indices of these probes in expression matrix
indices <- which(rownames(exprs_data) %in% probes_in_pathway)
# Only keep pathways with sufficient genes for statistical power
if (length(indices) >= min_genes) {
pathway_indices[[pathway_id]] <- indices
}
}
return(pathway_indices)
}
gene_set_indices <- create_gene_set_indices(pathway_to_genes, entrez_mapping)
cat("Gene set indices for CAMERA:\n")
## Gene set indices for CAMERA:
cat(" Pathways with ≥10 genes:", length(gene_set_indices), "\n")
## Pathways with ≥10 genes: 210
cat(" Example - Pathway", names(gene_set_indices)[1], "\n")
## Example - Pathway 04610
cat(" has", length(gene_set_indices[[1]]), "genes in the dataset\n")
## has 73 genes in the dataset
Why filter by pathway size?
CAMERA performs competitive gene set testing while accounting for inter-gene correlation.
# Run CAMERA
camera_results <- camera(
y = exprs_data,
index = gene_set_indices,
design = design,
contrast = contrast_matrix[, "TvsB"],
inter.gene.cor = 0.01
)
cat("CAMERA analysis complete!\n")
## CAMERA analysis complete!
cat(" Pathways tested:", nrow(camera_results), "\n\n")
## Pathways tested: 210
cat("Top 15 enriched KEGG pathways:\n")
## Top 15 enriched KEGG pathways:
print(head(camera_results, 15))
## NGenes Direction PValue FDR
## 03010 81 Down 1.188895e-09 2.496680e-07
## 05310 35 Down 2.958238e-08 2.310987e-06
## 04612 90 Down 3.301411e-08 2.310987e-06
## 03040 136 Down 9.657140e-08 5.069998e-06
## 05131 100 Down 1.296692e-06 4.994151e-05
## 05322 98 Down 1.490195e-06 4.994151e-05
## 05332 48 Down 1.664717e-06 4.994151e-05
## 05140 111 Down 2.834588e-06 7.440794e-05
## 05330 51 Down 2.748009e-05 6.412020e-04
## 03050 50 Down 3.210918e-05 6.742929e-04
## 00982 71 Up 5.769479e-05 1.101446e-03
## 05130 83 Down 9.191080e-05 1.608439e-03
## 03013 134 Down 1.088055e-04 1.757628e-03
## 05416 99 Down 1.282019e-04 1.923028e-03
## 05100 108 Down 2.195612e-04 2.921721e-03
CAMERA parameters:
0.01: Small positive correlation (default
assumption)NA: Estimate from data (slower but more accurate)Output columns:
Raw pathway IDs are not interpretable, so we’ll add pathway names and descriptions.
cat("Downloading KEGG pathway information...\n")
## Downloading KEGG pathway information...
# Download KEGG data using clusterProfiler
kegg_info <- download_KEGG("hsa")
cat("\nStructure of downloaded data:\n")
##
## Structure of downloaded data:
str(kegg_info)
## List of 2
## $ KEGGPATHID2EXTID:'data.frame': 39690 obs. of 2 variables:
## ..$ from: chr [1:39690] "hsa00010" "hsa00010" "hsa00010" "hsa00010" ...
## ..$ to : chr [1:39690] "10327" "124" "125" "126" ...
## $ KEGGPATHID2NAME :'data.frame': 368 obs. of 2 variables:
## ..$ from: chr [1:368] "hsa01100" "hsa01200" "hsa01210" "hsa01212" ...
## ..$ to : chr [1:368] "Metabolic pathways" "Carbon metabolism" "2-Oxocarboxylic acid metabolism" "Fatty acid metabolism" ...
The kegg_info object contains:
# Extract pathway names
pathway_names <- setNames(
kegg_info$KEGGPATHID2NAME$to,
kegg_info$KEGGPATHID2NAME$from
)
cat("Pathway name mapping:\n")
## Pathway name mapping:
cat(" Total pathways with names:", length(pathway_names), "\n\n")
## Total pathways with names: 368
cat("Example pathway names:\n")
## Example pathway names:
print(head(pathway_names, 3))
## hsa01100 hsa01200
## "Metabolic pathways" "Carbon metabolism"
## hsa01210
## "2-Oxocarboxylic acid metabolism"
# Get pathway IDs from CAMERA results
pathway_ids <- rownames(camera_results)
# Pad pathway IDs with leading zeros to match KEGG format
# Example: "810" becomes "00810", then "hsa00810"
pathway_ids_padded <- sprintf("%05s", pathway_ids)
pathway_ids_formatted <- paste0("hsa", pathway_ids_padded)
cat("Example of ID formatting:\n")
## Example of ID formatting:
formatting_example <- data.frame(
Original = head(pathway_ids),
Padded = head(pathway_ids_padded),
Formatted = head(pathway_ids_formatted)
)
print(formatting_example)
## Original Padded Formatted
## 1 03010 03010 hsa03010
## 2 05310 05310 hsa05310
## 3 04612 04612 hsa04612
## 4 03040 03040 hsa03040
## 5 05131 05131 hsa05131
## 6 05322 05322 hsa05322
# Add descriptions to results
camera_results$Description <- pathway_names[pathway_ids_formatted]
# Mark missing descriptions as NA
camera_results$Description[is.na(camera_results$Description)] <- NA
cat("\nDescriptions successfully added!\n\n")
##
## Descriptions successfully added!
cat("Top 10 pathways with descriptions:\n")
## Top 10 pathways with descriptions:
print(camera_results[1:10, c("NGenes", "Direction", "PValue", "FDR", "Description")])
## NGenes Direction PValue FDR
## 03010 81 Down 1.188895e-09 2.496680e-07
## 05310 35 Down 2.958238e-08 2.310987e-06
## 04612 90 Down 3.301411e-08 2.310987e-06
## 03040 136 Down 9.657140e-08 5.069998e-06
## 05131 100 Down 1.296692e-06 4.994151e-05
## 05322 98 Down 1.490195e-06 4.994151e-05
## 05332 48 Down 1.664717e-06 4.994151e-05
## 05140 111 Down 2.834588e-06 7.440794e-05
## 05330 51 Down 2.748009e-05 6.412020e-04
## 03050 50 Down 3.210918e-05 6.742929e-04
## Description
## 03010 Ribosome
## 05310 Asthma
## 04612 Antigen processing and presentation
## 03040 Spliceosome
## 05131 Shigellosis
## 05322 Systemic lupus erythematosus
## 05332 Graft-versus-host disease
## 05140 Leishmaniasis
## 05330 Allograft rejection
## 03050 Proteasome
Why pad with zeros?
Visualization helps identify the most significant pathways and their direction of enrichment.
# Select top pathways for visualization
top_n <- 15
top_pathways <- head(camera_results, top_n)
# Create barplot
par(mar = c(5, 15, 4, 2))
barplot(
-log10(top_pathways$FDR),
horiz = TRUE,
names.arg = top_pathways$Description,
las = 1,
main = "Top KEGG Pathways (CAMERA Analysis)\nT-cell vs B-cell ALL",
xlab = "-log10(FDR)",
col = ifelse(top_pathways$Direction == "Up", "steelblue", "coral")
)
abline(v = -log10(0.05), lty = 2, col = "red")
legend("bottomright",
legend = c("Up in T-cell", "Down in T-cell", "FDR = 0.05"),
fill = c("steelblue", "coral", NA),
border = c("black", "black", NA),
lty = c(NA, NA, 2),
col = c(NA, NA, "red"))
Interpretation:
Let’s quantify how many pathways are significantly enriched at different thresholds.
# Extract significant pathways (FDR < 0.05)
significant_pathways <- camera_results[camera_results$FDR < 0.05, ]
cat("=======================================================\n")
## =======================================================
cat("SUMMARY OF CAMERA RESULTS\n")
## SUMMARY OF CAMERA RESULTS
cat("=======================================================\n\n")
## =======================================================
cat("Total pathways tested:", nrow(camera_results), "\n")
## Total pathways tested: 210
cat("Significant pathways (FDR < 0.05):", nrow(significant_pathways), "\n")
## Significant pathways (FDR < 0.05): 37
cat(" Up-regulated in T-cells:", sum(significant_pathways$Direction == "Up"), "\n")
## Up-regulated in T-cells: 8
cat(" Down-regulated in T-cells:", sum(significant_pathways$Direction == "Down"), "\n\n")
## Down-regulated in T-cells: 29
# Show additional significance thresholds
cat("Pathways at different FDR thresholds:\n")
## Pathways at different FDR thresholds:
cat(" FDR < 0.01:", sum(camera_results$FDR < 0.01), "\n")
## FDR < 0.01: 26
cat(" FDR < 0.001:", sum(camera_results$FDR < 0.001), "\n\n")
## FDR < 0.001: 10
# Show top significant pathways
cat("Top 10 most significant pathways:\n")
## Top 10 most significant pathways:
print(significant_pathways[1:min(10, nrow(significant_pathways)),
c("NGenes", "Direction", "PValue", "FDR", "Description")])
## NGenes Direction PValue FDR
## 03010 81 Down 1.188895e-09 2.496680e-07
## 05310 35 Down 2.958238e-08 2.310987e-06
## 04612 90 Down 3.301411e-08 2.310987e-06
## 03040 136 Down 9.657140e-08 5.069998e-06
## 05131 100 Down 1.296692e-06 4.994151e-05
## 05322 98 Down 1.490195e-06 4.994151e-05
## 05332 48 Down 1.664717e-06 4.994151e-05
## 05140 111 Down 2.834588e-06 7.440794e-05
## 05330 51 Down 2.748009e-05 6.412020e-04
## 03050 50 Down 3.210918e-05 6.742929e-04
## Description
## 03010 Ribosome
## 05310 Asthma
## 04612 Antigen processing and presentation
## 03040 Spliceosome
## 05131 Shigellosis
## 05322 Systemic lupus erythematosus
## 05332 Graft-versus-host disease
## 05140 Leishmaniasis
## 05330 Allograft rejection
## 03050 Proteasome
To understand the difference between competitive and self-contained tests, let’s compare CAMERA with MROAST.
cat("Running MROAST for comparison...\n")
## Running MROAST for comparison...
cat("(Testing first 50 pathways for speed)\n\n")
## (Testing first 50 pathways for speed)
# Run MROAST (self-contained test)
mroast_results <- mroast(
y = exprs_data,
index = gene_set_indices[1:min(50, length(gene_set_indices))],
design = design,
contrast = contrast_matrix[, "TvsB"],
nrot = 999
)
cat("Top 10 pathways from MROAST:\n")
## Top 10 pathways from MROAST:
print(head(mroast_results, 10))
## NGenes PropDown PropUp Direction PValue FDR PValue.Mixed
## 00410 23 0.00000000 0.4782609 Up 0.001 0.00625000 0.012
## 00592 11 0.00000000 0.4545455 Up 0.001 0.00625000 0.002
## 04146 61 0.04918033 0.3278689 Up 0.001 0.00625000 0.012
## 05131 100 0.27000000 0.0500000 Down 0.001 0.00625000 0.072
## 00071 43 0.02325581 0.4418605 Up 0.002 0.01071429 0.004
## 00650 31 0.00000000 0.4193548 Up 0.002 0.01071429 0.023
## 00250 33 0.03030303 0.3636364 Up 0.002 0.01071429 0.022
## 00640 29 0.00000000 0.4482759 Up 0.003 0.01562500 0.023
## 00340 22 0.04545455 0.4545455 Up 0.004 0.01944444 0.005
## 00620 39 0.07692308 0.3076923 Up 0.005 0.02045455 0.009
## FDR.Mixed
## 00410 0.03382353
## 00592 0.01875000
## 04146 0.03382353
## 05131 0.08055556
## 00071 0.02500000
## 00650 0.03750000
## 00250 0.03750000
## 00640 0.03750000
## 00340 0.02500000
## 00620 0.03035714
# Compare pathways tested by both methods
common_pathways <- intersect(rownames(camera_results), rownames(mroast_results))
comparison <- data.frame(
PathwayID = common_pathways,
CAMERA_PValue = camera_results[common_pathways, "PValue"],
MROAST_PValue = mroast_results[common_pathways, "PValue"],
CAMERA_FDR = camera_results[common_pathways, "FDR"],
MROAST_FDR = mroast_results[common_pathways, "FDR"],
CAMERA_Direction = camera_results[common_pathways, "Direction"],
MROAST_Direction = mroast_results[common_pathways, "Direction"]
)
cat("\nComparison of top 10 pathways:\n")
##
## Comparison of top 10 pathways:
print(head(comparison[order(comparison$CAMERA_PValue), ], 10))
## PathwayID CAMERA_PValue MROAST_PValue CAMERA_FDR MROAST_FDR
## 1 05131 1.296692e-06 0.001 4.994151e-05 0.0062500
## 2 05130 9.191080e-05 0.043 1.608439e-03 0.1011905
## 3 05416 1.282019e-04 0.061 1.923028e-03 0.1315217
## 4 05220 2.504333e-04 0.267 2.921721e-03 0.4053030
## 5 04110 5.153455e-04 0.242 4.919207e-03 0.4053030
## 6 04145 7.599988e-04 0.232 6.939119e-03 0.4053030
## 7 05323 1.075522e-03 0.193 9.034388e-03 0.3620370
## 8 04520 4.997876e-03 0.365 3.086923e-02 0.4925676
## 9 04722 6.447600e-03 0.699 3.840846e-02 0.8315476
## 10 04380 2.987963e-02 0.958 1.228479e-01 0.9580000
## CAMERA_Direction MROAST_Direction
## 1 Down Down
## 2 Down Down
## 3 Down Down
## 4 Down Down
## 5 Down Down
## 6 Down Down
## 7 Down Down
## 8 Down Down
## 9 Down Down
## 10 Down Up
# Calculate correlation
cor_result <- cor(comparison$CAMERA_PValue, comparison$MROAST_PValue,
method = "spearman")
cat("\nSpearman correlation between CAMERA and MROAST p-values:",
round(cor_result, 3), "\n")
##
## Spearman correlation between CAMERA and MROAST p-values: 0.134
Key Differences:
The inter.gene.cor parameter is crucial for CAMERA.
Let’s see how it affects results.
cat("Testing different inter.gene.cor values...\n\n")
## Testing different inter.gene.cor values...
# Test with different correlation assumptions
cor_values <- c(0.01, 0.05, 0.1, NA)
results_list <- list()
for (cor_val in cor_values) {
label <- ifelse(is.na(cor_val), "Estimated", as.character(cor_val))
cat("Running CAMERA with inter.gene.cor =", label, "...\n")
results_list[[label]] <- camera(
y = exprs_data,
index = gene_set_indices[1:20], # Test subset for speed
design = design,
contrast = contrast_matrix[, "TvsB"],
inter.gene.cor = cor_val
)
}
## Running CAMERA with inter.gene.cor = 0.01 ...
## Running CAMERA with inter.gene.cor = 0.05 ...
## Running CAMERA with inter.gene.cor = 0.1 ...
## Running CAMERA with inter.gene.cor = Estimated ...
cat("\nComparison of top pathway with different correlation values:\n")
##
## Comparison of top pathway with different correlation values:
pathway_example <- names(gene_set_indices)[1]
comparison_cor <- data.frame(
Correlation = names(results_list),
PValue = sapply(results_list, function(x) x[pathway_example, "PValue"]),
FDR = sapply(results_list, function(x) x[pathway_example, "FDR"]),
Direction = sapply(results_list, function(x) x[pathway_example, "Direction"])
)
print(comparison_cor)
## Correlation PValue FDR Direction
## 0.01 0.01 0.03911166 0.09777915 Up
## 0.05 0.05 0.20662592 0.41325184 Up
## 0.1 0.1 0.34406472 0.65298613 Up
## Estimated Estimated 0.31564089 0.48560137 Up
Guidelines for inter.gene.cor:
cat("\nGuidelines for choosing inter.gene.cor:\n\n")
##
## Guidelines for choosing inter.gene.cor:
guidelines <- data.frame(
Setting = c("0.01 (default)", "0.05-0.1", "NA (estimate)", "0.2+"),
`Use When` = c(
"General analysis, quick results",
"Known moderate pathway correlation",
"Most accurate, worth the computation time",
"Strong correlation expected (e.g., protein complexes)"
),
Speed = c("Fast", "Fast", "Slow", "Fast"),
Conservativeness = c("Moderate", "More conservative", "Data-driven", "Very conservative")
)
print(guidelines)
## Setting Use.When Speed
## 1 0.01 (default) General analysis, quick results Fast
## 2 0.05-0.1 Known moderate pathway correlation Fast
## 3 NA (estimate) Most accurate, worth the computation time Slow
## 4 0.2+ Strong correlation expected (e.g., protein complexes) Fast
## Conservativeness
## 1 Moderate
## 2 More conservative
## 3 Data-driven
## 4 Very conservative
Let’s examine some interesting pathways in detail.
# Get top up and down-regulated pathways
top_up <- camera_results[camera_results$Direction == "Up", ][1:5, ]
top_down <- camera_results[camera_results$Direction == "Down", ][1:5, ]
cat("Top 5 up-regulated pathways in T-cells:\n")
## Top 5 up-regulated pathways in T-cells:
print(top_up[, c("NGenes", "PValue", "FDR", "Description")])
## NGenes PValue FDR
## 00982 71 5.769479e-05 0.001101446
## 00980 63 2.371138e-04 0.002921721
## 00350 44 3.372527e-04 0.003727530
## 00830 54 1.227877e-03 0.009917466
## 00591 30 1.988225e-03 0.014261384
## Description
## 00982 Drug metabolism - cytochrome P450
## 00980 Metabolism of xenobiotics by cytochrome P450
## 00350 Tyrosine metabolism
## 00830 Retinol metabolism
## 00591 Linoleic acid metabolism
cat("\nTop 5 down-regulated pathways in T-cells:\n")
##
## Top 5 down-regulated pathways in T-cells:
print(top_down[, c("NGenes", "PValue", "FDR", "Description")])
## NGenes PValue FDR Description
## 03010 81 1.188895e-09 2.496680e-07 Ribosome
## 05310 35 2.958238e-08 2.310987e-06 Asthma
## 04612 90 3.301411e-08 2.310987e-06 Antigen processing and presentation
## 03040 136 9.657140e-08 5.069998e-06 Spliceosome
## 05131 100 1.296692e-06 4.994151e-05 Shigellosis
# Select an interesting pathway
selected_pathway_id <- rownames(camera_results)[1]
selected_pathway_indices <- gene_set_indices[[selected_pathway_id]]
# Get gene-level statistics for this pathway
pathway_genes_stats <- topTable(
fit2,
coef = "TvsB",
number = Inf,
sort.by = "none"
)[selected_pathway_indices, ]
cat("Gene-level statistics for pathway:",
camera_results[selected_pathway_id, "Description"], "\n\n")
## Gene-level statistics for pathway: Ribosome
cat("Summary of log fold changes:\n")
## Summary of log fold changes:
print(summary(pathway_genes_stats$logFC))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.11335 -0.32350 -0.17912 -0.18650 -0.06581 0.41944
cat("\nTop 10 genes in this pathway:\n")
##
## Top 10 genes in this pathway:
print(head(pathway_genes_stats[order(pathway_genes_stats$P.Value), ], 10)[,
c("logFC", "AveExpr", "t", "P.Value", "adj.P.Val")])
## logFC AveExpr t P.Value adj.P.Val
## 41135_at 0.4194414 3.615144 3.314355 0.004453154 0.08060564
## 32394_s_at -1.1133485 9.439332 -3.147266 0.006315851 0.09358876
## 32413_at 0.3747050 3.723998 3.140657 0.006403561 0.09433484
## 31907_at -0.3597215 11.035304 -2.742663 0.014586511 0.13186934
## 41178_at -0.3304490 12.196345 -2.714956 0.015436133 0.13542820
## 33117_r_at -0.3938975 11.921959 -2.656481 0.017388509 0.14405738
## 32440_at -0.3417469 11.362664 -2.643143 0.017866005 0.14541817
## 32433_at -0.5176802 8.268898 -2.519150 0.022948404 0.16346849
## 34646_at -0.3828399 10.530080 -2.452929 0.026199819 0.17254706
## 32337_at -0.4140099 11.008792 -2.434224 0.027194528 0.17483596
Save the results for further analysis or reporting.
# Create output directory
if (!dir.exists("results")) {
dir.create("results")
}
# Export significant pathways
write.csv(
camera_results,
file = "results/camera_all_pathways.csv",
row.names = TRUE
)
write.csv(
significant_pathways,
file = "results/camera_significant_pathways.csv",
row.names = TRUE
)
# Export top genes for significant pathways
for (i in 1:min(5, nrow(significant_pathways))) {
pathway_id <- rownames(significant_pathways)[i]
pathway_indices <- gene_set_indices[[pathway_id]]
pathway_stats <- topTable(
fit2,
coef = "TvsB",
number = Inf,
sort.by = "P"
)[pathway_indices, ]
filename <- paste0("results/pathway_", pathway_id, "_genes.csv")
write.csv(pathway_stats, file = filename, row.names = TRUE)
}
cat("Results exported to 'results/' directory\n")
Advantages:
Limitations:
Use CAMERA when:
Use ROAST/MROAST/FRY when:
| Parameter | Purpose | Recommendations |
|---|---|---|
inter.gene.cor |
Controls correlation adjustment | 0.01 (default), NA (estimate), 0.05-0.1 (moderate) |
sort |
How to sort results | “directional” (default), “mixed”, “none” |
min pathway size |
Filter small pathways | 10-15 genes minimum |
FDR threshold |
Significance cutoff | 0.05 (standard), 0.01 (stringent) |
The pathways identified by CAMERA represent coordinated changes in gene expression between T-cell and B-cell ALL samples.
B-cell and T-cell leukemias should show differences in:
To validate CAMERA results:
In this tutorial, we demonstrated:
sessionInfo()
## R version 4.5.1 (2025-06-13)
## Platform: aarch64-apple-darwin20
## Running under: macOS Tahoe 26.0.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] clusterProfiler_4.16.0 hgu95av2.db_3.13.0 org.Hs.eg.db_3.21.0
## [4] AnnotationDbi_1.70.0 IRanges_2.42.0 S4Vectors_0.46.0
## [7] ALL_1.50.0 Biobase_2.68.0 BiocGenerics_0.55.1
## [10] generics_0.1.4 limma_3.64.3
##
## loaded via a namespace (and not attached):
## [1] DBI_1.2.3 gson_0.1.0 rlang_1.1.6
## [4] magrittr_2.0.3 DOSE_4.2.0 compiler_4.5.1
## [7] RSQLite_2.4.3 png_0.1-8 vctrs_0.6.5
## [10] reshape2_1.4.4 stringr_1.5.2 pkgconfig_2.0.3
## [13] crayon_1.5.3 fastmap_1.2.0 XVector_0.48.0
## [16] rmarkdown_2.29 enrichplot_1.28.4 UCSC.utils_1.4.0
## [19] purrr_1.1.0 bit_4.6.0 xfun_0.53
## [22] cachem_1.1.0 aplot_0.2.8 GenomeInfoDb_1.44.2
## [25] jsonlite_2.0.0 blob_1.2.4 BiocParallel_1.42.1
## [28] parallel_4.5.1 R6_2.6.1 bslib_0.9.0
## [31] stringi_1.8.7 RColorBrewer_1.1-3 jquerylib_0.1.4
## [34] GOSemSim_2.34.0 Rcpp_1.1.0 knitr_1.50
## [37] ggtangle_0.0.7 R.utils_2.13.0 Matrix_1.7-3
## [40] splines_4.5.1 igraph_2.1.4 tidyselect_1.2.1
## [43] qvalue_2.40.0 rstudioapi_0.17.1 dichromat_2.0-0.1
## [46] yaml_2.3.10 codetools_0.2-20 lattice_0.22-7
## [49] tibble_3.3.0 plyr_1.8.9 treeio_1.32.0
## [52] KEGGREST_1.48.1 evaluate_1.0.5 gridGraphics_0.5-1
## [55] Biostrings_2.76.0 pillar_1.11.0 ggtree_3.16.3
## [58] ggfun_0.2.0 ggplot2_3.5.2 scales_1.4.0
## [61] tidytree_0.4.6 glue_1.8.0 lazyeval_0.2.2
## [64] tools_4.5.1 data.table_1.17.8 fgsea_1.34.2
## [67] fs_1.6.6 fastmatch_1.1-6 cowplot_1.2.0
## [70] grid_4.5.1 tidyr_1.3.1 ape_5.8-1
## [73] colorspace_2.1-1 nlme_3.1-168 GenomeInfoDbData_1.2.14
## [76] patchwork_1.3.2 cli_3.6.5 rappdirs_0.3.3
## [79] dplyr_1.1.4 gtable_0.3.6 R.methodsS3_1.8.2
## [82] yulab.utils_0.2.1 sass_0.4.10 digest_0.6.37
## [85] ggrepel_0.9.6 ggplotify_0.1.2 farver_2.1.2
## [88] memoise_2.0.1 htmltools_0.5.8.1 R.oo_1.27.1
## [91] lifecycle_1.0.4 httr_1.4.7 GO.db_3.21.0
## [94] statmod_1.5.0 bit64_4.6.0-1