Load data:
library(curatedOvarianData)
library(gdata)
library(affy)
library(metafor)
library(xtable)
library(survival)
x <- read.xls("../input/Supplementary Table 1 - Tumor and stromal genes.xlsx",
as.is = TRUE)
head(x)
Create a list of ExpressionSets from curatedOvarianData:
dir.create("../output/data", recursive = TRUE)
source("../input/patientselection.config")
source("createEsetList.R")
## 2013-11-27 12:06:45 INFO::Inside script createEsetList.R - inputArgs =
## 2013-11-27 12:06:45 INFO::
## 2013-11-27 12:06:45 INFO::Loading curatedOvarianData 1.0.5
## 2013-11-27 12:07:16 INFO::Removing PMID17290060_eset (remove.datasets)
## 2013-11-27 12:07:16 INFO::Clean up the esets.
## 2013-11-27 12:07:16 INFO::including E.MTAB.386_eset
## 2013-11-27 12:07:16 INFO::excluding GSE12418_eset (min.number.of.events or min.sample.size)
## 2013-11-27 12:07:16 INFO::excluding GSE12470_eset (min.number.of.events or min.sample.size)
## 2013-11-27 12:07:17 INFO::including GSE13876_eset
## 2013-11-27 12:07:17 INFO::excluding GSE14764_eset (min.number.of.events or min.sample.size)
## 2013-11-27 12:07:17 INFO::including GSE17260_eset
## 2013-11-27 12:07:17 INFO::including GSE18520_eset
## 2013-11-27 12:07:17 INFO::excluding GSE19829.GPL570_eset (min.number.of.events or min.sample.size)
## 2013-11-27 12:07:17 INFO::including GSE19829.GPL8300_eset
## 2013-11-27 12:07:18 INFO::excluding GSE20565_eset (min.number.of.events or min.sample.size)
## 2013-11-27 12:07:18 INFO::excluding GSE2109_eset (min.number.of.events or min.sample.size)
## 2013-11-27 12:07:18 INFO::including GSE26712_eset
## 2013-11-27 12:07:18 INFO::excluding GSE30009_eset (min.number.of.genes)
## 2013-11-27 12:07:18 INFO::excluding GSE30161_eset (min.number.of.events or min.sample.size)
## 2013-11-27 12:07:19 INFO::including GSE32062.GPL6480_eset
## 2013-11-27 12:07:19 INFO::excluding GSE32063_eset (min.number.of.events or min.sample.size)
## 2013-11-27 12:07:19 INFO::excluding GSE6008_eset (min.number.of.events or min.sample.size)
## 2013-11-27 12:07:19 INFO::excluding GSE6822_eset (min.number.of.events or min.sample.size)
## 2013-11-27 12:07:19 INFO::including GSE9891_eset
## 2013-11-27 12:07:19 INFO::excluding PMID15897565_eset (min.number.of.events or min.sample.size)
## 2013-11-27 12:07:19 INFO::excluding PMID19318476_eset (min.number.of.events or min.sample.size)
## 2013-11-27 12:07:20 INFO::including TCGA_eset
## 2013-11-27 12:07:20 INFO::excluding TCGA.mirna.8x15kv2_eset (min.number.of.genes)
## 2013-11-27 12:07:21 INFO::Ids with missing data:
Invisibly convert dataset names to more human readable ones here…
Define the forestplot function:
forestplot <- function(esets, y = "y", probeset, formula = y ~ probeset, mlab = "Overall",
rma.method = "FE", at = NULL, xlab = "Hazard Ratio", ...) {
require(metafor)
esets <- esets[sapply(esets, function(x) probeset %in% featureNames(x))]
coefs <- sapply(1:length(esets), function(i) {
tmp <- as(phenoData(esets[[i]]), "data.frame")
tmp$y <- esets[[i]][[y]]
tmp$probeset <- exprs(esets[[i]])[probeset, ]
summary(coxph(formula, data = tmp))$coefficients[1, c(1, 3)]
})
res.rma <- metafor::rma(yi = coefs[1, ], sei = coefs[2, ], method = rma.method)
if (is.null(at))
at <- log(c(0.25, 1, 4, 20))
forest.rma(res.rma, xlab = xlab, slab = gsub("_eset$", "", names(esets)),
atransf = exp, at = at, mlab = mlab, ...)
return(res.rma)
}
Create a simple average stromal content score for each patient. After scaling each gene to z score, this is the simple mean expression of stromal genes minus the mean expression of tumor genes.
## Create random assignments of stromal and tumor genes:
set.seed(10)
tmp <- as.character(c(x$Stromal.gene.symbols, x$Tumor.gene.symbols))
tmp <- tmp[tmp != ""]
random.stromal.genes <- sample(tmp, size = round(length(tmp)/2))
random.tumor.genes <- tmp[!tmp %in% random.stromal.genes]
esets.small <- esets
for (i in 1:length(esets)) {
expr.stromal <- exprs(esets[[i]][featureNames(esets[[i]]) %in% x$Stromal.gene.symbols,
])
expr.tumor <- exprs(esets[[i]][featureNames(esets[[i]]) %in% x$Tumor.gene.symbols,
])
random.expr.stromal <- exprs(esets[[i]][featureNames(esets[[i]]) %in% random.stromal.genes,
])
random.expr.tumor <- exprs(esets[[i]][featureNames(esets[[i]]) %in% random.tumor.genes,
])
loginfo(paste(c(names(esets)[i], ": ", nrow(expr.stromal), " stromal, ",
nrow(expr.tumor), " tumor genes."), collapse = ""))
esets.small[[i]] <- esets[[i]][1:3, ]
featureNames(esets.small[[i]]) <- c("stromal.scaled", "stromal.unscaled",
"stromal.random")
exprs(esets.small[[i]])[1, ] <- scale(colSums(t(scale(t(expr.stromal)))) -
colSums(t(scale(t(expr.tumor)))))
exprs(esets.small[[i]])[2, ] <- scale(colSums(expr.stromal) - colSums(expr.tumor))
exprs(esets.small[[i]])[3, ] <- scale(colSums(t(scale(t(random.expr.stromal)))) -
colSums(t(scale(t(random.expr.tumor)))))
}
Stromal score is defined as the sum of expressions of each stromal gene, minus the sum of expressions of each tumor gene. This sum is then scaled to Z score to provide a consistent scale on the forest plots.
res <- forestplot(esets = esets.small, probeset = "stromal.scaled", at = log(c(0.5,
1, 2, 4)))
Very consistent with above.
res <- forestplot(esets = esets.small, probeset = "stromal.unscaled", at = log(c(0.5,
1, 2, 4)))
Half of tumor / stroma genes are assigned as tumor, the other half as stroma. Likely there is substantial correlation between these genes, and I think that the confidence interval is likely anti-conservative: ie, there can be a lot of fluctuation of this interval for different random seeds. Probably the most cautious analysis would be to simulate thousands of random assignments of tumor / stromal genes, calculated the synthesized HR each time, and use this as a null distribution to obtain a p-value for the prognostic value of the true tumor / stromal assignments.
res <- forestplot(esets = esets.small, probeset = "stromal.random", at = log(c(0.5,
1, 2, 4)))
PMID | N samples | stage | histology | Platform | |
---|---|---|---|---|---|
Bentink | 22348002 | 127 | 0/127/0 | 127/0/0/0/0/0/0 | Illumina humanRef-8 v2.0 |
Crijns | 19192944 | 98 | 0/98/0 | 98/0/0/0/0/0/0 | Operon v3 two-color |
Yoshihara 2010 | 20300634 | 43 | 0/43/0 | 43/0/0/0/0/0/0 | Agilent G4112A |
Mok | 19962670 | 53 | 0/53/0 | 53/0/0/0/0/0/0 | Affymetrix HG-U133Plus2 |
Konstantinopoulos | 20547991 | 42 | 0/0/42 | 0/0/0/0/0/0/42 | Affymetrix HG_U95Av2 |
Bonome | 18593951 | 185 | 0/185/0 | 185/0/0/0/0/0/0 | Affymetrix HG-U133A |
Yoshihara 2012A | 22241791 | 91 | 0/91/0 | 91/0/0/0/0/0/0 | Agilent G4112F |
Tothill | 18698038 | 140 | 0/140/0 | 140/0/0/0/0/0/0 | Affymetrix HG-U133Plus2 |
TCGA | 21720365 | 413 | 0/410/3 | 413/0/0/0/0/0/0 | Affymetrix HT_HG-U133A |