Stromal prognosis analysis for Aedin

Load data:

library(curatedOvarianData)
library(gdata)
library(affy)
library(metafor)
library(xtable)
library(survival)
x <- read.xls("../input/Supplementary Table 1 - Tumor and stromal genes.xlsx", 
    as.is = TRUE)
head(x)

Create a list of ExpressionSets from curatedOvarianData:

dir.create("../output/data", recursive = TRUE)
source("../input/patientselection.config")
source("createEsetList.R")

## 2013-11-26 21:15:23 INFO::Inside script createEsetList.R - inputArgs =
## 2013-11-26 21:15:23 INFO::
## 2013-11-26 21:15:23 INFO::Loading curatedOvarianData 1.0.5
## 2013-11-26 21:15:54 INFO::Clean up the esets.
## 2013-11-26 21:15:54 INFO::including E.MTAB.386_eset
## 2013-11-26 21:15:54 INFO::excluding GSE12418_eset (min.number.of.events or min.sample.size)
## 2013-11-26 21:15:54 INFO::excluding GSE12470_eset (min.number.of.events or min.sample.size)
## 2013-11-26 21:15:55 INFO::including GSE13876_eset
## 2013-11-26 21:15:55 INFO::excluding GSE14764_eset (min.number.of.events or min.sample.size)
## 2013-11-26 21:15:55 INFO::including GSE17260_eset
## 2013-11-26 21:15:55 INFO::including GSE18520_eset
## 2013-11-26 21:15:55 INFO::excluding GSE19829.GPL570_eset (min.number.of.events or min.sample.size)
## 2013-11-26 21:15:55 INFO::including GSE19829.GPL8300_eset
## 2013-11-26 21:15:56 INFO::excluding GSE20565_eset (min.number.of.events or min.sample.size)
## 2013-11-26 21:15:56 INFO::excluding GSE2109_eset (min.number.of.events or min.sample.size)
## 2013-11-26 21:15:56 INFO::including GSE26712_eset
## 2013-11-26 21:15:56 INFO::excluding GSE30009_eset (min.number.of.genes)
## 2013-11-26 21:15:56 INFO::excluding GSE30161_eset (min.number.of.events or min.sample.size)
## 2013-11-26 21:15:57 INFO::including GSE32062.GPL6480_eset
## 2013-11-26 21:15:57 INFO::excluding GSE32063_eset (min.number.of.events or min.sample.size)
## 2013-11-26 21:15:57 INFO::excluding GSE6008_eset (min.number.of.events or min.sample.size)
## 2013-11-26 21:15:57 INFO::excluding GSE6822_eset (min.number.of.events or min.sample.size)
## 2013-11-26 21:15:57 INFO::including GSE9891_eset
## 2013-11-26 21:15:57 INFO::excluding PMID15897565_eset (min.number.of.events or min.sample.size)
## 2013-11-26 21:15:58 INFO::including PMID17290060_eset
## 2013-11-26 21:15:58 INFO::excluding PMID19318476_eset (min.number.of.events or min.sample.size)
## 2013-11-26 21:15:59 INFO::including TCGA_eset
## 2013-11-26 21:15:59 INFO::excluding TCGA.mirna.8x15kv2_eset (min.number.of.genes)
## 2013-11-26 21:15:59 INFO::Ids with missing data:

Create a simple average stromal content score for each patient. After scaling each gene to z score, this is the simple mean expression of stromal genes minus the mean expression of tumor genes.

esets.small <- esets
for (i in 1:length(esets)) {
    stromal <- t(scale(t(exprs(esets[[i]][featureNames(esets[[i]]) %in% x$Stromal.gene.symbols, 
        ]))))
    tumor <- t(scale(t(exprs(esets[[i]][featureNames(esets[[i]]) %in% x$Tumor.gene.symbols, 
        ]))))
    esets.small[[i]] <- esets[[i]][1, ]
    featureNames(esets.small[[i]]) <- "stromal.score"
    exprs(esets.small[[i]])[1, ] <- colMeans(stromal) - colMeans(tumor)
}
esets.small <- esets.small[!grepl("PMID", names(esets.small))]

Invisibly convert dataset names to more human readable ones here…

Define the forestplot function:

forestplot <- function(esets, y = "y", probeset, formula = y ~ probeset, mlab = "Overall", 
    rma.method = "FE", at = NULL, xlab = "Hazard Ratio", ...) {
    require(metafor)
    esets <- esets[sapply(esets, function(x) probeset %in% featureNames(x))]
    coefs <- sapply(1:length(esets), function(i) {
        tmp <- as(phenoData(esets[[i]]), "data.frame")
        tmp$y <- esets[[i]][[y]]
        tmp$probeset <- exprs(esets[[i]])[probeset, ]

        summary(coxph(formula, data = tmp))$coefficients[1, c(1, 3)]
    })

    res.rma <- metafor::rma(yi = coefs[1, ], sei = coefs[2, ], method = rma.method)

    if (is.null(at)) 
        at <- log(c(0.25, 1, 4, 20))
    forest.rma(res.rma, xlab = xlab, slab = gsub("_eset$", "", names(esets)), 
        atransf = exp, at = at, mlab = mlab, ...)
    return(res.rma)
}

Forest plot of Cox coefficient for stromal score

res <- forestplot(esets = esets.small, probeset = "stromal.score", at = log(c(0.5, 
    1, 2, 4)))

plot of chunk unnamed-chunk-3

Dataset properties

Table 1: Datasets used in the figure. Stage column provides early/late/unknown, histology column provides ser/clearcell/endo/mucinous/other/unknown.
	PMID	N samples	stage	histology	Platform
Bentink	22348002	127	0/127/0	127/0/0/0/0/0/0	Illumina humanRef-8 v2.0
Crijns	19192944	98	0/98/0	98/0/0/0/0/0/0	Operon v3 two-color
Yoshihara 2010	20300634	43	0/43/0	43/0/0/0/0/0/0	Agilent G4112A
Mok	19962670	53	0/53/0	53/0/0/0/0/0/0	Affymetrix HG-U133Plus2
Konstantinopoulos	20547991	42	0/0/42	0/0/0/0/0/0/42	Affymetrix HG_U95Av2
Bonome	18593951	185	0/185/0	185/0/0/0/0/0/0	Affymetrix HG-U133A
Yoshihara 2012A	22241791	91	0/91/0	91/0/0/0/0/0/0	Agilent G4112F
Tothill	18698038	140	0/140/0	140/0/0/0/0/0/0	Affymetrix HG-U133Plus2
TCGA	21720365	413	0/410/3	413/0/0/0/0/0/0	Affymetrix HT_HG-U133A