Skip to content

Instantly share code, notes, and snippets.

@jnhutchinson
Last active December 15, 2015 12:49
Show Gist options
  • Save jnhutchinson/5263379 to your computer and use it in GitHub Desktop.
Save jnhutchinson/5263379 to your computer and use it in GitHub Desktop.
Convert deseq counts to STEM object
## Rory's kickass annotated dataframe function
annotate_df = function(d) {
require(biomaRt)
ensembl = useMart('ensembl', dataset = ensembl_gene)
a = getBM(attributes=c(filter_type, gene_symbol, "description"),
filters=c(filter_type), values=d[, 'id'],
mart=ensembl)
m = merge(d, a, by.x='id', by.y=filter_type)
return(m)
}
# the deseq counts
norm.counts = counts(cds, normalized=TRUE)
filt.norm.counts=norm.counts[use,] ## "use" are the genes with rowcounts above cutoff, i.e. expressed genes
counts.for.stem <- as.data.frame(filt.norm.counts)
counts.for.stem <- counts.for.stem[,c(3:1,18:16,12:4,21:19,15:13)] # rearrange time series columns to be by time instead of alphabetical
counts.for.stem.t <- t(counts.for.stem) # transform for the aggregation step
agg.vec <- sub("rep.", "", row.names(counts.for.stem.t)) # generate vector with time series replicates grouped by time
med.counts.for.stem.t <- aggregate(filt.norm.counts.t, by=list(agg.vec), function(n) median(n)) # median aggregate replicates
row.names(med.counts.for.stem.t) <- med.counts.for.stem.t$Group.1 # put row.names back as times
med.counts.for.stem.t <- med.counts.for.stem.t[,-(grep("Group.1", names(med.counts.for.stem.t)))] # dump the grouping column
med.counts.for.stem <- as.data.frame(t(med.counts.for.stem.t)) # transform back to normal form now that you're done aggregating
names(med.counts.for.stem) <- sub("Normal", 0, sub("FA", "", sub("day", "", names(med.counts.for.stem)))) # reduce the column names to just the numbers
med.counts.for.stem <- med.counts.for.stem[,order(as.numeric(names(med.counts.for.stem)))] # reorder columns by number (not sure I needed to do this)
med.counts.for.stem$id <- row.names(med.counts.for.stem) # make Ensembl id column before annotating dataframe
med.counts.for.stem <- annotate_df(med.counts.for.stem) # annotate dataframe with Rory's function
med.counts.for.stem <- med.counts.for.stem[,c(grep("mgi", names(med.counts.for.stem)), grep("[0-9]", names(med.counts.for.stem)))] # pull out the column you need, mgi_symbol and numerical time point columns
write.table(med.counts.for.stem, file.path(resultsDir, "normalized.median.counts.genes.forstem.tab"), quote=F, sep="\t", col.names=T, row.names=F) # write out table
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment