Last active
December 15, 2015 12:49
-
-
Save jnhutchinson/5263379 to your computer and use it in GitHub Desktop.
Convert deseq counts to STEM object
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Rory's kickass annotated dataframe function | |
annotate_df = function(d) { | |
require(biomaRt) | |
ensembl = useMart('ensembl', dataset = ensembl_gene) | |
a = getBM(attributes=c(filter_type, gene_symbol, "description"), | |
filters=c(filter_type), values=d[, 'id'], | |
mart=ensembl) | |
m = merge(d, a, by.x='id', by.y=filter_type) | |
return(m) | |
} | |
# the deseq counts | |
norm.counts = counts(cds, normalized=TRUE) | |
filt.norm.counts=norm.counts[use,] ## "use" are the genes with rowcounts above cutoff, i.e. expressed genes | |
counts.for.stem <- as.data.frame(filt.norm.counts) | |
counts.for.stem <- counts.for.stem[,c(3:1,18:16,12:4,21:19,15:13)] # rearrange time series columns to be by time instead of alphabetical | |
counts.for.stem.t <- t(counts.for.stem) # transform for the aggregation step | |
agg.vec <- sub("rep.", "", row.names(counts.for.stem.t)) # generate vector with time series replicates grouped by time | |
med.counts.for.stem.t <- aggregate(filt.norm.counts.t, by=list(agg.vec), function(n) median(n)) # median aggregate replicates | |
row.names(med.counts.for.stem.t) <- med.counts.for.stem.t$Group.1 # put row.names back as times | |
med.counts.for.stem.t <- med.counts.for.stem.t[,-(grep("Group.1", names(med.counts.for.stem.t)))] # dump the grouping column | |
med.counts.for.stem <- as.data.frame(t(med.counts.for.stem.t)) # transform back to normal form now that you're done aggregating | |
names(med.counts.for.stem) <- sub("Normal", 0, sub("FA", "", sub("day", "", names(med.counts.for.stem)))) # reduce the column names to just the numbers | |
med.counts.for.stem <- med.counts.for.stem[,order(as.numeric(names(med.counts.for.stem)))] # reorder columns by number (not sure I needed to do this) | |
med.counts.for.stem$id <- row.names(med.counts.for.stem) # make Ensembl id column before annotating dataframe | |
med.counts.for.stem <- annotate_df(med.counts.for.stem) # annotate dataframe with Rory's function | |
med.counts.for.stem <- med.counts.for.stem[,c(grep("mgi", names(med.counts.for.stem)), grep("[0-9]", names(med.counts.for.stem)))] # pull out the column you need, mgi_symbol and numerical time point columns | |
write.table(med.counts.for.stem, file.path(resultsDir, "normalized.median.counts.genes.forstem.tab"), quote=F, sep="\t", col.names=T, row.names=F) # write out table |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment