Last active
February 3, 2016 22:03
-
-
Save cdiener/fb4ea6580d6cd46e2be4 to your computer and use it in GitHub Desktop.
ExpressionSet reducer - allows you to reduce the features of an expression set from an nxn grouping, for instance probesets to genes/transcripts etc.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Rcpp::sourceCpp("matrix_reduce.cpp") | |
#' Reduces an ExpressionSet by an n-to-n map of features to groups. All entries | |
#' in \code{features} must exist in \code{eset}. \code{features} and | |
#' \code{groups} must have the same length. | |
#' | |
#' @param eset An ExpressionSet object. | |
#' @param features A character vector of features to be grouped. | |
#' @param groups A factor or character vector mapping the entries in | |
#' \code{features} to groups. | |
#' @param fun The reduction function. Must operate on a n x m matrix and return | |
#' a vector of length m. | |
#' @param progress Should progress information be shown. | |
#' @return A new ExpressionSet with features given by \code{unique(groups)}. | |
eset_reduce <- function(eset, features, groups, fun, progress=TRUE) { | |
groups <- factor(groups) | |
ex <- exprs(eset) | |
idx <- 1:nrow(ex) | |
names(idx) <- rownames(ex) | |
idx <- idx[features] | |
facs <- as.integer(groups) | |
res <- matrix_reduce(ex, idx, facs, "mean") | |
rownames(res) <- levels(groups) | |
return(ExpressionSet(res)) | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* matrix_reduce.cpp | |
* | |
* Copyright 2016 Christian Diener <mail[at]cdiener.com> | |
* | |
* MIT license. See LICENSE for more information. | |
*/ | |
#include <Rcpp.h> | |
#include <vector> | |
// [[Rcpp::depends(RcppProgress)]] | |
#include <progress.hpp> | |
using namespace Rcpp; | |
template <typename C> | |
class index_sorter { | |
public: | |
index_sorter(C const& c) : c(c) {} | |
bool operator()(std::size_t const& lhs, std::size_t const& rhs) const { | |
return c[lhs] < c[rhs]; | |
} | |
private: | |
C const& c; | |
}; | |
NumericVector max_reduce(NumericMatrix m, std::vector<int> idx) { | |
NumericVector res(m.ncol(), 0.0); | |
double val; | |
int maxidx = 0; | |
double maxval = mean(m(idx[0], _)); | |
for(int i=0; i<idx.size(); ++i) { | |
val = mean(m(idx[i], _)); | |
if(val > maxval) { | |
maxval = val; | |
maxidx = idx[i]; | |
} | |
} | |
res = m(maxidx, _); | |
return res; | |
} | |
NumericVector mean_reduce(NumericMatrix m, std::vector<int> idx) { | |
NumericVector res(m.ncol(), 0.0); | |
for(int i=0; i<idx.size(); ++i) res += m(idx[i], _); | |
return res/idx.size(); | |
} | |
NumericVector median_reduce(NumericMatrix m, std::vector<int> idx) { | |
NumericVector res(m.ncol(), 0.0); | |
std::vector<double> means(idx.size()); | |
for(int i=0; i<idx.size(); ++i) means[i] = mean(m(idx[i], _)); | |
int i = idx.size()/2; | |
if(idx.size() % 2 == 0) { | |
std::nth_element(idx.begin(), idx.begin()+i-1, idx.end(), | |
index_sorter<std::vector<double> >(means)); | |
res += m(idx[i-1], _); | |
std::nth_element(idx.begin(), idx.begin()+i, idx.end(), | |
index_sorter<std::vector<double> >(means)); | |
res += m(idx[i-1], _); | |
res = 0.5*res; | |
} else { | |
std::nth_element(idx.begin(), idx.begin()+i-1, idx.end(), | |
index_sorter<std::vector<double> >(means)); | |
res += m(idx[i], _); | |
} | |
return res; | |
} | |
// [[Rcpp::export]] | |
NumericVector matrix_reduce(NumericMatrix m, IntegerVector idx, | |
IntegerVector factors, std::string method) { | |
int minf = min(factors); | |
int nf = max(factors) - min(factors) + 1; | |
std::vector< std::vector<int> > map(nf); | |
//Rcout<<"Building factor map..."<<std::endl; | |
int fi; | |
for(int i=0; i<factors.size(); ++i) { | |
fi = factors[i] - minf; | |
map[fi].push_back(idx[i]-1); | |
} | |
//Rcout<<"Reducing matrix..."<<std::endl; | |
Progress p(nf, true); | |
NumericMatrix res(nf, m.ncol()); | |
for(int fi=0; fi<nf; ++fi) { | |
p.increment(); | |
if(method == "max") res(fi, _) = max_reduce(m, map[fi]); | |
else if(method == "median") res(fi, _) = median_reduce(m, map[fi]); | |
else res(fi, _) = mean_reduce(m, map[fi]); | |
} | |
return res; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment