Skip to content

Instantly share code, notes, and snippets.

@mwacc
Created June 18, 2014 11:16
Show Gist options
  • Save mwacc/581a73ffd4fa39c037b8 to your computer and use it in GitHub Desktop.
Save mwacc/581a73ffd4fa39c037b8 to your computer and use it in GitHub Desktop.
rhadoop demo for Lviv HUG
library(rmr2)
rmr.options(backend = "local")
#hdfs.init()
lm.map =
function(., line) {
keyval( line[[1]], paste(line[[2]], line[[3]], sep="|"))
}
lm.reduce =
function(item, val.list ) {
# convert input list to tabular/matrix format for further processing
tmp_list <- list()
for(line in val.list) {
l <- unlist(strsplit(line, split="\\|"))
x <- l[c(1:2)]
tmp_list[[length(tmp_list)+1]] <- x
}
tmp_list <- lapply(tmp_list, as.numeric)
m <- data.frame(do.call(rbind, tmp_list))
colnames(m) <- c("visited","purchased")
m <- data.frame( sapply(m, as.numeric) )
lm.m <- lm(purchased ~ visited, data=m)
sm <- summary(lm.m)
res <- paste(
sm$coefficients["(Intercept)","Estimate"],
sm$coefficients["visited","Estimate"],
sm$r.squared, sm$adj.r.squared,
sep="\t")
keyval(item, paste( res, "\n" ) )
}
mapreduce(
input = "~/Phone.csv" ,
output = "~/sample-phone-res.csv",
input.format=make.input.format("csv", sep = ","),
output.format = "text",
map = lm.map,
reduce = lm.reduce)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment