rawkintrevo · April 6, 2017 15:45 · rawkintrevo · Apr 5, 2017 · pferrel · Apr 5, 2017
diff --git a/Mahout Simple CCO b/Mahout Simple CCO
 /**
  * Created by rawkintrevo on 4/5/17.
  */

 // Only need these so intelliJ doesn't complain
 import org.apache.mahout.math._
 import org.apache.mahout.math.scalabindings._
 import org.apache.mahout.math.drm._
 import org.apache.mahout.math.scalabindings.RLikeOps._
 import org.apache.mahout.math.drm.RLikeDrmOps._
 import org.apache.mahout.sparkbindings._

 import org.apache.spark.SparkContext
 import org.apache.spark.SparkContext._
 import org.apache.spark.SparkConf
 val conf = new SparkConf().setAppName("Simple Application")
 val sc = new SparkContext(conf)

 implicit val sdc: org.apache.mahout.sparkbindings.SparkDistributedContext = sc2sdc(sc)

 // all this ^^ has been created for you by ./mahout spark-shell but it makes intellij happy

 // don't forget these!
 // export SPARK_HOME=$HOME/gits/spark-1.6.2-bin-hadoop2.6
 // ../mahout/bin/mahout spark-shell



 import org.apache.mahout.math.indexeddataset.{IndexedDataset, BiDictionary}
 import org.apache.mahout.sparkbindings.indexeddataset.IndexedDatasetSpark

 val rowIDs = new BiDictionary(List("Andrew", "Sebastian", "Ted", "Sarah", "Alexy", "Isabelle", "Pat"))
 val colIDs = new BiDictionary(List("iPhone5", "iPhone6", "Galaxy", "Nexus", "iPad", "Surface"))
 
 val buyIndicatorMatrix = sparse((0, 1) :: Nil, // Andrew
                                (2, 1) :: Nil,  // Sebastian
                                (4, 1) :: Nil, // Ted
                                (0, 1) :: Nil, // Sarah
                                (2, 1) :: Nil, // Alexey
                                (2, 1) :: Nil) // Isabelle


 val buyIndicatorDRM = drmParallelize(buyIndicatorMatrix)
 val buyIndicatorIDS = new IndexedDatasetSpark(buyIndicatorDRM, rowIDs, colIDs)

 val viewIndicatorMatrix = sparse( (0, 1) :: (2, 1) :: (3, 1) :: Nil,            // Andrew
                                  (0, 1) :: (2, 1) :: (4, 1) :: (5, 1) :: Nil,  // Sebastian
                                  (1, 1) :: (4, 1) :: (5, 1) :: Nil,            // Ted
                                  (0, 1) :: (2, 1) :: (5, 1) :: Nil,            // Sarah
                                  (1, 1) :: (2, 1) :: Nil,                      // Alexy
                                  (2, 1) :: (5, 1) :: Nil)                      // Isabelle


 val viewIndicatorDRM = drmParallelize(viewIndicatorMatrix)
 val viewIndicatorIDS = new IndexedDatasetSpark(viewIndicatorDRM, rowIDs, colIDs)

 import org.apache.mahout.math.cf.SimilarityAnalysis

 val llrDRMs = SimilarityAnalysis.cooccurrencesIDSs(Array(buyIndicatorIDS, viewIndicatorIDS),
  randomSeed = 1234,
  maxInterestingItemsPerThing = 1)
 //,maxNumInteractions = 2)

 val llrAtA = llrDRMs(0).matrix.collect
 val llrAtB = llrDRMs(1).matrix.collect

 /**
 invertedScores: org.apache.mahout.math.Matrix =
 {
 0 =>   {3:2.6341457841558764}
 1 =>   {}
 2 =>   {2:1.5876494966267813}
 3 =>   {}
 4 =>   {1:5.406734506395658}
 }
 **/

 val patViewHistoryVector = svec((0, 1) :: (2, 1) :: (4, 1) :: (5, 1) :: Nil)  // Pat

 val patsReccos = llrAtB %*% patViewHistoryVector

 // patsReccos: org.apache.mahout.math.Vector = {0:3.8190850097688784,2:1.5876494966267813}
	/**
	* Created by rawkintrevo on 4/5/17.
	*/

	// Only need these so intelliJ doesn't complain
	import org.apache.mahout.math._
	import org.apache.mahout.math.scalabindings._
	import org.apache.mahout.math.drm._
	import org.apache.mahout.math.scalabindings.RLikeOps._
	import org.apache.mahout.math.drm.RLikeDrmOps._
	import org.apache.mahout.sparkbindings._

	import org.apache.spark.SparkContext
	import org.apache.spark.SparkContext._
	import org.apache.spark.SparkConf
	val conf = new SparkConf().setAppName("Simple Application")
	val sc = new SparkContext(conf)

	implicit val sdc: org.apache.mahout.sparkbindings.SparkDistributedContext = sc2sdc(sc)

	// all this ^^ has been created for you by ./mahout spark-shell but it makes intellij happy

	// don't forget these!
	// export SPARK_HOME=$HOME/gits/spark-1.6.2-bin-hadoop2.6
	// ../mahout/bin/mahout spark-shell



	import org.apache.mahout.math.indexeddataset.{IndexedDataset, BiDictionary}
	import org.apache.mahout.sparkbindings.indexeddataset.IndexedDatasetSpark

	val rowIDs = new BiDictionary(List("Andrew", "Sebastian", "Ted", "Sarah", "Alexy", "Isabelle", "Pat"))
	val colIDs = new BiDictionary(List("iPhone5", "iPhone6", "Galaxy", "Nexus", "iPad", "Surface"))

	val buyIndicatorMatrix = sparse((0, 1) :: Nil, // Andrew
	(2, 1) :: Nil, // Sebastian
	(4, 1) :: Nil, // Ted
	(0, 1) :: Nil, // Sarah
	(2, 1) :: Nil, // Alexey
	(2, 1) :: Nil) // Isabelle


	val buyIndicatorDRM = drmParallelize(buyIndicatorMatrix)
	val buyIndicatorIDS = new IndexedDatasetSpark(buyIndicatorDRM, rowIDs, colIDs)

	val viewIndicatorMatrix = sparse( (0, 1) :: (2, 1) :: (3, 1) :: Nil, // Andrew
	(0, 1) :: (2, 1) :: (4, 1) :: (5, 1) :: Nil, // Sebastian
	(1, 1) :: (4, 1) :: (5, 1) :: Nil, // Ted
	(0, 1) :: (2, 1) :: (5, 1) :: Nil, // Sarah
	(1, 1) :: (2, 1) :: Nil, // Alexy
	(2, 1) :: (5, 1) :: Nil) // Isabelle


	val viewIndicatorDRM = drmParallelize(viewIndicatorMatrix)
	val viewIndicatorIDS = new IndexedDatasetSpark(viewIndicatorDRM, rowIDs, colIDs)

	import org.apache.mahout.math.cf.SimilarityAnalysis

	val llrDRMs = SimilarityAnalysis.cooccurrencesIDSs(Array(buyIndicatorIDS, viewIndicatorIDS),
	randomSeed = 1234,
	maxInterestingItemsPerThing = 1)
	//,maxNumInteractions = 2)

	val llrAtA = llrDRMs(0).matrix.collect
	val llrAtB = llrDRMs(1).matrix.collect

	/**
	invertedScores: org.apache.mahout.math.Matrix =
	{
	0 => {3:2.6341457841558764}
	1 => {}
	2 => {2:1.5876494966267813}
	3 => {}
	4 => {1:5.406734506395658}
	}
	**/

	val patViewHistoryVector = svec((0, 1) :: (2, 1) :: (4, 1) :: (5, 1) :: Nil) // Pat

	val patsReccos = llrAtB %*% patViewHistoryVector

	// patsReccos: org.apache.mahout.math.Vector = {0:3.8190850097688784,2:1.5876494966267813}