Pacific-Design.com

    
Home Index

1. Spark

2. Reduce

Spark / Reduce /

Spark Reduce


scala> val a = (1 to 10).toArray.reduce { ( i: Int, j: Int ) => i + j }
a: Int = 55

scala> val a = (1 to 10).toArray.reduce { ( i, j ) => i + j }
a: Int = 55

scala> val a = (1 to 10).toArray.reduce { _ + _ }
a: Int = 55

Word Count


scala> val rdd1 = sc.parallelize(Array("kevin","thomas","kevin","one,","two","thomas"));
rdd: org.apache.spark.rdd.RDD[String] = ParallelCollectionRDD[3] at parallelize at :52

scala> val rdd2 = sc.textFile("/Users/kevin1/dse/README.md");
linesRDD: org.apache.spark.rdd.RDD[String] = MapPartitionsRDD[2] at textFile at :52

scala> val words = rdd1.flatMap { _.trim.split("""[\s\W]+""") }.filter(_.length > 0).map(_.toLowerCase);
words: org.apache.spark.rdd.RDD[String] = MapPartitionsRDD[10] at map at :54

scala> val pair = words.map { word => (word, 1) }
pair: org.apache.spark.rdd.RDD[(String, Int)] = MapPartitionsRDD[11] at map at :56