From c389f56da5b7c74584771b80692e73775c7e6c86 Mon Sep 17 00:00:00 2001 From: Nikhil Khatwani Date: Thu, 26 Oct 2017 23:16:35 +0530 Subject: [PATCH] Bael 1166 intro apache spark (#2875) * Changes for BAEL-1166 * Changes for BAEL_1166 * Changes for BAEL 1166 * Changes for BAEL 1166 * Changes for BAEL_1166 --- .../src/main/java/com/baeldung/WordCount.java | 49 +++++++------------ 1 file changed, 18 insertions(+), 31 deletions(-) diff --git a/apache-spark/src/main/java/com/baeldung/WordCount.java b/apache-spark/src/main/java/com/baeldung/WordCount.java index ec1dedcb69..bc73b05536 100644 --- a/apache-spark/src/main/java/com/baeldung/WordCount.java +++ b/apache-spark/src/main/java/com/baeldung/WordCount.java @@ -17,37 +17,24 @@ import scala.Tuple2; public class WordCount { private static final Pattern SPACE = Pattern.compile(" "); - + public static void main(String[] args) throws Exception { - if (args.length < 1) { - System.err.println("Usage: JavaWordCount "); - System.exit(1); + if (args.length < 1) { + System.err.println("Usage: JavaWordCount "); + System.exit(1); + } + SparkConf sparkConf = new SparkConf().setAppName("JavaWordCount") + .setMaster("local"); + JavaSparkContext ctx = new JavaSparkContext(sparkConf); + JavaRDD lines = ctx.textFile(args[0], 1); + + JavaRDD words = lines.flatMap(s -> Arrays.asList(SPACE.split(s)).iterator()); + JavaPairRDD wordAsTuple = words.mapToPair(word -> new Tuple2<>(word, 1)); + JavaPairRDD wordWithCount = wordAsTuple.reduceByKey((Integer i1, Integer i2)->i1 + i2); + List> output = wordWithCount.collect(); + for (Tuple2 tuple : output) { + System.out.println(tuple._1() + ": " + tuple._2()); + } + ctx.stop(); } - SparkConf sparkConf = new SparkConf().setAppName("JavaWordCount").setMaster("local"); - JavaSparkContext ctx = new JavaSparkContext(sparkConf); - JavaRDD lines = ctx.textFile(args[0], 1); - - JavaRDD words = lines.flatMap(s -> Arrays.asList(SPACE.split(s)).iterator()); - JavaPairRDD ones = words.mapToPair( - new PairFunction() { - @Override - public Tuple2 call(String s) { - return new Tuple2<>(s, 1); - } - }); - - JavaPairRDD counts = ones.reduceByKey( - new Function2() { - @Override - public Integer call(Integer i1, Integer i2) { - return i1 + i2; - } - }); - - List> output = counts.collect(); - for (Tuple2 tuple : output) { - System.out.println(tuple._1() + ": " + tuple._2()); - } - ctx.stop(); -} }