diff --git a/apache-spark/src/test/java/com/baeldung/differences/rdd/TransformationsUnitTest.java b/apache-spark/src/test/java/com/baeldung/differences/rdd/TransformationsUnitTest.java index 4b2d9e1127..01e7d3adfc 100644 --- a/apache-spark/src/test/java/com/baeldung/differences/rdd/TransformationsUnitTest.java +++ b/apache-spark/src/test/java/com/baeldung/differences/rdd/TransformationsUnitTest.java @@ -1,9 +1,9 @@ package com.baeldung.differences.rdd; - import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -12,12 +12,15 @@ import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; +import java.io.File; + public class TransformationsUnitTest { - + public static final String COMMA_DELIMITER = ",(?=([^\"]*\"[^\"]*\")*[^\"]*$)"; + private static JavaSparkContext sc; private static JavaRDD tourists; - + @BeforeClass public static void init() { SparkConf conf = new SparkConf().setAppName("uppercaseCountries") @@ -25,8 +28,11 @@ public class TransformationsUnitTest { sc = new JavaSparkContext(conf); tourists = sc.textFile("data/Tourist.csv") .filter(line -> !line.startsWith("Region")); //filter header row + + // delete previous output dir and files + FileUtils.deleteQuietly(new File("data/output")); } - + @AfterClass public static void cleanup() { sc.close(); @@ -39,9 +45,9 @@ public class TransformationsUnitTest { return columns[1].toUpperCase(); }) .distinct(); - + upperCaseCountries.saveAsTextFile("data/output/uppercase.txt"); - + upperCaseCountries.foreach(country -> { //replace non alphanumerical characters country = country.replaceAll("[^a-zA-Z]", ""); @@ -52,9 +58,9 @@ public class TransformationsUnitTest { @Test public void whenFilterByCountry_thenShowRequestedCountryRecords() { JavaRDD touristsInMexico = tourists.filter(line -> line.split(COMMA_DELIMITER)[1].equals("Mexico")); - + touristsInMexico.saveAsTextFile("data/output/touristInMexico.txt"); - + touristsInMexico.foreach(record -> { assertEquals("Mexico", record.split(COMMA_DELIMITER)[1]); });