[BAEL-3634] Code for Spark DataFrame article (#12039)

* [BAEL-3634] Code for Spark DataFrame article

* [BAEL-3634] Improve example data sample and sort aggregations

* [BAEL-3634] change column name for clarity

* [BAEL-3634] Update method name with  U.S english standard

Co-authored-by: uzma khan <uzma.khan@nominet.uk>
This commit is contained in:
ukhan1980
2022-05-03 20:47:17 +01:00
committed by GitHub
parent 54d6c22977
commit 19a276d870
17 changed files with 686 additions and 6 deletions

View File

@@ -0,0 +1,52 @@
package com.baeldung.dataframes;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Properties;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
class CustomerDataAggregationPipelineLiveTest {
private static Connection conn;
@BeforeAll
static void beforeAll() throws SQLException {
DriverManager.registerDriver(new org.postgresql.Driver());
String dbURL1 = "jdbc:postgresql://localhost:5432/customerdb";
conn = DriverManager.getConnection(dbURL1, "postgres", "postgres");
String sql = "drop table if exists customer";
PreparedStatement statement = conn.prepareStatement(sql);
statement.executeUpdate();
}
@Test
void givenCSVAndJSON_whenRun_thenStoresAggregatedDataFrameInDB() throws Exception {
Properties dbProps = new Properties();
dbProps.setProperty("connectionURL", "jdbc:postgresql://localhost:5432/customerdb");
dbProps.setProperty("driver", "org.postgresql.Driver");
dbProps.setProperty("user", "postgres");
dbProps.setProperty("password", "postgres");
CustomerDataAggregationPipeline pipeline = new CustomerDataAggregationPipeline(dbProps);
pipeline.run();
String allCustomersSql = "Select count(*) from customer";
Statement statement = conn.createStatement();
ResultSet resultSet = statement.executeQuery(allCustomersSql);
resultSet.next();
int count = resultSet.getInt(1);
assertEquals(7, count);
}
}

View File

@@ -0,0 +1,62 @@
package com.baeldung.dataframes;
import static com.baeldung.dataframes.CustomerToDataFrameConverterApp.convertAfterMappingRows;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.util.Arrays;
import java.util.List;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.junit.jupiter.api.Test;
class CustomerToDataFrameConverterAppUnitTest {
@Test
void givenCustomers_whenConvertAfterMappingRows_thenConvertsToDataSet() {
List<Customer> customers = Arrays.asList(
new Customer("01", "jo", "Female", 2000),
new Customer("02", "jack", "Male", 1200)
);
Dataset<Row> customerDF = convertAfterMappingRows(customers);
List<Row> rows = customerDF.collectAsList();
Row row1 = rows.get(0);
Row row2 = rows.get(1);
assertEquals("01", row1.get(0));
assertEquals( "JO", row1.get(1));
assertEquals( "F", row1.get(2));
assertEquals( 2000, row1.get(3));
assertEquals("02", row2.get(0));
assertEquals( "JACK", row2.get(1));
assertEquals( "M", row2.get(2));
assertEquals( 1200, row2.get(3));
}
@Test
void givenCustomers_whenConvertWithNoChange_thenConvertsToDataSet() {
List<Customer> customers = Arrays.asList(
new Customer("01", "jo", "Female", 2000),
new Customer("02", "jack", "Male", 1200)
);
Dataset<Row> customerDF = CustomerToDataFrameConverterApp.convertToDataFrameWithNoChange();
List<Row> rows = customerDF.collectAsList();
Row row1 = rows.get(0);
Row row2 = rows.get(1);
assertEquals("01", row1.getAs("id"));
assertEquals( "jo", row1.getAs("name"));
assertEquals( "Female", row1.getAs("gender"));
assertEquals( 2000, (int)row1.getAs("transaction_amount"));
assertEquals("02", row2.getAs("id"));
assertEquals( "jack", row2.getAs("name"));
assertEquals( "Male", row2.getAs("gender"));
assertEquals( 1200, (int)row2.getAs("transaction_amount"));
}
}