BAEL-5196 - Split a comma-separated string while ignoring commas in quotes (#11432)

* Creating the module 'core-java-string-operations-4' for new string related code samples. Implemented code samples for the article BAEL-5196

* including new module 'core-java-string-operations-4 in the parent project

* fixing spacing in the pom file

* fixing the maven configuration for our new project core-java-string-operations-4
This commit is contained in:
Willian Nalepa Oizumi
2021-11-15 10:15:35 -03:00
committed by GitHub
parent d621f5d42c
commit c05d21519c
5 changed files with 172 additions and 0 deletions

View File

@@ -0,0 +1,66 @@
package com.baeldung.commaseparatedstring;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;
import com.google.common.base.Splitter;
import com.opencsv.CSVParser;
import com.opencsv.CSVParserBuilder;
import com.opencsv.CSVReader;
import com.opencsv.CSVReaderBuilder;
public class SplitCommaSeparatedString {
public static List<String> splitWithParser(String input) {
List<String> tokens = new ArrayList<String>();
int startPosition = 0;
boolean isInQuotes = false;
for (int currentPosition = 0; currentPosition < input.length(); currentPosition++) {
if (input.charAt(currentPosition) == '\"') {
isInQuotes = !isInQuotes;
} else if (input.charAt(currentPosition) == ',' && !isInQuotes) {
tokens.add(input.substring(startPosition, currentPosition));
startPosition = currentPosition + 1;
}
}
String lastToken = input.substring(startPosition);
if (lastToken.equals(",")) {
tokens.add("");
} else {
tokens.add(lastToken);
}
return tokens;
}
public static List<String> splitWithRegex(String input) {
String[] tokens = input.split(",(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)", -1);
return Arrays.asList(tokens);
}
public static List<String> splitWithGuava(String input) {
Pattern pattern = Pattern.compile(",(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)");
Splitter splitter = Splitter.on(pattern);
return splitter.splitToList(input);
}
public static List<String[]> splitMultiLineWithOpenCSV(String input) throws IOException {
CSVParser parser = new CSVParserBuilder().withSeparator(',')
.build();
CSVReader reader = new CSVReaderBuilder(new StringReader(input)).withCSVParser(parser)
.build();
List<String[]> list = new ArrayList<>();
list = reader.readAll();
reader.close();
return list;
}
}

View File

@@ -0,0 +1,44 @@
package com.baeldung.commaseparatedstring;
import static com.baeldung.commaseparatedstring.SplitCommaSeparatedString.splitMultiLineWithOpenCSV;
import static com.baeldung.commaseparatedstring.SplitCommaSeparatedString.splitWithGuava;
import static com.baeldung.commaseparatedstring.SplitCommaSeparatedString.splitWithParser;
import static com.baeldung.commaseparatedstring.SplitCommaSeparatedString.splitWithRegex;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.contains;
import static org.hamcrest.Matchers.hasSize;
import static org.junit.Assert.assertArrayEquals;
import java.io.IOException;
import java.util.List;
import org.junit.Test;
public class SplitCommaSeparatedStringUnitTest {
@Test
public void givenSingleLineInput_whenParsing_shouldIgnoreCommasInsideDoubleQuotes() {
String input = "baeldung,tutorial,splitting,text,\"ignoring this comma,\"";
var matcher = contains("baeldung", "tutorial", "splitting", "text", "\"ignoring this comma,\"");
assertThat(splitWithParser(input), matcher);
assertThat(splitWithRegex(input), matcher);
assertThat(splitWithGuava(input), matcher);
}
@Test
public void givenMultiLineInput_whenParsing_shouldIgnoreCommasInsideDoubleQuotes() throws IOException {
String input = "baeldung,tutorial,splitting,text,\"ignoring this comma,\"" + System.lineSeparator()
+ "splitting,a,regular,line,no double quotes";
String[] firstLine = new String[]{"baeldung", "tutorial", "splitting", "text", "ignoring this comma,"};
String[] secondLine = new String[]{"splitting", "a", "regular", "line", "no double quotes"};
List<String[]> result = splitMultiLineWithOpenCSV(input);
assertThat(result, hasSize(2));
assertArrayEquals(firstLine, result.get(0));
assertArrayEquals(secondLine, result.get(1));
}
}