From eead5281acf2adde897078ae3b2eb347be9d6dc9 Mon Sep 17 00:00:00 2001 From: Kai Yuan Date: Thu, 6 Jul 2023 01:20:28 +0200 Subject: [PATCH] [split-digits] Split a String Into Digits- and Non-Digits-Elements (#14349) * [split-digits] Split a String Into Digits- and Non-Digits-Elements * [split-digits] JMH benchmark * [split-digits] using enum --- .../core-java-string-operations-6/pom.xml | 12 +++- .../digitsandnondigits/BenchmarkLiveTest.java | 47 +++++++++++++ .../SplitDigitsAndNondigitsUnitTest.java | 69 +++++++++++++++++++ 3 files changed, 127 insertions(+), 1 deletion(-) create mode 100644 core-java-modules/core-java-string-operations-6/src/test/java/com/baeldung/digitsandnondigits/BenchmarkLiveTest.java create mode 100644 core-java-modules/core-java-string-operations-6/src/test/java/com/baeldung/digitsandnondigits/SplitDigitsAndNondigitsUnitTest.java diff --git a/core-java-modules/core-java-string-operations-6/pom.xml b/core-java-modules/core-java-string-operations-6/pom.xml index ddbb5d0e40..0ec32d91b1 100644 --- a/core-java-modules/core-java-string-operations-6/pom.xml +++ b/core-java-modules/core-java-string-operations-6/pom.xml @@ -18,7 +18,16 @@ commons-lang3 ${apache.commons-lang.version} - + + org.openjdk.jmh + jmh-core + ${jmh.version} + + + org.openjdk.jmh + jmh-generator-annprocess + ${jmh.version} + @@ -38,6 +47,7 @@ 11 11 3.12.0 + 1.36 \ No newline at end of file diff --git a/core-java-modules/core-java-string-operations-6/src/test/java/com/baeldung/digitsandnondigits/BenchmarkLiveTest.java b/core-java-modules/core-java-string-operations-6/src/test/java/com/baeldung/digitsandnondigits/BenchmarkLiveTest.java new file mode 100644 index 0000000000..7c5c8101f8 --- /dev/null +++ b/core-java-modules/core-java-string-operations-6/src/test/java/com/baeldung/digitsandnondigits/BenchmarkLiveTest.java @@ -0,0 +1,47 @@ +package com.baeldung.digitsandnondigits; + +import static com.baeldung.digitsandnondigits.SplitDigitsAndNondigitsUnitTest.parseString; + +import java.util.concurrent.TimeUnit; + +import org.junit.jupiter.api.Test; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +@State(Scope.Benchmark) +@Threads(1) +@BenchmarkMode(Mode.Throughput) +@Fork(warmups = 1, value = 1) +@Warmup(iterations = 2, time = 10, timeUnit = TimeUnit.MILLISECONDS) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +public class BenchmarkLiveTest { + private static final String INPUT = "01Michael Jackson23Michael Jordan42Michael Bolton999Michael Johnson000"; + + @Param({ "10000" }) + public int iterations; + + @Benchmark + public void regexBased(Blackhole blackhole) { + blackhole.consume(INPUT.split("(?<=\\D)(?=\\d)|(?<=\\d)(?=\\D)")); + } + + @Benchmark + public void nonRegexBased(Blackhole blackhole) { + blackhole.consume(parseString(INPUT)); + } + + @Test + public void benchmark() throws Exception { + String[] argv = {}; + org.openjdk.jmh.Main.main(argv); + } +} \ No newline at end of file diff --git a/core-java-modules/core-java-string-operations-6/src/test/java/com/baeldung/digitsandnondigits/SplitDigitsAndNondigitsUnitTest.java b/core-java-modules/core-java-string-operations-6/src/test/java/com/baeldung/digitsandnondigits/SplitDigitsAndNondigitsUnitTest.java new file mode 100644 index 0000000000..55e0e48070 --- /dev/null +++ b/core-java-modules/core-java-string-operations-6/src/test/java/com/baeldung/digitsandnondigits/SplitDigitsAndNondigitsUnitTest.java @@ -0,0 +1,69 @@ +package com.baeldung.digitsandnondigits; + +import static com.baeldung.digitsandnondigits.SplitDigitsAndNondigitsUnitTest.State.INIT; +import static com.baeldung.digitsandnondigits.SplitDigitsAndNondigitsUnitTest.State.PARSING_DIGIT; +import static com.baeldung.digitsandnondigits.SplitDigitsAndNondigitsUnitTest.State.PARSING_NON_DIGIT; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.junit.jupiter.api.Test; + +public class SplitDigitsAndNondigitsUnitTest { + private static final String INPUT1 = "01Michael Jackson23Michael Jordan42Michael Bolton999Michael Johnson000"; + private static final String[] EXPECTED1 = new String[] { "01", "Michael Jackson", "23", "Michael Jordan", "42", "Michael Bolton", "999", "Michael Johnson", "000" }; + private static final List EXPECTED_LIST1 = Arrays.asList(EXPECTED1); + + private static final String INPUT2 = "Michael Jackson01Michael Jordan23Michael Bolton42Michael Johnson999Great Michaels"; + private static final String[] EXPECTED2 = new String[] { "Michael Jackson", "01", "Michael Jordan", "23", "Michael Bolton", "42", "Michael Johnson", "999", "Great Michaels" }; + private static final List EXPECTED_LIST2 = Arrays.asList(EXPECTED2); + + @Test + void whenUsingLookaroundRegex_thenGetExpectedResult() { + String splitRE = "(?<=\\D)(?=\\d)|(?<=\\d)(?=\\D)"; + String[] result1 = INPUT1.split(splitRE); + assertArrayEquals(EXPECTED1, result1); + + String[] result2 = INPUT2.split(splitRE); + assertArrayEquals(EXPECTED2, result2); + } + + enum State { + INIT, PARSING_DIGIT, PARSING_NON_DIGIT + } + + static List parseString(String input) { + List result = new ArrayList<>(); + int start = 0; + State state = INIT; + for (int i = 0; i < input.length(); i++) { + if (input.charAt(i) >= '0' && input.charAt(i) <= '9') { + if (state == PARSING_NON_DIGIT) { + result.add(input.substring(start, i)); + start = i; + } + state = PARSING_DIGIT; + } else { + if (state == PARSING_DIGIT) { + result.add(input.substring(start, i)); + start = i; + } + state = PARSING_NON_DIGIT; + } + } + result.add(input.substring(start)); + return result; + } + + @Test + void whenCheckEachChar_thenGetExpectedResult() { + List result1 = parseString(INPUT1); + assertEquals(EXPECTED_LIST1, result1); + + List result2 = parseString(INPUT2); + assertEquals(EXPECTED_LIST2, result2); + } +} \ No newline at end of file