From f55e3796d98e6e239bf637be7a3561885e3bc1cb Mon Sep 17 00:00:00 2001 From: Felipe Santiago Corro Date: Tue, 14 Aug 2018 07:24:28 -0300 Subject: [PATCH] optimizing regex expressions (#4961) --- .../optmization/OptimizedMatcher.java | 6 + .../optmization/OptimizedMatcherUnitTest.java | 109 ++++++++++++++++++ 2 files changed, 115 insertions(+) create mode 100644 core-java/src/main/java/com/baeldung/regexp/datepattern/optmization/OptimizedMatcher.java create mode 100644 core-java/src/test/java/com/baeldung/regexp/optmization/OptimizedMatcherUnitTest.java diff --git a/core-java/src/main/java/com/baeldung/regexp/datepattern/optmization/OptimizedMatcher.java b/core-java/src/main/java/com/baeldung/regexp/datepattern/optmization/OptimizedMatcher.java new file mode 100644 index 0000000000..ccae942dcc --- /dev/null +++ b/core-java/src/main/java/com/baeldung/regexp/datepattern/optmization/OptimizedMatcher.java @@ -0,0 +1,6 @@ +package com.baeldung.regexp.datepattern.optmization; + +public class OptimizedMatcher { + + +} diff --git a/core-java/src/test/java/com/baeldung/regexp/optmization/OptimizedMatcherUnitTest.java b/core-java/src/test/java/com/baeldung/regexp/optmization/OptimizedMatcherUnitTest.java new file mode 100644 index 0000000000..f21a755b01 --- /dev/null +++ b/core-java/src/test/java/com/baeldung/regexp/optmization/OptimizedMatcherUnitTest.java @@ -0,0 +1,109 @@ +package com.baeldung.regexp.optmization; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.List; +import java.util.Random; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import static org.junit.Assert.assertTrue; + +public class OptimizedMatcherUnitTest { + + private long time; + private long mstimePreCompiled; + private long mstimeNotPreCompiled; + + private String action; + + private List items; + + @Before + public void setup() { + Random random = new Random(); + items = new ArrayList(); + long average = 0; + + for (int i = 0; i < 100000; ++i) { + StringBuilder s = new StringBuilder(); + int characters = random.nextInt(7) + 1; + for (int k = 0; k < characters; ++ k) { + char c = (char)(random.nextInt('Z' - 'A') + 'A'); + int rep = random.nextInt(95) + 5; + for (int j = 0; j < rep; ++ j) + s.append(c); + average += rep; + } + items.add(s.toString()); + } + + average /= 100000; + System.out.println("generated data, average length: " + average); + } + + + @Test + public void givenANotPreCompiledAndAPreCompiledPatternA_whenMatcheItems_thenPreCompiledFasterThanNotPreCompiled() { + + testPatterns("A*"); + assertTrue(mstimePreCompiled < mstimeNotPreCompiled); + } + + @Test + public void givenANotPreCompiledAndAPreCompiledPatternABC_whenMatcheItems_thenPreCompiledFasterThanNotPreCompiled() { + + testPatterns("A*B*C*"); + assertTrue(mstimePreCompiled < mstimeNotPreCompiled); + } + + @Test + public void givenANotPreCompiledAndAPreCompiledPatternECWF_whenMatcheItems_thenPreCompiledFasterThanNotPreCompiled() { + + testPatterns("E*C*W*F*"); + assertTrue(mstimePreCompiled < mstimeNotPreCompiled); + } + + private void testPatterns(String regex) { + time = System.nanoTime(); + int matched = 0; + int unmatched = 0; + + for (String item : this.items) { + if (item.matches(regex)) { + ++matched; + } + else { + ++unmatched; + } + } + + this.action = "uncompiled: regex=" + regex + " matched=" + matched + " unmatched=" + unmatched; + + this.mstimeNotPreCompiled = (System.nanoTime() - time) / 1000000; + System.out.println(this.action + ": " + mstimeNotPreCompiled + "ms"); + + time = System.nanoTime(); + + Matcher matcher = Pattern.compile(regex).matcher(""); + matched = 0; + unmatched = 0; + + for (String item : this.items) { + if (matcher.reset(item).matches()) { + ++matched; + } + else { + ++unmatched; + } + } + + this.action = "compiled: regex=" + regex + " matched=" + matched + " unmatched=" + unmatched; + + this.mstimePreCompiled = (System.nanoTime() - time) / 1000000; + System.out.println(this.action + ": " + mstimePreCompiled + "ms"); + } +}