remove stop words from string

This commit is contained in:
DOHA
2019-05-16 17:57:05 +02:00
parent 8754806e7e
commit a4762fcc90
5 changed files with 5182 additions and 55 deletions

View File

@@ -0,0 +1,73 @@
package com.baeldung.string.performance;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
@Fork(value = 3, warmups = 1)
@State(Scope.Benchmark)
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
public class RemovingStopwordsPerformanceComparison {
private String data;
private List<String> stopwords;
private String stopwordsRegex;
public static void main(String[] args) throws Exception {
org.openjdk.jmh.Main.main(args);
}
@Setup
public void setup() throws IOException {
data = new String(Files.readAllBytes(Paths.get("src/main/resources/shakespeare-hamlet.txt")));
data = data.toLowerCase();
stopwords = Files.readAllLines(Paths.get("src/main/resources/english_stopwords.txt"));
stopwordsRegex = stopwords.stream().collect(Collectors.joining("|", "\\b(", ")\\b\\s?"));
}
@Benchmark
public String removeManually() {
String[] allWords = data.split(" ");
StringBuilder builder = new StringBuilder();
for(String word:allWords) {
if(! stopwords.contains(word)) {
builder.append(word);
builder.append(' ');
}
}
return builder.toString().trim();
}
@Benchmark
public String removeAll() {
ArrayList<String> allWords = Stream.of(data.split(" "))
.collect(Collectors.toCollection(ArrayList<String>::new));
allWords.removeAll(stopwords);
return allWords.stream().collect(Collectors.joining(" "));
}
@Benchmark
public String replaceRegex() {
return data.replaceAll(stopwordsRegex, "");
}
}