Files
spring-boot-rest/apache-opennlp/src/test/java/com/baeldung/apache/opennlp/TokenizerUnitTest.java
Amit Pandey 7206e64bef Bael 6556 3 (#4382)
* Added parent module on poms that have no parent defined

* Removed dependency reduced pom from undertow module

* [BAEL-6556] - Next set of renames of testcases

* [BAEL-6556] - Next set of renames of testcases

* [BAEL-6556] - Next set of renames of testcases

* [BAEL-6556] - Next set of renames of testcases
2018-06-01 13:12:51 +02:00

37 lines
1.5 KiB
Java

package com.baeldung.apache.opennlp;
import java.io.InputStream;
import opennlp.tools.tokenize.SimpleTokenizer;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;
import opennlp.tools.tokenize.WhitespaceTokenizer;
import static org.assertj.core.api.Assertions.assertThat;
import org.junit.Test;
public class TokenizerUnitTest {
@Test
public void givenEnglishModel_whenTokenize_thenTokensAreDetected() throws Exception {
InputStream inputStream = getClass().getResourceAsStream("/models/en-token.bin");
TokenizerModel model = new TokenizerModel(inputStream);
TokenizerME tokenizer = new TokenizerME(model);
String[] tokens = tokenizer.tokenize("Baeldung is a Spring Resource.");
assertThat(tokens).contains("Baeldung", "is", "a", "Spring", "Resource", ".");
}
@Test
public void givenWhitespaceTokenizer_whenTokenize_thenTokensAreDetected() throws Exception {
WhitespaceTokenizer tokenizer = WhitespaceTokenizer.INSTANCE;
String[] tokens = tokenizer.tokenize("Baeldung is a Spring Resource.");
assertThat(tokens).contains("Baeldung", "is", "a", "Spring", "Resource.");
}
@Test
public void givenSimpleTokenizer_whenTokenize_thenTokensAreDetected() throws Exception {
SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
String[] tokens = tokenizer.tokenize("Baeldung is a Spring Resource.");
assertThat(tokens).contains("Baeldung", "is", "a", "Spring", "Resource", ".");
}
}