Files
spring-boot-rest/apache-opennlp/src/test/java/com/baeldung/apache/opennlp/LemmetizerUnitTest.java
Amit Pandey dfcc0cab05 Bael 6556 2 (#4365)
* Added parent module on poms that have no parent defined

* Removed dependency reduced pom from undertow module

* [BAEL-6556] - Next set of testcase renamed
2018-05-29 22:14:19 +02:00

30 lines
1.2 KiB
Java

package com.baeldung.apache.opennlp;
import java.io.InputStream;
import opennlp.tools.lemmatizer.DictionaryLemmatizer;
import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSTaggerME;
import opennlp.tools.tokenize.SimpleTokenizer;
import static org.assertj.core.api.Assertions.assertThat;
import org.junit.Test;
public class LemmetizerUnitTest {
@Test
public void givenEnglishDictionary_whenLemmatize_thenLemmasAreDetected() throws Exception {
SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
String[] tokens = tokenizer.tokenize("John has a sister named Penny.");
InputStream inputStreamPOSTagger = getClass().getResourceAsStream("/models/en-pos-maxent.bin");
POSModel posModel = new POSModel(inputStreamPOSTagger);
POSTaggerME posTagger = new POSTaggerME(posModel);
String tags[] = posTagger.tag(tokens);
InputStream dictLemmatizer = getClass().getResourceAsStream("/models/en-lemmatizer.dict");
DictionaryLemmatizer lemmatizer = new DictionaryLemmatizer(dictLemmatizer);
String[] lemmas = lemmatizer.lemmatize(tokens, tags);
assertThat(lemmas).contains("O", "have", "a", "sister", "name", "O", "O");
}
}