* Added parent module on poms that have no parent defined * Removed dependency reduced pom from undertow module * [BAEL-6556] - Next set of testcase renamed
30 lines
1.2 KiB
Java
30 lines
1.2 KiB
Java
package com.baeldung.apache.opennlp;
|
|
|
|
import java.io.InputStream;
|
|
import opennlp.tools.lemmatizer.DictionaryLemmatizer;
|
|
import opennlp.tools.postag.POSModel;
|
|
import opennlp.tools.postag.POSTaggerME;
|
|
import opennlp.tools.tokenize.SimpleTokenizer;
|
|
import static org.assertj.core.api.Assertions.assertThat;
|
|
import org.junit.Test;
|
|
|
|
public class LemmetizerUnitTest {
|
|
|
|
@Test
|
|
public void givenEnglishDictionary_whenLemmatize_thenLemmasAreDetected() throws Exception {
|
|
|
|
SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
|
|
String[] tokens = tokenizer.tokenize("John has a sister named Penny.");
|
|
|
|
InputStream inputStreamPOSTagger = getClass().getResourceAsStream("/models/en-pos-maxent.bin");
|
|
POSModel posModel = new POSModel(inputStreamPOSTagger);
|
|
POSTaggerME posTagger = new POSTaggerME(posModel);
|
|
String tags[] = posTagger.tag(tokens);
|
|
InputStream dictLemmatizer = getClass().getResourceAsStream("/models/en-lemmatizer.dict");
|
|
DictionaryLemmatizer lemmatizer = new DictionaryLemmatizer(dictLemmatizer);
|
|
String[] lemmas = lemmatizer.lemmatize(tokens, tags);
|
|
|
|
assertThat(lemmas).contains("O", "have", "a", "sister", "name", "O", "O");
|
|
}
|
|
}
|