Files
spring-boot-rest/apache-opennlp/src/test/java/com/baeldung/apache/opennlp/ChunkerUnitTest.java
Amit Pandey ce645b67d2 Bael 6556 - PR - 1st ~ 20 TC (#4359)
* Added parent module on poms that have no parent defined

* Removed dependency reduced pom from undertow module

* [BAEL-6556] - Enable our custom PMD rule and fix the build - Fixed Unit test case names
2018-05-28 22:21:49 +02:00

33 lines
1.4 KiB
Java

package com.baeldung.apache.opennlp;
import java.io.FileInputStream;
import java.io.InputStream;
import opennlp.tools.chunker.ChunkerME;
import opennlp.tools.chunker.ChunkerModel;
import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSTaggerME;
import opennlp.tools.tokenize.SimpleTokenizer;
import static org.assertj.core.api.Assertions.assertThat;
import org.junit.Test;
public class ChunkerUnitTest {
@Test
public void givenChunkerModel_whenChunk_thenChunksAreDetected() throws Exception {
SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
String[] tokens = tokenizer.tokenize("He reckons the current account deficit will narrow to only 8 billion.");
InputStream inputStreamPOSTagger = getClass().getResourceAsStream("/models/en-pos-maxent.bin");
POSModel posModel = new POSModel(inputStreamPOSTagger);
POSTaggerME posTagger = new POSTaggerME(posModel);
String tags[] = posTagger.tag(tokens);
InputStream inputStreamChunker = new FileInputStream("src/main/resources/models/en-chunker.bin");
ChunkerModel chunkerModel = new ChunkerModel(inputStreamChunker);
ChunkerME chunker = new ChunkerME(chunkerModel);
String[] chunks = chunker.chunk(tokens, tags);
assertThat(chunks).contains("B-NP", "B-VP", "B-NP", "I-NP", "I-NP", "I-NP", "B-VP", "I-VP", "B-PP", "B-NP", "I-NP", "I-NP", "O");
}
}