BAEL-1557 Intro to Apache OpenNlp
This commit is contained in:
@@ -0,0 +1,36 @@
|
||||
package com.baeldung.apache.opennlp;
|
||||
|
||||
import java.io.FileInputStream;
|
||||
import java.io.InputStream;
|
||||
import opennlp.tools.chunker.ChunkerME;
|
||||
import opennlp.tools.chunker.ChunkerModel;
|
||||
import opennlp.tools.postag.POSModel;
|
||||
import opennlp.tools.postag.POSTaggerME;
|
||||
import opennlp.tools.tokenize.SimpleTokenizer;
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author Parth
|
||||
*/
|
||||
public class ChunkerTest {
|
||||
|
||||
@Test
|
||||
public void givenSentence_whenChunk_thenGetChunks() throws Exception {
|
||||
|
||||
SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
|
||||
String[] tokens = tokenizer.tokenize("He reckons the current account deficit will narrow to only 8 billion.");
|
||||
|
||||
InputStream inputStreamPOSTagger = new FileInputStream("src/main/resources/models/en-pos-maxent.bin");
|
||||
POSModel posModel = new POSModel(inputStreamPOSTagger);
|
||||
POSTaggerME posTagger = new POSTaggerME(posModel);
|
||||
String tags[] = posTagger.tag(tokens);
|
||||
|
||||
InputStream inputStreamChunker = new FileInputStream("src/main/resources/models/en-chunker.bin");
|
||||
ChunkerModel chunkerModel = new ChunkerModel(inputStreamChunker);
|
||||
ChunkerME chunker = new ChunkerME(chunkerModel);
|
||||
String[] chunks = chunker.chunk(tokens, tags);
|
||||
assertThat(chunks).contains("B-NP", "B-VP", "B-NP", "I-NP", "I-NP", "I-NP", "B-VP", "I-VP", "B-PP", "B-NP", "I-NP", "I-NP", "O");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user