BAEL-5766 Overview of NLP Libraries in Java (#13798)
* BAEL-5766 Overview of NLP Libraries in Java * BAEL-5766 Overview of NLP Libraries in Java * Overview of NLP Libraries in Java * Overview of NLP Libraries in Java
This commit is contained in:
@@ -0,0 +1,41 @@
|
||||
package com.baeldung.nlp;
|
||||
|
||||
import edu.stanford.nlp.ling.CoreAnnotations;
|
||||
import edu.stanford.nlp.ling.CoreLabel;
|
||||
import edu.stanford.nlp.pipeline.Annotation;
|
||||
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
|
||||
import edu.stanford.nlp.util.CoreMap;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
public class CoreNLPTokenizerUnitTest {
|
||||
@Test
|
||||
public void givenSampleText_whenTokenize_thenExpectedTokensReturned() {
|
||||
|
||||
Properties props = new Properties();
|
||||
props.setProperty("annotators", "tokenize");
|
||||
|
||||
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
|
||||
String text = "The german shepard display an act of kindness";
|
||||
|
||||
Annotation document = new Annotation(text);
|
||||
pipeline.annotate(document);
|
||||
|
||||
List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
|
||||
StringBuilder tokens = new StringBuilder();
|
||||
|
||||
for (CoreMap sentence : sentences) {
|
||||
for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
|
||||
String word = token.get(CoreAnnotations.TextAnnotation.class);
|
||||
tokens.append(word)
|
||||
.append(" ");
|
||||
}
|
||||
}
|
||||
assertEquals("The german shepard display an act of kindness", tokens.toString()
|
||||
.trim());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
package com.baeldung.nlp;
|
||||
|
||||
import opennlp.tools.langdetect.Language;
|
||||
import opennlp.tools.langdetect.LanguageDetectorME;
|
||||
import opennlp.tools.langdetect.LanguageDetectorModel;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
class OpenNLPLanguageDetector {
|
||||
|
||||
@Test
|
||||
public void givenTextInEnglish_whenDetectLanguage_thenReturnsEnglishLanguageCode() {
|
||||
|
||||
String text = "the dream my father told me";
|
||||
LanguageDetectorModel model;
|
||||
|
||||
try (InputStream modelIn = new FileInputStream("langdetect-183.bin")) {
|
||||
model = new LanguageDetectorModel(modelIn);
|
||||
} catch (IOException e) {
|
||||
return;
|
||||
}
|
||||
|
||||
LanguageDetectorME detector = new LanguageDetectorME(model);
|
||||
Language language = detector.predictLanguage(text);
|
||||
|
||||
assertEquals("eng", language.getLang());
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user