BAEL-5766 Overview of NLP Libraries in Java (#13798)

* BAEL-5766 Overview of NLP Libraries in Java

* BAEL-5766 Overview of NLP Libraries in Java

* Overview of NLP Libraries in Java

* Overview of NLP Libraries in Java
This commit is contained in:
Michael Olayemi
2023-04-22 06:32:53 +01:00
committed by GitHub
parent f0d3aefcb7
commit 5d02c60bc8
6 changed files with 109 additions and 0 deletions

View File

@@ -0,0 +1,41 @@
package com.baeldung.nlp;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.util.CoreMap;
import org.junit.Test;
import java.util.List;
import java.util.Properties;
import static org.junit.Assert.assertEquals;
public class CoreNLPTokenizerUnitTest {
@Test
public void givenSampleText_whenTokenize_thenExpectedTokensReturned() {
Properties props = new Properties();
props.setProperty("annotators", "tokenize");
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
String text = "The german shepard display an act of kindness";
Annotation document = new Annotation(text);
pipeline.annotate(document);
List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
StringBuilder tokens = new StringBuilder();
for (CoreMap sentence : sentences) {
for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
String word = token.get(CoreAnnotations.TextAnnotation.class);
tokens.append(word)
.append(" ");
}
}
assertEquals("The german shepard display an act of kindness", tokens.toString()
.trim());
}
}

View File

@@ -0,0 +1,33 @@
package com.baeldung.nlp;
import opennlp.tools.langdetect.Language;
import opennlp.tools.langdetect.LanguageDetectorME;
import opennlp.tools.langdetect.LanguageDetectorModel;
import org.junit.jupiter.api.Test;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import static org.junit.jupiter.api.Assertions.assertEquals;
class OpenNLPLanguageDetector {
@Test
public void givenTextInEnglish_whenDetectLanguage_thenReturnsEnglishLanguageCode() {
String text = "the dream my father told me";
LanguageDetectorModel model;
try (InputStream modelIn = new FileInputStream("langdetect-183.bin")) {
model = new LanguageDetectorModel(modelIn);
} catch (IOException e) {
return;
}
LanguageDetectorME detector = new LanguageDetectorME(model);
Language language = detector.predictLanguage(text);
assertEquals("eng", language.getLang());
}
}