From 33198c73e179c4d7b34ec88f1ead04c0489d1fc9 Mon Sep 17 00:00:00 2001 From: "dhrubajyoti.bhattacharjee" Date: Wed, 13 Dec 2017 00:49:49 +0530 Subject: [PATCH] BAEL-1207 File search using Lucene --- lucene/.gitignore | 1 + .../com/baeldung/lucene/LuceneFileSearch.java | 80 +++++++++++++++++++ .../baeldung/lucene/LuceneFileSearchTest.java | 32 ++++++++ lucene/src/test/resources/data/file1.txt | 3 + 4 files changed, 116 insertions(+) create mode 100644 lucene/.gitignore create mode 100644 lucene/src/main/java/com/baeldung/lucene/LuceneFileSearch.java create mode 100644 lucene/src/test/java/com/baeldung/lucene/LuceneFileSearchTest.java create mode 100644 lucene/src/test/resources/data/file1.txt diff --git a/lucene/.gitignore b/lucene/.gitignore new file mode 100644 index 0000000000..3ed87faec7 --- /dev/null +++ b/lucene/.gitignore @@ -0,0 +1 @@ +/index/ diff --git a/lucene/src/main/java/com/baeldung/lucene/LuceneFileSearch.java b/lucene/src/main/java/com/baeldung/lucene/LuceneFileSearch.java new file mode 100644 index 0000000000..1d090d55fc --- /dev/null +++ b/lucene/src/main/java/com/baeldung/lucene/LuceneFileSearch.java @@ -0,0 +1,80 @@ +package com.baeldung.lucene; + +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.net.URISyntaxException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.queryparser.classic.ParseException; +import org.apache.lucene.queryparser.classic.QueryParser; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.store.Directory; + +public class LuceneFileSearch { + + private Directory indexDirectory; + private StandardAnalyzer analyzer; + + public LuceneFileSearch(Directory fsDirectory, StandardAnalyzer analyzer) { + super(); + this.indexDirectory = fsDirectory; + this.analyzer = analyzer; + } + + public void addFileToIndex(String filepath) throws IOException, URISyntaxException { + + Path path = Paths.get(getClass().getClassLoader().getResource(filepath).toURI()); + File file = path.toFile(); + IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); + IndexWriter indexWriter = new IndexWriter(indexDirectory, indexWriterConfig); + Document document = new Document(); + + FileReader fileReader = new FileReader(file); + document.add(new TextField("contents", fileReader)); + document.add(new StringField("path", file.getPath(), Field.Store.YES)); + document.add(new StringField("filename", file.getName(), Field.Store.YES)); + + indexWriter.addDocument(document); + + indexWriter.close(); + } + + public List searchFiles(String inField, String queryString) { + try { + Query query = new QueryParser(inField, analyzer).parse(queryString); + + IndexReader indexReader = DirectoryReader.open(indexDirectory); + IndexSearcher searcher = new IndexSearcher(indexReader); + TopDocs topDocs = searcher.search(query, 10); + List documents = new ArrayList<>(); + for (ScoreDoc scoreDoc : topDocs.scoreDocs) { + documents.add(searcher.doc(scoreDoc.doc)); + } + + return documents; + } catch (IOException | ParseException e) { + e.printStackTrace(); + } + return null; + + } + +} + + diff --git a/lucene/src/test/java/com/baeldung/lucene/LuceneFileSearchTest.java b/lucene/src/test/java/com/baeldung/lucene/LuceneFileSearchTest.java new file mode 100644 index 0000000000..4345057ff7 --- /dev/null +++ b/lucene/src/test/java/com/baeldung/lucene/LuceneFileSearchTest.java @@ -0,0 +1,32 @@ +package com.baeldung.lucene; + +import java.io.IOException; +import java.net.URISyntaxException; +import java.nio.file.Paths; +import java.util.List; + +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.junit.Assert; +import org.junit.Test; + +public class LuceneFileSearchTest { + + @Test + public void givenSearchQueryWhenFetchedFileNamehenCorrect() throws IOException, URISyntaxException { + String indexPath = "index"; + String dataPath = "data/file1.txt"; + + Directory directory = FSDirectory.open(Paths.get(indexPath)); + LuceneFileSearch luceneFileSearch = new LuceneFileSearch(directory, new StandardAnalyzer()); + + luceneFileSearch.addFileToIndex(dataPath); + + List docs = luceneFileSearch.searchFiles("contents", "consectetur"); + + Assert.assertEquals("file1.txt", docs.get(0).get("filename")); + } + +} \ No newline at end of file diff --git a/lucene/src/test/resources/data/file1.txt b/lucene/src/test/resources/data/file1.txt new file mode 100644 index 0000000000..6f915d3927 --- /dev/null +++ b/lucene/src/test/resources/data/file1.txt @@ -0,0 +1,3 @@ +Cras auctor viverra arcu, id consequat diam posuere id. Pellentesque hendrerit felis tortor, et ornare nibh ullamcorper sed. Aenean sed mauris vitae purus auctor gravida. Nam aliquam egestas orci, sit amet imperdiet leo porttitor quis. Integer commodo sodales orci, ultrices vulputate arcu vestibulum non. Nunc at tellus id urna tristique ultrices in in massa. Vestibulum laoreet ullamcorper nulla vel porttitor. Duis blandit commodo elit at consequat. Vestibulum faucibus lectus eget mi tincidunt, quis molestie lacus mollis. Duis elementum urna eros, non iaculis est facilisis in. Praesent et neque vel ipsum viverra euismod ac ac metus. Ut vitae libero ex. + +Proin consectetur, neque nec feugiat facilisis, metus libero mollis arcu, id pharetra nibh sapien in elit. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Nam pulvinar fringilla orci in posuere. Duis ut turpis dignissim nisl eleifend posuere nec a massa. Cras fringilla iaculis ipsum a aliquet. Nunc ultrices nisl ipsum, vitae consectetur tellus vehicula in. Aliquam lacinia elit nec scelerisque dapibus. Duis pharetra mauris vitae quam tincidunt, viverra iaculis orci iaculis. Nunc gravida sem arcu, et mollis leo porttitor nec. Ut dictum tempor est, at fringilla ex feugiat sed. Nullam purus mi, aliquet eu libero ut, finibus efficitur metus. \ No newline at end of file