-
-
Save mutekinootoko/2c9f1247bebf91d65d071fc6819795dd to your computer and use it in GitHub Desktop.
複習 Lucene 的用法 (6.x)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import static org.junit.Assert.assertEquals; | |
import java.io.IOException; | |
import org.apache.log4j.Logger; | |
import org.apache.lucene.analysis.Analyzer; | |
import org.apache.lucene.analysis.cjk.CJKAnalyzer; | |
import org.apache.lucene.document.Document; | |
import org.apache.lucene.document.Field.Store; | |
import org.apache.lucene.document.TextField; | |
import org.apache.lucene.index.CorruptIndexException; | |
import org.apache.lucene.index.DirectoryReader; | |
import org.apache.lucene.index.IndexWriter; | |
import org.apache.lucene.index.IndexWriterConfig; | |
import org.apache.lucene.index.IndexWriterConfig.OpenMode; | |
import org.apache.lucene.queryparser.classic.ParseException; | |
import org.apache.lucene.queryparser.classic.QueryParser; | |
import org.apache.lucene.search.IndexSearcher; | |
import org.apache.lucene.search.Query; | |
import org.apache.lucene.search.TopDocs; | |
import org.apache.lucene.store.Directory; | |
import org.apache.lucene.store.RAMDirectory; | |
import org.junit.After; | |
import org.junit.Before; | |
import org.junit.Test; | |
public class LuceneLab { | |
Logger logger = Logger.getLogger(LuceneLab.class); | |
Directory directory; | |
@Before | |
public void setUp() throws Exception { | |
directory = new RAMDirectory(); | |
try (IndexWriter writer = createIndexWriter(directory)) { | |
Document document = new Document(); | |
document.add(new TextField("NAME", "誠實豆沙包", Store.YES)); | |
writer.addDocument(document); | |
} | |
} | |
@After | |
public void tearDown() throws IOException { | |
directory.close(); | |
} | |
@Test | |
public void testSearch() throws Exception { | |
assertEquals(1, search("誠實豆沙包")); | |
assertEquals(0, search("慚愧棒棒糖")); | |
} | |
protected int search(String key) throws CorruptIndexException, IOException, ParseException { | |
try (DirectoryReader reader = DirectoryReader.open(directory)) { | |
IndexSearcher searcher = new IndexSearcher(reader); | |
QueryParser parser = new QueryParser("NAME", new CJKAnalyzer()); | |
Query query = parser.parse(key); | |
TopDocs docs = searcher.search(query, 100); | |
logger.info(String.format("尋找 %s", key)); | |
if (docs.totalHits == 0) { | |
logger.info(String.format("找不到 %s", key)); | |
} else { | |
logger.info(String.format("發現 %s", key)); | |
} | |
return docs.totalHits; | |
} | |
} | |
protected IndexWriter createIndexWriter(Directory directory) throws IOException { | |
Analyzer analyzer = new CJKAnalyzer(); | |
IndexWriterConfig config = new IndexWriterConfig(analyzer); | |
config.setOpenMode(OpenMode.CREATE_OR_APPEND); | |
return new IndexWriter(directory, config); | |
} | |
} | |
// gradle dep | |
// compile group: 'org.apache.lucene', name: 'lucene-core', version: '6.2.0' | |
// compile group: 'org.apache.lucene', name: 'lucene-analyzers-common', version: '6.2.0' | |
// compile group: 'org.apache.lucene', name: 'lucene-queryparser', version: '6.2.0' |
如果之後要算term vector,不能用 text field。
FieldType ft = new FieldType();
ft.setStored(true);
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
ft.setStoreTermVectors(true);
doc.add(new Field("content", content, ft));
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
13
down vote
In order to write the changes to the Index you have to close the index writer and then open the IndexReader.
writer.close();
I was facing similar issue, and using writer.commit() fixed it. If I had used writer.close(), I would have to re-open the writer again.
http://stackoverflow.com/questions/10460737/org-apache-lucene-index-indexnotfoundexception-no-segments-file-found-in-org-a#10460874