Last active
October 31, 2016 13:43
-
-
Save qrtt1/6560456 to your computer and use it in GitHub Desktop.
複習 Lucene 的用法 (6.x)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import static org.junit.Assert.assertEquals; | |
import java.io.IOException; | |
import org.apache.log4j.Logger; | |
import org.apache.lucene.analysis.Analyzer; | |
import org.apache.lucene.analysis.cjk.CJKAnalyzer; | |
import org.apache.lucene.document.Document; | |
import org.apache.lucene.document.Field.Store; | |
import org.apache.lucene.document.TextField; | |
import org.apache.lucene.index.CorruptIndexException; | |
import org.apache.lucene.index.DirectoryReader; | |
import org.apache.lucene.index.IndexWriter; | |
import org.apache.lucene.index.IndexWriterConfig; | |
import org.apache.lucene.index.IndexWriterConfig.OpenMode; | |
import org.apache.lucene.queryparser.classic.ParseException; | |
import org.apache.lucene.queryparser.classic.QueryParser; | |
import org.apache.lucene.search.IndexSearcher; | |
import org.apache.lucene.search.Query; | |
import org.apache.lucene.search.TopDocs; | |
import org.apache.lucene.store.Directory; | |
import org.apache.lucene.store.RAMDirectory; | |
import org.junit.After; | |
import org.junit.Before; | |
import org.junit.Test; | |
public class LuceneLab { | |
Logger logger = Logger.getLogger(LuceneLab.class); | |
Directory directory; | |
@Before | |
public void setUp() throws Exception { | |
directory = new RAMDirectory(); | |
try (IndexWriter writer = createIndexWriter(directory)) { | |
Document document = new Document(); | |
document.add(new TextField("NAME", "誠實豆沙包", Store.YES)); | |
writer.addDocument(document); | |
} | |
} | |
@After | |
public void tearDown() throws IOException { | |
directory.close(); | |
} | |
@Test | |
public void testSearch() throws Exception { | |
assertEquals(1, search("誠實豆沙包")); | |
assertEquals(0, search("慚愧棒棒糖")); | |
} | |
protected int search(String key) throws CorruptIndexException, IOException, ParseException { | |
try (DirectoryReader reader = DirectoryReader.open(directory)) { | |
IndexSearcher searcher = new IndexSearcher(reader); | |
QueryParser parser = new QueryParser("NAME", new CJKAnalyzer()); | |
Query query = parser.parse(key); | |
TopDocs docs = searcher.search(query, 100); | |
logger.info(String.format("尋找 %s", key)); | |
if (docs.totalHits == 0) { | |
logger.info(String.format("找不到 %s", key)); | |
} else { | |
logger.info(String.format("發現 %s", key)); | |
} | |
return docs.totalHits; | |
} | |
} | |
protected IndexWriter createIndexWriter(Directory directory) throws IOException { | |
Analyzer analyzer = new CJKAnalyzer(); | |
IndexWriterConfig config = new IndexWriterConfig(analyzer); | |
config.setOpenMode(OpenMode.CREATE_OR_APPEND); | |
return new IndexWriter(directory, config); | |
} | |
} | |
// gradle dep | |
// compile group: 'org.apache.lucene', name: 'lucene-core', version: '6.2.0' | |
// compile group: 'org.apache.lucene', name: 'lucene-analyzers-common', version: '6.2.0' | |
// compile group: 'org.apache.lucene', name: 'lucene-queryparser', version: '6.2.0' |
幫加註:如果之後要算term vector,不能用 text field。
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Output: