lucene(二) 索引的建立、增刪改查
一、索引的整個知識架構
二、例子一:建立對多個檔案的索引並查詢
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
public class HelloLucene {
public static void main(String[]args){
HelloLucene helloLucene=new HelloLucene();
helloLucene.index();
helloLucene.search();
}
/** 建立檔案索引
* @author
* @param
* @return
*/
public void index(){
IndexWriter writer=null;
//1、建立Directory
//Directory directory=new RAMDirectory();//建立在記憶體中的
try {
Directory directory=FSDirectory.open(new File("d:/index"));//建在硬碟上的
//2、建立IndexWriter
writer=new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
//3、建立Document物件
Document doc=null;
//4、為Document新增Field
File f=new File("d:/TestLucene");
for(File file:f.listFiles()){
doc=new Document();
doc.add(new Field("content", new FileReader(file)));
doc.add(new Field("filename",file.getName(),Field.Store.YES,Field.Index.NOT_ANALYZED));
doc.add(new Field("path",file.getAbsolutePath(),Field.Store.YES,Field.Index.NOT_ANALYZED));
writer.addDocument(doc);
}
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (LockObtainFailedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
if(writer!=null)
try {
writer.close();
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
/** 查詢
* @author
* @param
* @return
*/
public void search(){
try{
//1、建立Directory
Directory directory=FSDirectory.open(new File("d:/index"));
//2、建立IndexReader
IndexReader reader=IndexReader.open(directory);
//3、根據IndexReader建立IndexSearcher
IndexSearcher searcher=new IndexSearcher(reader);
//4、建立搜尋的Query
QueryParser parser=new QueryParser(Version.LUCENE_35, "content", new StandardAnalyzer(Version.LUCENE_35));
Query query=parser.parse("奧運");//查詢content欄位內容為"奧運"的檔案
//5、根據seacher搜尋並且返回TopDocs
TopDocs tds=searcher.search(query, 10);
//6、根據TopDocs獲取ScoreDoc物件
ScoreDoc[]sds=tds.scoreDocs;
for(ScoreDoc sd:sds){
//7、根據sercher和ScoreDoc物件獲取具體的Document物件
Document d=searcher.doc(sd.doc);
//8、根據Document物件獲取需要的值
System.out.println(d.get("filename")+"["+d.get("path")+"]");
}
//9、關閉reader
reader.close();
}catch(CorruptIndexException e){
e.printStackTrace();
}
catch (IOException e) {
e.printStackTrace();
}
catch (ParseException e) {
e.printStackTrace();
}
}
}
三、域的說明、使用luke開啟建立的索引二進位制檔案
1、域Field的說明
2、使用luke分析索引檔案
建立好索引之後在硬碟中找到如下所示:
使用luke簡單看一下各個檔案是幹什麼的:
luke還有其他的功能,這裡就不一一介紹了~
四、例子二:索引的增刪改查
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.StaleReaderException;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
public class IndexUtil {
private String[] ids = {"1","2","3","4","5","6"};
private String[] emails = {"aa@itat.org","bb@itat.org","cc@cc.org","dd@sina.org","ee@zttc.edu","ff@itat.org"};
private String[] contents = {
"welcome to visited the space,I like book",
"hello boy, I like pingpeng ball",
"my name is cc I like game",
"I like football",
"I like football and I like basketball too",
"I like movie and swim"
};
private int[] attachs = {2,3,1,4,5,5};//附件
private String[] names = {"zhangsan","lisi","john","jetty","mike","jake"};
private Map<String,Float> scores = new HashMap<String,Float>();//儲存權值
private Directory directory=null;
public static void main(String[]args){
IndexUtil iUtil=new IndexUtil();
iUtil.index();
iUtil.search();
}
/** 建構函式
* @author
* @param
* @return
*/
public IndexUtil(){
scores.put("itat.org",2.0f);
scores.put("zttc.edu", 1.5f);
try {
//1、建立索引存在的地方
directory=FSDirectory.open(new File("d:/index"));
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/** 建立索引
* @author
* @param
* @return
*/
public void index(){
//2、建立索引
IndexWriter writer=null;
try {
writer=new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
//3、建立文件並建立索引(文件相當於二維表中的每一條記錄,域相當於表的欄位,所以整個索引可以理解為一個二維表)
Document doc=null;
for(int i=0;i<ids.length;i++){
doc = new Document();
doc.add(new Field("id",ids[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
doc.add(new Field("email",emails[i],Field.Store.YES,Field.Index.NOT_ANALYZED));
doc.add(new Field("content",contents[i],Field.Store.NO,Field.Index.ANALYZED));
doc.add(new Field("name",names[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
String et = emails[i].substring(emails[i].lastIndexOf("@")+1);
System.out.println(et);
//對文件進行加權處理
// if(scores.containsKey(et)) {
// doc.setBoost(scores.get(et));
// } else {
// doc.setBoost(0.5f);
// }
writer.addDocument(doc);
}
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (LockObtainFailedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
if(writer!=null)
try {
writer.close();
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
/** 查詢索引基本資訊
* @author
* @param
* @return
*/
public void query() {
try {
IndexReader reader = IndexReader.open(directory);
//通過reader可以有效的獲取到文件的數量
System.out.println("numDocs:"+reader.numDocs());
System.out.println("maxDocs:"+reader.maxDoc());
System.out.println("deleteDocs:"+reader.numDeletedDocs());
reader.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace(); }
}
/** 使用TermQuery具體查詢
* @author
* @param
* @return
*/
public void search() {
try {
IndexReader reader = IndexReader.open(directory);
IndexSearcher searcher = new IndexSearcher(reader);
TermQuery query = new TermQuery(new Term("content","like"));
TopDocs tds = searcher.search(query, 10);
for(ScoreDoc sd:tds.scoreDocs) {
Document doc = searcher.doc(sd.doc);
System.out.println("("+sd.doc+"-"+doc.getBoost()+"-"+sd.score+")"+
doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id"));
}
reader.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
/** 刪除索引
* @author
* @param
* @return
*/
public void delete() {
IndexWriter writer = null;
try {
writer = new IndexWriter(directory,
new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35)));
//引數是一個選項,可以是一個Query,也可以是一個term,term是一個精確查詢的值
//此時刪除的文件並不會被完全刪除,而是儲存在一個回收站中的,可以恢復
writer.deleteDocuments(new Term("id","1"));
writer.commit();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if(writer!=null) writer.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
/** 恢復刪除的索引
* @author
* @param
* @return
*/
public void undelete() {
//使用IndexReader進行恢復
try {
IndexReader reader = IndexReader.open(directory,false);
//恢復時,必須把IndexReader的只讀(readOnly)設定為false
reader.undeleteAll();
reader.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (StaleReaderException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
/** 強制刪除回收站中的索引
* @author
* @param
* @return
*/
public void forceDelete() {
IndexWriter writer = null;
try {
writer = new IndexWriter(directory,
new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35)));
writer.forceMergeDeletes();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if(writer!=null) writer.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
/** 索引的更新
* @author
* @param
* @return
*/
public void update() {
IndexWriter writer = null;
try {
writer = new IndexWriter(directory,
new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35)));
/*
* Lucene並沒有提供更新,這裡的更新操作其實是如下兩個操作的合集
* 先刪除之後再新增
*/
Document doc = new Document();
doc.add(new Field("id","11",Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
doc.add(new Field("email",emails[0],Field.Store.YES,Field.Index.NOT_ANALYZED));
doc.add(new Field("content",contents[0],Field.Store.NO,Field.Index.ANALYZED));
doc.add(new Field("name",names[0],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
writer.updateDocument(new Term("id","1"), doc);
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if(writer!=null) writer.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
相關文章
- 基本 SQL 之增刪改查(二)SQL
- Lucene中建立索引的效率和刪除索引的實現索引
- 增刪改查
- 表的建立修改及增刪改查-DML操作
- mogoose 建立資料庫並增刪改查Go資料庫
- layui的增刪改查UI
- 列表的增刪改查
- 字典的增刪改查
- redist的增刪改查Redis
- Mybatis的增刪改查MyBatis
- MongoDB的增刪改查MongoDB
- ThinkPHP的增、刪、改、查PHP
- indexedDB 增刪改查Index
- SQL增刪改查SQL
- mysql增刪改查MySql
- Mongoose查增改刪Go
- FMDB增刪改查
- mysql增查刪改MySql
- MongoDB 資料庫建立刪除、表(集合)建立刪除、資料增刪改查MongoDB資料庫
- 使用Go語言建立簡單的CRUD增刪改查Go
- MyBatis初級實戰之二:增刪改查MyBatis
- sql指令,增,刪,查,改SQL
- EFCore之增刪改查
- mysql基本增刪改查MySql
- MongoDB增刪改查操作MongoDB
- mongodb 基本增刪改查MongoDB
- Lucene建立索引流程索引
- 02-CoreData 的增刪改查
- 增刪改查框架的疑問框架
- php 建立頁面表單並進行增刪改查PHP
- Elasticsearch增刪改查 之 —— Delete刪除Elasticsearchdelete
- elasticsearch7.2增刪改查Elasticsearch
- SQL 基礎增、刪、改、查SQL
- iOS CoreData (一) 增刪改查iOS
- CoreData - 簡單 增刪改查
- [express+mongoose](增刪改查)ExpressGo
- rust sqlx 使用---增刪改查RustSQL
- PHP操作MongoDB(增刪改查)PHPMongoDB