1、下载安装,只下载elasticSearch、Kibana即可
这里我使用7.6.2的elasticsearch版本, 因为项目使用的springboot2.3.x,避免低版本客户端,高版本索引库·,这里我先退回使用低版本索引库
插件安装
插件下载完成之后,将压缩包解压到 elasticsearch的plugins目录, 之后重启elasticsearch
定义文本抽取管道
PUT /_ingest/pipeline/attachment
{
"description" : "Extract attachment information",
"processors":[
{
"attachment":{
"field":"data",
"indexed_chars" : -1,
"ignore_missing":true
}
},
{
"remove":{"field":"data"}
}]}
2、SpringBoot整合ElasticSearch
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-elasticsearch</artifactId>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.58</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.20</version>
</dependency>
</dependencies>
application.yml
server:
port: 9090
spring:
application:
name: elasticsearch-service
elasticsearch:
rest:
uris: http://127.0.0.1:9200
实体类
package top.fate.entity;
import lombok.Data;
import org.springframework.data.elasticsearch.annotations.Document;
import org.springframework.data.elasticsearch.annotations.Field;
import org.springframework.data.elasticsearch.annotations.FieldType;
/**
* @auther:Wangxl
* @Emile:18335844494@163.com
* @Time:2020/11/2 14:15
*/
@Data
@Document(indexName = "filedata")
public class FileData {
@Field(type = FieldType.Keyword)
private String filePk;
@Field(type = FieldType.Keyword)
private String fileName;
@Field(type = FieldType.Keyword)
private Integer page;
@Field(type = FieldType.Keyword)
private String departmentId;
@Field(type = FieldType.Keyword)
private String ljdm;
@Field(type = FieldType.Text, analyzer = "ik_max_word")
private String data;
@Field(type = FieldType.Keyword)
private String realName;
@Field(type = FieldType.Keyword)
private String url;
@Field(type = FieldType.Keyword)
private String type;
}
接口类
package top.fate.controller;
import com.alibaba.fastjson.JSON;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.elasticsearch.core.ElasticsearchRestTemplate;
import org.springframework.data.elasticsearch.core.IndexOperations;
import org.springframework.data.elasticsearch.core.document.Document;
import org.springframework.data.elasticsearch.core.mapping.IndexCoordinates;
import org.springframework.util.Base64Utils;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import top.fate.entity.FileData;
import java.io.File;
import java.io.FileInputStream;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
/**
* @auther:Wangxl
* @Emile:18335844494@163.com
* @Time:2022/6/1 16:33
*/
@RestController
@RequestMapping(value = "fullTextSearch")
public class FullTextSearchController {
@Autowired
private ElasticsearchRestTemplate elasticsearchRestTemplate;
@Autowired
private RestHighLevelClient restHighLevelClient;
@GetMapping("createIndex")
public void add() {
IndexOperations indexOperations = elasticsearchRestTemplate.indexOps(IndexCoordinates.of("testindex"));
indexOperations.create();
Document mapping = indexOperations.createMapping(FileData.class);
indexOperations.putMapping(mapping);
}
@GetMapping("deleteIndex")
public void deleteIndex() {
IndexOperations indexOperations = elasticsearchRestTemplate.indexOps(FileData.class);
indexOperations.delete();
}
@GetMapping("uploadFileToEs")
public void uploadFileToEs() {
try {
// File file = new File("D:\\desktop\\Java开发工程师-4年-王晓龙-2022-05.pdf");
File file = new File("D:\\desktop\\Java开发工程师-4年-王晓龙-2022-05.docx");
FileInputStream inputFile = new FileInputStream(file);
byte[] buffer = new byte[(int)file.length()];
inputFile.read(buffer);
inputFile.close();
//将文件转成base64编码
String fileString = Base64Utils.encodeToString(buffer);
FileData fileData = new FileData();
fileData.setFileName(file.getName());
fileData.setFilePk(file.getName());
fileData.setData(fileString);
IndexRequest indexRequest = new IndexRequest("testindex").id(fileData.getFilePk());
indexRequest.source(JSON.toJSONString(fileData),XContentType.JSON);
indexRequest.setPipeline("attachment");
IndexResponse index = restHighLevelClient.index(indexRequest, RequestOptions.DEFAULT);
return;
} catch (Exception e) {
e.printStackTrace();
}
}
@GetMapping("search")
public Object search(@RequestParam("txt") String txt) {
List list = new ArrayList();
try {
SearchRequest searchRequest = new SearchRequest("testindex");
SearchSourceBuilder builder = new SearchSourceBuilder();
builder.query(QueryBuilders.matchQuery("attachment.content",txt).analyzer("ik_max_word"));
searchRequest.source(builder);
// 返回实际命中数
builder.trackTotalHits(true);
//高亮
HighlightBuilder highlightBuilder = new HighlightBuilder();
highlightBuilder.field("attachment.content");
highlightBuilder.requireFieldMatch(false);//多个高亮关闭
highlightBuilder.preTags("<span style='color:red'>");
highlightBuilder.postTags("</span>");
builder.highlighter(highlightBuilder);
SearchResponse search = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
if (search.getHits() != null) {
for (SearchHit documentFields : search.getHits().getHits()) {
Map<String, HighlightField> highlightFields = documentFields.getHighlightFields();
HighlightField title = highlightFields.get("attachment.content");
Map<String, Object> sourceAsMap = documentFields.getSourceAsMap();
if (title != null) {
Text[] fragments = title.fragments();
String n_title = "";
for (Text fragment : fragments) {
n_title += fragment;
}
sourceAsMap.put("data", n_title);
}
list.add(dealObject(sourceAsMap, FileData.class));
}
}
} catch (Exception e) {
e.printStackTrace();
}
return list;
}
/*public static void ignoreSource(Map<String, Object> map) {
for (String key : IGNORE_KEY) {
map.remove(key);
}
}*/
public static <T> T dealObject(Map<String, Object> sourceAsMap, Class<T> clazz) {
try {
// ignoreSource(sourceAsMap);
Iterator<String> keyIterator = sourceAsMap.keySet().iterator();
T t = clazz.newInstance();
while (keyIterator.hasNext()) {
String key = keyIterator.next();
String replaceKey = key.replaceFirst(key.substring(0, 1), key.substring(0, 1).toUpperCase());
Method method = null;
try {
method = clazz.getMethod("set" + replaceKey, sourceAsMap.get(key).getClass());
} catch (NoSuchMethodException e) {
continue;
}
method.invoke(t, sourceAsMap.get(key));
}
return t;
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
}
测试
创建索引
localhost:9090/fullTextSearch/createIndex
上传文档
localhost:9090/fullTextSearch/uploadFileToEs
搜索
localhost:9090/fullTextSearch/search?txt=索引库