Elasticsearch搜索功能的实现(五)-- 实战-世界资讯

来源:博客园    时间:2023-04-19 01:22:30


(相关资料图)

实战环境

elastic search 8.5.0 + kibna 8.5.0 + springboot 3.0.2 + spring data elasticsearch 5.0.2 + jdk 17

一、集成 spring data elasticsearch1 添加依赖
    org.springframework.boot    spring-boot-starter-data-elasticsearch
2 配置es连接
@Configurationpublic class ElasticsearchConfig extends ElasticsearchConfiguration {    @Override    public ClientConfiguration clientConfiguration() {            return ClientConfiguration.builder()                    .connectedTo("127.0.0.1:9200")                    .withBasicAuth("elastic", "********")                    .build();    }}
3 配置打印DSL语句
# 日志配置logging:  level:    #es日志    org.springframework.data.elasticsearch.client.WIRE : trace
二、index及mapping 文件编写
@Data@Document(indexName = "news") //索引名@Setting(shards = 1,replicas = 0,refreshInterval = "1s") //shards 分片数 replicas 副本数@Schema(name = "News",description = "新闻对象")public class News implements Serializable {    @Id  //索引主键    @NotBlank(message = "新闻ID不能为空")    @Schema(type = "integer",description = "新闻ID",example = "1")    private Integer id;    @NotBlank(message = "新闻标题不能为空")    @Schema(type = "String",description = "新闻标题")    @MultiField(mainField = @Field(type = FieldType.Text, analyzer = "ik_max_word", searchAnalyzer = "ik_smart"),            otherFields = {@InnerField(type = FieldType.Keyword, suffix = "keyword") }) //混合类型字段 指定 建立索引时分词器与搜索时入参分词器    private String title;    @Schema(type = "LocalDate",description = "发布时间")    @Field(type = FieldType.Date,format = DateFormat.date)    private LocalDate pubDate;    @Schema(type = "String",description = "来源")    @Field(type = FieldType.Keyword)    private String source;    @Schema(type = "String",description = "行业类型代码",example = "1,2,3")    @Field(type = FieldType.Text,analyzer = "ik_max_word",searchAnalyzer = "ik_smart")    private String industry;    @Schema(type = "String",description = "预警类型")    @Field(type = FieldType.Keyword)    private String type;    @Schema(type = "String",description = "涉及公司")    @Field(type = FieldType.Text, analyzer = "ik_max_word", searchAnalyzer = "ik_smart")    private String companies;    @Schema(type = "String",description = "新闻内容")    @Field(type = FieldType.Text, analyzer = "ik_max_word", searchAnalyzer = "ik_smart")    private String content;}
三、DAO层编写
@Repositorypublic interface NewsRepository extends ElasticsearchRepository {    Page findByType(String type, Pageable pageable);}
四、简单功能实现4.1 简单功能写法
/**     * 新增新闻     * @param news     * @return     */    @Override    public void saveNews(News news) {        newsRepository.save(news);    }    /**     * 删除新闻     * @param newsId     */    @Override    public void delete(Integer newsId) {        newsRepository.deleteById(newsId);    }    /**     * 删除新闻索引     */    @Override    public void deleteIndex() {        operations.indexOps(News.class).delete();    }    /**     * 创建索引     */    @Override    public void createIndex() {        operations.indexOps(News.class).createWithMapping();    }    @Override    public PageResult findByType(String type) {        // 先发布日期排序        Sort sort = Sort.by(new Order(Sort.Direction.DESC, "pubDate"));        Pageable pageable = PageRequest.of(0,10,sort);        final Page newsPage = newsRepository.findByType(type, pageable);        return new PageResult(newsPage.getTotalElements(),newsPage.getContent());    }

实现效果图片:

实际执行的DSL语句:

注意: 当指定排序条件时 _score 会被置空

4.2 搜索功能的实现
@Override    public PageResult searchNews(NewsPageSearch search) {        //创建原生查询DSL对象        final NativeQueryBuilder nativeQueryBuilder = new NativeQueryBuilder();        // 先发布日期再得分排序        Sort sort = Sort.by(new Order(Sort.Direction.DESC, "pubDate"),new Order(Sort.Direction.DESC, "_score"));        Pageable pageable = PageRequest.of(search.getCurPage(), search.getPageSize(),sort);        final BoolQuery.Builder boolBuilder = new BoolQuery.Builder();        //过滤条件        setFilter(search, boolBuilder);        //关键字搜索        if (StringUtils.isNotBlank(search.getKeyword())){            setKeyWordAndHighlightField(search, nativeQueryBuilder, boolBuilder);        }else {            nativeQueryBuilder.withQuery(q -> q.bool(boolBuilder.build()));        }        nativeQueryBuilder.withPageable(pageable);        SearchHits searchHits = operations.search(nativeQueryBuilder.build(), News.class);        //高亮回填封装        final List newsList = searchHits.getSearchHits().stream()                .map(s -> {                    final News content = s.getContent();                    final List title = s.getHighlightFields().get("title");                    final List contentList = s.getHighlightFields().get("content");                    if (!CollectionUtils.isEmpty(title)){                        s.getContent().setTitle(title.get(0));                    }                    if (!CollectionUtils.isEmpty(contentList)){                        s.getContent().setContent(contentList.get(0));                    }                    return content;                }).collect(Collectors.toList());        return new PageResult(searchHits.getTotalHits(),newsList);    }    /**     * 设置过滤条件 行业类型 来源 预警类型     * @param search     * @param boolBuilder     */    private void setFilter(NewsPageSearch search, BoolQuery.Builder boolBuilder) {        //行业类型        if(StringUtils.isNotBlank(search.getIndustry())){            // 按逗号拆分            List industryQueries = Arrays.asList(search.getIndustry().split(",")).stream().map(p -> {                Query.Builder queryBuilder = new Query.Builder();                queryBuilder.term(t -> t.field("industry").value(p));                return queryBuilder.build();            }).collect(Collectors.toList());            boolBuilder.filter(f -> f.bool(t -> t.should(industryQueries)));        }        // 来源        if(StringUtils.isNotBlank(search.getSource())){            // 按逗号拆分            List sourceQueries = Arrays.asList(search.getSource().split(",")).stream().map(p -> {                Query.Builder queryBuilder = new Query.Builder();                queryBuilder.term(t -> t.field("source").value(p));                return queryBuilder.build();            }).collect(Collectors.toList());            boolBuilder.filter(f -> f.bool(t -> t.should(sourceQueries)));        }        // 预警类型        if(StringUtils.isNotBlank(search.getType())){            // 按逗号拆分            List typeQueries = Arrays.asList(search.getType().split(",")).stream().map(p -> {                Query.Builder queryBuilder = new Query.Builder();                queryBuilder.term(t -> t.field("type").value(p));                return queryBuilder.build();            }).collect(Collectors.toList());            boolBuilder.filter(f -> f.bool(t -> t.should(typeQueries)));        }        //范围区间        if (StringUtils.isNotBlank(search.getStartDate())){            boolBuilder.filter(f -> f.range(r -> r.field("pubDate")                    .gte(JsonData.of(search.getStartDate()))                    .lte(JsonData.of(search.getEndDate()))));        }    }    /**     * 关键字搜索 title 权重更高     * 高亮字段  title 、content     * @param search     * @param nativeQueryBuilder     * @param boolBuilder     */    private void setKeyWordAndHighlightField(NewsPageSearch search, NativeQueryBuilder nativeQueryBuilder, BoolQuery.Builder boolBuilder) {        final String keyword = search.getKeyword();        //查询条件        boolBuilder.must(b -> b.multiMatch(m -> m.fields("title","content","companies").query(keyword)));        //高亮        final HighlightFieldParameters.HighlightFieldParametersBuilder builder = HighlightFieldParameters.builder();        builder.withPreTags("")                .withPostTags("")                .withRequireFieldMatch(true) //匹配才加标签                .withNumberOfFragments(0); //显示全文        final HighlightField titleHighlightField = new HighlightField("title", builder.build());        final HighlightField contentHighlightField = new HighlightField("content", builder.build());        final Highlight titleHighlight = new Highlight(List.of(titleHighlightField,contentHighlightField));        nativeQueryBuilder.withQuery(                        f -> f.functionScore(                                fs -> fs.query(q -> q.bool(boolBuilder.build()))                                        .functions( FunctionScore.of(func -> func.filter(                                                        fq -> fq.match(ft -> ft.field("title").query(keyword))).weight(100.0)),                                                FunctionScore.of(func -> func.filter(                                                        fq -> fq.match(ft -> ft.field("content").query(keyword))).weight(20.0)),                                                FunctionScore.of(func -> func.filter(                                                        fq -> fq.match(ft -> ft.field("companies").query(keyword))).weight(10.0)))                                        .scoreMode(FunctionScoreMode.Sum)                                        .boostMode(FunctionBoostMode.Sum)                                        .minScore(1.0)))                .withHighlightQuery(new HighlightQuery(titleHighlight,News.class));    }
实现效果

加权前效果:

加权后效果:

DSL 语句:

{"from": 0,"size": 6,"sort": [{"pubDate": {"mode": "min","order": "desc"}}, {"_score": {"order": "desc"}}],"highlight": {"fields": {"title": {"number_of_fragments": 0,"post_tags": [""],"pre_tags": [""]},"content": {"number_of_fragments": 0,"post_tags": [""],"pre_tags": [""]}}},"query": {"function_score": {"boost_mode": "sum","functions": [{"filter": {"match": {"title": {"query": "立足优势稳住外贸基本盘"}}},"weight": 100.0}, {"filter": {"match": {"content": {"query": "立足优势稳住外贸基本盘"}}},"weight": 20.0}, {"filter": {"match": {"companies": {"query": "立足优势稳住外贸基本盘"}}},"weight": 10.0}],"min_score": 1.0,"query": {"bool": {"filter": [{"bool": {"should": [{"term": {"industry": {"value": "1"}}}, {"term": {"industry": {"value": "2"}}}, {"term": {"industry": {"value": "3"}}}]}}, {"bool": {"should": [{"term": {"source": {"value": "新华社"}}}, {"term": {"source": {"value": "中国经济网"}}}]}}, {"bool": {"should": [{"term": {"type": {"value": "经济简报"}}}, {"term": {"type": {"value": "外贸简报"}}}]}}, {"range": {"pubDate": {"gte": "2023-03-29","lte": "2023-03-30"}}}],"must": [{"multi_match": {"fields": ["title", "content", "companies"],"query": "立足优势稳住外贸基本盘"}}]}},"score_mode": "sum"}},"track_scores": false,"version": true}
4.3 接口测试
摔跤零距离| 摔跤名将邓志伟来温参赛:目标是冠军
<< 上一篇
最后一页
下一篇 >>

X 关闭

  • 太阳能