0
点赞
收藏
分享

微信扫一扫

Elasticsearch自定义分词和分词器

上一篇 <<<Elasticsearch默认分词器对中文分词不友好
下一篇 >>>正向索引和倒排索引区别


1.自定义分词

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
<properties>
    <comment>IK Analyzer 扩展配置</comment>
    <!--用户可以在这里配置自己的扩展字典 -->
    <entry key="ext_dict">custom/new_word.dic</entry>
     <!--用户可以在这里配置自己的扩展停止词字典-->
    <entry key="ext_stopwords"></entry>
    <!--用户可以在这里配置远程扩展字典 -->
    <!-- <entry key="remote_ext_dict">words_location</entry> -->
    <!--用户可以在这里配置远程扩展停止词字典-->
    <!-- <entry key="remote_ext_stopwords">words_location</entry> -->
</properties>

2.自定义分词器

PUT /goods
{
   "settings": {
        "analysis": {
            "analyzer": {
                "ik_smart_pinyin": {
                    "type": "custom",
                    "tokenizer": "ik_smart",
                    "filter": ["my_pinyin", "word_delimiter"]
                },
                "ik_max_word_pinyin": {
                    "type": "custom",
                    "tokenizer": "ik_max_word",
                    "filter": ["my_pinyin", "word_delimiter"]
                }
            },
            "filter": {
                "my_pinyin": {
                    "type" : "pinyin",
                    "keep_separate_first_letter" : true,
                    "keep_full_pinyin" : true,
                    "keep_original" : true,
                    "limit_first_letter_length" : 16,
                    "lowercase" : true,
                    "remove_duplicated_term" : true 
                }
            }
        }
  }  
}
POST /goods/_mapping/goods
{
   
  
      "goods": {
        "properties": {
          "@timestamp": {
            "type": "date"
          },
          "@version": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "attribute_list": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "category_id": {
            "type": "long"
          },
          "created_time": {
            "type": "date"
          },
          "detail": {
            "type": "text",
             "analyzer":"ik_smart_pinyin",
            "search_analyzer":"ik_smart_pinyin"

          },
          "id": {
            "type": "long"
          },
          "main_image": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "name": {
            "type": "text",
            "analyzer":"ik_smart_pinyin",
            "search_analyzer":"ik_smart_pinyin"

          },
          "revision": {
            "type": "long"
          },
          "status": {
            "type": "long"
          },
          "sub_images": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "subtitle": {
            "type": "text",
          "analyzer":"ik_smart",
         "search_analyzer":"ik_smart"

          },
          "updated_time": {
            "type": "date"
          }
        }
      }
}

推荐阅读:
<<<Elasticsearch入门知识
<<<Elasticsearch快速原因分析及应用场景
<<<Elasticsearch的存储结构端口及版本控制
<<<Elasticsearch文档映射方式
<<<Elasticsearch的基本及复杂数据类型
<<<Elasticsearch的简易版及结构化查询语句
<<<Elasticsearch默认分词器对中文分词不友好
<<<正向索引和倒排索引区别
<<<Elasticsearch中的类型区别汇总
<<<Elasticsearch是如何解决高并发问题
<<<Elasticsearch集群相关名词
<<<Elasticsearch集群及分片实现原理
<<<SpringBoot整合Elasticsearch
<<<Linux环境安装Elasticsearch
<<<Linux环境安装Elasticsearch集群
<<<Elasticsearch和数据库保持同步的方式及原理
<<<Logstash-input-jdbc实现ES和数据同步操作步骤

举报

相关推荐

0 条评论