POST /forum/_bulk { "index": { "_id": 1 }} { "articleID" : "XHDK-A-1293-#fJ3", "userID" : 1, "hidden": false, "postDate": "2017-01-01" } { "index": { "_id": 2 }} { "articleID" : "KDKE-B-9947-#kL5", "userID" : 1, "hidden": false, "postDate": "2017-01-02" } { "index": { "_id": 3 }} { "articleID" : "JODL-X-1937-#pV7", "userID" : 2, "hidden": false, "postDate": "2017-01-01" } { "index": { "_id": 4 }} { "articleID" : "QQPX-R-3956-#aD8", "userID" : 2, "hidden": true, "postDate": "2017-01-02" }
POST /forum/_bulk { "update": { "_id": "1"} } { "doc" : {"title" : "this is java and elasticsearch blog"} } { "update": { "_id": "2"} } { "doc" : {"title" : "this is java blog"} } { "update": { "_id": "3"} } { "doc" : {"title" : "this is elasticsearch blog"} } { "update": { "_id": "4"} } { "doc" : {"title" : "this is java, elasticsearch, hadoop blog"} } { "update": { "_id": "5"} } { "doc" : {"title" : "this is spark blog"} }
这个就跟之前的那个term filter/query不一样了。不是搜索exact value,而是进行full text全文搜索。
match query是负责进行全文检索的。当然如果要检索的field是not_analyzed类型的,那么match query也相当于term query
GET /forum/_search { "query": { "match": { "title": "java elasticsearch" } } }
{ "took" : 1139, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 4, "relation" : "eq" }, "max_score" : 0.97797304, "hits" : [ { "_index" : "forum", "_type" : "_doc", "_id" : "1", "_score" : 0.97797304, "_source" : { "articleID" : "XHDK-A-1293-#fJ3", "userID" : 1, "hidden" : false, "postDate" : "2017-01-01", "tag" : [ "java", "hadoop" ], "tag_cnt" : 2, "view_cnt" : 30, "title" : "this is java and elasticsearch blog" } }, { "_index" : "forum", "_type" : "_doc", "_id" : "4", "_score" : 0.97797304, "_source" : { "articleID" : "QQPX-R-3956-#aD8", "userID" : 2, "hidden" : true, "postDate" : "2017-01-02", "tag" : [ "java", "elasticsearch" ], "tag_cnt" : 2, "view_cnt" : 80, "title" : "this is java, elasticsearch, hadoop blog" } }, { "_index" : "forum", "_type" : "_doc", "_id" : "2", "_score" : 0.57843524, "_source" : { "articleID" : "KDKE-B-9947-#kL5", "userID" : 1, "hidden" : false, "postDate" : "2017-01-02", "tag" : [ "java" ], "tag_cnt" : 1, "view_cnt" : 50, "title" : "this is java blog" } }, { "_index" : "forum", "_type" : "_doc", "_id" : "3", "_score" : 0.57843524, "_source" : { "articleID" : "JODL-X-1937-#pV7", "userID" : 2, "hidden" : false, "postDate" : "2017-01-01", "tag" : [ "hadoop" ], "tag_cnt" : 1, "view_cnt" : 100, "title" : "this is elasticsearch blog" } } ] } }
搜索结果精确控制的第一步就是灵活使用and关键字,如果你是希望所有的搜索关键字都要匹配的,那么就用and,可以实现单纯match query无法实现的效果
GET /forum/_search { "query": { "match": { "title": { "query": "java elasticsearch", "operator": "and" } } } }
{ "took" : 6, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 2, "relation" : "eq" }, "max_score" : 0.97797304, "hits" : [ { "_index" : "forum", "_type" : "_doc", "_id" : "1", "_score" : 0.97797304, "_source" : { "articleID" : "XHDK-A-1293-#fJ3", "userID" : 1, "hidden" : false, "postDate" : "2017-01-01", "tag" : [ "java", "hadoop" ], "tag_cnt" : 2, "view_cnt" : 30, "title" : "this is java and elasticsearch blog" } }, { "_index" : "forum", "_type" : "_doc", "_id" : "4", "_score" : 0.97797304, "_source" : { "articleID" : "QQPX-R-3956-#aD8", "userID" : 2, "hidden" : true, "postDate" : "2017-01-02", "tag" : [ "java", "elasticsearch" ], "tag_cnt" : 2, "view_cnt" : 80, "title" : "this is java, elasticsearch, hadoop blog" } } ] } }
控制搜索结果的精确度的第二步就是指定一些关键字中,必须至少匹配其中的多少个关键字,才能作为结果返回
GET /forum/_search { "query": { "match": { "title": { "query": "java elasticsearch spark hadoop", "minimum_should_match": 3 } } } }
{ "took" : 4, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 1, "relation" : "eq" }, "max_score" : 2.2356422, "hits" : [ { "_index" : "forum", "_type" : "_doc", "_id" : "4", "_score" : 2.2356422, "_source" : { "articleID" : "QQPX-R-3956-#aD8", "userID" : 2, "hidden" : true, "postDate" : "2017-01-02", "tag" : [ "java", "elasticsearch" ], "tag_cnt" : 2, "view_cnt" : 80, "title" : "this is java, elasticsearch, hadoop blog" } } ] } }
GET /forum/_search { "query": { "bool": { "must": [ { "match": { "title": "java" } } ], "must_not": [ { "match": { "title": "spark" } } ], "should": [ { "match": { "title": "hadoop" } }, { "match": { "title": "elasticsearch" } } ] } } }
{ "took" : 12, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 3, "relation" : "eq" }, "max_score" : 2.2356422, "hits" : [ { "_index" : "forum", "_type" : "_doc", "_id" : "4", "_score" : 2.2356422, "_source" : { "articleID" : "QQPX-R-3956-#aD8", "userID" : 2, "hidden" : true, "postDate" : "2017-01-02", "tag" : [ "java", "elasticsearch" ], "tag_cnt" : 2, "view_cnt" : 80, "title" : "this is java, elasticsearch, hadoop blog" } }, { "_index" : "forum", "_type" : "_doc", "_id" : "1", "_score" : 0.97797304, "_source" : { "articleID" : "XHDK-A-1293-#fJ3", "userID" : 1, "hidden" : false, "postDate" : "2017-01-01", "tag" : [ "java", "hadoop" ], "tag_cnt" : 2, "view_cnt" : 30, "title" : "this is java and elasticsearch blog" } }, { "_index" : "forum", "_type" : "_doc", "_id" : "2", "_score" : 0.57843524, "_source" : { "articleID" : "KDKE-B-9947-#kL5", "userID" : 1, "hidden" : false, "postDate" : "2017-01-02", "tag" : [ "java" ], "tag_cnt" : 1, "view_cnt" : 50, "title" : "this is java blog" } } ] } }
must和should搜索对应的分数,加起来,除以must和should的总分数
所以排在第一位的是:包含java、hadoop、elasticsearch
排在第二位的是:包含java、elasticsearch
排在第三位的是:包含java
should是可以影响相关度分数的
must确保说谁必须有这个关键字,同时会根据这个must的条件去计算出document对这个搜索条件的relevance score。在满足must的基础上,should中的条件,不匹配也是可以的,但是如果匹配的更多,那么document的relevance score就会更高。
默认情况下,should是可以不匹配任何一个的,但是有一个例外的情况,就是如果没有must的情况下,那么should中必须至少匹配一个才可以
GET /forum/_search { "query": { "bool": { "should": [ { "match": { "title": "java" } }, { "match": { "title": "elasticsearch" } }, { "match": { "title": "hadoop" } }, { "match": { "title": "spark" } } ], "minimum_should_match": 3 } } }
{ "took" : 2, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 1, "relation" : "eq" }, "max_score" : 2.2356422, "hits" : [ { "_index" : "forum", "_type" : "_doc", "_id" : "4", "_score" : 2.2356422, "_source" : { "articleID" : "QQPX-R-3956-#aD8", "userID" : 2, "hidden" : true, "postDate" : "2017-01-02", "tag" : [ "java", "elasticsearch" ], "tag_cnt" : 2, "view_cnt" : 80, "title" : "this is java, elasticsearch, hadoop blog" } } ] } }