forked from medcl/book-elastic-search-in-action
-
Notifications
You must be signed in to change notification settings - Fork 0
/
3.1_task_intro.html
540 lines (266 loc) · 19.2 KB
/
3.1_task_intro.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
<!doctype html>
<html class="no-js" lang="en">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>
任务介绍 - Elastic 搜索开发实战
</title>
<meta name="description" content="Elastic 搜索开发实战,图书">
<link href="atom.xml" rel="alternate" title="Elastic 搜索开发实战" type="application/atom+xml">
<link rel="stylesheet" href="asset/css/foundation.min.css" />
<link rel="stylesheet" href="asset/css/docs.css" />
<script src="asset/js/vendor/modernizr.js"></script>
<script src="asset/js/vendor/jquery.js"></script>
<script src="asset/highlightjs/highlight.pack.js"></script>
<script async src="//pagead2.googlesyndication.com/pagead/js/adsbygoogle.js"></script>
<script>
(adsbygoogle = window.adsbygoogle || []).push({
google_ad_client: "ca-pub-9516277730901843",
enable_page_level_ads: true
});
</script>
<link href="asset/highlightjs/styles/github.css" media="screen, projection" rel="stylesheet" type="text/css">
<script>hljs.initHighlightingOnLoad();</script>
</head>
<body class="antialiased hide-extras">
<div class="marketing off-canvas-wrap" data-offcanvas>
<div class="inner-wrap">
<nav class="top-bar docs-bar hide-for-small" data-topbar>
<div id="header">
<span style="font-size: 30px;"><a href="index.html">Elastic 搜索开发实战</a></span> <div style="float: right;"><a class="github-button" href="https://github.com/medcl/book-elastic-search-in-action" data-size="large" data-show-count="true" aria-label="Star medcl/book-elastic-search-in-action on GitHub">Star</a></div>
</div>
</nav>
<nav class="tab-bar show-for-small">
<a href="javascript:void(0)" class="left-off-canvas-toggle menu-icon">
<span> Elastic 搜索开发实战</span>
</a>
</nav>
<aside class="left-off-canvas-menu">
<ul class="off-canvas-list">
<li><a href="index.html">Home</a></li>
<li class="divider"></li>
<li><a title="摘要" href="0.1_abstract.html">摘要</a></li>
<li class="divider"></li>
<li><a title="前言" href="1.0_preface.html">前言</a></li>
<li class="sub_nav"><a title="谁应该读这本书" href="1.1_who_should_read_this_book.html">谁应该读这本书</a></li>
<li class="sub_nav"><a title="本书结构" href="1.2_book_strcture.html">本书结构</a></li>
<li class="sub_nav"><a title="思维导图" href="1.3_mindmap.html">思维导图</a></li>
<li class="sub_nav"><a title="软件版本" href="1.4_soft_version.html">软件版本</a></li>
<li class="divider"></li>
<li><a title="基础准备" href="2.0_basic_concept.html">基础准备</a></li>
<li class="sub_nav"><a title="安装使用 Elasticsearch" href="2.1_install_elasticsearch.html">安装使用 Elasticsearch</a></li>
<li class="sub_nav"><a title="开发调试工具介绍" href="2.2_dev_tools.html">开发调试工具介绍</a></li>
<li class="sub_nav"><a title="常用 API 介绍" href="2.3_elasticsearch_basic_api.html">常用 API 介绍</a></li>
<li class="divider"></li>
<li><a title="添加站内搜索功能" href="3.0_site_search.html">添加站内搜索功能</a></li>
<li class="sub_nav"><a title="任务介绍" href="3.1_task_intro.html">任务介绍</a></li>
<li class="sub_nav"><a title="问题列表" href="3.2_prob_list.html">问题列表</a></li>
<li class="sub_nav"><a title="功能分解" href="3.3_task_decomposition.html">功能分解</a></li>
<li class="sub_nav"><a title="总体架构" href="3.4_architecture.html">总体架构</a></li>
<li class="sub_nav"><a title="数据结构" href="3.5_data_structure.html">数据结构</a></li>
<li class="sub_nav"><a title="数据迁移" href="3.6_data_migration.html">数据迁移</a></li>
<li class="sub_nav"><a title="搜索提示" href="3.7_autocomplete.html">搜索提示</a></li>
<li class="sub_nav"><a title="搜索模板" href="15486660133014.html">搜索模板</a></li>
<li class="sub_nav"><a title="优化查询" href="15490320699135.html">优化查询</a></li>
<li class="sub_nav"><a title="模糊查询" href="15490984646086.html">模糊查询</a></li>
</ul>
<div>
<br/>
<a class="github-button" href="https://github.com/medcl/book-elastic-search-in-action" data-size="large" data-show-count="true" aria-label="Star medcl/book-elastic-search-in-action on GitHub">Star</a></div>
</aside>
<a class="exit-off-canvas" href="#"></a>
<section id="main-content" role="main" class="scroll-container">
<div class="row">
<div class="large-3 medium-3 columns">
<div class="hide-for-small">
<div class="sidebar">
<nav>
<ul id="side-nav" class="side-nav">
<li class="side-title"><span><a title="摘要" href="0.1_abstract.html">摘要</a></span></li>
<li class="side-title"><span><a title="前言" href="1.0_preface.html">前言</a></span></li>
<li><a title="谁应该读这本书" href="1.1_who_should_read_this_book.html">谁应该读这本书</a></li>
<li><a title="本书结构" href="1.2_book_strcture.html">本书结构</a></li>
<li><a title="思维导图" href="1.3_mindmap.html">思维导图</a></li>
<li><a title="软件版本" href="1.4_soft_version.html">软件版本</a></li>
<li class="side-title"><span><a title="基础准备" href="2.0_basic_concept.html">基础准备</a></span></li>
<li><a title="安装使用 Elasticsearch" href="2.1_install_elasticsearch.html">安装使用 Elasticsearch</a></li>
<li><a title="开发调试工具介绍" href="2.2_dev_tools.html">开发调试工具介绍</a></li>
<li><a title="常用 API 介绍" href="2.3_elasticsearch_basic_api.html">常用 API 介绍</a></li>
<li class="side-title"><span><a title="添加站内搜索功能" href="3.0_site_search.html">添加站内搜索功能</a></span></li>
<li><a title="任务介绍" href="3.1_task_intro.html">任务介绍</a></li>
<li><a title="问题列表" href="3.2_prob_list.html">问题列表</a></li>
<li><a title="功能分解" href="3.3_task_decomposition.html">功能分解</a></li>
<li><a title="总体架构" href="3.4_architecture.html">总体架构</a></li>
<li><a title="数据结构" href="3.5_data_structure.html">数据结构</a></li>
<li><a title="数据迁移" href="3.6_data_migration.html">数据迁移</a></li>
<li><a title="搜索提示" href="3.7_autocomplete.html">搜索提示</a></li>
<li><a title="搜索模板" href="15486660133014.html">搜索模板</a></li>
<li><a title="优化查询" href="15490320699135.html">优化查询</a></li>
<li><a title="模糊查询" href="15490984646086.html">模糊查询</a></li>
</ul>
</nav>
</div>
</div>
</div>
<div class="large-9 medium-9 columns">
<!-- start toc -->
<div style="background: #fbf7f7; margin-top: 35px;padding: 10px;">
<link rel="stylesheet" href="asset/css/zTreeStyle/zTreeStyle.css" type="text/css">
<ul id="tree" class="ztree"></ul>
</div>
<!-- end toc -->
<div class="markdown-body">
<h1>任务介绍</h1>
<p>我们的目标网站是我们的 Elastic 中文社区:<a href="http://elasticsearch.cn">http://elasticsearch.cn</a>,为什么我们要改造其站内搜索功能呢?我们先来看下面的一张截图:</p>
<p><img src="media/15285964747097/15285983964703.jpg" alt=""/></p>
<p>我们搜索的关键字是 <code>Elastic 搜开发实战</code>,可以看到智能提示框,里面的推荐结果貌似有点糟糕,有些结果看上去完全没有体现出和关键字有什么关系,感觉搜了之后还是找不到有用的东西。我们再看一个。</p>
<p><img src="media/15285964747097/15285987753533.jpg" alt=""/></p>
<p>将一篇日报的完整标题作为关键字搜索,在搜索结果里面,居然不是排在第一位的。</p>
<p>我们再随便打开一篇文章:<a href="https://elasticsearch.cn/question/1023">https://elasticsearch.cn/question/1023</a>,内容如下图:</p>
<p><img src="media/15285964747097/15285998430298.jpg" alt=""/></p>
<p>我们随便复制一段文字:</p>
<pre><code>Understand the industrial trend of big data technology (e.g. elasticsearch, web crawler, Kafka).
</code></pre>
<p>然后放到搜索框里面检索一下,结果如下:</p>
<p><img src="media/15285964747097/15285997280401.jpg" alt=""/></p>
<p>可以看到,完全找不到我们的刚刚的那篇文章。</p>
<p>我们再来搜索一下 <code>学习</code>,看看结果:</p>
<p><img src="media/15285964747097/15286008013887.jpg" alt=""/></p>
<p>有不少结果返回,然后再搜索一下 拼音 <code>xuexi</code>,看看结果:</p>
<p><img src="media/15285964747097/15286007456235.jpg" alt=""/></p>
<p>可以看到只找到了一个信息。</p>
<p>我们再试试繁体 <code>學習</code>,看看搜索结果:<br/>
<img src="media/15285964747097/15286006755574.jpg" alt=""/></p>
<p>居然查询结果各不相同啊,但是对于搜索的用户来说,我本来就不知道有什么东西,我只是随便丢一个关键字就是想找找看,<code>学习</code>、<code>xuexi</code> 和 <code>學習</code> 应该无差别才对。</p>
<p>还有一个就是社区分享的 PPT,比如我想快速找到我感兴趣的 PPT,如下图:</p>
<p><img src="media/15285964747097/15285991022218.jpg" alt=""/></p>
<p>因为这个分享的功能是新开发的,相关内容也没有加入到数据库的索引里面,所以按 PPT 的标题、描述和 PPT 里面的文本都查不到。</p>
<p>另外结果多了之后,一页一页的找也很麻烦,而且如果我想找最近一个月的文章,或者按分类、标签或者作者进行查看,也不行。没有各个维度的统计。没有对搜索结果的再过滤。</p>
<p>这些都是用户的痛点。</p>
<p>也就是本章我们要解决的任务。</p>
<script async src="//pagead2.googlesyndication.com/pagead/js/adsbygoogle.js"></script>
<ins class="adsbygoogle"
style="display:block; text-align:center;"
data-ad-layout="in-article"
data-ad-format="fluid"
data-ad-client="ca-pub-9516277730901843"
data-ad-slot="5015120160"></ins>
<script>
(adsbygoogle = window.adsbygoogle || []).push({});
</script>
</div>
<br /><br />
<hr />
<div class="row clearfix">
<div style="width:50%;float:left;padding-left: 15px;">
<div class="text-left" style="padding:15px 0px;">
<a href="3.0_site_search.html" title="Previous Post: 添加站内搜索功能">« 添加站内搜索功能</a>
</div>
</div>
<div style="width:50%;float:right;padding-right: 15px;">
<div class="text-right" style="padding:15px 0px;">
<a href="3.2_prob_list.html"
title="Next Post: 问题列表">问题列表 »</a>
</div>
</div>
</div>
<div class="row">
<div style="padding:0px 0.93em;" class="share-comments">
<div id="disqus_thread"></div>
<script>
/**
* RECOMMENDED CONFIGURATION VARIABLES: EDIT AND UNCOMMENT THE SECTION BELOW TO INSERT DYNAMIC VALUES FROM YOUR PLATFORM OR CMS.
* LEARN WHY DEFINING THESE VARIABLES IS IMPORTANT: https://disqus.com/admin/universalcode/#configuration-variables*/
/*
var disqus_config = function () {
this.page.url = PAGE_URL; // Replace PAGE_URL with your page's canonical URL variable
this.page.identifier = PAGE_IDENTIFIER; // Replace PAGE_IDENTIFIER with your page's unique identifier variable
};
*/
(function() { // DON'T EDIT BELOW THIS LINE
var d = document, s = d.createElement('script');
s.src = 'https://elastic-sou-suo-kai-fa-shi-zhan.disqus.com/embed.js';
s.setAttribute('data-timestamp', +new Date());
(d.head || d.body).appendChild(s);
})();
</script>
<noscript>Please enable JavaScript to view the <a href="https://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript>
</div>
</div>
<script type="text/javascript">
$(function(){
var currentURL = '3.1_task_intro.html';
$('#side-nav a').each(function(){
if($(this).attr('href') == currentURL){
$(this).parent().addClass('active');
}
});
});
</script>
<!-- toc -->
<script src="asset/js/jquery.ztree.all-3.5.min.js?v=4?v=20151125" type="text/javascript"></script>
<script src="asset/js/ztree_toc.js?v=4?v=20151125" type="text/javascript"></script>
<SCRIPT type="text/javascript" >
<!--
$(document).ready(function(){
$('#tree').ztree_toc({
is_auto_number:true
});
});
//-->
</SCRIPT>
<!-- toc -->
</div></div>
<div class="page-bottom">
<div class="row">
<hr />
<div class="small-9 columns">
<p class="copyright">Copyright © 2018 <a target="_blank" href="http://medcl.com">Medcl</a>. 未经许可,谢绝转载.</p>
</div>
<div class="small-3 columns">
<p class="copyright text-right"><a href="#header">TOP</a></p>
</div>
</div>
</div>
</section>
</div>
</div>
<script src="asset/js/foundation.min.js"></script>
<script src="asset/js/foundation/foundation.offcanvas.js"></script>
<!-- UserVoice JavaScript SDK (only needed once on a page) -->
<script>(function(){var uv=document.createElement('script');uv.type='text/javascript';uv.async=true;uv.src='http://widget.uservoice.com/c1PjjxGIoKy3HUGvYKodbA.js';var s=document.getElementsByTagName('script')[0];s.parentNode.insertBefore(uv,s)})()</script>
<!-- A tab to launch the Classic Widget -->
<script>
UserVoice = window.UserVoice || [];
UserVoice.push(['showTab', 'classic_widget', {
mode: 'feedback',
primary_color: '#fa8c28',
link_color: '#0a8cc6',
forum_id: 53824,
support_tab_name: '联系我',
feedback_tab_name: '给予反馈',
tab_label: '意见反馈',
tab_color: '#2369d9',
tab_position: 'middle-right',
tab_inverted: false
}]);
</script>
<script>
$(document).foundation();
</script>
<script src="asset/chart/all-min.js"></script><script type="text/javascript">$(function(){ var mwebii=0; var mwebChartEleId = 'mweb-chart-ele-'; $('pre>code').each(function(){ mwebii++; var eleiid = mwebChartEleId+mwebii; if($(this).hasClass('language-sequence')){ var ele = $(this).addClass('nohighlight').parent(); $('<div id="'+eleiid+'"></div>').insertAfter(ele); ele.hide(); var diagram = Diagram.parse($(this).text()); diagram.drawSVG(eleiid,{theme: 'simple'}); }else if($(this).hasClass('language-flow')){ var ele = $(this).addClass('nohighlight').parent(); $('<div id="'+eleiid+'"></div>').insertAfter(ele); ele.hide(); var diagram = flowchart.parse($(this).text()); diagram.drawSVG(eleiid); } });});</script>
<script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script type="text/x-mathjax-config">MathJax.Hub.Config({TeX: { equationNumbers: { autoNumber: "AMS" } }});</script>
<!-- Place this tag in your head or just before your close body tag. -->
<script async defer src="https://buttons.github.io/buttons.js"></script>
<script id="dsq-count-scr" src="//elastic-sou-suo-kai-fa-shi-zhan.disqus.com/count.js" async></script>
<!-- Global site tag (gtag.js) - Google Analytics -->
<script async src="https://www.googletagmanager.com/gtag/js?id=UA-120597990-1"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'UA-120597990-1');
</script>
</body>
</html>