Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Elasticsearch migration from 2.6 to 5.6 #1805

Open
wants to merge 8 commits into
base: elasticsearch-search-indice-update
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ jobs:
- name: Run (old) elasticsearch
run: |
mkdir /tmp/elasticsearch
wget -O - https://download.elasticsearch.org/elasticsearch/release/org/elasticsearch/distribution/tar/elasticsearch/2.4.6/elasticsearch-2.4.6.tar.gz | tar xz --directory=/tmp/elasticsearch --strip-components=1
/tmp/elasticsearch/bin/plugin install analysis-icu
/tmp/elasticsearch/bin/elasticsearch --daemonize --path.data /tmp
wget -O - https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-5.6.10.tar.gz | tar xz --directory=/tmp/elasticsearch --strip-components=1
/tmp/elasticsearch/bin/elasticsearch-plugin install analysis-icu
/tmp/elasticsearch/bin/elasticsearch --daemonize
sleep 30 # ElasticSearch takes few seconds to start, make sure it is available when the build script runs
- run: curl -v http://localhost:9200
- name: Run tests
Expand Down
2 changes: 1 addition & 1 deletion c2corg_api/search/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,10 +118,10 @@ def get_text_query_on_title(search_term, search_lang=None):
query=search_term,
fields=fields,
type='phrase',
fuzziness=2,
max_expansions=3,
zero_terms_query="none",
slop=4,
operator="and"
)


Expand Down
54 changes: 26 additions & 28 deletions c2corg_api/search/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from c2corg_api.models.common.attributes import default_langs
from c2corg_api.models.common.sortable_search_attributes import \
sortable_quality_types
from elasticsearch_dsl import DocType, String, MetaField, Long, GeoPoint
from elasticsearch_dsl import DocType, MetaField, Long, GeoPoint, Text


class BaseMeta:
Expand All @@ -21,25 +21,23 @@ class BaseMeta:
# https://github.com/komoot/photon/blob/master/es/index_settings.json
def default_title_field(lang: None):
if lang is None:
return String(
return Text(
index='not_analyzed',
similarity='c2corgsimilarity',
fields={
'ngram': String(
'ngram': Text(
analyzer='index_ngram', search_analyzer='search_ngram'),
'raw': String(
'raw': Text(
analyzer='index_raw', search_analyzer='search_raw')
})
else:
return String(
return Text(
index='not_analyzed',
similarity='c2corgsimilarity',
fields={
'ngram': String(
'ngram': Text(
analyzer='index_ngram', search_analyzer='search_ngram'),
'raw': String(
'raw': Text(
analyzer='index_raw', search_analyzer='search_raw'),
'contentheavy': String(
'contentheavy': Text(
analyzer='{0}_heavy'.format(lang))
})

Expand Down Expand Up @@ -74,65 +72,65 @@ class Meta(BaseMeta):

# fr
title_fr = default_title_field("french")
summary_fr = String(
summary_fr = Text(
analyzer='index_french', search_analyzer='search_french')
description_fr = String(
description_fr = Text(
analyzer='index_french', search_analyzer='search_french')

# it
title_it = default_title_field("italian")
summary_it = String(
summary_it = Text(
analyzer='index_italian', search_analyzer='search_italian')
description_it = String(
description_it = Text(
analyzer='index_italian', search_analyzer='search_italian')

# de
title_de = default_title_field("german")
summary_de = String(
summary_de = Text(
analyzer='index_german', search_analyzer='search_german')
description_de = String(
description_de = Text(
analyzer='index_german', search_analyzer='search_german')

# en
title_en = default_title_field("english")
summary_en = String(
summary_en = Text(
analyzer='index_english', search_analyzer='search_english')
description_en = String(
description_en = Text(
analyzer='index_english', search_analyzer='search_english')

# es
title_es = default_title_field("spanish")
summary_es = String(
summary_es = Text(
analyzer='index_spanish', search_analyzer='search_spanish')
description_es = String(
description_es = Text(
analyzer='index_spanish', search_analyzer='search_spanish')

# ca
title_ca = default_title_field("catalan")
summary_ca = String(
summary_ca = Text(
analyzer='index_catalan', search_analyzer='search_catalan')
description_ca = String(
description_ca = Text(
analyzer='index_catalan', search_analyzer='search_catalan')

# eu
title_eu = default_title_field("basque")
summary_eu = String(
summary_eu = Text(
analyzer='index_basque', search_analyzer='search_basque')
description_eu = String(
description_eu = Text(
analyzer='index_basque', search_analyzer='search_basque')

# sl
title_sl = default_title_field("slovene")
summary_sl = String(
summary_sl = Text(
analyzer='index_slovene', search_analyzer='search_slovene')
description_sl = String(
description_sl = Text(
analyzer='index_slovene', search_analyzer='search_slovene')

# zh
title_zh = default_title_field("chinois")
summary_zh = String(
summary_zh = Text(
analyzer='index_chinois', search_analyzer='search_chinois')
description_zh = String(
description_zh = Text(
analyzer='index_chinois', search_analyzer='search_chinois')

@staticmethod
Expand Down
1 change: 0 additions & 1 deletion c2corg_api/search/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ def do_multi_search_for_types(search_types, search_term, limit, lang):
(_, get_documents_config) = search_type
search = create_search(get_documents_config.document_type).\
query(get_text_query_on_title(search_term, lang)).\
fields([]).\
extra(from_=0, size=limit)
multi_search = multi_search.add(search)

Expand Down
19 changes: 9 additions & 10 deletions c2corg_api/search/search_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
from c2corg_api.search import (create_search, get_text_query_on_title,
search_documents)
from c2corg_api.search.mapping_types import reserved_query_fields
from elasticsearch_dsl.query import (Bool, GeoBoundingBox, Missing, Range,
Script, Term, Terms)
from elasticsearch_dsl.query import (Bool, GeoBoundingBox, Range,
Script, Term, Terms, Q)
from pyproj import Transformer

log = logging.getLogger(__name__)
Expand All @@ -35,8 +35,7 @@ def build_query(url_params, meta_params, doc_type):
if filter:
search = search.filter(filter)

search = search.\
fields([]).\
search = search. \
extra(from_=offset, size=limit)

if url_params.get('bbox'):
Expand Down Expand Up @@ -171,9 +170,9 @@ def create_enum_range_min_max_filter(field, query_term):
return Bool(must_not=Bool(should=[
Range(**kwargs_start),
Range(**kwargs_end),
Bool(must=[
Missing(field=field.field_min),
Missing(field=field.field_max)
Bool(must_not=[
Q("exists", field=field.field_min),
Q("exists", field=field.field_max)
])
]))

Expand Down Expand Up @@ -351,9 +350,9 @@ def create_number_range_filter(field, query_term):
return Bool(must_not=Bool(should=[
Range(**kwargs_start),
Range(**kwargs_end),
Bool(must=[
Missing(field=field.field_min),
Missing(field=field.field_max)
Bool(must_not=[
Q('exists', field=field.field_min),
Q('exists', field=field.field_max)
])
]))

Expand Down
26 changes: 11 additions & 15 deletions c2corg_api/tests/search/test_search_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
from c2corg_api.search.mappings.outing_mapping import SearchOuting
from c2corg_api.search.mappings.waypoint_mapping import SearchWaypoint
from c2corg_api.tests import BaseTestCase
from elasticsearch_dsl.query import (Bool, GeoBoundingBox, Missing, Range,
Script, Term, Terms)
from elasticsearch_dsl.query import (Bool, GeoBoundingBox, Range,
Script, Term, Terms, Q)


class AdvancedSearchTest(BaseTestCase):
Expand All @@ -29,7 +29,6 @@ def test_build_query(self):
filter(Term(available_locales='fr')).\
filter(Terms(areas=[1234, 4567])). \
filter(Range(elevation={'gte': 1500})). \
fields([]).\
extra(from_=0, size=10)
self.assertQueryEqual(query, expected_query)

Expand All @@ -52,7 +51,6 @@ def test_build_query_bbox(self):
'left': 6.28279913, 'bottom': 46.03129072,
'right': 6.28369744, 'top': 46.03191439},
type='indexed')). \
fields([]). \
extra(from_=0, size=10)
self.assertQueryEqual(query, expected_query)

Expand All @@ -67,7 +65,6 @@ def test_build_query_limit_offset(self):
query = build_query(params, meta_params, 'w')
expected_query = create_search('w'). \
query(get_text_query_on_title('search word')). \
fields([]).\
extra(from_=40, size=20)
self.assertQueryEqual(query, expected_query)

Expand All @@ -82,7 +79,6 @@ def test_build_query_sort_outing(self):
query = build_query(params, meta_params, 'o')
expected_query = create_search('o'). \
filter(Term(activities='skitouring')). \
fields([]). \
sort({'date_end': {'order': 'desc'}}, {'id': {'order': 'desc'}}). \
extra(from_=40, size=20)
self.assertQueryEqual(query, expected_query)
Expand Down Expand Up @@ -212,9 +208,9 @@ def test_create_filter_enum_range_min_max(self):
Bool(must_not=Bool(should=[
Range(climbing_rating_min={'gt': 17}),
Range(climbing_rating_max={'lt': 5}),
Bool(must=[
Missing(field='climbing_rating_min'),
Missing(field='climbing_rating_max')
Bool(must_not=[
Q("exists", field='climbing_rating_min'),
Q("exists", field='climbing_rating_max')
])
])))

Expand Down Expand Up @@ -243,19 +239,19 @@ def test_create_filter_integer_range(self):
Bool(must_not=Bool(should=[
Range(elevation_min={'gt': 2400}),
Range(elevation_max={'lt': 1200}),
Bool(must=[
Missing(field='elevation_min'),
Missing(field='elevation_max')
Bool(must_not=[
Q("exists", field='elevation_min'),
Q("exists", field='elevation_max')
])
])))
self.assertEqual(
create_filter('height', '1200,2400', SearchWaypoint),
Bool(must_not=Bool(should=[
Range(height_min={'gt': 2400}),
Range(height_max={'lt': 1200}),
Bool(must=[
Missing(field='height_min'),
Missing(field='height_max')
Bool(must_not=[
Q("exists", field='height_min'),
Q("exists", field='height_max')
])
])))

Expand Down
6 changes: 6 additions & 0 deletions dataMigration/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
## dataMigraiton folder content logstash pipeline script per target release ES number.

- v5: to move ES data from 2.6 original to v5.6
- v6: to move ES data from v5.6 to v6.x
- v7: to move ES data from v6.X to 7.X

51 changes: 51 additions & 0 deletions dataMigration/v5/logstash.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#pre requisite : docker-compose run -it logstash5 /usr/share/logstash/bin/logstash-plugin install logstash-filter-prune
input
{
elasticsearch
{
hosts => ["elasticsearch26:9200"]
index => "c2corg"
docinfo => true
size => 1000
scroll => "5m"
docinfo_target => "[@metadata]"
}
}

filter {
if (![geom]) {
mutate {
add_field => { "geom" => [0,0] }
}
}

mutate {
add_field => { "doc_id" => "%{[@metadata][_id]}" }
}

#prune {
# whitelist_names => [ "doc_id" ]
#}

}

output
{
elasticsearch
{
hosts => ["elasticsearch:9200"]
index => "c2corg"
document_type => "%{doc_type}"
document_id => "%{doc_id}"
}

stdout {
codec => "dots"
}

}

#stdout { codec => "dots"}
#stdout { codec => rubydebug }
#stdout { codec => "json" }
#docker-compose run logstash5 /usr/share/logstash/bin/logstash -f /root/logstash.conf
45 changes: 45 additions & 0 deletions dataMigration/v6/logstash/logstash.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
input
{
elasticsearch
{
hosts => ["elasticsearch56:9200"]
index => "c2corg"
docinfo => true
size => 1000
scroll => "5m"
docinfo_target => "[@metadata]"
}
}

filter {
if (![geom]) {
mutate {
add_field => { "geom" => [0,0] }
}
}

mutate {
add_field => { "doc_id" => "%{[@metadata][_id]}" }
}
}

output
{
elasticsearch
{
hosts => ["elasticsearch6:9200"]
index => "c2corg_%{[@metadata][_type]}"
document_type => "%{doc_type}"
document_id => "%{doc_id}"
}

stdout {
codec => "dots"
}

}

#stdout { codec => "dots"}
#stdout { codec => rubydebug }
#stdout { codec => "json" }
#docker-compose run logstash5 /usr/share/logstash/bin/logstash -f /root/logstash.conf
Loading