Skip to content

Commit

Permalink
Upgrade ElasticSearch to v8.7.1 (#1359)
Browse files Browse the repository at this point in the history
* [ElasticsearchApi] updating scripts from elasticsearch v5 to v8

* [Elasticsearch] updated elasticsearch client for graphql

* [ES] fixed python tests

* [CircleCi] updated elasticsearch image

* [CircleCI] disbabled es ssl auth

* [ApiTest] removed es dsl import

* [ApiTest] creating es test index beffore sm daemon test

* [ApiEsTest] fixed remove_mold_db obj assign

* Added the ability to specify a file with a list of ds_id

---------

Co-authored-by: Sergii Mamedov <finve.ua@gmail.com>
  • Loading branch information
lmacielvieira and sergii-mamedov committed May 31, 2023
1 parent 2e4c7bb commit fbe7354
Show file tree
Hide file tree
Showing 24 changed files with 514 additions and 339 deletions.
9 changes: 6 additions & 3 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -173,9 +173,10 @@ jobs:
POSTGRES_USER: sm
POSTGRES_PASSWORD: password

- image: elasticsearch:5.4.0-alpine
- image: elasticsearch:8.7.1
environment:
ES_JAVA_OPTS: "-Xms512m -Xmx512m"
xpack.security.enabled: "false"
command: [elasticsearch, -Etransport.host=127.0.0.1]

- image: redis:3.2-alpine
Expand Down Expand Up @@ -265,9 +266,10 @@ jobs:
POSTGRES_USER: sm
POSTGRES_PASSWORD: password

- image: elasticsearch:5.4.0-alpine
- image: elasticsearch:8.7.1
environment:
ES_JAVA_OPTS: "-Xms512m -Xmx512m"
xpack.security.enabled: "false"
command: [elasticsearch, -Etransport.host=127.0.0.1]

- image: rabbitmq:3.6-management
Expand Down Expand Up @@ -331,9 +333,10 @@ jobs:
POSTGRES_USER: sm
POSTGRES_PASSWORD: password

- image: elasticsearch:5.4.0-alpine
- image: elasticsearch:8.7.1
environment:
ES_JAVA_OPTS: "-Xms512m -Xmx512m"
xpack.security.enabled: "false"
command: [elasticsearch, -Etransport.host=127.0.0.1]

- image: rabbitmq:3.6-management
Expand Down
2 changes: 1 addition & 1 deletion docker/elasticsearch/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM elasticsearch:5.5.2
FROM elasticsearch:8.7.1


RUN rm -r /usr/share/elasticsearch/config/elasticsearch.yml && \
Expand Down
4 changes: 3 additions & 1 deletion docker/elasticsearch/config/elasticsearch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@
cluster.name: "docker-cluster"
network.host: 0.0.0.0

xpack.security.enabled: false

# minimum_master_nodes need to be explicitly set when bound on a public IP
# set to 1 to allow single node clusters
# Details: https://github.com/elastic/elasticsearch/pull/17288
discovery.zen.minimum_master_nodes: 1
#discovery.zen.minimum_master_nodes: 1

## Use single node discovery in order to disable production mode and avoid bootstrap checks
## see https://www.elastic.co/guide/en/elasticsearch/reference/current/bootstrap-checks.html
Expand Down
2 changes: 2 additions & 0 deletions metaspace/engine/conf/config.docker.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
},
"elasticsearch": {
"index": "sm",
"dataset_index": "dataset",
"annotation_index": "annotation",
"host": "elasticsearch",
"port": "9200",
"user": "elastic",
Expand Down
2 changes: 2 additions & 0 deletions metaspace/engine/conf/config.json.template
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
},
"elasticsearch": {
"index": "sm",
"dataset_index": "dataset",
"annotation_index": "annotation",
"host": "{{ sm_es_host }}",
"port": "{{ sm_es_port }}",
"user": "{{ sm_es_user }}",
Expand Down
2 changes: 2 additions & 0 deletions metaspace/engine/conf/scitest_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
},
"elasticsearch": {
"index": "sm_test",
"dataset_index": "dataset_test",
"annotation_index": "annotation_test",
"host": "localhost",
"port": "9200",
"user": "elastic",
Expand Down
2 changes: 2 additions & 0 deletions metaspace/engine/conf/test_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
},
"elasticsearch": {
"index": "sm_test",
"dataset_index": "dataset_test",
"annotation_index": "annotation_test",
"host": "localhost",
"port": 9200
},
Expand Down
2 changes: 1 addition & 1 deletion metaspace/engine/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ boto3==1.17.13
fabric3
pypng==0.0.19 # 0.0.20 introduced incompatible API changes
pyyaml>=5.1
elasticsearch-dsl>=5.0.0,<6.0.0
elasticsearch==8.7.0
pika==0.13.1
bottle
Pillow==9.3.0
Expand Down
31 changes: 21 additions & 10 deletions metaspace/engine/scripts/manage_es_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,27 +53,38 @@ def print_status(es_man, alias):

es_config = SMConfig.get_conf()['elasticsearch']
es_man = ESIndexManager(es_config)
alias = es_config['index']
active_index = es_man.internal_index_name(alias)
inactive_index = es_man.another_index_name(active_index)
index = inactive_index if args.inactive else active_index
dataset_alias = es_config['dataset_index']
annotation_alias = es_config['annotation_index']
dataset_active_index = es_man.internal_index_name(dataset_alias)
annotation_active_index = es_man.internal_index_name(annotation_alias)
dataset_inactive_index = es_man.another_index_name(dataset_active_index)
annotation_inactive_index = es_man.another_index_name(annotation_active_index)

dataset_index = dataset_inactive_index if args.inactive else dataset_active_index
annotation_index = annotation_inactive_index if args.inactive else annotation_active_index

if args.action == 'create':
if args.drop:
es_man.delete_index(index)
es_man.create_index(index)
es_man.delete_index(dataset_index)
es_man.delete_index(annotation_index)
es_man.create_dataset_index(dataset_index)
es_man.create_annotation_index(annotation_index)
if not args.inactive:
es_man.remap_alias(index, alias)
es_man.remap_alias(dataset_index, dataset_alias)
es_man.remap_alias(annotation_index, annotation_alias)
elif args.action == 'swap':
es_man.remap_alias(inactive_index, alias)
es_man.remap_alias(dataset_inactive_index, dataset_alias)
es_man.remap_alias(annotation_inactive_index, annotation_alias)
elif args.action == 'drop':
assert args.inactive, 'drop must be used with --inactive '
es_man.delete_index(index)
es_man.delete_index(dataset_index)
es_man.delete_index(annotation_index)
elif args.action == 'status':
pass
else:
parser.error('Invalid action')

# Print status regardless. The specific command just exists as a clean way
# to indicate to not do anything
print_status(es_man, alias)
print_status(es_man, dataset_alias)
print_status(es_man, annotation_alias)
59 changes: 43 additions & 16 deletions metaspace/engine/scripts/update_es_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,19 @@
logger = logging.getLogger('engine')


def get_inactive_index_es_config(es_config):
def get_inactive_dataset_index_es_config(es_config):
es_man = ESIndexManager(es_config)
old_index = es_man.internal_index_name(es_config['index'])
old_index = es_man.internal_index_name(es_config['dataset_index'])
new_index = es_man.another_index_name(old_index)
tmp_es_config = deepcopy(es_config)
tmp_es_config['index'] = new_index

return tmp_es_config


def get_inactive_annotation_index_es_config(es_config):
es_man = ESIndexManager(es_config)
old_index = es_man.internal_index_name(es_config['annotation_index'])
new_index = es_man.another_index_name(old_index)
tmp_es_config = deepcopy(es_config)
tmp_es_config['index'] = new_index
Expand All @@ -22,25 +32,35 @@ def get_inactive_index_es_config(es_config):

def _reindex_all(sm_config):
es_config = sm_config['elasticsearch']
alias = es_config['index']
es_man = ESIndexManager(es_config)
old_index = es_man.internal_index_name(alias)
new_index = es_man.another_index_name(old_index)
es_man.create_index(new_index)

dataset_alias = es_config['dataset_index']
old_dataset_index = es_man.internal_index_name(dataset_alias)
new_dataset_index = es_man.another_index_name(old_dataset_index)
es_man.create_dataset_index(new_dataset_index)

annotation_alias = es_config['annotation_index']
old_annotation_index = es_man.internal_index_name(annotation_alias)
new_annotation_index = es_man.another_index_name(old_annotation_index)
es_man.create_annotation_index(new_annotation_index)

try:
inactive_es_config = get_inactive_index_es_config(es_config)
inactive_dataset_es_config = get_inactive_dataset_index_es_config(es_config)
inactive_annotation_es_config = get_inactive_annotation_index_es_config(es_config)
db = DB()
es_exp = ESExporter(db, {**sm_config, 'elasticsearch': inactive_es_config})
ds_ids = [r[0] for r in db.select('select id from dataset')]
es_exp = ESExporter(db, {**sm_config, 'elasticsearch': inactive_dataset_es_config})
ds_ids = [r[0] for r in db.select('SELECT id FROM dataset ORDER BY id')]
_reindex_datasets(ds_ids, es_exp)

es_man.remap_alias(inactive_es_config['index'], alias=alias)
es_man.remap_alias(inactive_dataset_es_config['index'], alias=dataset_alias)
es_man.remap_alias(inactive_annotation_es_config['index'], alias=annotation_alias)
except Exception as e:
es_man.delete_index(new_index)
es_man.delete_index(new_dataset_index)
es_man.delete_index(new_annotation_index)
raise e
else:
es_man.delete_index(old_index)
es_man.delete_index(old_dataset_index)
es_man.delete_index(old_annotation_index)


def _reindex_datasets(ds_ids, es_exp):
Expand All @@ -57,8 +77,10 @@ def _partial_update_datasets(ds_ids, es_exp, fields):
es_exp.update_ds(ds_id, fields)


def reindex_results(sm_config, ds_id, ds_mask, use_inactive_index, offline_reindex, update_fields):
assert ds_id or ds_mask or offline_reindex
def reindex_results(
sm_config, ds_id, ds_mask, ds_file, use_inactive_index, offline_reindex, update_fields
):
assert ds_id or ds_mask or ds_file or offline_reindex

IsocalcWrapper.set_centroids_cache_enabled(True)

Expand All @@ -67,7 +89,7 @@ def reindex_results(sm_config, ds_id, ds_mask, use_inactive_index, offline_reind
else:
es_config = sm_config['elasticsearch']
if use_inactive_index:
es_config = get_inactive_index_es_config(es_config)
es_config = get_inactive_dataset_index_es_config(es_config)

db = DB()
es_exp = ESExporter(db, sm_config={**sm_config, 'elasticsearch': es_config})
Expand All @@ -78,9 +100,12 @@ def reindex_results(sm_config, ds_id, ds_mask, use_inactive_index, offline_reind
ds_ids = [
id
for (id,) in db.select(
"select id from dataset where name like '{}%'".format(ds_mask)
"SELECT id FROM dataset WHERE name like '{}%' ORDER BY id".format(ds_mask)
)
]
elif ds_file:
with ds_file as file:
ds_ids = [line.strip() for line in file.readlines()]
else:
ds_ids = []

Expand All @@ -96,6 +121,7 @@ def reindex_results(sm_config, ds_id, ds_mask, use_inactive_index, offline_reind
parser.add_argument('--inactive', action='store_true', help='Run against the inactive index')
parser.add_argument('--ds-id', help='DS id (or comma-separated list of ids)')
parser.add_argument('--ds-name', help='DS name prefix mask')
parser.add_argument('--ds-file', type=argparse.FileType('r'), help='DS ids from the file')
parser.add_argument('--offline-reindex', help='Create and populate inactive index then swap')
parser.add_argument(
'--update-fields',
Expand All @@ -109,6 +135,7 @@ def reindex_results(sm_config, ds_id, ds_mask, use_inactive_index, offline_reind
sm_config=sm_config,
ds_id=args.ds_id,
ds_mask=args.ds_name,
ds_file=args.ds_file,
use_inactive_index=args.inactive,
offline_reindex=args.offline_reindex,
update_fields=args.update_fields,
Expand Down
Loading

0 comments on commit fbe7354

Please sign in to comment.