Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Elasticsearch upgrade #210

Merged
merged 7 commits into from
Mar 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 33 additions & 32 deletions lib/config/es_api_schemas/2.0.yml
Original file line number Diff line number Diff line change
@@ -1,37 +1,38 @@
# compatible with Apium v2.0
settings:
analysis:
char_filter:
escapes:
type: mapping
mappings:
- "<em> => "
- "</em> => "
- "<u> => "
- "</u> => "
- "<strong> => "
- "</strong> => "
- "- => "
- "& => "
- ": => "
- "; => "
- ", => "
- ". => "
- "$ => "
- "@ => "
- "~ => "
- "\" => "
- "' => "
- "[ => "
- "] => "
normalizer:
keyword_normalized:
type: custom
char_filter:
- escapes
filter:
- asciifolding
- lowercase
settings:
analysis:
char_filter:
escapes:
type: mapping
mappings:
- "<em> => "
- "</em> => "
- "<u> => "
- "</u> => "
- "<strong> => "
- "</strong> => "
- "- => "
- "& => "
- ": => "
- "; => "
- ", => "
- ". => "
- "$ => "
- "@ => "
- "~ => "
- "\" => "
- "' => "
- "[ => "
- "] => "
normalizer:
keyword_normalized:
type: custom
char_filter:
- escapes
filter:
- asciifolding
- lowercase
mappings:
properties:
identifier:
Expand Down
6 changes: 3 additions & 3 deletions lib/datura/elasticsearch/alias.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def self.add
{ add: { alias: ali, index: idx } }
]
}
RestClient.post(base_url, data.to_json, { content_type: :json }) { |res, req, result|
RestClient.post(base_url, data.to_json, @auth_header.merge({ content_type: :json })) { |res, req, result|
if result.code == "200"
puts res
puts "Successfully added alias #{ali}. Current alias list:"
Expand All @@ -40,15 +40,15 @@ def self.delete

url = File.join(options["es_path"], idx, "_alias", ali)

res = JSON.parse(RestClient.delete(url))
res = JSON.parse(RestClient.delete(url, @auth_header))
puts JSON.pretty_generate(res)
list
end

def self.list
options = Datura::Options.new({}).all

res = RestClient.get(File.join(options["es_path"], "_aliases"))
res = RestClient.get(File.join(options["es_path"], "_aliases"), )
JSON.pretty_generate(JSON.parse(res))
end

Expand Down
4 changes: 2 additions & 2 deletions lib/datura/elasticsearch/data.rb
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def self.clear_all(options)
if confirm == "Yes I'm sure"
url = File.join(options["es_path"], options["es_index"], "_doc", "_delete_by_query?pretty=true")
json = { "query" => { "match_all" => {} } }
RestClient.post(url, json.to_json, { content_type: :json }) { |res, req, result|
RestClient.post(url, json.to_json, @auth_header.merge({ content_type: :json })) { |res, req, result|
if result.code == "200"
puts res
else
Expand All @@ -66,7 +66,7 @@ def self.clear_index(options)

if confirmation
data = self.build_clear_data(options)
RestClient.post(url, data.to_json, { content_type: :json }) { |res, req, result|
RestClient.post(url, data.to_json, @auth_header.merge({ content_type: :json })) { |res, req, result|
if result.code == "200"
puts res
else
Expand Down
45 changes: 25 additions & 20 deletions lib/datura/elasticsearch/index.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
require "json"
require "rest-client"
require "yaml"
require "base64"

require_relative "./../elasticsearch.rb"

Expand All @@ -21,10 +22,11 @@ def initialize(options = nil, schema_mapping: false)

@index_url = File.join(@options["es_path"], @options["es_index"])
@pretty_url = "#{@index_url}?pretty=true"
@mapping_url = File.join(@index_url, "_mapping", "_doc?pretty=true")
@mapping_url = File.join(@index_url, "_mapping?pretty=true")

# yaml settings (if exist) and mappings
@requested_schema = YAML.load_file(@options["es_schema"])
@auth_header = Datura::Helpers.construct_auth_header(@options)
# if requested, grab the mapping currently associated with this index
# otherwise wait until after the requested schema is loaded
get_schema_mapping if schema_mapping
Expand All @@ -33,17 +35,16 @@ def initialize(options = nil, schema_mapping: false)
def create
json = @requested_schema["settings"].to_json
puts "Creating ES index for API version #{@options["api_version"]}: #{@pretty_url}"

if json && json != "null"
RestClient.put(@pretty_url, json, { content_type: :json }) { |res, req, result|
RestClient.put(@pretty_url, json, @auth_header.merge({ content_type: :json })) { |res, req, result|
if result.code == "200"
puts res
else
raise "#{result.code} error creating Elasticsearch index: #{res}"
end
}
else
RestClient.put(@pretty_url, nil) { |res, req, result|
RestClient.put(@pretty_url, nil, @auth_header) { |res, req, result|
if result.code == "200"
puts res
else
Expand All @@ -56,15 +57,15 @@ def create
def delete
puts "Deleting #{@options["es_index"]} via url #{@pretty_url}"

RestClient.delete(@pretty_url) { |res, req, result|
RestClient.delete(@pretty_url, @auth_header) { |res, req, result|
if result.code != "200"
raise "#{result.code} error deleting Elasticsearch index: #{res}"
end
}
end

def get_schema
RestClient.get(@mapping_url) { |res, req, result|
RestClient.get(@mapping_url, @auth_header) { |res, req, result|
if result.code == "200"
JSON.parse(res)
else
Expand All @@ -77,13 +78,13 @@ def get_schema_mapping
# if mapping has not already been set, get the schema and manipulate
if !defined?(@schema_mapping)
@schema_mapping = {
"dyanmic" => nil, # /regex|regex/
"dynamic" => nil, # /regex|regex/
"fields" => [], # [ fields ]
"nested" => {} # { field: [ nested_fields ] }
}

schema = get_schema[@options["es_index"]]
doc = schema["mappings"]["_doc"]
doc = schema["mappings"]
doc["properties"].each do |field, value|
@schema_mapping["fields"] << field
if value["type"] == "nested"
Expand All @@ -92,12 +93,14 @@ def get_schema_mapping
end

regex_pieces = []
doc["dynamic_templates"].each do |template|
mapping = template.map { |k,v| v["match"] }.first
# dynamic fields are listed like *_k and will need
# to be converted to ^.*_k$, then combined into a mega-regex
es_match = mapping.sub("*", ".*")
regex_pieces << es_match
if doc["dynamic_templates"]
doc["dynamic_templates"].each do |template|
mapping = template.map { |k,v| v["match"] }.first
# dynamic fields are listed like *_k and will need
# to be converted to ^.*_k$, then combined into a mega-regex
es_match = mapping.sub("*", ".*")
regex_pieces << es_match
end
end
if !regex_pieces.empty?
regex_joined = regex_pieces.join("|")
Expand All @@ -111,7 +114,7 @@ def set_schema
json = @requested_schema["mappings"].to_json

puts "Setting schema: #{@mapping_url}"
RestClient.put(@mapping_url, json, { content_type: :json }) { |res, req, result|
RestClient.put(@mapping_url, json, @auth_header.merge({ content_type: :json })) { |res, req, result|
if result.code == "200"
puts res
else
Expand Down Expand Up @@ -206,9 +209,10 @@ def self.clear_all(options)
puts "Type: 'Yes I'm sure'"
confirm = STDIN.gets.chomp
if confirm == "Yes I'm sure"
url = File.join(options["es_path"], options["es_index"], "_doc", "_delete_by_query?pretty=true")
url = File.join(options["es_path"], options["es_index"], "_delete_by_query?pretty=true")
auth_header = Datura::Helpers.construct_auth_header(options)
json = { "query" => { "match_all" => {} } }
RestClient.post(url, json.to_json, { content_type: :json }) { |res, req, result|
RestClient.post(url, json.to_json, auth_header.merge({ content_type: :json })) { |res, req, result|
if result.code == "200"
puts res
else
Expand All @@ -222,13 +226,14 @@ def self.clear_all(options)
end

def self.clear_index(options)
url = File.join(options["es_path"], options["es_index"], "_doc", "_delete_by_query?pretty=true")
url = File.join(options["es_path"], options["es_index"], "_delete_by_query?pretty=true")
confirmation = self.confirm_clear(options, url)

if confirmation
data = self.build_clear_data(options)
RestClient.post(url, data.to_json, { content_type: :json }) { |res, req, result|
if result.code == "200"
auth_header = Datura::Helpers.construct_auth_header(options)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note for future reference on committing. This helper method isn't included in this commit also, so the code would be broken at this commit. It's added in the next commit, but in the future these kinds of things need to be committed together.

RestClient.post(url, data.to_json, auth_header.merge({content_type: :json })) { |res, req, result|
if result.code == "200" || result.code == "201"
puts res
else
raise "#{result.code} error when clearing index: #{res}"
Expand Down
3 changes: 2 additions & 1 deletion lib/datura/file_type.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def initialize(location, options)
@out_html = File.join(output, "html")
@out_iiif = File.join(output, "iiif")
@out_solr = File.join(output, "solr")
@auth_header = Datura::Helpers.construct_auth_header(options)
Datura::Helpers.make_dirs(@out_es, @out_html, @out_iiif, @out_solr)
# script locations set in child classes
end
Expand Down Expand Up @@ -68,7 +69,7 @@ def post_es(es)
# NOTE: If you need to do partial updates rather than replacement of doc
# you will need to add _update at the end of this URL
begin
RestClient.put("#{es.index_url}/_doc/#{id}", doc.to_json, {:content_type => :json } )
RestClient.put("#{es.index_url}/_doc/#{id}", doc.to_json, @auth_header.merge({:content_type => :json }) )
rescue => e
error = "Error transforming or posting to ES for #{self.filename(false)}: #{e}"
end
Expand Down
6 changes: 6 additions & 0 deletions lib/datura/helpers.rb
Original file line number Diff line number Diff line change
Expand Up @@ -171,4 +171,10 @@ def self.should_update?(file, since_date=nil)
end
end

def self.construct_auth_header(options)
username = options["es_user"]
password = options["es_password"]
{ "Authorization" => "Basic #{Base64::encode64("#{username}:#{password}")}" }
end

end