Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimizer: Sysbench support #4173

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Fixing the rule_parser output and its unit tests
  • Loading branch information
poojam23 committed Jul 24, 2018
commit 9a909adb79c41bf659240987bd63534fd1366c49
51 changes: 28 additions & 23 deletions tools/advisor/advisor/db_log_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,14 @@
from enum import Enum


NO_FAM = 'DB_WIDE'


class DataSource(ABC):
class Type(Enum):
LOG = 1
DB_OPTIONS = 2
STATS = 3
PERF_CONTEXT = 4
ODS = 5
TIME_SERIES = 3

def __init__(self, type):
self.type = type
Expand All @@ -33,11 +34,19 @@ def is_new_log(log_line):
date_regex = '\d{4}/\d{2}/\d{2}-\d{2}:\d{2}:\d{2}\.\d{6}'
return re.match(date_regex, log_line)

def __init__(self, log_line):
def __init__(self, log_line, column_families):
token_list = log_line.strip().split()
self.time = token_list[0]
self.context = token_list[1]
self.message = " ".join(token_list[2:])
self.column_family = None
for col_fam in column_families:
search_for_str = '[' + col_fam + ']'
if re.search(search_for_str, self.message):
self.column_family = col_fam
break
if not self.column_family:
self.column_family = NO_FAM

def get_time(self):
return self.time
Expand All @@ -57,40 +66,36 @@ def __repr__(self):


class DatabaseLogs(DataSource):
def __init__(self, logs_path_prefix):
def __init__(self, logs_path_prefix, column_families):
super().__init__(DataSource.Type.LOG)
self.logs_path_prefix = logs_path_prefix
self.column_families = column_families

def trigger_appropriate_conditions(self, conditions, log):
conditions_to_be_removed = []
def trigger_conditions_for_log(self, conditions, log):
for cond in conditions:
if re.search(cond.regex, log.get_message(), re.IGNORECASE):
cond.set_trigger(log)
conditions_to_be_removed.append(cond)
for remove_cond in conditions_to_be_removed:
conditions.remove(remove_cond)
return conditions
trigger = cond.get_trigger()
if not trigger:
trigger = {}
if log.column_family not in trigger:
trigger[log.column_family] = []
trigger[log.column_family].append(log)
cond.set_trigger(trigger)

def check_and_trigger_conditions(self, conditions):
for file_name in glob.glob(self.logs_path_prefix + '*'):
with open(file_name, 'r') as db_logs:
new_log = None
for line in db_logs:
if not conditions:
break
if Log.is_new_log(line):
if new_log:
conditions = self.trigger_appropriate_conditions(
conditions,
new_log
self.trigger_conditions_for_log(
conditions, new_log
)
new_log = Log(line)
new_log = Log(line, self.column_families)
else:
# To account for logs split into multiple lines
new_log.append_message(line)
# Check for the last log in the file.
if new_log and conditions:
conditions = self.trigger_appropriate_conditions(
conditions,
new_log
)
if new_log:
self.trigger_conditions_for_log(conditions, new_log)
187 changes: 149 additions & 38 deletions tools/advisor/advisor/db_options_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
# COPYING file in the root directory) and Apache 2.0 License
# (found in the LICENSE.Apache file in the root directory).

from db_log_parser import DataSource
from ini_parser import IniParser
import copy
from advisor.db_log_parser import DataSource, NO_FAM
from advisor.ini_parser import IniParser


class OptionsSpecParser(IniParser):
Expand All @@ -16,11 +17,12 @@ def is_new_option(line):
def get_section_type(line):
'''
Example section header: [TableOptions/BlockBasedTable "default"]
Here section_type returned would be 'TableOptions.BlockBasedTable'
Here ConfigurationOptimizer returned would be
'TableOptions.BlockBasedTable'
'''
section_path = line.strip()[1:-1].split()[0]
section_type = '.'.join(section_path.split('/'))
return section_type
ConfigurationOptimizer = '.'.join(section_path.split('/'))
return ConfigurationOptimizer

@staticmethod
def get_section_name(line):
Expand All @@ -29,16 +31,44 @@ def get_section_name(line):
return None
return token_list[1]

@staticmethod
def get_section_str(section_type, section_name):
section_type = '/'.join(section_type.strip().split('.'))
section_str = '[' + section_type
if section_name == NO_FAM:
return (section_str + ']')
else:
return section_str + ' "' + section_name + '"]'

@staticmethod
def get_option_str(key, values):
option_str = key + '='
if values:
if isinstance(values, list):
for value in values:
option_str += (str(value) + ':')
option_str = option_str[:-1]
else:
option_str += str(values)
return option_str


class DatabaseOptions(DataSource):
def __init__(self, rocksdb_options):
super().__init__(DataSource.Type.DB_OPTIONS)
self.options_path = rocksdb_options
# The options are stored in the following data structure:
# Dict[str, Dict[str, Dict[str, Any]]].
# The above strings are:
# ConfigurationOptimizer, column_family, option, value(s).
self.options_dict = None
self.column_families = None
# Load the options from the given file to a dictionary.
self.load_from_source()

def get_original_file(self):
return self.options_path

def load_from_source(self):
self.options_dict = {}
with open(self.options_path, 'r') as db_options:
Expand All @@ -47,59 +77,140 @@ def load_from_source(self):
if not line:
continue
if OptionsSpecParser.is_section_header(line):
curr_sec_type = OptionsSpecParser.get_section_type(line)
curr_sec_type = (
OptionsSpecParser.get_section_type(line)
)
curr_sec_name = OptionsSpecParser.get_section_name(line)
if curr_sec_name:
option_prefix = curr_sec_name + '.' + curr_sec_type
if curr_sec_type == 'CFOptions':
if not self.column_families:
self.column_families = []
self.column_families.append(curr_sec_name)
else:
option_prefix = curr_sec_type
if curr_sec_type not in self.options_dict:
self.options_dict[curr_sec_type] = {}
if not curr_sec_name:
curr_sec_name = NO_FAM
self.options_dict[curr_sec_type][curr_sec_name] = {}
if curr_sec_type == 'CFOptions':
if not self.column_families:
self.column_families = []
self.column_families.append(curr_sec_name)
elif OptionsSpecParser.is_new_option(line):
key, value = OptionsSpecParser.get_key_value_pair(line)
if not self.options_dict:
self.options_dict = {}
self.options_dict[option_prefix + '.' + key] = value
self.options_dict[curr_sec_type][curr_sec_name][key] = (
value
)
else:
error = 'Not able to parse line in Options file.'
OptionsSpecParser.exit_with_parse_error(line, error)

def get_column_families(self):
return self.column_families

def get_options(self, reqd_options):
# type: List[str] -> Dict[str, Dict[str, Any]]
# List[option] -> Dict[option, Dict[col_fam, value]]
reqd_options_dict = {}
for option in reqd_options:
sec_name = '.'.join(option.split('.')[:-1])
opt_name = option.split('.')[-1]
if sec_name not in self.options_dict:
continue
if (
NO_FAM in self.options_dict[sec_name] and
opt_name in self.options_dict[sec_name][NO_FAM]
):
if option not in reqd_options_dict:
reqd_options_dict[option] = {}
reqd_options_dict[option][NO_FAM] = (
self.options_dict[sec_name][NO_FAM][opt_name]
)
for col_fam in self.options_dict[sec_name]:
if opt_name in self.options_dict[sec_name][col_fam]:
if option not in reqd_options_dict:
reqd_options_dict[option] = {}
reqd_options_dict[option][col_fam] = (
self.options_dict[sec_name][col_fam][opt_name]
)
return reqd_options_dict

def update_options(self, options):
# type: Dict[str, Dict[str, Any]] -> None
# Dict[option, Dict[col_fam, value]] -> None where option is in the
# form: ('.' delimited section type) + '.' + option
for option in options:
sec_name = '.'.join(option.split('.')[:-1])
opt_name = option.split('.')[-1]
if sec_name not in self.options_dict:
self.options_dict[sec_name] = {}
for col_fam in options[option]:
# if the option is not already present in the dictionary,
# it will be inserted, else it will be updated to the new
# value
if col_fam not in self.options_dict[sec_name]:
self.options_dict[sec_name][col_fam] = {}
self.options_dict[sec_name][col_fam][opt_name] = (
copy.deepcopy(options[option][col_fam])
)

def generate_options_config(self, file_name):
# type: str -> str
with open(file_name, 'w') as fp:
for section in self.options_dict:
for col_fam in self.options_dict[section]:
fp.write(
OptionsSpecParser.get_section_str(section, col_fam) +
'\n'
)
for option in self.options_dict[section][col_fam]:
values = self.options_dict[section][col_fam][option]
fp.write(
OptionsSpecParser.get_option_str(option, values) +
'\n'
)
return file_name

def check_and_trigger_conditions(self, conditions):
'''
For every condition, if the fields are not present set_trigger will
not be called for it. Or if all the fields are present, then the
trigger will be set to whatever the expression evaluates to.
'''
for cond in conditions:
# This contains the indices of options to whose name the column
# family name needs to be prepended in order to create the full
# option name as parsed from the options file.
reqd_options_dict = self.get_options(cond.options)
incomplete_option_ix = []
options = []
missing_reqd_option = False
for ix, option in enumerate(cond.options):
if option in self.options_dict.keys():
options.append(self.options_dict[option])
if option not in reqd_options_dict:
missing_reqd_option = True
break # required option absent
if NO_FAM in reqd_options_dict[option]:
options.append(reqd_options_dict[option][NO_FAM])
else:
options.append(None)
incomplete_option_ix.append(ix)
options.append(0)

# if all the options were present as is:
if missing_reqd_option:
continue

# if all the options are database-wide options
if not incomplete_option_ix:
if not eval(cond.eval_expr):
cond.set_trigger(cond.eval_expr)
try:
if eval(cond.eval_expr):
cond.set_trigger({NO_FAM: options})
except Exception as e:
print('DatabaseOptions check_and_trigger: ' + str(e))
continue

# for all the options that were not present as is, we prepend them
# their names with every column family found in options file.
# for all the options that are not database-wide, we look for their
# values specific to column families
col_fam_options_dict = {}
for col_fam in self.column_families:
present = True
for ix in incomplete_option_ix:
full_option = col_fam + '.' + cond.options[ix]
if full_option not in self.options_dict.keys():
option = cond.options[ix]
if col_fam not in reqd_options_dict[option]:
present = False
break
options[ix] = self.options_dict[full_option]
if present and not eval(cond.eval_expr):
cond.set_trigger(cond.eval_expr)
options[ix] = reqd_options_dict[option][col_fam]
if present:
try:
if eval(cond.eval_expr):
col_fam_options_dict[col_fam] = (
copy.deepcopy(options)
)
except Exception as e:
print('DatabaseOptions check_and_trigger: ' + str(e))
if col_fam_options_dict:
cond.set_trigger(col_fam_options_dict)
Loading