Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLI to Compare and Apply Configs, Base Datasource Feature Additions #92

Draft
wants to merge 27 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
aaf7a63
add where functions go
jaybythebay May 31, 2024
6478902
bump version to include in 2.2.12
jaybythebay Jun 2, 2024
1645ec2
removing empty folder works
jaybythebay Jun 2, 2024
d1f664d
mock test works
jaybythebay Jun 2, 2024
8de989c
3 tests passed
jaybythebay Jun 2, 2024
a747c19
rename
jaybythebay Jun 2, 2024
de1ade3
rename
jaybythebay Jun 2, 2024
803aeed
add new testing to the ci
jaybythebay Jun 2, 2024
acc1d27
move generate config dics to their own function to make it resuavble
jaybythebay Jun 2, 2024
20daaf7
add comments and delete duplicate code
jaybythebay Jun 2, 2024
292546c
getting args validation error as expected
jaybythebay Jun 2, 2024
57e63a8
wip
jaybythebay Jun 2, 2024
835402b
Move column_init to finction
jaybythebay Jun 3, 2024
71559be
clean comments and docs
jaybythebay Jun 3, 2024
0924b79
define color and symbols as globals so it will all work in functions
jaybythebay Jun 3, 2024
1b774f9
define color and symbols as globals so it will all work in functions …
jaybythebay Jun 3, 2024
ccb8c59
more code
jaybythebay Jun 4, 2024
779781d
Clean naming
jaybythebay Jun 4, 2024
10921fe
Add lots of stuff
jaybythebay Jun 4, 2024
9c2c5cc
passing tests and printing the number of tests to console
jaybythebay Jun 4, 2024
04ffd52
works with typing
jaybythebay Jun 4, 2024
0582861
invert configs working with typing
jaybythebay Jun 4, 2024
5f56687
invert configs working with typing
jaybythebay Jun 4, 2024
30cb25e
invert configs working with typing
jaybythebay Jun 4, 2024
f49bc3e
still getting attribute errors:
jaybythebay Jun 4, 2024
856aba0
ignoring getting that test to work for now
jaybythebay Jun 4, 2024
ebd6e96
WIP
jaybythebay Jun 4, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Clean naming
  • Loading branch information
jaybythebay committed Jun 4, 2024
commit 779781db0bf0c139f7059f96107259cb2758deec
Original file line number Diff line number Diff line change
Expand Up @@ -314,54 +314,54 @@ def __compare_folders(self, datasource_id, tds_folders, cfg_folders):
def execute(self, context):
""" Update Tableau datasource according to config. """

github_conn = BaseHook.get_connection(self.github_conn_id)
config = cfg.Config(
githup_token=github_conn.password,
repo_name=github_conn.extra_dejson.get('repo_name'),
repo_branch=github_conn.extra_dejson.get('repo_branch'),
subfolder=github_conn.extra_dejson.get('subfolder')
)

ts = get_tableau_server(self.tableau_conn_id)
expected_conn_attrs = self.__set_connection_attributes()

# Get the ID for each datasource in the config
for ds in ts.get.datasources():
if ds not in config.datasources:
continue
config.datasources[ds].id = ds.id

for datasource in config.datasources:
if not datasource.id:
logging.error('!! Datasource not found in Tableau Online: %s / %s',
datasource.project_name, datasource.name)
continue
dsid = datasource.id
# github_conn = BaseHook.get_connection(self.github_conn_id)
# config = cfg.Config(
# githup_token=github_conn.password,
# repo_name=github_conn.extra_dejson.get('repo_name'),
# repo_branch=github_conn.extra_dejson.get('repo_branch'),
# subfolder=github_conn.extra_dejson.get('subfolder')
# )
#
# ts = get_tableau_server(self.tableau_conn_id)
# expected_conn_attrs = self.__set_connection_attributes()
#
# # Get the ID for each datasource in the config
# for ds in ts.get.datasources():
# if ds not in config.datasources:
# continue
# config.datasources[ds].id = ds.id
#
# # for datasource in config.datasources:
# if not datasource.id:
# logging.error('!! Datasource not found in Tableau Online: %s / %s',
# datasource.project_name, datasource.name)
# continue
# dsid = datasource.id
# Set default dict attributes for tasks, for each datasource
self.tasks[dsid] = {a: [] for a in UPDATE_ACTIONS}
self.tasks[dsid]['project'] = datasource.project_name
self.tasks[dsid]['datasource_name'] = datasource.name
if not config.in_maintenance_window and AIRFLOW_ENV not in ['STAGING', 'DEV']:
self.tasks[dsid]['skip'] = 'Outside maintenance window'
logging.info('(SKIP) Outside maintenance window: %s', datasource.name)
continue
elif datasource.name in EXCLUDED_DATASOURCES:
self.tasks[dsid]['skip'] = 'Marked to exclude'
logging.info('(SKIP) Marked to exclude: %s', datasource.name)
continue
logging.info('Checking Datasource: %s', datasource.name)
# Download the Datasource for comparison
dl_path = f"downloads/{dsid}/"
os.makedirs(dl_path, exist_ok=True)
ds_path = ts.download.datasource(dsid, file_dir=dl_path, include_extract=False)
tds = Datasource(ds_path)
# Cleanup downloaded file after assigning the Datasource
shutil.rmtree(dl_path, ignore_errors=True)
# Add connection task, if there is a difference
self.__compare_connection(dsid, datasource.name, tds.connection, expected_conn_attrs)
# Add folder tasks, if folders need to be added/deleted
self.__compare_folders(dsid, tds.folders_common, datasource.folders)
# Add Column tasks, if there are missing columns, or columns need to be updated
# if not config.in_maintenance_window and AIRFLOW_ENV not in ['STAGING', 'DEV']:
# self.tasks[dsid]['skip'] = 'Outside maintenance window'
# logging.info('(SKIP) Outside maintenance window: %s', datasource.name)
# continue
# elif datasource.name in EXCLUDED_DATASOURCES:
# self.tasks[dsid]['skip'] = 'Marked to exclude'
# logging.info('(SKIP) Marked to exclude: %s', datasource.name)
# continue
# logging.info('Checking Datasource: %s', datasource.name)
# # Download the Datasource for comparison
# dl_path = f"downloads/{dsid}/"
# os.makedirs(dl_path, exist_ok=True)
# ds_path = ts.download.datasource(dsid, file_dir=dl_path, include_extract=False)
# tds = Datasource(ds_path)
# # Cleanup downloaded file after assigning the Datasource
# shutil.rmtree(dl_path, ignore_errors=True)
# # Add connection task, if there is a difference
# self.__compare_connection(dsid, datasource.name, tds.connection, expected_conn_attrs)
# # Add folder tasks, if folders need to be added/deleted
# self.__compare_folders(dsid, tds.folders_common, datasource.folders)
# # Add Column tasks, if there are missing columns, or columns need to be updated
for column in datasource.columns:
# Check if the column metadata needs to be updated
self.__compare_column_metadata(dsid, tds, column)
Expand Down
30 changes: 15 additions & 15 deletions tableau_utilities/scripts/apply_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,17 @@
from tableau_utilities.scripts.datasource import add_metadata_records_as_columns

class ApplyConfigs:
def __init__(self, datasource_name, datasource_path, column_config, calculated_column_config, debugging_logs):
""" Applies a set of configs to a datasource. Configs prefixed with target_ will be applied to the datasource.
Configs prefixed with datasource_ represent the current state of the datasource before changes.

"""
def __init__(self, datasource_name, datasource_path, target_column_config, target_calculated_column_config, debugging_logs):
self.datasource_name = datasource_name
self.datasource_path = datasource_path
self.column_config = column_config
self.calculated_column_config = calculated_column_config
self.target_column_config = target_column_config
self.target_calculated_column_config = target_calculated_column_config
self.debugging_logs = debugging_logs



def invert_config(self, config):
""" Helper function to invert the column config and calc config.
Output -> {datasource: {column: info}}
Expand Down Expand Up @@ -97,7 +99,8 @@ def execute_changes(self, column_config, calculated_field_config, datasource):
pass

def apply_config_to_datasource(self):
""" Applies changes to make
""" Applies a set of configs (column_config and calculated_column_config) to a datasource.
If a column is in a datasource but NOT in the config that column will be unchanged.

Args:
datasource_name:
Expand All @@ -119,18 +122,15 @@ def apply_config_to_datasource(self):
datasource = add_metadata_records_as_columns(datasource, self.debugging_logs)

# Build the config dictionaries from the datasource
datasource_current_column_config, datasource_current_calculated_column_config = build_configs(datasource,
self.datasource_name)
datasource_column_config, datasource_calculated_column_config = build_configs(datasource, self.datasource_name)
# Prepare the configs by inverting, combining and removing configs for other datasources
config = self.prepare_configs()
current_datasource_config = self.prepare_configs()
target_config = self.prepare_configs(self.target_column_config, self.target_calculated_column_config)
datasource_config = self.prepare_configs(datasource_column_config, datasource_calculated_column_config)

# datasource = self.invert_config(self.column_config)
# self.invert_config(self.calculated_column_config)
# combined_config = {**dict1, **dict2}



# Get the changes to make for the column config
# Get the changes to make for the calculation config

Expand All @@ -148,9 +148,9 @@ def apply_configs(args):
debugging_logs = args.debugging_logs
datasource_name = args.name
datasource_path = args.file_path
column_config = args.column_config
calculated_column_config = args.calculated_column_config
target_column_config = args.column_config
target_calculated_column_config = args.calculated_column_config

AC = ApplyConfigs(datasource_name, datasource_path, column_config, calculated_column_config, debugging_logs)
AC = ApplyConfigs(datasource_name, datasource_path, target_column_config, target_calculated_column_config, debugging_logs)

AC.apply_config_to_datasource()