Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLI to Compare and Apply Configs, Base Datasource Feature Additions #92

Draft
wants to merge 27 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
aaf7a63
add where functions go
jaybythebay May 31, 2024
6478902
bump version to include in 2.2.12
jaybythebay Jun 2, 2024
1645ec2
removing empty folder works
jaybythebay Jun 2, 2024
d1f664d
mock test works
jaybythebay Jun 2, 2024
8de989c
3 tests passed
jaybythebay Jun 2, 2024
a747c19
rename
jaybythebay Jun 2, 2024
de1ade3
rename
jaybythebay Jun 2, 2024
803aeed
add new testing to the ci
jaybythebay Jun 2, 2024
acc1d27
move generate config dics to their own function to make it resuavble
jaybythebay Jun 2, 2024
20daaf7
add comments and delete duplicate code
jaybythebay Jun 2, 2024
292546c
getting args validation error as expected
jaybythebay Jun 2, 2024
57e63a8
wip
jaybythebay Jun 2, 2024
835402b
Move column_init to finction
jaybythebay Jun 3, 2024
71559be
clean comments and docs
jaybythebay Jun 3, 2024
0924b79
define color and symbols as globals so it will all work in functions
jaybythebay Jun 3, 2024
1b774f9
define color and symbols as globals so it will all work in functions …
jaybythebay Jun 3, 2024
ccb8c59
more code
jaybythebay Jun 4, 2024
779781d
Clean naming
jaybythebay Jun 4, 2024
10921fe
Add lots of stuff
jaybythebay Jun 4, 2024
9c2c5cc
passing tests and printing the number of tests to console
jaybythebay Jun 4, 2024
04ffd52
works with typing
jaybythebay Jun 4, 2024
0582861
invert configs working with typing
jaybythebay Jun 4, 2024
5f56687
invert configs working with typing
jaybythebay Jun 4, 2024
30cb25e
invert configs working with typing
jaybythebay Jun 4, 2024
f49bc3e
still getting attribute errors:
jaybythebay Jun 4, 2024
856aba0
ignoring getting that test to work for now
jaybythebay Jun 4, 2024
ebd6e96
WIP
jaybythebay Jun 4, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ jobs:
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Run Pytest on the tests in tests/
run:
pytest tests/ -p no:warnings
- name: Run pytest on tableau-utilities
run: |
cd tableau_utilities && pytest -v

1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ sheets.googleapis.com-python.json
.idea
.DS_Store
tmp_tdsx_and_config/
development_test_files/

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -314,54 +314,54 @@ def __compare_folders(self, datasource_id, tds_folders, cfg_folders):
def execute(self, context):
""" Update Tableau datasource according to config. """

github_conn = BaseHook.get_connection(self.github_conn_id)
config = cfg.Config(
githup_token=github_conn.password,
repo_name=github_conn.extra_dejson.get('repo_name'),
repo_branch=github_conn.extra_dejson.get('repo_branch'),
subfolder=github_conn.extra_dejson.get('subfolder')
)

ts = get_tableau_server(self.tableau_conn_id)
expected_conn_attrs = self.__set_connection_attributes()

# Get the ID for each datasource in the config
for ds in ts.get.datasources():
if ds not in config.datasources:
continue
config.datasources[ds].id = ds.id

for datasource in config.datasources:
if not datasource.id:
logging.error('!! Datasource not found in Tableau Online: %s / %s',
datasource.project_name, datasource.name)
continue
dsid = datasource.id
# github_conn = BaseHook.get_connection(self.github_conn_id)
# config = cfg.Config(
# githup_token=github_conn.password,
# repo_name=github_conn.extra_dejson.get('repo_name'),
# repo_branch=github_conn.extra_dejson.get('repo_branch'),
# subfolder=github_conn.extra_dejson.get('subfolder')
# )
#
# ts = get_tableau_server(self.tableau_conn_id)
# expected_conn_attrs = self.__set_connection_attributes()
#
# # Get the ID for each datasource in the config
# for ds in ts.get.datasources():
# if ds not in config.datasources:
# continue
# config.datasources[ds].id = ds.id
#
# # for datasource in config.datasources:
# if not datasource.id:
# logging.error('!! Datasource not found in Tableau Online: %s / %s',
# datasource.project_name, datasource.name)
# continue
# dsid = datasource.id
# Set default dict attributes for tasks, for each datasource
self.tasks[dsid] = {a: [] for a in UPDATE_ACTIONS}
self.tasks[dsid]['project'] = datasource.project_name
self.tasks[dsid]['datasource_name'] = datasource.name
if not config.in_maintenance_window and AIRFLOW_ENV not in ['STAGING', 'DEV']:
self.tasks[dsid]['skip'] = 'Outside maintenance window'
logging.info('(SKIP) Outside maintenance window: %s', datasource.name)
continue
elif datasource.name in EXCLUDED_DATASOURCES:
self.tasks[dsid]['skip'] = 'Marked to exclude'
logging.info('(SKIP) Marked to exclude: %s', datasource.name)
continue
logging.info('Checking Datasource: %s', datasource.name)
# Download the Datasource for comparison
dl_path = f"downloads/{dsid}/"
os.makedirs(dl_path, exist_ok=True)
ds_path = ts.download.datasource(dsid, file_dir=dl_path, include_extract=False)
tds = Datasource(ds_path)
# Cleanup downloaded file after assigning the Datasource
shutil.rmtree(dl_path, ignore_errors=True)
# Add connection task, if there is a difference
self.__compare_connection(dsid, datasource.name, tds.connection, expected_conn_attrs)
# Add folder tasks, if folders need to be added/deleted
self.__compare_folders(dsid, tds.folders_common, datasource.folders)
# Add Column tasks, if there are missing columns, or columns need to be updated
# if not config.in_maintenance_window and AIRFLOW_ENV not in ['STAGING', 'DEV']:
# self.tasks[dsid]['skip'] = 'Outside maintenance window'
# logging.info('(SKIP) Outside maintenance window: %s', datasource.name)
# continue
# elif datasource.name in EXCLUDED_DATASOURCES:
# self.tasks[dsid]['skip'] = 'Marked to exclude'
# logging.info('(SKIP) Marked to exclude: %s', datasource.name)
# continue
# logging.info('Checking Datasource: %s', datasource.name)
# # Download the Datasource for comparison
# dl_path = f"downloads/{dsid}/"
# os.makedirs(dl_path, exist_ok=True)
# ds_path = ts.download.datasource(dsid, file_dir=dl_path, include_extract=False)
# tds = Datasource(ds_path)
# # Cleanup downloaded file after assigning the Datasource
# shutil.rmtree(dl_path, ignore_errors=True)
# # Add connection task, if there is a difference
# self.__compare_connection(dsid, datasource.name, tds.connection, expected_conn_attrs)
# # Add folder tasks, if folders need to be added/deleted
# self.__compare_folders(dsid, tds.folders_common, datasource.folders)
# # Add Column tasks, if there are missing columns, or columns need to be updated
for column in datasource.columns:
# Check if the column metadata needs to be updated
self.__compare_column_metadata(dsid, tds, column)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
long_description=readme,
long_description_content_type='text/markdown',
name="tableau_utilities",
version="2.2.11",
version="2.2.12",
requires_python=">=3.8",
packages=[
'tableau_utilities',
Expand Down
184 changes: 184 additions & 0 deletions tableau_utilities/scripts/apply_configs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
from copy import deepcopy
import pprint
from typing import Dict, Any

from tableau_utilities.tableau_file.tableau_file import Datasource
from tableau_utilities.scripts.gen_config import build_configs
from tableau_utilities.scripts.datasource import add_metadata_records_as_columns

class ApplyConfigs:
"""Applies a set of configs to a datasource. Configs prefixed with target_ will be applied to the datasource.
Configs prefixed with datasource_ represent the current state of the datasource before changes.
"""

def __init__(self,
datasource_name: str,
datasource_path: str,
target_column_config: Dict[str, Any],
target_calculated_column_config: Dict[str, Any],
debugging_logs: bool) -> None:
self.datasource_name: str = datasource_name
self.datasource_path: str = datasource_path
self.target_column_config: Dict[str, Any] = target_column_config
self.target_calculated_column_config: Dict[str, Any] = target_calculated_column_config
self.debugging_logs: bool = debugging_logs


def select_matching_datasource_config(self, config: Dict[str, Any]) -> Dict[str, Any]:
""" Limit

Args:
comfig:

Returns:
A config with any datasource that is not self.datasource_name removed

"""

config = config[self.datasource_name]
return config

def invert_config(self, config: Dict[str, Any]) -> Dict[str, Any]:
"""Helper function to invert the column config and calc config.
Output -> {datasource: {column: info}}

Args:
config (dict): The config to invert.

Returns:
dict: The inverted config.
"""

inverted_config = {}

for column, i in config.items():
for datasource in i['datasources']:
new_info = deepcopy(i)
del new_info['datasources']
new_info['local-name'] = datasource['local-name']
new_info['remote_name'] = datasource['sql_alias'] if 'sql_alias' in datasource else None
inverted_config.setdefault(datasource['name'], {column: new_info})
inverted_config[datasource['name']].setdefault(column, new_info)

if self.debugging_logs:
pp = pprint.PrettyPrinter(indent=4, width=80, depth=None, compact=False)
pp.pprint(inverted_config)

return inverted_config


def prepare_configs(self, config_A: Dict[str, Any], config_B: Dict[str, Any]) -> Dict[str, Any]:
""" Takes 2 configs to invert, combine, and remove irrelevant datasource information. Columns in a main config
can be in 1 or many Tableau datasources. So when managing multiple datasources it's likely to have columns that
need removal

Args:
config_A:
config_B:

Returns:

"""

# invert the configs
config_A = self.invert_config(config_A)
config_B = self.invert_config(config_B)

# Get only the configs to the current datasource
config_A = self.select_matching_datasource_config(config_A)
config_B = self.select_matching_datasource_config(config_B)

# Combine configs
combined_config = {**config_A, **config_B}

return combined_config

def compare_columns(self):
""" Compares the config to a datasource. Generates a list of changes to make the datasource match the config

Returns:
dict: a dictionary with the columns that need updating

"""

# compare the caption. If the caption matches compare the attributes
pass

def compare_configs(self, config, datasource_cureent_config, datasource_name):
""" Compares the config to a datasource. Generates a list of changes to make the datasource match the config

Returns:
dict: a dictionary with the columns that need updating

"""

# compare the caption. If the caption matches compare the attributes
pass


def execute_changes(self, column_config, calculated_field_config, datasource):
""" Applies changes to make

Args:
config:
datasource:

Returns:

"""
pass

def apply_config_to_datasource(self):
""" Applies a set of configs (column_config and calculated_column_config) to a datasource.
If a column is in a datasource but NOT in the config that column will be unchanged.

Args:
datasource_name:
datasource_path:
column_config:
calculated_field_config:
debugging_logs:



Returns:
None

"""

datasource = Datasource(self.datasource_path)

# Run column init on the datasource to make sure columns aren't hiding in Metadata records
datasource = add_metadata_records_as_columns(datasource, self.debugging_logs)

# Build the config dictionaries from the datasource
datasource_column_config, datasource_calculated_column_config = build_configs(datasource, self.datasource_name)
# Prepare the configs by inverting, combining and removing configs for other datasources
target_config = self.prepare_configs(self.target_column_config, self.target_calculated_column_config)
datasource_config = self.prepare_configs(datasource_column_config, datasource_calculated_column_config)



# Get the changes to make for the column config
# Get the changes to make for the calculation config

# Apply the changes for the column config
# Apply the changes for the calc config

# Clean up the empty folders

# Save the file
pass


def apply_configs(args):
# Set variables from the args
debugging_logs = args.debugging_logs
datasource_name = args.name
datasource_path = args.file_path
target_column_config = args.column_config
target_calculated_column_config = args.calculated_column_config

AC = ApplyConfigs(datasource_name, datasource_path, target_column_config, target_calculated_column_config, debugging_logs)

AC.apply_config_to_datasource()
19 changes: 19 additions & 0 deletions tableau_utilities/scripts/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from tableau_utilities.scripts.server_operate import server_operate
from tableau_utilities.scripts.datasource import datasource
from tableau_utilities.scripts.csv_config import csv_config
from tableau_utilities.scripts.apply_configs import apply_configs

__version__ = importlib.metadata.version('tableau_utilities')

Expand Down Expand Up @@ -163,6 +164,9 @@
help='Deletes data from the extract based on the condition string provided. '
"""E.g. "CREATED_AT" < '1/1/2024'""")
parser_datasource.add_argument('-ci', '--column_init', action='store_true', help="Adds Columns from all Metadata Records, if they don't already exist.")
parser_datasource.add_argument('-cf', '--clean_folders', action='store_true', help="Removes any empty folders without columns")
# parser_datasource.add_argument('-cc', '--column_config', help='The path to the column configs file')
# parser_datasource.add_argument('-cac', '--calculated_column_config', help='The path to the calculated field config file.')
parser_datasource.set_defaults(func=datasource)

# GENERATE CONFIG
Expand Down Expand Up @@ -200,6 +204,14 @@
'Use with --merge_with generate_merge_all')
parser_config_merge.set_defaults(func=merge_configs)

# APPLY CONFIGS
parser_config_apply = subparsers.add_parser(
'apply_configs', help='Applies a config to a datasource. Writes over any datasource attributes to make it '
'conform to the config.', formatter_class=RawTextHelpFormatter)
parser_config_apply.add_argument('-cc', '--column_config', help='The path to the column configs file')
parser_config_apply.add_argument('-cac', '--calculated_column_config', help='The path to the calculated field config file.')
parser_config_apply.set_defaults(func=apply_configs)


def validate_args_server_operate(args):
""" Validate that combinations of args are present """
Expand Down Expand Up @@ -263,6 +275,11 @@ def validate_args_command_merge_config(args):
parser.error(f'--merge_with {args.merge_with} requires --target_directory')


def validate_args_command_apply_configs(args):
if args.file_path is None or args.name is None or args.column_config is None or args.calculated_column_config is None:
parser.error(f'{args.command} requires --name and --file_path for a datasource and --column_config and --calculated_column_config')


def validate_subpackage_hyper():
""" Checks that the hyper subpackage is installed for functions that use it """

Expand Down Expand Up @@ -455,6 +472,8 @@ def main():
validate_args_command_datasource(args)
if args.command == 'merge_config':
validate_args_command_merge_config(args)
if args.command == 'apply_configs':
validate_args_command_apply_configs(args)

# Set/Reset the directory
tmp_folder = args.output_dir
Expand Down
Loading