From 240ed1f6595c2d62fadc1dff6cc83d9fd796780f Mon Sep 17 00:00:00 2001 From: Lukas Trippe Date: Mon, 16 Sep 2024 13:43:33 +0200 Subject: [PATCH] refactor: improve structure and adds custom no logs error (#21) * refactor: improve structure * fix * fix * fix: dont fail for missing logs --- action.yml | 6 +- .../comment_components.py | 193 ++---------------- src/draft_comment.py | 173 ++++++++++++++++ {scripts => src}/metrics.py | 22 +- {scripts => src}/plot_benchmarks.py | 5 +- {scripts => src}/utils.py | 0 6 files changed, 209 insertions(+), 190 deletions(-) rename scripts/draft_comment.py => src/comment_components.py (78%) create mode 100644 src/draft_comment.py rename {scripts => src}/metrics.py (89%) rename {scripts => src}/plot_benchmarks.py (98%) rename {scripts => src}/utils.py (100%) diff --git a/action.yml b/action.yml index 2ea9951..82269b3 100644 --- a/action.yml +++ b/action.yml @@ -290,7 +290,7 @@ runs: # Get static plot list (from input) read -a plots_array_input <<< ${{ inputs.plots }} # Get dynamic plot list (from comment script) - read -a plots_array_dynamic <<< "$(python scripts/draft_comment.py plots)" + read -a plots_array_dynamic <<< "$(python src/draft_comment.py plots)" plots_array=("${plots_array_input[@]}" "${plots_array_dynamic[@]}") @@ -315,7 +315,7 @@ runs: done # Get benchmark plot list (from benchmark script) - read -a plots_array_benchmark <<< "$(python scripts/plot_benchmarks.py)" + read -a plots_array_benchmark <<< "$(python src/plot_benchmarks.py)" mkdir -p _validation-images/benchmarks @@ -356,7 +356,7 @@ runs: # Create comment # Note: The script uses many env variables. See the script for more details. - python scripts/draft_comment.py > $GITHUB_WORKSPACE/comment.txt + python src/draft_comment.py > $GITHUB_WORKSPACE/comment.txt cat $GITHUB_WORKSPACE/comment.txt >> $GITHUB_STEP_SUMMARY diff --git a/scripts/draft_comment.py b/src/comment_components.py similarity index 78% rename from scripts/draft_comment.py rename to src/comment_components.py index 25dc576..6306ef7 100644 --- a/scripts/draft_comment.py +++ b/src/comment_components.py @@ -1,10 +1,5 @@ -""" -Draft comment for pypsa-validator GitHub PRs. +"""Text components to generate validator report.""" -Script can be called via command line or imported as a module. -""" - -import argparse import os import re from dataclasses import dataclass @@ -12,29 +7,11 @@ import numpy as np import pandas as pd + from metrics import min_max_normalized_mae, normalized_root_mean_square_error -from numpy.typing import ArrayLike from utils import get_env_var -def create_numeric_mask(arr: ArrayLike) -> np.ndarray: - """ - Create a mask where True indicates numeric and finite values. - - Parameters - ---------- - arr : array-like - Input array - - Returns - ------- - np.ndarray: Numeric mask where True indicates numeric and finite sort_values - - """ - arr = np.array(arr) - return np.vectorize(lambda x: isinstance(x, (int, float)) and np.isfinite(x))(arr) - - @dataclass class CommentData: """Class to store data for comment generation.""" @@ -74,12 +51,15 @@ def errors(self, branch_type: str) -> list: logs = list( Path(f"{self.dir_artifacts}/logs/{branch_type}/.snakemake/log/").glob("*") ) - if len(logs) != 1: + if len(logs) > 1: msg = ( - "Expected exactly one log file in snakemake/log directory " - "({branch_type} branch)." + "Expected exactly one log fiie in snakemake/log directory " + f"({branch_type} branch). Found {len(logs)}." ) raise ValueError(msg) + elif len(logs) == 0: + inpt_erros = ['no_logs_found'] + return inpt_erros with logs[0].open() as file: log = file.read() @@ -150,7 +130,7 @@ def create_details_block(summary: str, content: str) -> str: return "" -class RunSuccessfull(CommentData): +class RunSuccessfullComponent(CommentData): """Class to generate successfull run component.""" def __init__(self): @@ -315,20 +295,22 @@ def _format_csvs_dir_files(self, df: pd.DataFrame) -> pd.DataFrame: # Set header df.columns = df.iloc[header_row_index] # Fill nan values in header - df.columns = [ - f"col{i+1}" if pd.isna(col) or col == "" or col is None else col - for i, col in enumerate(df.columns) - ] + df.columns = pd.Index( + [ + f"col{i+1}" if pd.isna(col) or col == "" or col is None else col + for i, col in enumerate(df.columns) + ] + ) # Remove all rows before header df = df.iloc[header_row_index + 1 :] # Make non-numeric values the index non_numeric = df.apply( - lambda col: pd.to_numeric(col, errors="coerce").isna().all() + lambda col: pd.to_numeric(col, errors="coerce").isna().all() # type: ignore ) if non_numeric.any(): - df = df.set_index(df.columns[non_numeric].to_list()) + df = df.set_index(df.columns[non_numeric].to_list()) # type: ignore else: df = df.set_index("planning_horizon") @@ -507,7 +489,7 @@ def __call__(self) -> str: return self.body -class RunFailed(CommentData): +class RunFailedComponent(CommentData): """Class to generate failed run component.""" def __init__(self): @@ -545,7 +527,7 @@ def __call__(self) -> str: return self.body() -class ModelMetrics(CommentData): +class ModelMetricsComponent(CommentData): """Class to generate model metrics component.""" def __init__(self): @@ -575,140 +557,3 @@ def body(self) -> str: def __call__(self) -> str: """Return text for model metrics component.""" return self.body() - - -class Comment(CommentData): - """Class to generate pypsa validator comment for GitHub PRs.""" - - def __init__(self) -> None: - """Initialize comment class. It will put all text components together.""" - super().__init__() - - @property - def header(self) -> str: - """ - Header text. - - Contains the title, identifier, and short description. - """ - return ( - f"" - f"\n" - f"## Validator Report\n" - f"I am the Validator. Download all artifacts [here](https://github.com/" - f"{self.github_repository}/actions/runs/{self.github_run_id}).\n" - f"I'll be back and edit this comment for each new commit.\n\n" - ) - - @property - def config_diff(self) -> str: - """ - Config diff text. - - Only use when there are changes in the config. - """ - return ( - f"
\n" - f" :warning: Config changes detected!\n" - f"\n" - f"Results may differ due to these changes:\n" - f"```diff\n" - f"{self.git_diff_config}\n" - f"```\n" - f"
\n\n" - ) - - @property - def subtext(self) -> str: - """Subtext for the comment.""" - if self.hash_feature: - hash_feature = ( - f"([{self.hash_feature[:7]}](https://github.com/" - f"{self.github_repository}/commits/{self.hash_feature})) " - ) - if self.hash_main: - hash_main = ( - f"([{self.hash_main[:7]}](https://github.com/" - f"{self.github_repository}/commits/{self.hash_main}))" - ) - time = ( - pd.Timestamp.now() - .tz_localize("UTC") - .tz_convert("Europe/Berlin") - .strftime("%Y-%m-%d %H:%M:%S %Z") - ) - return ( - f"Comparing `{self.github_head_ref}` {hash_feature}with " - f"`{self.github_base_ref}` {hash_main}.\n" - f"Branch is {self.ahead_count} commits ahead and {self.behind_count} " - f"commits behind.\n" - f"Last updated on `{time}`." - ) - - def dynamic_plots(self) -> str: - """ - Return a list of dynamic results plots needed for the comment. - - Returns - ------- - str: Space separated list of dynamic plots. - - """ - if self.sucessfull_run: - body_sucessfull = RunSuccessfull() - plots_string = " ".join(body_sucessfull.variables_plot_strings) - return plots_string - else: - "" - - def __repr__(self) -> str: - """Return full formatted comment.""" - body_benchmarks = ModelMetrics() - if self.sucessfull_run: - body_sucessfull = RunSuccessfull() - - return ( - f"{self.header}" - f"{self.config_diff if self.git_diff_config else ''}" - f"{body_sucessfull()}" - f"{body_benchmarks()}" - f"{self.subtext}" - ) - - else: - body_failed = RunFailed() - - return ( - f"{self.header}" - f"{body_failed()}" - f"{self.config_diff if self.git_diff_config else ''}" - f"{body_benchmarks()}" - f"{self.subtext}" - ) - - -def main(): - """ - Run draft comment script. - - Command line interface for the draft comment script. Use no arguments to print the - comment, or use the "plots" argument to print the dynamic plots which will be needed - for the comment. - """ - parser = argparse.ArgumentParser(description="Process some comments.") - parser.add_argument( - "command", nargs="?", default="", help='Command to run, e.g., "plots".' - ) - args = parser.parse_args() - - comment = Comment() - - if args.command == "plots": - print(comment.dynamic_plots()) - - else: - print(comment) # noqa T201 - - -if __name__ == "__main__": - main() diff --git a/src/draft_comment.py b/src/draft_comment.py new file mode 100644 index 0000000..99d5c68 --- /dev/null +++ b/src/draft_comment.py @@ -0,0 +1,173 @@ +""" +Draft comment for pypsa-validator GitHub PRs. + +Script can be called via command line or imported as a module. +""" + +import argparse + +import numpy as np +import pandas as pd +from numpy.typing import ArrayLike + +from comment_components import ( + CommentData, + ModelMetricsComponent, + RunFailedComponent, + RunSuccessfullComponent, +) + + +def create_numeric_mask(arr: ArrayLike) -> np.ndarray: + """ + Create a mask where True indicates numeric and finite values. + + Parameters + ---------- + arr : array-like + Input array + + Returns + ------- + np.ndarray: Numeric mask where True indicates numeric and finite sort_values + + """ + arr = np.array(arr) + return np.vectorize(lambda x: isinstance(x, (int, float)) and np.isfinite(x))(arr) + + +class Comment(CommentData): + """Class to generate pypsa validator comment for GitHub PRs.""" + + def __init__(self) -> None: + """Initialize comment class. It will put all text components together.""" + super().__init__() + + @property + def header(self) -> str: + """ + Header text. + + Contains the title, identifier, and short description. + """ + return ( + f"" + f"\n" + f"## Validator Report\n" + f"I am the Validator. Download all artifacts [here](https://github.com/" + f"{self.github_repository}/actions/runs/{self.github_run_id}).\n" + f"I'll be back and edit this comment for each new commit.\n\n" + ) + + @property + def config_diff(self) -> str: + """ + Config diff text. + + Only use when there are changes in the config. + """ + return ( + f"
\n" + f" :warning: Config changes detected!\n" + f"\n" + f"Results may differ due to these changes:\n" + f"```diff\n" + f"{self.git_diff_config}\n" + f"```\n" + f"
\n\n" + ) + + @property + def subtext(self) -> str: + """Subtext for the comment.""" + if self.hash_feature: + hash_feature = ( + f"([{self.hash_feature[:7]}](https://github.com/" + f"{self.github_repository}/commits/{self.hash_feature})) " + ) + if self.hash_main: + hash_main = ( + f"([{self.hash_main[:7]}](https://github.com/" + f"{self.github_repository}/commits/{self.hash_main}))" + ) + time = ( + pd.Timestamp.now() + .tz_localize("UTC") + .tz_convert("Europe/Berlin") + .strftime("%Y-%m-%d %H:%M:%S %Z") + ) + return ( + f"Comparing `{self.github_head_ref}` {hash_feature}with " + f"`{self.github_base_ref}` {hash_main}.\n" + f"Branch is {self.ahead_count} commits ahead and {self.behind_count} " + f"commits behind.\n" + f"Last updated on `{time}`." + ) + + def dynamic_plots(self) -> str: + """ + Return a list of dynamic results plots needed for the comment. + + Returns + ------- + str: Space separated list of dynamic plots. + + """ + if self.sucessfull_run: + body_sucessfull = RunSuccessfullComponent() + plots_string = " ".join(body_sucessfull.variables_plot_strings) + return plots_string + else: + return "" + + def __repr__(self) -> str: + """Return full formatted comment.""" + body_benchmarks = ModelMetricsComponent() + if self.sucessfull_run: + body_sucessfull = RunSuccessfullComponent() + + return ( + f"{self.header}" + f"{self.config_diff if self.git_diff_config else ''}" + f"{body_sucessfull()}" + f"{body_benchmarks()}" + f"{self.subtext}" + ) + + else: + body_failed = RunFailedComponent() + + return ( + f"{self.header}" + f"{body_failed()}" + f"{self.config_diff if self.git_diff_config else ''}" + f"{body_benchmarks()}" + f"{self.subtext}" + ) + + +def main(): + """ + Run draft comment script. + + Command line interface for the draft comment script. Use no arguments to print the + comment, or use the "plots" argument to print the dynamic plots which will be needed + for the comment. + """ + parser = argparse.ArgumentParser(description="Process some comments.") + parser.add_argument( + "command", nargs="?", default="", help='Command to run, e.g., "plots".' + ) + args = parser.parse_args() + + comment = Comment() + + if args.command == "plots": + print(comment.dynamic_plots()) + + else: + print(comment) # noqa T201 + + +if __name__ == "__main__": + main() diff --git a/scripts/metrics.py b/src/metrics.py similarity index 89% rename from scripts/metrics.py rename to src/metrics.py index 768d189..611731d 100644 --- a/scripts/metrics.py +++ b/src/metrics.py @@ -21,15 +21,15 @@ def min_max_normalized_mae(y_true: ArrayLike, y_pred: ArrayLike) -> float: float: Min-max normalized MAE """ - y_true = np.array(y_true) - y_pred = np.array(y_pred) + y_true_ = np.array(y_true) + y_pred_ = np.array(y_pred) # Ignore -inf and inf values in y_true - y_true = y_true[np.isfinite(y_true)] - y_pred = y_pred[np.isfinite(y_pred)] + y_true_ = y_true_[np.isfinite(y_true_)] + y_pred_ = y_pred_[np.isfinite(y_pred_)] # Calculate the absolute errors - abs_errors = np.abs(y_true - y_pred) + abs_errors = np.abs(y_true_ - y_pred_) # Check if all errors are the same if np.all(abs_errors == abs_errors[0]): @@ -72,19 +72,19 @@ def mean_absolute_percentage_error( float: MAPE """ - y_true = np.array(y_true) - y_pred = np.array(y_pred) + y_true_ = np.array(y_true) + y_pred_ = np.array(y_pred) # Ignore -inf and inf values if ignore_inf: - y_true = y_true[np.isfinite(y_true)] - y_pred = y_pred[np.isfinite(y_pred)] + y_true_ = y_true_[np.isfinite(y_true)] + y_pred_ = y_pred_[np.isfinite(y_pred)] # Avoid division by zero - y_pred = y_pred + epsilon + y_pred_ = y_pred_ + epsilon # Calculate the absolute percentage errors - mape = np.abs((y_true - y_pred) / y_true) + mape = np.abs((y_true_ - y_pred_) / y_true_) if aggregate: mape = np.mean(mape) diff --git a/scripts/plot_benchmarks.py b/src/plot_benchmarks.py similarity index 98% rename from scripts/plot_benchmarks.py rename to src/plot_benchmarks.py index 8a96523..b119d7c 100644 --- a/scripts/plot_benchmarks.py +++ b/src/plot_benchmarks.py @@ -7,6 +7,7 @@ import numpy as np import pandas as pd import seaborn as sns + from utils import get_env_var DIR_ARTIFACTS: Path = Path( @@ -79,7 +80,7 @@ def create_bar_chart_comparison( xlabel: str, filename: str, ignore_stacked_plot: bool = False, -): +) -> str: """ Create a horizontal bar chart comparing execution time or memory peak. @@ -188,7 +189,7 @@ def create_bar_chart_comparison( return filename -def create_scatter_memory(df: pd.DataFrame, filename: str) -> None: +def create_scatter_memory(df: pd.DataFrame, filename: str) -> str: """ Create a scatter plot of max_rss vs max_uss. diff --git a/scripts/utils.py b/src/utils.py similarity index 100% rename from scripts/utils.py rename to src/utils.py