From 240ed1f6595c2d62fadc1dff6cc83d9fd796780f Mon Sep 17 00:00:00 2001
From: Lukas Trippe <lkstrp@pm.me>
Date: Mon, 16 Sep 2024 13:43:33 +0200
Subject: [PATCH] refactor: improve structure and adds custom no logs error
 (#21)

* refactor: improve structure

* fix

* fix

* fix: dont fail for missing logs
---
 action.yml                                    |   6 +-
 .../comment_components.py                     | 193 ++----------------
 src/draft_comment.py                          | 173 ++++++++++++++++
 {scripts => src}/metrics.py                   |  22 +-
 {scripts => src}/plot_benchmarks.py           |   5 +-
 {scripts => src}/utils.py                     |   0
 6 files changed, 209 insertions(+), 190 deletions(-)
 rename scripts/draft_comment.py => src/comment_components.py (78%)
 create mode 100644 src/draft_comment.py
 rename {scripts => src}/metrics.py (89%)
 rename {scripts => src}/plot_benchmarks.py (98%)
 rename {scripts => src}/utils.py (100%)

diff --git a/action.yml b/action.yml
index 2ea9951..82269b3 100644
--- a/action.yml
+++ b/action.yml
@@ -290,7 +290,7 @@ runs:
       # Get static plot list (from input)
       read -a plots_array_input <<< ${{ inputs.plots }}
       # Get dynamic plot list (from comment script)
-      read -a plots_array_dynamic <<< "$(python scripts/draft_comment.py plots)"
+      read -a plots_array_dynamic <<< "$(python src/draft_comment.py plots)"
 
       plots_array=("${plots_array_input[@]}" "${plots_array_dynamic[@]}")
 
@@ -315,7 +315,7 @@ runs:
       done
 
       # Get benchmark plot list (from benchmark script)
-      read -a plots_array_benchmark <<< "$(python scripts/plot_benchmarks.py)"
+      read -a plots_array_benchmark <<< "$(python src/plot_benchmarks.py)"
 
       mkdir -p _validation-images/benchmarks
 
@@ -356,7 +356,7 @@ runs:
 
       # Create comment
       # Note: The script uses many env variables. See the script for more details.
-      python scripts/draft_comment.py > $GITHUB_WORKSPACE/comment.txt
+      python src/draft_comment.py > $GITHUB_WORKSPACE/comment.txt
 
       cat $GITHUB_WORKSPACE/comment.txt >> $GITHUB_STEP_SUMMARY
 
diff --git a/scripts/draft_comment.py b/src/comment_components.py
similarity index 78%
rename from scripts/draft_comment.py
rename to src/comment_components.py
index 25dc576..6306ef7 100644
--- a/scripts/draft_comment.py
+++ b/src/comment_components.py
@@ -1,10 +1,5 @@
-"""
-Draft comment for pypsa-validator GitHub PRs.
+"""Text components to generate validator report."""
 
-Script can be called via command line or imported as a module.
-"""
-
-import argparse
 import os
 import re
 from dataclasses import dataclass
@@ -12,29 +7,11 @@
 
 import numpy as np
 import pandas as pd
+
 from metrics import min_max_normalized_mae, normalized_root_mean_square_error
-from numpy.typing import ArrayLike
 from utils import get_env_var
 
 
-def create_numeric_mask(arr: ArrayLike) -> np.ndarray:
-    """
-    Create a mask where True indicates numeric and finite values.
-
-    Parameters
-    ----------
-    arr : array-like
-        Input array
-
-    Returns
-    -------
-    np.ndarray: Numeric mask where True indicates numeric and finite sort_values
-
-    """
-    arr = np.array(arr)
-    return np.vectorize(lambda x: isinstance(x, (int, float)) and np.isfinite(x))(arr)
-
-
 @dataclass
 class CommentData:
     """Class to store data for comment generation."""
@@ -74,12 +51,15 @@ def errors(self, branch_type: str) -> list:
         logs = list(
             Path(f"{self.dir_artifacts}/logs/{branch_type}/.snakemake/log/").glob("*")
         )
-        if len(logs) != 1:
+        if len(logs) > 1:
             msg = (
-                "Expected exactly one log file in snakemake/log directory "
-                "({branch_type} branch)."
+                "Expected exactly one log fiie in snakemake/log directory "
+                f"({branch_type} branch). Found {len(logs)}."
             )
             raise ValueError(msg)
+        elif len(logs) == 0:
+            inpt_erros = ['no_logs_found']
+            return inpt_erros
 
         with logs[0].open() as file:
             log = file.read()
@@ -150,7 +130,7 @@ def create_details_block(summary: str, content: str) -> str:
         return ""
 
 
-class RunSuccessfull(CommentData):
+class RunSuccessfullComponent(CommentData):
     """Class to generate successfull run component."""
 
     def __init__(self):
@@ -315,20 +295,22 @@ def _format_csvs_dir_files(self, df: pd.DataFrame) -> pd.DataFrame:
         # Set header
         df.columns = df.iloc[header_row_index]
         # Fill nan values in header
-        df.columns = [
-            f"col{i+1}" if pd.isna(col) or col == "" or col is None else col
-            for i, col in enumerate(df.columns)
-        ]
+        df.columns = pd.Index(
+            [
+                f"col{i+1}" if pd.isna(col) or col == "" or col is None else col
+                for i, col in enumerate(df.columns)
+            ]
+        )
         # Remove all rows before header
         df = df.iloc[header_row_index + 1 :]
 
         # Make non-numeric values the index
         non_numeric = df.apply(
-            lambda col: pd.to_numeric(col, errors="coerce").isna().all()
+            lambda col: pd.to_numeric(col, errors="coerce").isna().all()  # type: ignore
         )
 
         if non_numeric.any():
-            df = df.set_index(df.columns[non_numeric].to_list())
+            df = df.set_index(df.columns[non_numeric].to_list())  # type: ignore
         else:
             df = df.set_index("planning_horizon")
 
@@ -507,7 +489,7 @@ def __call__(self) -> str:
         return self.body
 
 
-class RunFailed(CommentData):
+class RunFailedComponent(CommentData):
     """Class to generate failed run component."""
 
     def __init__(self):
@@ -545,7 +527,7 @@ def __call__(self) -> str:
         return self.body()
 
 
-class ModelMetrics(CommentData):
+class ModelMetricsComponent(CommentData):
     """Class to generate model metrics component."""
 
     def __init__(self):
@@ -575,140 +557,3 @@ def body(self) -> str:
     def __call__(self) -> str:
         """Return text for model metrics component."""
         return self.body()
-
-
-class Comment(CommentData):
-    """Class to generate pypsa validator comment for GitHub PRs."""
-
-    def __init__(self) -> None:
-        """Initialize comment class. It will put all text components together."""
-        super().__init__()
-
-    @property
-    def header(self) -> str:
-        """
-        Header text.
-
-        Contains the title, identifier, and short description.
-        """
-        return (
-            f""
-            f"<!-- _val-bot-id-keyword_ -->\n"
-            f"## Validator Report\n"
-            f"I am the Validator. Download all artifacts [here](https://github.com/"
-            f"{self.github_repository}/actions/runs/{self.github_run_id}).\n"
-            f"I'll be back and edit this comment for each new commit.\n\n"
-        )
-
-    @property
-    def config_diff(self) -> str:
-        """
-        Config diff text.
-
-        Only use when there are changes in the config.
-        """
-        return (
-            f"<details>\n"
-            f"    <summary>:warning: Config changes detected!</summary>\n"
-            f"\n"
-            f"Results may differ due to these changes:\n"
-            f"```diff\n"
-            f"{self.git_diff_config}\n"
-            f"```\n"
-            f"</details>\n\n"
-        )
-
-    @property
-    def subtext(self) -> str:
-        """Subtext for the comment."""
-        if self.hash_feature:
-            hash_feature = (
-                f"([{self.hash_feature[:7]}](https://github.com/"
-                f"{self.github_repository}/commits/{self.hash_feature})) "
-            )
-        if self.hash_main:
-            hash_main = (
-                f"([{self.hash_main[:7]}](https://github.com/"
-                f"{self.github_repository}/commits/{self.hash_main}))"
-            )
-        time = (
-            pd.Timestamp.now()
-            .tz_localize("UTC")
-            .tz_convert("Europe/Berlin")
-            .strftime("%Y-%m-%d %H:%M:%S %Z")
-        )
-        return (
-            f"Comparing `{self.github_head_ref}` {hash_feature}with "
-            f"`{self.github_base_ref}` {hash_main}.\n"
-            f"Branch is {self.ahead_count} commits ahead and {self.behind_count} "
-            f"commits behind.\n"
-            f"Last updated on `{time}`."
-        )
-
-    def dynamic_plots(self) -> str:
-        """
-        Return a list of dynamic results plots needed for the comment.
-
-        Returns
-        -------
-            str: Space separated list of dynamic plots.
-
-        """
-        if self.sucessfull_run:
-            body_sucessfull = RunSuccessfull()
-            plots_string = " ".join(body_sucessfull.variables_plot_strings)
-            return plots_string
-        else:
-            ""
-
-    def __repr__(self) -> str:
-        """Return full formatted comment."""
-        body_benchmarks = ModelMetrics()
-        if self.sucessfull_run:
-            body_sucessfull = RunSuccessfull()
-
-            return (
-                f"{self.header}"
-                f"{self.config_diff if self.git_diff_config else ''}"
-                f"{body_sucessfull()}"
-                f"{body_benchmarks()}"
-                f"{self.subtext}"
-            )
-
-        else:
-            body_failed = RunFailed()
-
-            return (
-                f"{self.header}"
-                f"{body_failed()}"
-                f"{self.config_diff if self.git_diff_config else ''}"
-                f"{body_benchmarks()}"
-                f"{self.subtext}"
-            )
-
-
-def main():
-    """
-    Run draft comment script.
-
-    Command line interface for the draft comment script. Use no arguments to print the
-    comment, or use the "plots" argument to print the dynamic plots which will be needed
-    for the comment.
-    """
-    parser = argparse.ArgumentParser(description="Process some comments.")
-    parser.add_argument(
-        "command", nargs="?", default="", help='Command to run, e.g., "plots".'
-    )
-    args = parser.parse_args()
-
-    comment = Comment()
-
-    if args.command == "plots":
-        print(comment.dynamic_plots())
-
-    else:
-        print(comment)  # noqa T201
-
-
-if __name__ == "__main__":
-    main()
diff --git a/src/draft_comment.py b/src/draft_comment.py
new file mode 100644
index 0000000..99d5c68
--- /dev/null
+++ b/src/draft_comment.py
@@ -0,0 +1,173 @@
+"""
+Draft comment for pypsa-validator GitHub PRs.
+
+Script can be called via command line or imported as a module.
+"""
+
+import argparse
+
+import numpy as np
+import pandas as pd
+from numpy.typing import ArrayLike
+
+from comment_components import (
+    CommentData,
+    ModelMetricsComponent,
+    RunFailedComponent,
+    RunSuccessfullComponent,
+)
+
+
+def create_numeric_mask(arr: ArrayLike) -> np.ndarray:
+    """
+    Create a mask where True indicates numeric and finite values.
+
+    Parameters
+    ----------
+    arr : array-like
+        Input array
+
+    Returns
+    -------
+    np.ndarray: Numeric mask where True indicates numeric and finite sort_values
+
+    """
+    arr = np.array(arr)
+    return np.vectorize(lambda x: isinstance(x, (int, float)) and np.isfinite(x))(arr)
+
+
+class Comment(CommentData):
+    """Class to generate pypsa validator comment for GitHub PRs."""
+
+    def __init__(self) -> None:
+        """Initialize comment class. It will put all text components together."""
+        super().__init__()
+
+    @property
+    def header(self) -> str:
+        """
+        Header text.
+
+        Contains the title, identifier, and short description.
+        """
+        return (
+            f""
+            f"<!-- _val-bot-id-keyword_ -->\n"
+            f"## Validator Report\n"
+            f"I am the Validator. Download all artifacts [here](https://github.com/"
+            f"{self.github_repository}/actions/runs/{self.github_run_id}).\n"
+            f"I'll be back and edit this comment for each new commit.\n\n"
+        )
+
+    @property
+    def config_diff(self) -> str:
+        """
+        Config diff text.
+
+        Only use when there are changes in the config.
+        """
+        return (
+            f"<details>\n"
+            f"    <summary>:warning: Config changes detected!</summary>\n"
+            f"\n"
+            f"Results may differ due to these changes:\n"
+            f"```diff\n"
+            f"{self.git_diff_config}\n"
+            f"```\n"
+            f"</details>\n\n"
+        )
+
+    @property
+    def subtext(self) -> str:
+        """Subtext for the comment."""
+        if self.hash_feature:
+            hash_feature = (
+                f"([{self.hash_feature[:7]}](https://github.com/"
+                f"{self.github_repository}/commits/{self.hash_feature})) "
+            )
+        if self.hash_main:
+            hash_main = (
+                f"([{self.hash_main[:7]}](https://github.com/"
+                f"{self.github_repository}/commits/{self.hash_main}))"
+            )
+        time = (
+            pd.Timestamp.now()
+            .tz_localize("UTC")
+            .tz_convert("Europe/Berlin")
+            .strftime("%Y-%m-%d %H:%M:%S %Z")
+        )
+        return (
+            f"Comparing `{self.github_head_ref}` {hash_feature}with "
+            f"`{self.github_base_ref}` {hash_main}.\n"
+            f"Branch is {self.ahead_count} commits ahead and {self.behind_count} "
+            f"commits behind.\n"
+            f"Last updated on `{time}`."
+        )
+
+    def dynamic_plots(self) -> str:
+        """
+        Return a list of dynamic results plots needed for the comment.
+
+        Returns
+        -------
+            str: Space separated list of dynamic plots.
+
+        """
+        if self.sucessfull_run:
+            body_sucessfull = RunSuccessfullComponent()
+            plots_string = " ".join(body_sucessfull.variables_plot_strings)
+            return plots_string
+        else:
+            return ""
+
+    def __repr__(self) -> str:
+        """Return full formatted comment."""
+        body_benchmarks = ModelMetricsComponent()
+        if self.sucessfull_run:
+            body_sucessfull = RunSuccessfullComponent()
+
+            return (
+                f"{self.header}"
+                f"{self.config_diff if self.git_diff_config else ''}"
+                f"{body_sucessfull()}"
+                f"{body_benchmarks()}"
+                f"{self.subtext}"
+            )
+
+        else:
+            body_failed = RunFailedComponent()
+
+            return (
+                f"{self.header}"
+                f"{body_failed()}"
+                f"{self.config_diff if self.git_diff_config else ''}"
+                f"{body_benchmarks()}"
+                f"{self.subtext}"
+            )
+
+
+def main():
+    """
+    Run draft comment script.
+
+    Command line interface for the draft comment script. Use no arguments to print the
+    comment, or use the "plots" argument to print the dynamic plots which will be needed
+    for the comment.
+    """
+    parser = argparse.ArgumentParser(description="Process some comments.")
+    parser.add_argument(
+        "command", nargs="?", default="", help='Command to run, e.g., "plots".'
+    )
+    args = parser.parse_args()
+
+    comment = Comment()
+
+    if args.command == "plots":
+        print(comment.dynamic_plots())
+
+    else:
+        print(comment)  # noqa T201
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/metrics.py b/src/metrics.py
similarity index 89%
rename from scripts/metrics.py
rename to src/metrics.py
index 768d189..611731d 100644
--- a/scripts/metrics.py
+++ b/src/metrics.py
@@ -21,15 +21,15 @@ def min_max_normalized_mae(y_true: ArrayLike, y_pred: ArrayLike) -> float:
     float: Min-max normalized MAE
 
     """
-    y_true = np.array(y_true)
-    y_pred = np.array(y_pred)
+    y_true_ = np.array(y_true)
+    y_pred_ = np.array(y_pred)
 
     # Ignore -inf and inf values in y_true
-    y_true = y_true[np.isfinite(y_true)]
-    y_pred = y_pred[np.isfinite(y_pred)]
+    y_true_ = y_true_[np.isfinite(y_true_)]
+    y_pred_ = y_pred_[np.isfinite(y_pred_)]
 
     # Calculate the absolute errors
-    abs_errors = np.abs(y_true - y_pred)
+    abs_errors = np.abs(y_true_ - y_pred_)
 
     # Check if all errors are the same
     if np.all(abs_errors == abs_errors[0]):
@@ -72,19 +72,19 @@ def mean_absolute_percentage_error(
     float: MAPE
 
     """
-    y_true = np.array(y_true)
-    y_pred = np.array(y_pred)
+    y_true_ = np.array(y_true)
+    y_pred_ = np.array(y_pred)
 
     # Ignore -inf and inf values
     if ignore_inf:
-        y_true = y_true[np.isfinite(y_true)]
-        y_pred = y_pred[np.isfinite(y_pred)]
+        y_true_ = y_true_[np.isfinite(y_true)]
+        y_pred_ = y_pred_[np.isfinite(y_pred)]
 
     # Avoid division by zero
-    y_pred = y_pred + epsilon
+    y_pred_ = y_pred_ + epsilon
 
     # Calculate the absolute percentage errors
-    mape = np.abs((y_true - y_pred) / y_true)
+    mape = np.abs((y_true_ - y_pred_) / y_true_)
 
     if aggregate:
         mape = np.mean(mape)
diff --git a/scripts/plot_benchmarks.py b/src/plot_benchmarks.py
similarity index 98%
rename from scripts/plot_benchmarks.py
rename to src/plot_benchmarks.py
index 8a96523..b119d7c 100644
--- a/scripts/plot_benchmarks.py
+++ b/src/plot_benchmarks.py
@@ -7,6 +7,7 @@
 import numpy as np
 import pandas as pd
 import seaborn as sns
+
 from utils import get_env_var
 
 DIR_ARTIFACTS: Path = Path(
@@ -79,7 +80,7 @@ def create_bar_chart_comparison(
     xlabel: str,
     filename: str,
     ignore_stacked_plot: bool = False,
-):
+) -> str:
     """
     Create a horizontal bar chart comparing execution time or memory peak.
 
@@ -188,7 +189,7 @@ def create_bar_chart_comparison(
     return filename
 
 
-def create_scatter_memory(df: pd.DataFrame, filename: str) -> None:
+def create_scatter_memory(df: pd.DataFrame, filename: str) -> str:
     """
     Create a scatter plot of max_rss vs max_uss.
 
diff --git a/scripts/utils.py b/src/utils.py
similarity index 100%
rename from scripts/utils.py
rename to src/utils.py