From 88ef47eb94cabc99f6a91f145d186e845a59adac Mon Sep 17 00:00:00 2001 From: Chris Date: Fri, 3 Feb 2023 12:57:52 -0500 Subject: [PATCH 01/24] ADD: added if __name__ block for testing --- route1io_connectors/google/google_analytics.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/route1io_connectors/google/google_analytics.py b/route1io_connectors/google/google_analytics.py index 0477a7b..d427442 100644 --- a/route1io_connectors/google/google_analytics.py +++ b/route1io_connectors/google/google_analytics.py @@ -220,4 +220,7 @@ def _process_dimensions(dimensions: List[str]) -> List[Dict[str, str]]: def _process_metrics(metrics: List[str]) -> List[Dict[str, str]]: """Return list of dictionary's containing the metrics formatted for Google Analytics Reporting API to accept the request""" - return [{"expression": f"ga:{metric}"} for metric in metrics] \ No newline at end of file + return [{"expression": f"ga:{metric}"} for metric in metrics] + +if __name__ == "__main__": + pass \ No newline at end of file From ceb33cafa930e3ce184e8cafb175ddd480aa0c90 Mon Sep 17 00:00:00 2001 From: Chris Date: Fri, 3 Feb 2023 13:07:07 -0500 Subject: [PATCH 02/24] ADD: added environment setup to if __name__ --- route1io_connectors/google/google_analytics.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/route1io_connectors/google/google_analytics.py b/route1io_connectors/google/google_analytics.py index d427442..e7185c1 100644 --- a/route1io_connectors/google/google_analytics.py +++ b/route1io_connectors/google/google_analytics.py @@ -223,4 +223,9 @@ def _process_metrics(metrics: List[str]) -> List[Dict[str, str]]: return [{"expression": f"ga:{metric}"} for metric in metrics] if __name__ == "__main__": - pass \ No newline at end of file + import dotenv + import os + + dotenv.load_dotenv("../../.env") + + \ No newline at end of file From 6c3cce446537848718bd7f5049b059d2f6ef8d68 Mon Sep 17 00:00:00 2001 From: Chris Date: Fri, 3 Feb 2023 13:09:37 -0500 Subject: [PATCH 03/24] FIX: fixed docs and var name to reflect Google Analytics connection --- route1io_connectors/google/google_analytics.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/route1io_connectors/google/google_analytics.py b/route1io_connectors/google/google_analytics.py index e7185c1..1de372d 100644 --- a/route1io_connectors/google/google_analytics.py +++ b/route1io_connectors/google/google_analytics.py @@ -8,6 +8,7 @@ import numpy as np import pandas as pd from googleapiclient.discovery import build +from google.analytics.data_v1beta import BetaAnalyticsDataClient def connect_to_google_analytics(credentials: "google.oauth2.credentials.Credentials" ) -> "googleapiclient.discovery.Resource": @@ -20,11 +21,11 @@ def connect_to_google_analytics(credentials: "google.oauth2.credentials.Credenti Returns ------- - google_drive_conn : googleapiclient.discovery.Resource - Connection to Google Drive API + google_conn : googleapiclient.discovery.Resource + Connection to Google Analytics API """ - google_drive_conn = build('analyticsreporting', 'v4', credentials=credentials) - return google_drive_conn + google_conn = build('analyticsreporting', 'v4', credentials=credentials) + return google_conn def get_google_analytics_data( analytics, From 66308d5b5badb1fde9effa96f3eb53c360990b9d Mon Sep 17 00:00:00 2001 From: Chris Date: Fri, 3 Feb 2023 13:12:50 -0500 Subject: [PATCH 04/24] ADD: added ga4 flag to connect_to_google_analytics --- route1io_connectors/google/google_analytics.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/route1io_connectors/google/google_analytics.py b/route1io_connectors/google/google_analytics.py index 1de372d..aa6ab84 100644 --- a/route1io_connectors/google/google_analytics.py +++ b/route1io_connectors/google/google_analytics.py @@ -10,8 +10,10 @@ from googleapiclient.discovery import build from google.analytics.data_v1beta import BetaAnalyticsDataClient -def connect_to_google_analytics(credentials: "google.oauth2.credentials.Credentials" - ) -> "googleapiclient.discovery.Resource": +def connect_to_google_analytics( + credentials: "google.oauth2.credentials.Credentials", + ga4: bool = False + ) -> "googleapiclient.discovery.Resource": """Return a connection to Google Drive Parameters From 50f5f268882f8754687c382f0a216064470be51f Mon Sep 17 00:00:00 2001 From: Chris Date: Fri, 3 Feb 2023 13:14:11 -0500 Subject: [PATCH 05/24] ADD: added call to BetaAnalyticsDataClient for Data API --- route1io_connectors/google/google_analytics.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/route1io_connectors/google/google_analytics.py b/route1io_connectors/google/google_analytics.py index aa6ab84..773adc4 100644 --- a/route1io_connectors/google/google_analytics.py +++ b/route1io_connectors/google/google_analytics.py @@ -26,7 +26,10 @@ def connect_to_google_analytics( google_conn : googleapiclient.discovery.Resource Connection to Google Analytics API """ - google_conn = build('analyticsreporting', 'v4', credentials=credentials) + if ga4: + google_conn = BetaAnalyticsDataClient(credentials=credentials) + else: + google_conn = build('analyticsreporting', 'v4', credentials=credentials) return google_conn def get_google_analytics_data( From 1f461b0a031cb6ab3c1a7f30bb9963f7f61ada2c Mon Sep 17 00:00:00 2001 From: Chris Date: Fri, 3 Feb 2023 15:11:59 -0500 Subject: [PATCH 06/24] ADD: added _process_universal_analytics_data function --- .../google/google_analytics.py | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/route1io_connectors/google/google_analytics.py b/route1io_connectors/google/google_analytics.py index 773adc4..c39b8b2 100644 --- a/route1io_connectors/google/google_analytics.py +++ b/route1io_connectors/google/google_analytics.py @@ -62,6 +62,18 @@ def get_google_analytics_data( ------- df : pd.DataFrame """ + pass + +def _process_universal_analytics_data( + analytics, + view_id: str, + dimensions: List[str] = None, + metrics: List[str] = None, + start_date: str = "7daysAgo", + end_date: str = "today" + ) -> "pd.DataFrame": + """Return pd.DataFrame of Universal Analytics data pulled via the + Google Analytics Reporting API""" resp_df_arr = [] next_page_token = None while True: @@ -84,6 +96,7 @@ def get_google_analytics_data( df = pd.concat(resp_df_arr) return df + def _get_next_page_token(resp: Dict[str, str]) -> Union[str, None]: """Return Boolean indicating if paginated data exists""" return resp["reports"][0].get("nextPageToken") @@ -227,11 +240,3 @@ def _process_metrics(metrics: List[str]) -> List[Dict[str, str]]: """Return list of dictionary's containing the metrics formatted for Google Analytics Reporting API to accept the request""" return [{"expression": f"ga:{metric}"} for metric in metrics] - -if __name__ == "__main__": - import dotenv - import os - - dotenv.load_dotenv("../../.env") - - \ No newline at end of file From 20cb5b193abd2650753d41cc345b39cdc865dc8a Mon Sep 17 00:00:00 2001 From: Chris Date: Fri, 3 Feb 2023 15:19:09 -0500 Subject: [PATCH 07/24] ADD: added reference to _process_universal_analytics_data --- route1io_connectors/google/google_analytics.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/route1io_connectors/google/google_analytics.py b/route1io_connectors/google/google_analytics.py index c39b8b2..72659d2 100644 --- a/route1io_connectors/google/google_analytics.py +++ b/route1io_connectors/google/google_analytics.py @@ -62,7 +62,14 @@ def get_google_analytics_data( ------- df : pd.DataFrame """ - pass + df = _process_universal_analytics_data( + analytics=analytics, + view_id=view_id, + dimensions=dimensions, + metrics=metrics, + start_date=start_date, + end_date=end_date + ) def _process_universal_analytics_data( analytics, From 1c12bb94c2aa98d779efd1357d342443d0ea0d22 Mon Sep 17 00:00:00 2001 From: Chris Date: Fri, 3 Feb 2023 15:20:45 -0500 Subject: [PATCH 08/24] ADD: added _process_ga4_data connector --- route1io_connectors/google/google_analytics.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/route1io_connectors/google/google_analytics.py b/route1io_connectors/google/google_analytics.py index 72659d2..262582b 100644 --- a/route1io_connectors/google/google_analytics.py +++ b/route1io_connectors/google/google_analytics.py @@ -71,6 +71,18 @@ def get_google_analytics_data( end_date=end_date ) +def _process_ga4_data( + analytics, + view_id: str, + dimensions: List[str] = None, + metrics: List[str] = None, + start_date: str = "7daysAgo", + end_date: str = "today" + ) -> "pd.DataFrame": + """Return pd.DataFrame of GA4 data pulled via the + Google Analytics Data API""" + pass + def _process_universal_analytics_data( analytics, view_id: str, From 81f3086bd91d8841096b424cfd669c50c3653444 Mon Sep 17 00:00:00 2001 From: chris-greening Date: Mon, 6 Feb 2023 09:25:00 -0500 Subject: [PATCH 09/24] ADD: added processing_func ternary op for setting which processor to use --- route1io_connectors/google/google_analytics.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/route1io_connectors/google/google_analytics.py b/route1io_connectors/google/google_analytics.py index 262582b..fdd129e 100644 --- a/route1io_connectors/google/google_analytics.py +++ b/route1io_connectors/google/google_analytics.py @@ -62,7 +62,9 @@ def get_google_analytics_data( ------- df : pd.DataFrame """ - df = _process_universal_analytics_data( + is_ga4_data = isinstance(analytics, BetaAnalyticsDataClient) + processing_func = _process_ga4_data if is_ga4_data else _process_universal_analytics_data + df = processing_func( analytics=analytics, view_id=view_id, dimensions=dimensions, From f66d7d740be4763955f0669dc4867ed4932c3a4a Mon Sep 17 00:00:00 2001 From: chris-greening Date: Mon, 6 Feb 2023 10:53:56 -0500 Subject: [PATCH 10/24] MODIFY: renamed _request_google_analytics_data -> _request_universal_analytics_data --- route1io_connectors/google/google_analytics.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/route1io_connectors/google/google_analytics.py b/route1io_connectors/google/google_analytics.py index fdd129e..8ac588f 100644 --- a/route1io_connectors/google/google_analytics.py +++ b/route1io_connectors/google/google_analytics.py @@ -83,7 +83,7 @@ def _process_ga4_data( ) -> "pd.DataFrame": """Return pd.DataFrame of GA4 data pulled via the Google Analytics Data API""" - pass + def _process_universal_analytics_data( analytics, @@ -98,7 +98,7 @@ def _process_universal_analytics_data( resp_df_arr = [] next_page_token = None while True: - resp = _request_google_analytics_data( + resp = _request_universal_analytics_data( analytics=analytics, view_id=view_id, dimensions=dimensions, @@ -122,7 +122,7 @@ def _get_next_page_token(resp: Dict[str, str]) -> Union[str, None]: """Return Boolean indicating if paginated data exists""" return resp["reports"][0].get("nextPageToken") -def _request_google_analytics_data( +def _request_universal_analytics_data( analytics, view_id: str, dimensions: List[str] = None, From e69852e78a531958e4afb79f95ebe4fb1939e9dd Mon Sep 17 00:00:00 2001 From: chris-greening Date: Mon, 6 Feb 2023 10:54:31 -0500 Subject: [PATCH 11/24] MODIFY: renamed _process_raw_google_analytics_data -> _process_raw_universal_analytics_data --- route1io_connectors/google/google_analytics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/route1io_connectors/google/google_analytics.py b/route1io_connectors/google/google_analytics.py index 8ac588f..f5623de 100644 --- a/route1io_connectors/google/google_analytics.py +++ b/route1io_connectors/google/google_analytics.py @@ -107,7 +107,7 @@ def _process_universal_analytics_data( end_date=end_date, next_page_token=next_page_token ) - resp_df = _process_raw_google_analytics_data(resp=resp) + resp_df = _process_raw_universal_analytics_data(resp=resp) resp_df_arr.append(resp_df) next_page_token = _get_next_page_token(resp=resp) From d160edb168331648b09ff3193ae51534cc17cc83 Mon Sep 17 00:00:00 2001 From: chris-greening Date: Mon, 6 Feb 2023 10:55:07 -0500 Subject: [PATCH 12/24] MODIFY: renamed function def to universal analytics --- route1io_connectors/google/google_analytics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/route1io_connectors/google/google_analytics.py b/route1io_connectors/google/google_analytics.py index f5623de..5ab991e 100644 --- a/route1io_connectors/google/google_analytics.py +++ b/route1io_connectors/google/google_analytics.py @@ -164,7 +164,7 @@ def _request_universal_analytics_data( )} ).execute() -def _process_raw_google_analytics_data(resp: Dict[str, Union[str, List, Dict, bool]]) -> "pd.DataFrame": +def _process_raw_universal_analytics_data(resp: Dict[str, Union[str, List, Dict, bool]]) -> "pd.DataFrame": """ Return a DataFrame parsed and constructed from the raw response from Google Analytics""" resp_data = resp['reports'][0] From 0db8252d2c41707c5d10efe78fd9cdabd9a95234 Mon Sep 17 00:00:00 2001 From: chris-greening Date: Mon, 6 Feb 2023 10:59:51 -0500 Subject: [PATCH 13/24] ADD: created utils directory for processing functions and module for universal analytics --- route1io_connectors/google/google_analytics.py | 4 +++- route1io_connectors/google/utils/_universal_analytics.py | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 route1io_connectors/google/utils/_universal_analytics.py diff --git a/route1io_connectors/google/google_analytics.py b/route1io_connectors/google/google_analytics.py index 5ab991e..1cb0ba1 100644 --- a/route1io_connectors/google/google_analytics.py +++ b/route1io_connectors/google/google_analytics.py @@ -10,6 +10,8 @@ from googleapiclient.discovery import build from google.analytics.data_v1beta import BetaAnalyticsDataClient +from .utils import _universal_analytics + def connect_to_google_analytics( credentials: "google.oauth2.credentials.Credentials", ga4: bool = False @@ -63,7 +65,7 @@ def get_google_analytics_data( df : pd.DataFrame """ is_ga4_data = isinstance(analytics, BetaAnalyticsDataClient) - processing_func = _process_ga4_data if is_ga4_data else _process_universal_analytics_data + processing_func = _process_ga4_data if is_ga4_data else _process_universal_analytics_data df = processing_func( analytics=analytics, view_id=view_id, diff --git a/route1io_connectors/google/utils/_universal_analytics.py b/route1io_connectors/google/utils/_universal_analytics.py new file mode 100644 index 0000000..7577ccd --- /dev/null +++ b/route1io_connectors/google/utils/_universal_analytics.py @@ -0,0 +1,2 @@ +"""Processing scripts for Universal Analytics data""" + From ca698af53f333ce5dd44f1eec75edaf2003b6d00 Mon Sep 17 00:00:00 2001 From: chris-greening Date: Mon, 6 Feb 2023 11:11:01 -0500 Subject: [PATCH 14/24] ADD: moved all UA code over into separate module --- .../google/utils/_universal_analytics.py | 176 ++++++++++++++++++ 1 file changed, 176 insertions(+) diff --git a/route1io_connectors/google/utils/_universal_analytics.py b/route1io_connectors/google/utils/_universal_analytics.py index 7577ccd..0451dfd 100644 --- a/route1io_connectors/google/utils/_universal_analytics.py +++ b/route1io_connectors/google/utils/_universal_analytics.py @@ -1,2 +1,178 @@ """Processing scripts for Universal Analytics data""" +def _process_universal_analytics_data( + analytics, + view_id: str, + dimensions: List[str] = None, + metrics: List[str] = None, + start_date: str = "7daysAgo", + end_date: str = "today" + ) -> "pd.DataFrame": + """Return pd.DataFrame of Universal Analytics data pulled via the + Google Analytics Reporting API""" + resp_df_arr = [] + next_page_token = None + while True: + resp = _request_universal_analytics_data( + analytics=analytics, + view_id=view_id, + dimensions=dimensions, + metrics=metrics, + start_date=start_date, + end_date=end_date, + next_page_token=next_page_token + ) + resp_df = _process_raw_universal_analytics_data(resp=resp) + resp_df_arr.append(resp_df) + + next_page_token = _get_next_page_token(resp=resp) + if next_page_token is None: + break + + df = pd.concat(resp_df_arr) + return df + + +def _get_next_page_token(resp: Dict[str, str]) -> Union[str, None]: + """Return Boolean indicating if paginated data exists""" + return resp["reports"][0].get("nextPageToken") + +def _request_universal_analytics_data( + analytics, + view_id: str, + dimensions: List[str] = None, + metrics: List[str] = None, + start_date: str = "7daysAgo", + end_date: str = "today", + next_page_token = Union[str, None] + ) -> Dict[str, Union[str, List, Dict, bool]]: + """Returns response from reporting request to the Google Analytics Reporting API + built from arguments + + Parameters + ---------- + view_id : str + View ID that we want to view + dimensions : List[str] + List of dimensions + https://ga-dev-tools.web.app/dimensions-metrics-explorer/ + metrics : List[str] + List of metrics + https://ga-dev-tools.web.app/dimensions-metrics-explorer/ + start_date : str + Dynamic preset such as 7daysago or YYYY-MM-DD + end_date : str + Dynamic preset such as today or YYYY-MM-DD + + Returns + ------- + resp : Dict[str, Union[str, List, Dict, bool]] + """ + return analytics.reports().batchGet( + body={'reportRequests': _process_report_requests( + view_id=view_id, + dimensions=dimensions, + metrics=metrics, + start_date=start_date, + end_date=end_date, + next_page_token=next_page_token + )} + ).execute() + +def _process_raw_universal_analytics_data(resp: Dict[str, Union[str, List, Dict, bool]]) -> "pd.DataFrame": + """ Return a DataFrame parsed and constructed from the raw response from + Google Analytics""" + resp_data = resp['reports'][0] + columns_metadata = _process_columns(resp_data['columnHeader']) + columns = list(columns_metadata) + values = _process_rows(resp_data['data']) + df = pd.DataFrame(values, columns=columns) + df = df.astype(columns_metadata) + return df + +def _process_rows(values_resp) -> List[List[str]]: + """Return list of lists containing values parsed from API response""" + rows = values_resp['rows'] + processed_rows = [] + for row in rows: + try: + dimensions = row['dimensions'] + except KeyError: + dimensions = [] + + metrics = [metric['values'] for metric in row['metrics']] + metrics = list(itertools.chain.from_iterable(metrics)) + + processed_rows.append([*dimensions, *metrics]) + return processed_rows + +def _process_columns(column_header_resp: Dict[str, str]) -> List[Tuple[str]]: + """Return a dictionary containing column name and associated dtype as parsed + from the Google Analytics API + """ + dimensions_cols = _process_dimensions_columns(column_header_resp=column_header_resp) + metrics_cols = _process_metrics_columns(column_header_resp=column_header_resp) + columns_metadata = [*dimensions_cols, *metrics_cols] + return {key.replace("ga:", ""): val for key, val in columns_metadata} + +def _process_metrics_columns(column_header_resp) -> List[Tuple]: + """Return list of tuple's containing metrics and their associated dtype""" + metrics_col_data = column_header_resp['metricHeader']['metricHeaderEntries'] + metrics_cols = [(metric['name'], _lookup_dtype(metric['type'])) + for metric in metrics_col_data] + return metrics_cols + +def _process_dimensions_columns(column_header_resp) -> List[Tuple[str, str]]: + """Return list of tuple's containing dimensions and their associated dtype""" + try: + dimensions_col_data = column_header_resp['dimensions'] + except KeyError: + dimensions_cols = [] + else: + dimensions_cols = [(dimension, str) for dimension in dimensions_col_data] + return dimensions_cols + +def _lookup_dtype(resp_type: str) -> Dict[str, str]: + """Return dtype for pd.DataFrame associated with column as determined + from the API response + """ + dtypes = { + "INTEGER": np.int32, + "FLOAT": np.float32, + "TIME": str, + "CURRENCY": np.float32 + } + return dtypes[resp_type] + +def _process_report_requests( + view_id: str, + dimensions: Union[List[str], None], + metrics: Union[List[str], None], + start_date: str, + end_date: str, + next_page_token: Union[str, None] + ) -> Dict[str, str]: + """Return a dictionary containing formatted data request to Google Analytics + API""" + report_requests = { + "viewId": f"ga:{view_id}", + "dateRanges": [{"startDate": start_date, "endDate": end_date}], + "pageSize": 100_000 + } + if next_page_token is not None: + report_requests["pageToken"] = next_page_token + if dimensions is not None: + report_requests['dimensions'] = _process_dimensions(dimensions) + if metrics is not None: + report_requests['metrics'] = _process_metrics(metrics) + return [report_requests] + +def _process_dimensions(dimensions: List[str]) -> List[Dict[str, str]]: + """Return list of dictionary's containing the dimensions formatted for Google + Analytics Reporting API to accept the request""" + return [{"name": f"ga:{dimension}"} for dimension in dimensions] + +def _process_metrics(metrics: List[str]) -> List[Dict[str, str]]: + """Return list of dictionary's containing the metrics formatted for Google + Analytics Reporting API to accept the request""" + return [{"expression": f"ga:{metric}"} for metric in metrics] From a3ff59d7f138db8853f6486f9818b8bde43ee618 Mon Sep 17 00:00:00 2001 From: chris-greening Date: Mon, 6 Feb 2023 11:26:23 -0500 Subject: [PATCH 15/24] MODIFY: moved appropriate imports over to refactored _universal_analytics module --- .../google/google_analytics.py | 183 +----------------- .../google/utils/_universal_analytics.py | 6 + 2 files changed, 7 insertions(+), 182 deletions(-) diff --git a/route1io_connectors/google/google_analytics.py b/route1io_connectors/google/google_analytics.py index 1cb0ba1..c95ce12 100644 --- a/route1io_connectors/google/google_analytics.py +++ b/route1io_connectors/google/google_analytics.py @@ -2,11 +2,8 @@ This module contains functions for interacting with Google Analytics reporting """ -from typing import List, Union, Dict, Tuple -import itertools +from typing import List -import numpy as np -import pandas as pd from googleapiclient.discovery import build from google.analytics.data_v1beta import BetaAnalyticsDataClient @@ -85,181 +82,3 @@ def _process_ga4_data( ) -> "pd.DataFrame": """Return pd.DataFrame of GA4 data pulled via the Google Analytics Data API""" - - -def _process_universal_analytics_data( - analytics, - view_id: str, - dimensions: List[str] = None, - metrics: List[str] = None, - start_date: str = "7daysAgo", - end_date: str = "today" - ) -> "pd.DataFrame": - """Return pd.DataFrame of Universal Analytics data pulled via the - Google Analytics Reporting API""" - resp_df_arr = [] - next_page_token = None - while True: - resp = _request_universal_analytics_data( - analytics=analytics, - view_id=view_id, - dimensions=dimensions, - metrics=metrics, - start_date=start_date, - end_date=end_date, - next_page_token=next_page_token - ) - resp_df = _process_raw_universal_analytics_data(resp=resp) - resp_df_arr.append(resp_df) - - next_page_token = _get_next_page_token(resp=resp) - if next_page_token is None: - break - - df = pd.concat(resp_df_arr) - return df - - -def _get_next_page_token(resp: Dict[str, str]) -> Union[str, None]: - """Return Boolean indicating if paginated data exists""" - return resp["reports"][0].get("nextPageToken") - -def _request_universal_analytics_data( - analytics, - view_id: str, - dimensions: List[str] = None, - metrics: List[str] = None, - start_date: str = "7daysAgo", - end_date: str = "today", - next_page_token = Union[str, None] - ) -> Dict[str, Union[str, List, Dict, bool]]: - """Returns response from reporting request to the Google Analytics Reporting API - built from arguments - - Parameters - ---------- - view_id : str - View ID that we want to view - dimensions : List[str] - List of dimensions - https://ga-dev-tools.web.app/dimensions-metrics-explorer/ - metrics : List[str] - List of metrics - https://ga-dev-tools.web.app/dimensions-metrics-explorer/ - start_date : str - Dynamic preset such as 7daysago or YYYY-MM-DD - end_date : str - Dynamic preset such as today or YYYY-MM-DD - - Returns - ------- - resp : Dict[str, Union[str, List, Dict, bool]] - """ - return analytics.reports().batchGet( - body={'reportRequests': _process_report_requests( - view_id=view_id, - dimensions=dimensions, - metrics=metrics, - start_date=start_date, - end_date=end_date, - next_page_token=next_page_token - )} - ).execute() - -def _process_raw_universal_analytics_data(resp: Dict[str, Union[str, List, Dict, bool]]) -> "pd.DataFrame": - """ Return a DataFrame parsed and constructed from the raw response from - Google Analytics""" - resp_data = resp['reports'][0] - columns_metadata = _process_columns(resp_data['columnHeader']) - columns = list(columns_metadata) - values = _process_rows(resp_data['data']) - df = pd.DataFrame(values, columns=columns) - df = df.astype(columns_metadata) - return df - -def _process_rows(values_resp) -> List[List[str]]: - """Return list of lists containing values parsed from API response""" - rows = values_resp['rows'] - processed_rows = [] - for row in rows: - try: - dimensions = row['dimensions'] - except KeyError: - dimensions = [] - - metrics = [metric['values'] for metric in row['metrics']] - metrics = list(itertools.chain.from_iterable(metrics)) - - processed_rows.append([*dimensions, *metrics]) - return processed_rows - -def _process_columns(column_header_resp: Dict[str, str]) -> List[Tuple[str]]: - """Return a dictionary containing column name and associated dtype as parsed - from the Google Analytics API - """ - dimensions_cols = _process_dimensions_columns(column_header_resp=column_header_resp) - metrics_cols = _process_metrics_columns(column_header_resp=column_header_resp) - columns_metadata = [*dimensions_cols, *metrics_cols] - return {key.replace("ga:", ""): val for key, val in columns_metadata} - -def _process_metrics_columns(column_header_resp) -> List[Tuple]: - """Return list of tuple's containing metrics and their associated dtype""" - metrics_col_data = column_header_resp['metricHeader']['metricHeaderEntries'] - metrics_cols = [(metric['name'], _lookup_dtype(metric['type'])) - for metric in metrics_col_data] - return metrics_cols - -def _process_dimensions_columns(column_header_resp) -> List[Tuple[str, str]]: - """Return list of tuple's containing dimensions and their associated dtype""" - try: - dimensions_col_data = column_header_resp['dimensions'] - except KeyError: - dimensions_cols = [] - else: - dimensions_cols = [(dimension, str) for dimension in dimensions_col_data] - return dimensions_cols - -def _lookup_dtype(resp_type: str) -> Dict[str, str]: - """Return dtype for pd.DataFrame associated with column as determined - from the API response - """ - dtypes = { - "INTEGER": np.int32, - "FLOAT": np.float32, - "TIME": str, - "CURRENCY": np.float32 - } - return dtypes[resp_type] - -def _process_report_requests( - view_id: str, - dimensions: Union[List[str], None], - metrics: Union[List[str], None], - start_date: str, - end_date: str, - next_page_token: Union[str, None] - ) -> Dict[str, str]: - """Return a dictionary containing formatted data request to Google Analytics - API""" - report_requests = { - "viewId": f"ga:{view_id}", - "dateRanges": [{"startDate": start_date, "endDate": end_date}], - "pageSize": 100_000 - } - if next_page_token is not None: - report_requests["pageToken"] = next_page_token - if dimensions is not None: - report_requests['dimensions'] = _process_dimensions(dimensions) - if metrics is not None: - report_requests['metrics'] = _process_metrics(metrics) - return [report_requests] - -def _process_dimensions(dimensions: List[str]) -> List[Dict[str, str]]: - """Return list of dictionary's containing the dimensions formatted for Google - Analytics Reporting API to accept the request""" - return [{"name": f"ga:{dimension}"} for dimension in dimensions] - -def _process_metrics(metrics: List[str]) -> List[Dict[str, str]]: - """Return list of dictionary's containing the metrics formatted for Google - Analytics Reporting API to accept the request""" - return [{"expression": f"ga:{metric}"} for metric in metrics] diff --git a/route1io_connectors/google/utils/_universal_analytics.py b/route1io_connectors/google/utils/_universal_analytics.py index 0451dfd..c0a847d 100644 --- a/route1io_connectors/google/utils/_universal_analytics.py +++ b/route1io_connectors/google/utils/_universal_analytics.py @@ -1,5 +1,11 @@ """Processing scripts for Universal Analytics data""" +from typing import List, Union, Dict, Tuple +import itertools + +import pandas as pd +import numpy as np + def _process_universal_analytics_data( analytics, view_id: str, From 42621ff8861de8bf23799c8a149579272b79298f Mon Sep 17 00:00:00 2001 From: chris-greening Date: Mon, 6 Feb 2023 11:27:14 -0500 Subject: [PATCH 16/24] FIX: added correct import for process_universal_analytics_data --- route1io_connectors/google/google_analytics.py | 2 +- route1io_connectors/google/utils/_universal_analytics.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/route1io_connectors/google/google_analytics.py b/route1io_connectors/google/google_analytics.py index c95ce12..7f56b85 100644 --- a/route1io_connectors/google/google_analytics.py +++ b/route1io_connectors/google/google_analytics.py @@ -62,7 +62,7 @@ def get_google_analytics_data( df : pd.DataFrame """ is_ga4_data = isinstance(analytics, BetaAnalyticsDataClient) - processing_func = _process_ga4_data if is_ga4_data else _process_universal_analytics_data + processing_func = _process_ga4_data if is_ga4_data else _universal_analytics.process_universal_analytics_data df = processing_func( analytics=analytics, view_id=view_id, diff --git a/route1io_connectors/google/utils/_universal_analytics.py b/route1io_connectors/google/utils/_universal_analytics.py index c0a847d..d5bccf7 100644 --- a/route1io_connectors/google/utils/_universal_analytics.py +++ b/route1io_connectors/google/utils/_universal_analytics.py @@ -6,7 +6,7 @@ import pandas as pd import numpy as np -def _process_universal_analytics_data( +def process_universal_analytics_data( analytics, view_id: str, dimensions: List[str] = None, From 0ea1b12ba2dfab0e50dea6269d45a2cce544ad72 Mon Sep 17 00:00:00 2001 From: chris-greening Date: Tue, 7 Feb 2023 11:12:41 -0500 Subject: [PATCH 17/24] MODIFY: moved GA4 processing code over into its own module --- route1io_connectors/google/google_analytics.py | 14 ++------------ route1io_connectors/google/utils/_ga4.py | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 12 deletions(-) create mode 100644 route1io_connectors/google/utils/_ga4.py diff --git a/route1io_connectors/google/google_analytics.py b/route1io_connectors/google/google_analytics.py index 7f56b85..44435c8 100644 --- a/route1io_connectors/google/google_analytics.py +++ b/route1io_connectors/google/google_analytics.py @@ -7,7 +7,7 @@ from googleapiclient.discovery import build from google.analytics.data_v1beta import BetaAnalyticsDataClient -from .utils import _universal_analytics +from .utils import _universal_analytics, _ga4 def connect_to_google_analytics( credentials: "google.oauth2.credentials.Credentials", @@ -62,7 +62,7 @@ def get_google_analytics_data( df : pd.DataFrame """ is_ga4_data = isinstance(analytics, BetaAnalyticsDataClient) - processing_func = _process_ga4_data if is_ga4_data else _universal_analytics.process_universal_analytics_data + processing_func = _ga4.process_ga4_data if is_ga4_data else _universal_analytics.process_universal_analytics_data df = processing_func( analytics=analytics, view_id=view_id, @@ -72,13 +72,3 @@ def get_google_analytics_data( end_date=end_date ) -def _process_ga4_data( - analytics, - view_id: str, - dimensions: List[str] = None, - metrics: List[str] = None, - start_date: str = "7daysAgo", - end_date: str = "today" - ) -> "pd.DataFrame": - """Return pd.DataFrame of GA4 data pulled via the - Google Analytics Data API""" diff --git a/route1io_connectors/google/utils/_ga4.py b/route1io_connectors/google/utils/_ga4.py new file mode 100644 index 0000000..1df5f5c --- /dev/null +++ b/route1io_connectors/google/utils/_ga4.py @@ -0,0 +1,14 @@ +"""Code for requesting and processing data from GA4""" + +from typing import List + +def process_ga4_data( + analytics, + view_id: str, + dimensions: List[str] = None, + metrics: List[str] = None, + start_date: str = "7daysAgo", + end_date: str = "today" + ) -> "pd.DataFrame": + """Return pd.DataFrame of GA4 data pulled via the + Google Analytics Data API""" From 6f46a4c3c924b7ab7ae4cd5bd715241c63de1da2 Mon Sep 17 00:00:00 2001 From: chris-greening Date: Tue, 7 Feb 2023 11:21:51 -0500 Subject: [PATCH 18/24] REFACTOR: rewrote ternary op as if-else block for readability --- route1io_connectors/google/google_analytics.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/route1io_connectors/google/google_analytics.py b/route1io_connectors/google/google_analytics.py index 44435c8..43e745c 100644 --- a/route1io_connectors/google/google_analytics.py +++ b/route1io_connectors/google/google_analytics.py @@ -62,7 +62,10 @@ def get_google_analytics_data( df : pd.DataFrame """ is_ga4_data = isinstance(analytics, BetaAnalyticsDataClient) - processing_func = _ga4.process_ga4_data if is_ga4_data else _universal_analytics.process_universal_analytics_data + if is_ga4_data: + processing_func = _ga4.process_ga4_data + else: + processing_func = _universal_analytics.process_universal_analytics_data df = processing_func( analytics=analytics, view_id=view_id, From 52f7d5f1531493fbf1035ffee59ad85d3b76d97e Mon Sep 17 00:00:00 2001 From: chris-greening Date: Tue, 7 Feb 2023 11:28:51 -0500 Subject: [PATCH 19/24] ADD: added import of RunReportRequest and various other necessary objects --- route1io_connectors/google/utils/_ga4.py | 31 ++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/route1io_connectors/google/utils/_ga4.py b/route1io_connectors/google/utils/_ga4.py index 1df5f5c..42ee0a4 100644 --- a/route1io_connectors/google/utils/_ga4.py +++ b/route1io_connectors/google/utils/_ga4.py @@ -2,6 +2,13 @@ from typing import List +from google.analytics.data_v1beta.types import ( + DateRange, + Dimension, + Metric, + RunReportRequest, +) + def process_ga4_data( analytics, view_id: str, @@ -12,3 +19,27 @@ def process_ga4_data( ) -> "pd.DataFrame": """Return pd.DataFrame of GA4 data pulled via the Google Analytics Data API""" + resp = _request_ga4_data( + analytics=analytics, + view_id=view_id, + dimensions=dimensions, + metrics=metrics, + start_date=start_date, + end_date=end_date + ) + +def _request_ga4_data( + analytics, + view_id: str, + dimensions: List[str] = None, + metrics: List[str] = None, + start_date: str = "7daysAgo", + end_date: str = "today" + ): + """Return response from reporting request to Google Analytics Data API""" + RunReportRequest( + property=f"properties/{os.environ.get('GCP_PROPERTY_ID')}", + dimensions=[Dimension(name="date"), Dimension(name="city")], + metrics=[Metric(name="newUsers"), Metric(name="totalUsers")], + date_ranges=[DateRange(start_date="2020-03-31", end_date="today")], + ) \ No newline at end of file From 4602c82a5f76e530f7769b9ac73289ae3b38b97e Mon Sep 17 00:00:00 2001 From: chris-greening Date: Tue, 7 Feb 2023 11:31:13 -0500 Subject: [PATCH 20/24] ADD: added request and response call --- route1io_connectors/google/utils/_ga4.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/route1io_connectors/google/utils/_ga4.py b/route1io_connectors/google/utils/_ga4.py index 42ee0a4..ee99db5 100644 --- a/route1io_connectors/google/utils/_ga4.py +++ b/route1io_connectors/google/utils/_ga4.py @@ -37,9 +37,10 @@ def _request_ga4_data( end_date: str = "today" ): """Return response from reporting request to Google Analytics Data API""" - RunReportRequest( - property=f"properties/{os.environ.get('GCP_PROPERTY_ID')}", - dimensions=[Dimension(name="date"), Dimension(name="city")], - metrics=[Metric(name="newUsers"), Metric(name="totalUsers")], - date_ranges=[DateRange(start_date="2020-03-31", end_date="today")], - ) \ No newline at end of file + request = RunReportRequest( + property=f"properties/{view_id}", + dimensions=[Dimension(dim) for dim in dimensions], + metrics=[Metric(metric) for metric in metrics], + date_ranges=[DateRange(start_date=start_date, end_date=end_date)], + ) + return analytics.run_report(request) \ No newline at end of file From 9a107d17751290025b00dfdaf5e54876adf10604 Mon Sep 17 00:00:00 2001 From: chris-greening Date: Tue, 7 Feb 2023 17:35:24 -0500 Subject: [PATCH 21/24] ADD: added DataFrame creation and processing and returned back through callstack --- .../google/google_analytics.py | 2 +- route1io_connectors/google/utils/_ga4.py | 39 +++++++++++++++++-- 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/route1io_connectors/google/google_analytics.py b/route1io_connectors/google/google_analytics.py index 43e745c..bb56aa5 100644 --- a/route1io_connectors/google/google_analytics.py +++ b/route1io_connectors/google/google_analytics.py @@ -74,4 +74,4 @@ def get_google_analytics_data( start_date=start_date, end_date=end_date ) - + return df diff --git a/route1io_connectors/google/utils/_ga4.py b/route1io_connectors/google/utils/_ga4.py index ee99db5..c7db57b 100644 --- a/route1io_connectors/google/utils/_ga4.py +++ b/route1io_connectors/google/utils/_ga4.py @@ -1,7 +1,10 @@ """Code for requesting and processing data from GA4""" -from typing import List +from typing import List, Dict +import itertools +import pandas as pd +import numpy as np from google.analytics.data_v1beta.types import ( DateRange, Dimension, @@ -27,6 +30,36 @@ def process_ga4_data( start_date=start_date, end_date=end_date ) + resp_df = _process_raw_ga4_data(resp=resp) + return resp_df + +def _process_raw_ga4_data(resp) -> "pd.DataFrame": + """Return a DataFrame containing the processed data extracted from GA4""" + rows = [] + keys = _build_list_from_resp(resp.dimension_headers, resp.metric_headers, attr_name = "name") + metric_dtypes = _build_metric_type_list_from_resp(resp) + for row in resp.rows: + values = _build_list_from_resp(row.dimension_values, row.metric_values, attr_name = "value") + row_dict = dict(zip(keys, values)) + rows.append(row_dict) + df = pd.DataFrame(rows) + df = df.astype(metric_dtypes) + return df + +def _build_list_from_resp(*args, attr_name: str) -> List[str]: + """Return list of strings of values parsed from header information in response""" + return [getattr(val, attr_name) for val in list(itertools.chain.from_iterable(args))] + +def _build_metric_type_list_from_resp(resp) -> Dict[str, str]: + """Return a dict of strings detailing data type of the returned metric""" + return {val.name: _lookup_dtype(val.type_.name) for val in resp.metric_headers} + +def _lookup_dtype(resp_type: str) -> str: + """Return dtype for pd.DataFrmae column associated with Google's provided dtype""" + dtype_lookup_table = { + "TYPE_INTEGER": np.int32 + } + return dtype_lookup_table.get(resp_type, str) def _request_ga4_data( analytics, @@ -39,8 +72,8 @@ def _request_ga4_data( """Return response from reporting request to Google Analytics Data API""" request = RunReportRequest( property=f"properties/{view_id}", - dimensions=[Dimension(dim) for dim in dimensions], - metrics=[Metric(metric) for metric in metrics], + dimensions=[Dimension(name=dim) for dim in dimensions], + metrics=[Metric(name=metric) for metric in metrics], date_ranges=[DateRange(start_date=start_date, end_date=end_date)], ) return analytics.run_report(request) \ No newline at end of file From 54c582a023cb4161c171fecc86bb6d4fd718865c Mon Sep 17 00:00:00 2001 From: chris-greening Date: Tue, 7 Feb 2023 17:57:18 -0500 Subject: [PATCH 22/24] ADD: added warning about Universal Analytics sunset --- route1io_connectors/google/google_analytics.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/route1io_connectors/google/google_analytics.py b/route1io_connectors/google/google_analytics.py index bb56aa5..ca9ed53 100644 --- a/route1io_connectors/google/google_analytics.py +++ b/route1io_connectors/google/google_analytics.py @@ -3,6 +3,7 @@ This module contains functions for interacting with Google Analytics reporting """ from typing import List +import warnings from googleapiclient.discovery import build from google.analytics.data_v1beta import BetaAnalyticsDataClient @@ -29,6 +30,7 @@ def connect_to_google_analytics( google_conn = BetaAnalyticsDataClient(credentials=credentials) else: google_conn = build('analyticsreporting', 'v4', credentials=credentials) + warnings.warn("Google is sunsetting Universal Analytics on July 1st, 2023 and is recommending you migrate to Google Analytics 4. More information can be found here: https://support.google.com/analytics/answer/11583528?hl=en", DeprecationWarning) return google_conn def get_google_analytics_data( From 17e80748bd6c162bcc58f00af4a8e826cc0367d7 Mon Sep 17 00:00:00 2001 From: chris-greening Date: Tue, 7 Feb 2023 17:58:47 -0500 Subject: [PATCH 23/24] ADD: added install requirement of google-analytics-data --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 9d3cd9d..3fa9f88 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name="route1io-connectors", - version="0.14.1", + version="0.15.0", description="Connectors for interacting with popular API's used in marketing analytics using clean and concise Python code.", long_description=README, long_description_content_type="text/markdown", @@ -23,6 +23,6 @@ install_requires=["boto3", "google-api-python-client", "google-auth-httplib2", "google-auth-oauthlib", "numpy", "pandas", "requests", "facebook-business", "pyjwt==1.7.1", - "aiohttp", "pysftp"], + "aiohttp", "pysftp", "google-analytics-data"], ) From 915040f9b0cad7b2154e291123808f280a39d004 Mon Sep 17 00:00:00 2001 From: chris-greening Date: Tue, 7 Feb 2023 18:05:33 -0500 Subject: [PATCH 24/24] FIX: fixed next_page_token default arg as None --- route1io_connectors/google/utils/_universal_analytics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/route1io_connectors/google/utils/_universal_analytics.py b/route1io_connectors/google/utils/_universal_analytics.py index d5bccf7..5056a94 100644 --- a/route1io_connectors/google/utils/_universal_analytics.py +++ b/route1io_connectors/google/utils/_universal_analytics.py @@ -50,7 +50,7 @@ def _request_universal_analytics_data( metrics: List[str] = None, start_date: str = "7daysAgo", end_date: str = "today", - next_page_token = Union[str, None] + next_page_token: Union[str, None] = None ) -> Dict[str, Union[str, List, Dict, bool]]: """Returns response from reporting request to the Google Analytics Reporting API built from arguments