From 24b15a919e98b5d66ec11f7b4023b1947f7059d8 Mon Sep 17 00:00:00 2001 From: Fabian Jakobs Date: Fri, 13 Sep 2024 17:44:38 +0200 Subject: [PATCH 1/7] Show progress bar for spark jobs from DB Connect --- .../resources/python/00-databricks-init.py | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/packages/databricks-vscode/resources/python/00-databricks-init.py b/packages/databricks-vscode/resources/python/00-databricks-init.py index 57d9be4e0..be9c1ee9d 100644 --- a/packages/databricks-vscode/resources/python/00-databricks-init.py +++ b/packages/databricks-vscode/resources/python/00-databricks-init.py @@ -3,6 +3,7 @@ import json from typing import Any, Union, List import os +import time import shlex import warnings import tempfile @@ -24,6 +25,7 @@ def logError(function_name: str, e: Union[str, Exception]): try: from IPython import get_ipython + from IPython.display import display from IPython.core.magic import magics_class, Magics, line_magic, needs_local_scope except Exception as e: logError("Ipython Imports", e) @@ -357,6 +359,67 @@ def df_html(df): html_formatter.for_type(SparkConnectDataframe, df_html) html_formatter.for_type(DataFrame, df_html) +@logErrorAndContinue +@disposable +def register_spark_progress(spark): + try: + from pyspark.sql.connect.shell.progress import Progress + import ipywidgets as widgets + except Exception as e: + return + + class JupyterProgressHandler: + def __init__(self): + self.op_id = "" + self.p = None + + def reset(self): + self.p = Progress(enabled=False, handlers=[]) + self.init_ui() + + def init_ui(self): + self.w_progress = widgets.IntProgress( + value=0, + min=0, + max=100, + bar_style='success', + orientation='horizontal' + ) + self.w_status = widgets.Label(value="") + display(widgets.HBox([self.w_progress, self.w_status])) + + def __call__(self, + stages, + inflight_tasks: int, + operation_id, + done: bool + ): + # print("ProgressHandler", stages, inflight_tasks, operation_id, done) + if len(stages) == 0: + return + + if self.op_id != operation_id or self.p is None: + self.op_id = operation_id + self.reset() + + self.p.update_ticks(stages, inflight_tasks, operation_id) + self.output() + if done: + self.wip = False + + def output(self) -> None: + p = self.p + if p._tick is not None and p._ticks is not None: + percent_complete = (p._tick / p._ticks) * 100 + elapsed = int(time.time() - p._started) + scanned = p._bytes_to_string(p._bytes_read) + running = p._running + self.w_progress.value = percent_complete + self.w_status.value = f"{percent_complete:.2f}% Complete ({running} Tasks running, {elapsed}s, Scanned {scanned})" + + spark.clearProgressHandlers() + spark.registerProgressHandler(JupyterProgressHandler()) + @logErrorAndContinue @disposable @@ -385,6 +448,7 @@ def make_matplotlib_inline(): create_and_register_databricks_globals() register_magics(cfg) register_formatters(cfg) + register_spark_progress(globals()["spark"]) update_sys_path(cfg) make_matplotlib_inline() From b1ceb526d53d94da56a52d21c184c968bad869ce Mon Sep 17 00:00:00 2001 From: Fabian Jakobs Date: Mon, 16 Sep 2024 11:08:43 +0200 Subject: [PATCH 2/7] Inline code from pyspark --- .../resources/python/00-databricks-init.py | 80 +++++++++++++------ 1 file changed, 56 insertions(+), 24 deletions(-) diff --git a/packages/databricks-vscode/resources/python/00-databricks-init.py b/packages/databricks-vscode/resources/python/00-databricks-init.py index be9c1ee9d..fa65916b5 100644 --- a/packages/databricks-vscode/resources/python/00-databricks-init.py +++ b/packages/databricks-vscode/resources/python/00-databricks-init.py @@ -368,13 +368,18 @@ def register_spark_progress(spark): except Exception as e: return - class JupyterProgressHandler: - def __init__(self): - self.op_id = "" - self.p = None - - def reset(self): - self.p = Progress(enabled=False, handlers=[]) + class Progress: + SI_BYTE_SIZES = (1 << 60, 1 << 50, 1 << 40, 1 << 30, 1 << 20, 1 << 10, 1) + SI_BYTE_SUFFIXES = ("EiB", "PiB", "TiB", "GiB", "MiB", "KiB", "B") + + def __init__( + self, + ) -> None: + self._ticks = None + self._tick = None + self._started = time.time() + self._bytes_read = 0 + self._running = 0 self.init_ui() def init_ui(self): @@ -388,37 +393,64 @@ def init_ui(self): self.w_status = widgets.Label(value="") display(widgets.HBox([self.w_progress, self.w_status])) + def update_ticks( + self, + stages, + inflight_tasks: int + ) -> None: + total_tasks = sum(map(lambda x: x.num_tasks, stages)) + completed_tasks = sum(map(lambda x: x.num_completed_tasks, stages)) + if total_tasks > 0: + self._ticks = total_tasks + self._tick = completed_tasks + self._bytes_read = sum(map(lambda x: x.num_bytes_read, stages)) + if self._tick is not None and self._tick >= 0: + self.output() + self._running = inflight_tasks + + def output(self) -> None: + if self._tick is not None and self._ticks is not None: + percent_complete = (self._tick / self._ticks) * 100 + elapsed = int(time.time() - self._started) + scanned = self._bytes_to_string(self._bytes_read) + running = self._running + self.w_progress.value = percent_complete + self.w_status.value = f"{percent_complete:.2f}% Complete ({running} Tasks running, {elapsed}s, Scanned {scanned})" + + @staticmethod + def _bytes_to_string(size: int) -> str: + """Helper method to convert a numeric bytes value into a human-readable representation""" + i = 0 + while i < len(Progress.SI_BYTE_SIZES) - 1 and size < 2 * Progress.SI_BYTE_SIZES[i]: + i += 1 + result = float(size) / Progress.SI_BYTE_SIZES[i] + return f"{result:.1f} {Progress.SI_BYTE_SUFFIXES[i]}" + + + class ProgressHandler: + def __init__(self): + self.op_id = "" + + def reset(self): + self.p = Progress() + def __call__(self, stages, inflight_tasks: int, operation_id, done: bool ): - # print("ProgressHandler", stages, inflight_tasks, operation_id, done) if len(stages) == 0: return - if self.op_id != operation_id or self.p is None: + if self.op_id != operation_id: self.op_id = operation_id self.reset() - self.p.update_ticks(stages, inflight_tasks, operation_id) - self.output() - if done: - self.wip = False - - def output(self) -> None: - p = self.p - if p._tick is not None and p._ticks is not None: - percent_complete = (p._tick / p._ticks) * 100 - elapsed = int(time.time() - p._started) - scanned = p._bytes_to_string(p._bytes_read) - running = p._running - self.w_progress.value = percent_complete - self.w_status.value = f"{percent_complete:.2f}% Complete ({running} Tasks running, {elapsed}s, Scanned {scanned})" + self.p.update_ticks(stages, inflight_tasks) spark.clearProgressHandlers() - spark.registerProgressHandler(JupyterProgressHandler()) + spark.registerProgressHandler(ProgressHandler()) @logErrorAndContinue From ec6797b1b856154ef939dfdc2c527ff3ecf6c4c3 Mon Sep 17 00:00:00 2001 From: Fabian Jakobs Date: Mon, 16 Sep 2024 11:19:23 +0200 Subject: [PATCH 3/7] Add option to disable progress through an environment variable --- .../resources/python/00-databricks-init.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/packages/databricks-vscode/resources/python/00-databricks-init.py b/packages/databricks-vscode/resources/python/00-databricks-init.py index fa65916b5..97ae5313c 100644 --- a/packages/databricks-vscode/resources/python/00-databricks-init.py +++ b/packages/databricks-vscode/resources/python/00-databricks-init.py @@ -391,7 +391,12 @@ def init_ui(self): orientation='horizontal' ) self.w_status = widgets.Label(value="") - display(widgets.HBox([self.w_progress, self.w_status])) + if self.is_enabled(): + display(widgets.HBox([self.w_progress, self.w_status])) + + def is_enabled(self): + env_var = os.getenv("DATABRICKS_CONNECT_PROGRESS") + return env_var is None or (env_var.lower() != "false" and env_var.lower() != "0") def update_ticks( self, From 14e71af2d69ce1dcda2d4ac33f0f2c94df124df1 Mon Sep 17 00:00:00 2001 From: Fabian Jakobs Date: Mon, 16 Sep 2024 14:15:00 +0200 Subject: [PATCH 4/7] Expose db connect progress as a settings --- packages/databricks-vscode/package.json | 8 ++++++-- .../resources/python/00-databricks-init.py | 20 +++++++++++-------- .../utils/JobRunStateUtils.ts | 2 ++ .../src/utils/envVarGenerators.ts | 4 ++++ .../src/vscode-objs/WorkspaceConfigs.ts | 8 ++++++++ 5 files changed, 32 insertions(+), 10 deletions(-) diff --git a/packages/databricks-vscode/package.json b/packages/databricks-vscode/package.json index 68c490291..eef799526 100644 --- a/packages/databricks-vscode/package.json +++ b/packages/databricks-vscode/package.json @@ -856,7 +856,6 @@ "views.workspace" ], "enumDescriptions": [ - "Limited local notebook support using DB Connect v2.", "Show cluster view in the explorer.", "Show workspace browser in the explorer." ], @@ -870,6 +869,11 @@ "default": true, "description": "Enable/disable rearranging cells in wrapper files created when using `workspace` as the sync destination. **Note:** It is recommended to NOT disable this setting. If you do disable it, you will need to manually handle sys.path for local imports in your notebooks." }, + "databricks.connect.progress": { + "type": "boolean", + "default": true, + "description": "Show PySpark progress bar when using Databricks Connect from notebooks." + }, "databricks.ipythonDir": { "type": "string", "description": "Absolute path to a directory for storing IPython files. Defaults to IPYTHONDIR environment variable (if set) or ~/.ipython." @@ -1008,4 +1012,4 @@ ], "report-dir": "coverage" } -} +} \ No newline at end of file diff --git a/packages/databricks-vscode/resources/python/00-databricks-init.py b/packages/databricks-vscode/resources/python/00-databricks-init.py index 97ae5313c..a7c26b49c 100644 --- a/packages/databricks-vscode/resources/python/00-databricks-init.py +++ b/packages/databricks-vscode/resources/python/00-databricks-init.py @@ -103,7 +103,13 @@ def __init__(self, env_name: str, default: any = None, required: bool = False): def __get__(self, instance, owner): if self.env_name in os.environ: - return self.transform(os.environ[self.env_name]) + if self.transform is not bool: + return self.transform(os.environ[self.env_name]) + + if os.environ[self.env_name].lower() == "true" or os.environ[self.env_name] == "1": + return True + elif os.environ[self.env_name].lower() == "false" or os.environ[self.env_name] == "0": + return False if self.required: raise AttributeError( @@ -119,6 +125,7 @@ def __set__(self, instance, value): class LocalDatabricksNotebookConfig: project_root: str = EnvLoader("DATABRICKS_PROJECT_ROOT", required=True) dataframe_display_limit: int = EnvLoader("DATABRICKS_DF_DISPLAY_LIMIT", 20) + show_progress: bool = EnvLoader("DATABRICKS_CONNECT_PROGRESS", default=True) def __new__(cls): annotations = cls.__dict__['__annotations__'] @@ -363,7 +370,6 @@ def df_html(df): @disposable def register_spark_progress(spark): try: - from pyspark.sql.connect.shell.progress import Progress import ipywidgets as widgets except Exception as e: return @@ -374,12 +380,14 @@ class Progress: def __init__( self, + cfg: LocalDatabricksNotebookConfig ) -> None: self._ticks = None self._tick = None self._started = time.time() self._bytes_read = 0 self._running = 0 + self.show_progress = cfg.show_progress self.init_ui() def init_ui(self): @@ -391,13 +399,9 @@ def init_ui(self): orientation='horizontal' ) self.w_status = widgets.Label(value="") - if self.is_enabled(): + if self.show_progress: display(widgets.HBox([self.w_progress, self.w_status])) - def is_enabled(self): - env_var = os.getenv("DATABRICKS_CONNECT_PROGRESS") - return env_var is None or (env_var.lower() != "false" and env_var.lower() != "0") - def update_ticks( self, stages, @@ -437,7 +441,7 @@ def __init__(self): self.op_id = "" def reset(self): - self.p = Progress() + self.p = Progress(cfg) def __call__(self, stages, diff --git a/packages/databricks-vscode/src/ui/bundle-resource-explorer/utils/JobRunStateUtils.ts b/packages/databricks-vscode/src/ui/bundle-resource-explorer/utils/JobRunStateUtils.ts index 0045419d7..bcbe3b4d8 100644 --- a/packages/databricks-vscode/src/ui/bundle-resource-explorer/utils/JobRunStateUtils.ts +++ b/packages/databricks-vscode/src/ui/bundle-resource-explorer/utils/JobRunStateUtils.ts @@ -43,4 +43,6 @@ export function getSimplifiedRunState(run?: Run): SimplifiedRunState { } return "Terminated"; } + + return "Unknown"; } diff --git a/packages/databricks-vscode/src/utils/envVarGenerators.ts b/packages/databricks-vscode/src/utils/envVarGenerators.ts index cc5711c35..6478f7f3d 100644 --- a/packages/databricks-vscode/src/utils/envVarGenerators.ts +++ b/packages/databricks-vscode/src/utils/envVarGenerators.ts @@ -5,6 +5,7 @@ import {logging, Headers} from "@databricks/databricks-sdk"; import {ConnectionManager} from "../configuration/ConnectionManager"; import {ConfigModel} from "../configuration/models/ConfigModel"; import {TerraformMetadata} from "./terraformUtils"; +import {workspaceConfigs} from "../vscode-objs/WorkspaceConfigs"; // eslint-disable-next-line @typescript-eslint/no-var-requires const packageJson = require("../../package.json"); @@ -115,11 +116,14 @@ export async function getDbConnectEnvVars( ) { const userAgent = getUserAgent(connectionManager); const existingSparkUa = process.env.SPARK_CONNECT_USER_AGENT ?? ""; + /* eslint-disable @typescript-eslint/naming-convention */ return { //We append our user agent to any existing SPARK_CONNECT_USER_AGENT defined in the //environment of the parent process of VS Code. SPARK_CONNECT_USER_AGENT: [existingSparkUa, userAgent].join(" ").trim(), + DATABRICKS_CONNECT_PROGRESS: + workspaceConfigs.showDbConnectProgress.toString(), DATABRICKS_PROJECT_ROOT: workspacePath.fsPath, ...((await getSparkRemoteEnvVar(connectionManager)) || {}), }; diff --git a/packages/databricks-vscode/src/vscode-objs/WorkspaceConfigs.ts b/packages/databricks-vscode/src/vscode-objs/WorkspaceConfigs.ts index 0bb248573..8bbe82176 100644 --- a/packages/databricks-vscode/src/vscode-objs/WorkspaceConfigs.ts +++ b/packages/databricks-vscode/src/vscode-objs/WorkspaceConfigs.ts @@ -105,6 +105,14 @@ export const workspaceConfigs = { ); }, + get showDbConnectProgress(): boolean { + return ( + workspace + .getConfiguration("databricks") + .get("connect.progress") ?? true + ); + }, + get ipythonDir(): string | undefined { const dir = workspace .getConfiguration("databricks") From 6e5d67529340545decf95b25cfc924edfda223dc Mon Sep 17 00:00:00 2001 From: Fabian Jakobs Date: Mon, 16 Sep 2024 14:45:47 +0200 Subject: [PATCH 5/7] Reload ipy kernel on settings change --- .../file-managers/DatabricksEnvFileManager.ts | 27 +++++++++++++++++-- .../src/utils/envVarGenerators.test.ts | 16 ++++++++--- .../src/utils/envVarGenerators.ts | 7 +++-- .../src/vscode-objs/WorkspaceConfigs.ts | 2 +- 4 files changed, 41 insertions(+), 11 deletions(-) diff --git a/packages/databricks-vscode/src/file-managers/DatabricksEnvFileManager.ts b/packages/databricks-vscode/src/file-managers/DatabricksEnvFileManager.ts index 62f65c58d..360deb580 100644 --- a/packages/databricks-vscode/src/file-managers/DatabricksEnvFileManager.ts +++ b/packages/databricks-vscode/src/file-managers/DatabricksEnvFileManager.ts @@ -19,6 +19,7 @@ export class DatabricksEnvFileManager implements Disposable { private userEnvFileWatcherDisposables: Disposable[] = []; private mutex = new Mutex(); private userEnvPath?: Uri; + private showDatabricksConnectProgess = true; get databricksEnvPath() { return Uri.joinPath( @@ -44,7 +45,23 @@ export class DatabricksEnvFileManager implements Disposable { private readonly featureManager: FeatureManager, private readonly connectionManager: ConnectionManager, private readonly configModel: ConfigModel - ) {} + ) { + this.showDatabricksConnectProgess = + workspaceConfigs.showDatabricksConnectProgress; + } + + private async updateShowDatabricksConnectProgessWatcher() { + if ( + this.showDatabricksConnectProgess === + workspaceConfigs.showDatabricksConnectProgress + ) { + return; + } + + this.showDatabricksConnectProgess = + workspaceConfigs.showDatabricksConnectProgress; + await this.writeFile(); + } private updateUserEnvFileWatcher() { const userEnvPath = workspaceConfigs.msPythonEnvFile @@ -111,6 +128,11 @@ export class DatabricksEnvFileManager implements Disposable { this, this.disposables ), + workspace.onDidChangeConfiguration( + this.updateShowDatabricksConnectProgessWatcher, + this, + this.disposables + ), this.featureManager.onDidChangeState( "environment.dependencies", () => { @@ -160,7 +182,8 @@ export class DatabricksEnvFileManager implements Disposable { ...(this.getDatabrickseEnvVars() || {}), ...((await EnvVarGenerators.getDbConnectEnvVars( this.connectionManager, - this.workspacePath + this.workspacePath, + this.showDatabricksConnectProgess )) || {}), ...this.getIdeEnvVars(), ...((await this.getUserEnvVars()) || {}), diff --git a/packages/databricks-vscode/src/utils/envVarGenerators.test.ts b/packages/databricks-vscode/src/utils/envVarGenerators.test.ts index 7b75754bb..b40f521ab 100644 --- a/packages/databricks-vscode/src/utils/envVarGenerators.test.ts +++ b/packages/databricks-vscode/src/utils/envVarGenerators.test.ts @@ -103,12 +103,14 @@ describe(__filename, () => { const actual = await getDbConnectEnvVars( instance(mockConnectionManager), - mockWorkspacePath + mockWorkspacePath, + true ); assert.deepEqual(actual, { SPARK_CONNECT_USER_AGENT: "test/0.0.1", DATABRICKS_PROJECT_ROOT: mockWorkspacePath.fsPath, + DATABRICKS_CONNECT_PROGRESS: "true", }); }); @@ -118,12 +120,14 @@ describe(__filename, () => { const actual = await getDbConnectEnvVars( instance(mockConnectionManager), - mockWorkspacePath + mockWorkspacePath, + true ); assert.deepEqual(actual, { SPARK_CONNECT_USER_AGENT: "existing test/0.0.1", DATABRICKS_PROJECT_ROOT: mockWorkspacePath.fsPath, + DATABRICKS_CONNECT_PROGRESS: "true", }); }); @@ -139,12 +143,14 @@ describe(__filename, () => { const actual = await getDbConnectEnvVars( instance(mockConnectionManager), - mockWorkspacePath + mockWorkspacePath, + true ); assert.deepEqual(actual, { SPARK_CONNECT_USER_AGENT: "test/0.0.1", DATABRICKS_PROJECT_ROOT: mockWorkspacePath.fsPath, + DATABRICKS_CONNECT_PROGRESS: "true", SPARK_REMOTE: `sc://${ Uri.parse(mockHost).authority }:443/;token=token;use_ssl=true;x-databricks-cluster-id=${mockClusterId}`, @@ -163,12 +169,14 @@ describe(__filename, () => { const actual = await getDbConnectEnvVars( instance(mockConnectionManager), - mockWorkspacePath + mockWorkspacePath, + true ); assert.deepEqual(actual, { SPARK_CONNECT_USER_AGENT: "test/0.0.1", DATABRICKS_PROJECT_ROOT: mockWorkspacePath.fsPath, + DATABRICKS_CONNECT_PROGRESS: "true", }); }); }); diff --git a/packages/databricks-vscode/src/utils/envVarGenerators.ts b/packages/databricks-vscode/src/utils/envVarGenerators.ts index 6478f7f3d..3a039cb48 100644 --- a/packages/databricks-vscode/src/utils/envVarGenerators.ts +++ b/packages/databricks-vscode/src/utils/envVarGenerators.ts @@ -5,7 +5,6 @@ import {logging, Headers} from "@databricks/databricks-sdk"; import {ConnectionManager} from "../configuration/ConnectionManager"; import {ConfigModel} from "../configuration/models/ConfigModel"; import {TerraformMetadata} from "./terraformUtils"; -import {workspaceConfigs} from "../vscode-objs/WorkspaceConfigs"; // eslint-disable-next-line @typescript-eslint/no-var-requires const packageJson = require("../../package.json"); @@ -112,7 +111,8 @@ async function getSparkRemoteEnvVar(connectionManager: ConnectionManager) { export async function getDbConnectEnvVars( connectionManager: ConnectionManager, - workspacePath: Uri + workspacePath: Uri, + showDatabricksConnectProgess: boolean ) { const userAgent = getUserAgent(connectionManager); const existingSparkUa = process.env.SPARK_CONNECT_USER_AGENT ?? ""; @@ -122,8 +122,7 @@ export async function getDbConnectEnvVars( //We append our user agent to any existing SPARK_CONNECT_USER_AGENT defined in the //environment of the parent process of VS Code. SPARK_CONNECT_USER_AGENT: [existingSparkUa, userAgent].join(" ").trim(), - DATABRICKS_CONNECT_PROGRESS: - workspaceConfigs.showDbConnectProgress.toString(), + DATABRICKS_CONNECT_PROGRESS: showDatabricksConnectProgess.toString(), DATABRICKS_PROJECT_ROOT: workspacePath.fsPath, ...((await getSparkRemoteEnvVar(connectionManager)) || {}), }; diff --git a/packages/databricks-vscode/src/vscode-objs/WorkspaceConfigs.ts b/packages/databricks-vscode/src/vscode-objs/WorkspaceConfigs.ts index 8bbe82176..9a23a4d29 100644 --- a/packages/databricks-vscode/src/vscode-objs/WorkspaceConfigs.ts +++ b/packages/databricks-vscode/src/vscode-objs/WorkspaceConfigs.ts @@ -105,7 +105,7 @@ export const workspaceConfigs = { ); }, - get showDbConnectProgress(): boolean { + get showDatabricksConnectProgress(): boolean { return ( workspace .getConfiguration("databricks") From 75bd65693f474b36cbb6e4c5346e42c7b915f0c6 Mon Sep 17 00:00:00 2001 From: Fabian Jakobs Date: Tue, 17 Sep 2024 10:20:24 +0200 Subject: [PATCH 6/7] Also show progress for running python files --- packages/databricks-vscode/package.json | 2 +- .../resources/python/00-databricks-init.py | 20 +++++++++++-------- .../src/utils/envVarGenerators.test.ts | 8 ++++---- .../src/utils/envVarGenerators.ts | 4 +++- 4 files changed, 20 insertions(+), 14 deletions(-) diff --git a/packages/databricks-vscode/package.json b/packages/databricks-vscode/package.json index eef799526..4ce8362d3 100644 --- a/packages/databricks-vscode/package.json +++ b/packages/databricks-vscode/package.json @@ -872,7 +872,7 @@ "databricks.connect.progress": { "type": "boolean", "default": true, - "description": "Show PySpark progress bar when using Databricks Connect from notebooks." + "description": "Show PySpark progress bar when using Databricks Connect." }, "databricks.ipythonDir": { "type": "string", diff --git a/packages/databricks-vscode/resources/python/00-databricks-init.py b/packages/databricks-vscode/resources/python/00-databricks-init.py index a7c26b49c..3292b43f5 100644 --- a/packages/databricks-vscode/resources/python/00-databricks-init.py +++ b/packages/databricks-vscode/resources/python/00-databricks-init.py @@ -125,7 +125,7 @@ def __set__(self, instance, value): class LocalDatabricksNotebookConfig: project_root: str = EnvLoader("DATABRICKS_PROJECT_ROOT", required=True) dataframe_display_limit: int = EnvLoader("DATABRICKS_DF_DISPLAY_LIMIT", 20) - show_progress: bool = EnvLoader("DATABRICKS_CONNECT_PROGRESS", default=True) + show_progress: bool = EnvLoader("SPARK_CONNECT_PROGRESS_BAR_ENABLED", default=False) def __new__(cls): annotations = cls.__dict__['__annotations__'] @@ -368,7 +368,7 @@ def df_html(df): @logErrorAndContinue @disposable -def register_spark_progress(spark): +def register_spark_progress(spark, show_progress: bool): try: import ipywidgets as widgets except Exception as e: @@ -379,15 +379,13 @@ class Progress: SI_BYTE_SUFFIXES = ("EiB", "PiB", "TiB", "GiB", "MiB", "KiB", "B") def __init__( - self, - cfg: LocalDatabricksNotebookConfig + self ) -> None: self._ticks = None self._tick = None self._started = time.time() self._bytes_read = 0 self._running = 0 - self.show_progress = cfg.show_progress self.init_ui() def init_ui(self): @@ -399,7 +397,7 @@ def init_ui(self): orientation='horizontal' ) self.w_status = widgets.Label(value="") - if self.show_progress: + if show_progress: display(widgets.HBox([self.w_progress, self.w_status])) def update_ticks( @@ -441,7 +439,7 @@ def __init__(self): self.op_id = "" def reset(self): - self.p = Progress(cfg) + self.p = Progress() def __call__(self, stages, @@ -486,10 +484,16 @@ def make_matplotlib_inline(): if not load_env_from_leaf(os.getcwd()): sys.exit(1) cfg = LocalDatabricksNotebookConfig() + + # disable build-in progress bar + show_progress = cfg.show_progress + if "SPARK_CONNECT_PROGRESS_BAR_ENABLED" in os.environ: + del os.environ["SPARK_CONNECT_PROGRESS_BAR_ENABLED"] + create_and_register_databricks_globals() register_magics(cfg) register_formatters(cfg) - register_spark_progress(globals()["spark"]) + register_spark_progress(globals()["spark"], show_progress) update_sys_path(cfg) make_matplotlib_inline() diff --git a/packages/databricks-vscode/src/utils/envVarGenerators.test.ts b/packages/databricks-vscode/src/utils/envVarGenerators.test.ts index b40f521ab..4c55b00d3 100644 --- a/packages/databricks-vscode/src/utils/envVarGenerators.test.ts +++ b/packages/databricks-vscode/src/utils/envVarGenerators.test.ts @@ -110,7 +110,7 @@ describe(__filename, () => { assert.deepEqual(actual, { SPARK_CONNECT_USER_AGENT: "test/0.0.1", DATABRICKS_PROJECT_ROOT: mockWorkspacePath.fsPath, - DATABRICKS_CONNECT_PROGRESS: "true", + SPARK_CONNECT_PROGRESS_BAR_ENABLED: "true", }); }); @@ -127,7 +127,7 @@ describe(__filename, () => { assert.deepEqual(actual, { SPARK_CONNECT_USER_AGENT: "existing test/0.0.1", DATABRICKS_PROJECT_ROOT: mockWorkspacePath.fsPath, - DATABRICKS_CONNECT_PROGRESS: "true", + SPARK_CONNECT_PROGRESS_BAR_ENABLED: "true", }); }); @@ -150,7 +150,7 @@ describe(__filename, () => { assert.deepEqual(actual, { SPARK_CONNECT_USER_AGENT: "test/0.0.1", DATABRICKS_PROJECT_ROOT: mockWorkspacePath.fsPath, - DATABRICKS_CONNECT_PROGRESS: "true", + SPARK_CONNECT_PROGRESS_BAR_ENABLED: "true", SPARK_REMOTE: `sc://${ Uri.parse(mockHost).authority }:443/;token=token;use_ssl=true;x-databricks-cluster-id=${mockClusterId}`, @@ -176,7 +176,7 @@ describe(__filename, () => { assert.deepEqual(actual, { SPARK_CONNECT_USER_AGENT: "test/0.0.1", DATABRICKS_PROJECT_ROOT: mockWorkspacePath.fsPath, - DATABRICKS_CONNECT_PROGRESS: "true", + SPARK_CONNECT_PROGRESS_BAR_ENABLED: "true", }); }); }); diff --git a/packages/databricks-vscode/src/utils/envVarGenerators.ts b/packages/databricks-vscode/src/utils/envVarGenerators.ts index 3a039cb48..3160e0030 100644 --- a/packages/databricks-vscode/src/utils/envVarGenerators.ts +++ b/packages/databricks-vscode/src/utils/envVarGenerators.ts @@ -122,7 +122,9 @@ export async function getDbConnectEnvVars( //We append our user agent to any existing SPARK_CONNECT_USER_AGENT defined in the //environment of the parent process of VS Code. SPARK_CONNECT_USER_AGENT: [existingSparkUa, userAgent].join(" ").trim(), - DATABRICKS_CONNECT_PROGRESS: showDatabricksConnectProgess.toString(), + SPARK_CONNECT_PROGRESS_BAR_ENABLED: showDatabricksConnectProgess + ? "1" + : "0", DATABRICKS_PROJECT_ROOT: workspacePath.fsPath, ...((await getSparkRemoteEnvVar(connectionManager)) || {}), }; From 013e5132a1cfa74f4cd332165abb8e1b7112e095 Mon Sep 17 00:00:00 2001 From: Fabian Jakobs Date: Tue, 17 Sep 2024 10:23:56 +0200 Subject: [PATCH 7/7] fix tests --- databricks-vscode.code-workspace | 4 +++- .../databricks-vscode/src/utils/envVarGenerators.test.ts | 8 ++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/databricks-vscode.code-workspace b/databricks-vscode.code-workspace index 3a2047c52..7bbbdc42b 100644 --- a/databricks-vscode.code-workspace +++ b/databricks-vscode.code-workspace @@ -22,6 +22,8 @@ }, "[typescript]": { "editor.defaultFormatter": "esbenp.prettier-vscode" - } + }, + "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", + "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------" } } diff --git a/packages/databricks-vscode/src/utils/envVarGenerators.test.ts b/packages/databricks-vscode/src/utils/envVarGenerators.test.ts index 4c55b00d3..e3538cd93 100644 --- a/packages/databricks-vscode/src/utils/envVarGenerators.test.ts +++ b/packages/databricks-vscode/src/utils/envVarGenerators.test.ts @@ -110,7 +110,7 @@ describe(__filename, () => { assert.deepEqual(actual, { SPARK_CONNECT_USER_AGENT: "test/0.0.1", DATABRICKS_PROJECT_ROOT: mockWorkspacePath.fsPath, - SPARK_CONNECT_PROGRESS_BAR_ENABLED: "true", + SPARK_CONNECT_PROGRESS_BAR_ENABLED: "1", }); }); @@ -127,7 +127,7 @@ describe(__filename, () => { assert.deepEqual(actual, { SPARK_CONNECT_USER_AGENT: "existing test/0.0.1", DATABRICKS_PROJECT_ROOT: mockWorkspacePath.fsPath, - SPARK_CONNECT_PROGRESS_BAR_ENABLED: "true", + SPARK_CONNECT_PROGRESS_BAR_ENABLED: "1", }); }); @@ -150,7 +150,7 @@ describe(__filename, () => { assert.deepEqual(actual, { SPARK_CONNECT_USER_AGENT: "test/0.0.1", DATABRICKS_PROJECT_ROOT: mockWorkspacePath.fsPath, - SPARK_CONNECT_PROGRESS_BAR_ENABLED: "true", + SPARK_CONNECT_PROGRESS_BAR_ENABLED: "1", SPARK_REMOTE: `sc://${ Uri.parse(mockHost).authority }:443/;token=token;use_ssl=true;x-databricks-cluster-id=${mockClusterId}`, @@ -176,7 +176,7 @@ describe(__filename, () => { assert.deepEqual(actual, { SPARK_CONNECT_USER_AGENT: "test/0.0.1", DATABRICKS_PROJECT_ROOT: mockWorkspacePath.fsPath, - SPARK_CONNECT_PROGRESS_BAR_ENABLED: "true", + SPARK_CONNECT_PROGRESS_BAR_ENABLED: "1", }); }); });