From 6eebcfc7c4255f4ff73c7b47b7cf209fced46b3e Mon Sep 17 00:00:00 2001 From: Mario Linker <15095261+maldag@users.noreply.github.com> Date: Tue, 10 Sep 2024 09:28:48 +0200 Subject: [PATCH 01/13] Revert Split Tokenize This is not included in 2024.4.8 ! So code would not work --- distributed/client.py | 2 +- distributed/diagnostics/progress.py | 2 +- distributed/protocol/serialize.py | 2 +- distributed/scheduler.py | 2 +- distributed/shuffle/_merge.py | 2 +- distributed/shuffle/_rechunk.py | 2 +- distributed/shuffle/_shuffle.py | 2 +- distributed/tests/test_client.py | 4 ++-- 8 files changed, 9 insertions(+), 9 deletions(-) diff --git a/distributed/client.py b/distributed/client.py index b038c0e46fb..75e8d976eec 100644 --- a/distributed/client.py +++ b/distributed/client.py @@ -49,7 +49,7 @@ from dask.highlevelgraph import HighLevelGraph from dask.layers import Layer from dask.optimization import SubgraphCallable -from dask.tokenize import tokenize +from dask.base import tokenize from dask.typing import Key, NoDefault, no_default from dask.utils import ( apply, diff --git a/distributed/diagnostics/progress.py b/distributed/diagnostics/progress.py index 1712cc5df32..3fd7bbbd07f 100644 --- a/distributed/diagnostics/progress.py +++ b/distributed/diagnostics/progress.py @@ -9,7 +9,7 @@ from tlz import groupby, valmap -from dask.tokenize import tokenize +from dask.base import tokenize from dask.utils import key_split from distributed.diagnostics.plugin import SchedulerPlugin diff --git a/distributed/protocol/serialize.py b/distributed/protocol/serialize.py index 4ad5c3f98a2..300e27631b2 100644 --- a/distributed/protocol/serialize.py +++ b/distributed/protocol/serialize.py @@ -15,7 +15,7 @@ import dask from dask.sizeof import sizeof -from dask.tokenize import normalize_token +from dask.base import normalize_token from dask.utils import typename from distributed.metrics import context_meter diff --git a/distributed/scheduler.py b/distributed/scheduler.py index 64ad6d964f0..9b9828f8045 100644 --- a/distributed/scheduler.py +++ b/distributed/scheduler.py @@ -55,7 +55,7 @@ import dask import dask.utils from dask.core import get_deps, iskey, validate_key -from dask.tokenize import TokenizationError, normalize_token, tokenize +from dask.base import TokenizationError, normalize_token, tokenize from dask.typing import Key, no_default from dask.utils import ( _deprecated, diff --git a/distributed/shuffle/_merge.py b/distributed/shuffle/_merge.py index 0fb403a3f82..735c8f637fa 100644 --- a/distributed/shuffle/_merge.py +++ b/distributed/shuffle/_merge.py @@ -8,7 +8,7 @@ from dask.base import is_dask_collection from dask.highlevelgraph import HighLevelGraph from dask.layers import Layer -from dask.tokenize import tokenize +from dask.base import tokenize from distributed.shuffle._arrow import check_minimal_arrow_version from distributed.shuffle._core import ShuffleId, barrier_key, get_worker_plugin diff --git a/distributed/shuffle/_rechunk.py b/distributed/shuffle/_rechunk.py index 15e3ea1d78a..ef233e4ed36 100644 --- a/distributed/shuffle/_rechunk.py +++ b/distributed/shuffle/_rechunk.py @@ -123,7 +123,7 @@ import dask.config from dask.highlevelgraph import HighLevelGraph from dask.layers import Layer -from dask.tokenize import tokenize +from dask.base import tokenize from dask.typing import Key from dask.utils import parse_bytes diff --git a/distributed/shuffle/_shuffle.py b/distributed/shuffle/_shuffle.py index 154f25c4bab..cced6a50190 100644 --- a/distributed/shuffle/_shuffle.py +++ b/distributed/shuffle/_shuffle.py @@ -23,7 +23,7 @@ import dask from dask.highlevelgraph import HighLevelGraph from dask.layers import Layer -from dask.tokenize import tokenize +from dask.base import tokenize from dask.typing import Key from distributed.core import PooledRPCCall diff --git a/distributed/tests/test_client.py b/distributed/tests/test_client.py index 0016294e708..fd1d9563e1e 100644 --- a/distributed/tests/test_client.py +++ b/distributed/tests/test_client.py @@ -43,7 +43,7 @@ import dask.bag as db from dask import delayed from dask.optimization import SubgraphCallable -from dask.tokenize import tokenize +from dask.base import tokenize from dask.utils import get_default_shuffle_method, parse_timedelta, tmpfile from distributed import ( @@ -1127,7 +1127,7 @@ async def test_scatter_non_list(c, s, a, b): @gen_cluster(client=True) async def test_scatter_tokenize_local(c, s, a, b): - from dask.tokenize import normalize_token + from dask.base import normalize_token class MyObj: pass From 24cc26d74b14bf0438da1fad6d0be7e2faaf6f36 Mon Sep 17 00:00:00 2001 From: Mario Linker <15095261+maldag@users.noreply.github.com> Date: Tue, 10 Sep 2024 09:38:00 +0200 Subject: [PATCH 02/13] Fix #5918 The method clean_dashboard_address will also return an empty string. In this case the start_address should be used which will account for the "host" parameter used when specifying a Cluster This will fix the bug a firewall popup will be created even if `Localcluster(host=localhost)` is specified --- distributed/node.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/distributed/node.py b/distributed/node.py index d41a3409850..5cb45c95f4e 100644 --- a/distributed/node.py +++ b/distributed/node.py @@ -145,7 +145,7 @@ def start_http_server( http_addresses = clean_dashboard_address(dashboard_address or default_port) for http_address in http_addresses: - if http_address["address"] is None: + if http_address["address"] is None or http_address["address"] == '': address = self._start_address if isinstance(address, (list, tuple)): address = address[0] From 226b860c9549389d9f98ef82c918bb3a94612bcc Mon Sep 17 00:00:00 2001 From: Mario Linker <15095261+maldag@users.noreply.github.com> Date: Tue, 10 Sep 2024 09:48:07 +0200 Subject: [PATCH 03/13] Revert "Revert Split Tokenize" This reverts commit 6eebcfc7c4255f4ff73c7b47b7cf209fced46b3e. --- distributed/client.py | 2 +- distributed/diagnostics/progress.py | 2 +- distributed/protocol/serialize.py | 2 +- distributed/scheduler.py | 2 +- distributed/shuffle/_merge.py | 2 +- distributed/shuffle/_rechunk.py | 2 +- distributed/shuffle/_shuffle.py | 2 +- distributed/tests/test_client.py | 4 ++-- 8 files changed, 9 insertions(+), 9 deletions(-) diff --git a/distributed/client.py b/distributed/client.py index 75e8d976eec..b038c0e46fb 100644 --- a/distributed/client.py +++ b/distributed/client.py @@ -49,7 +49,7 @@ from dask.highlevelgraph import HighLevelGraph from dask.layers import Layer from dask.optimization import SubgraphCallable -from dask.base import tokenize +from dask.tokenize import tokenize from dask.typing import Key, NoDefault, no_default from dask.utils import ( apply, diff --git a/distributed/diagnostics/progress.py b/distributed/diagnostics/progress.py index 3fd7bbbd07f..1712cc5df32 100644 --- a/distributed/diagnostics/progress.py +++ b/distributed/diagnostics/progress.py @@ -9,7 +9,7 @@ from tlz import groupby, valmap -from dask.base import tokenize +from dask.tokenize import tokenize from dask.utils import key_split from distributed.diagnostics.plugin import SchedulerPlugin diff --git a/distributed/protocol/serialize.py b/distributed/protocol/serialize.py index 300e27631b2..4ad5c3f98a2 100644 --- a/distributed/protocol/serialize.py +++ b/distributed/protocol/serialize.py @@ -15,7 +15,7 @@ import dask from dask.sizeof import sizeof -from dask.base import normalize_token +from dask.tokenize import normalize_token from dask.utils import typename from distributed.metrics import context_meter diff --git a/distributed/scheduler.py b/distributed/scheduler.py index 9b9828f8045..64ad6d964f0 100644 --- a/distributed/scheduler.py +++ b/distributed/scheduler.py @@ -55,7 +55,7 @@ import dask import dask.utils from dask.core import get_deps, iskey, validate_key -from dask.base import TokenizationError, normalize_token, tokenize +from dask.tokenize import TokenizationError, normalize_token, tokenize from dask.typing import Key, no_default from dask.utils import ( _deprecated, diff --git a/distributed/shuffle/_merge.py b/distributed/shuffle/_merge.py index 735c8f637fa..0fb403a3f82 100644 --- a/distributed/shuffle/_merge.py +++ b/distributed/shuffle/_merge.py @@ -8,7 +8,7 @@ from dask.base import is_dask_collection from dask.highlevelgraph import HighLevelGraph from dask.layers import Layer -from dask.base import tokenize +from dask.tokenize import tokenize from distributed.shuffle._arrow import check_minimal_arrow_version from distributed.shuffle._core import ShuffleId, barrier_key, get_worker_plugin diff --git a/distributed/shuffle/_rechunk.py b/distributed/shuffle/_rechunk.py index ef233e4ed36..15e3ea1d78a 100644 --- a/distributed/shuffle/_rechunk.py +++ b/distributed/shuffle/_rechunk.py @@ -123,7 +123,7 @@ import dask.config from dask.highlevelgraph import HighLevelGraph from dask.layers import Layer -from dask.base import tokenize +from dask.tokenize import tokenize from dask.typing import Key from dask.utils import parse_bytes diff --git a/distributed/shuffle/_shuffle.py b/distributed/shuffle/_shuffle.py index cced6a50190..154f25c4bab 100644 --- a/distributed/shuffle/_shuffle.py +++ b/distributed/shuffle/_shuffle.py @@ -23,7 +23,7 @@ import dask from dask.highlevelgraph import HighLevelGraph from dask.layers import Layer -from dask.base import tokenize +from dask.tokenize import tokenize from dask.typing import Key from distributed.core import PooledRPCCall diff --git a/distributed/tests/test_client.py b/distributed/tests/test_client.py index fd1d9563e1e..0016294e708 100644 --- a/distributed/tests/test_client.py +++ b/distributed/tests/test_client.py @@ -43,7 +43,7 @@ import dask.bag as db from dask import delayed from dask.optimization import SubgraphCallable -from dask.base import tokenize +from dask.tokenize import tokenize from dask.utils import get_default_shuffle_method, parse_timedelta, tmpfile from distributed import ( @@ -1127,7 +1127,7 @@ async def test_scatter_non_list(c, s, a, b): @gen_cluster(client=True) async def test_scatter_tokenize_local(c, s, a, b): - from dask.base import normalize_token + from dask.tokenize import normalize_token class MyObj: pass From 9ffcb8d47192459021bdf145e9cfa07d307cc5bd Mon Sep 17 00:00:00 2001 From: Mario Linker <15095261+maldag@users.noreply.github.com> Date: Tue, 10 Sep 2024 10:14:11 +0200 Subject: [PATCH 04/13] Linting / pre-commit hooks (pull_request) --- distributed/node.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/distributed/node.py b/distributed/node.py index 5cb45c95f4e..c137f22f5b9 100644 --- a/distributed/node.py +++ b/distributed/node.py @@ -145,7 +145,7 @@ def start_http_server( http_addresses = clean_dashboard_address(dashboard_address or default_port) for http_address in http_addresses: - if http_address["address"] is None or http_address["address"] == '': + if http_address["address"] is None or http_address["address"] == "": address = self._start_address if isinstance(address, (list, tuple)): address = address[0] From 9cd91a7a46e02bddb9b833e61046c3cf5a032638 Mon Sep 17 00:00:00 2001 From: Mario Linker <15095261+maldag@users.noreply.github.com> Date: Tue, 10 Sep 2024 10:54:48 +0200 Subject: [PATCH 05/13] Specify defaultcase for dashboard address Iff the dashboard should listen to all interfaces this should be expressed here since empty strings should respect the given host argument --- distributed/node.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/distributed/node.py b/distributed/node.py index c137f22f5b9..08a8dd7cd67 100644 --- a/distributed/node.py +++ b/distributed/node.py @@ -143,8 +143,13 @@ def start_http_server( self.http_server = HTTPServer(self.http_application, ssl_options=ssl_options) http_addresses = clean_dashboard_address(dashboard_address or default_port) - for http_address in http_addresses: + # Handle default case for dashboard address + # In case dashboard_address is given, e.g. ":8787" + # the address is empty and it is intended to listen to all interfaces + if dashboard_address is not None and http_address["address"] == "": + http_address["address"] = "0.0.0.0" + if http_address["address"] is None or http_address["address"] == "": address = self._start_address if isinstance(address, (list, tuple)): From 93579153f5c3b331939d96bccc72f5173fe96b4b Mon Sep 17 00:00:00 2001 From: Mario Linker <15095261+maldag@users.noreply.github.com> Date: Tue, 10 Sep 2024 12:29:58 +0200 Subject: [PATCH 06/13] pr linting --- distributed/node.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/distributed/node.py b/distributed/node.py index 08a8dd7cd67..56d0a1882c0 100644 --- a/distributed/node.py +++ b/distributed/node.py @@ -149,7 +149,7 @@ def start_http_server( # the address is empty and it is intended to listen to all interfaces if dashboard_address is not None and http_address["address"] == "": http_address["address"] = "0.0.0.0" - + if http_address["address"] is None or http_address["address"] == "": address = self._start_address if isinstance(address, (list, tuple)): From cf3ac827221c559ded9666148ff1862a11db3177 Mon Sep 17 00:00:00 2001 From: Mario Linker <15095261+maldag@users.noreply.github.com> Date: Tue, 10 Sep 2024 12:48:38 +0200 Subject: [PATCH 07/13] fix for 9cd91a7a --- distributed/node.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/distributed/node.py b/distributed/node.py index 56d0a1882c0..b1036ea4648 100644 --- a/distributed/node.py +++ b/distributed/node.py @@ -147,7 +147,7 @@ def start_http_server( # Handle default case for dashboard address # In case dashboard_address is given, e.g. ":8787" # the address is empty and it is intended to listen to all interfaces - if dashboard_address is not None and http_address["address"] == "": + if dashboard_address is None and http_address["address"] == "": http_address["address"] = "0.0.0.0" if http_address["address"] is None or http_address["address"] == "": From 90f30d55bd952c7a6b0b4adf3b73a4ec67dab4de Mon Sep 17 00:00:00 2001 From: Mario Linker <15095261+maldag@users.noreply.github.com> Date: Tue, 10 Sep 2024 14:46:38 +0200 Subject: [PATCH 08/13] Revert "fix for 9cd91a7a" This reverts commit cf3ac827221c559ded9666148ff1862a11db3177. --- distributed/node.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/distributed/node.py b/distributed/node.py index b1036ea4648..56d0a1882c0 100644 --- a/distributed/node.py +++ b/distributed/node.py @@ -147,7 +147,7 @@ def start_http_server( # Handle default case for dashboard address # In case dashboard_address is given, e.g. ":8787" # the address is empty and it is intended to listen to all interfaces - if dashboard_address is None and http_address["address"] == "": + if dashboard_address is not None and http_address["address"] == "": http_address["address"] = "0.0.0.0" if http_address["address"] is None or http_address["address"] == "": From 8a77345ea308b9a07a42c9985acff91a4c07178c Mon Sep 17 00:00:00 2001 From: Mario Linker <15095261+maldag@users.noreply.github.com> Date: Tue, 10 Sep 2024 14:47:32 +0200 Subject: [PATCH 09/13] Fix expected host binding for dashboard If None is given to dashboard_address, the binding should now take the given parameter on "host" if set. --- distributed/tests/test_scheduler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/distributed/tests/test_scheduler.py b/distributed/tests/test_scheduler.py index cfb6fdefdb8..8f72267e6bf 100644 --- a/distributed/tests/test_scheduler.py +++ b/distributed/tests/test_scheduler.py @@ -1969,7 +1969,7 @@ async def test_scheduler_file(): @pytest.mark.parametrize( "dashboard_address,expect", [ - (None, ("::", "0.0.0.0")), + (None, ("::", "0.0.0.0","127.0.0.1")), ("127.0.0.1:0", ("127.0.0.1",)), ], ) From cca2b1be073babbf17ce7961992b04d2cc8b3940 Mon Sep 17 00:00:00 2001 From: Mario Linker <15095261+maldag@users.noreply.github.com> Date: Tue, 10 Sep 2024 14:50:26 +0200 Subject: [PATCH 10/13] fix linting --- distributed/tests/test_scheduler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/distributed/tests/test_scheduler.py b/distributed/tests/test_scheduler.py index 8f72267e6bf..dada3e3fc95 100644 --- a/distributed/tests/test_scheduler.py +++ b/distributed/tests/test_scheduler.py @@ -1969,7 +1969,7 @@ async def test_scheduler_file(): @pytest.mark.parametrize( "dashboard_address,expect", [ - (None, ("::", "0.0.0.0","127.0.0.1")), + (None, ("::", "0.0.0.0", "127.0.0.1")), ("127.0.0.1:0", ("127.0.0.1",)), ], ) From 5db15c579260dff8a27c4510a3e535b867931c0e Mon Sep 17 00:00:00 2001 From: Mario Linker <15095261+maldag@users.noreply.github.com> Date: Wed, 11 Sep 2024 09:11:04 +0200 Subject: [PATCH 11/13] Fix test for new behaviour --- distributed/tests/test_worker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/distributed/tests/test_worker.py b/distributed/tests/test_worker.py index 8bfcd8347b9..0a2fed3219a 100644 --- a/distributed/tests/test_worker.py +++ b/distributed/tests/test_worker.py @@ -1115,7 +1115,7 @@ async def test_service_hosts_match_worker(s): async with Worker(s.address, host="tcp://127.0.0.1") as w: sock = first(w.http_server._sockets.values()) - assert sock.getsockname()[0] in ("::", "0.0.0.0") + assert sock.getsockname()[0] in ("::", "127.0.0.1") # See what happens with e.g. `dask worker --listen-address tcp://:8811` async with Worker(s.address, host="") as w: From 21fdf7863705da09f613991759e69640e9075e00 Mon Sep 17 00:00:00 2001 From: Mario Linker <15095261+maldag@users.noreply.github.com> Date: Wed, 11 Sep 2024 09:20:46 +0200 Subject: [PATCH 12/13] Add documentation for LocalCluster --- distributed/deploy/local.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/distributed/deploy/local.py b/distributed/deploy/local.py index 69f5d8af35d..db7e864d73d 100644 --- a/distributed/deploy/local.py +++ b/distributed/deploy/local.py @@ -62,6 +62,9 @@ class LocalCluster(SpecCluster): 'localhost:8787' or '0.0.0.0:8787'. Defaults to ':8787'. Set to ``None`` to disable the dashboard. Use ':0' for a random port. + When using ':8787' the dashboard will bind to the given interface from parameter "host". + If "host" is empty, binding will occur on all interfaces '0.0.0.0'. + To avoid firewall issues when deploying locally specify "host" parameter to localhost. worker_dashboard_address: str Address on which to listen for the Bokeh worker diagnostics server like 'localhost:8787' or '0.0.0.0:8787'. Defaults to None which disables the dashboard. From cb9f62cdc7953e1ed65035522baed79b366dcdbb Mon Sep 17 00:00:00 2001 From: Mario Linker <15095261+maldag@users.noreply.github.com> Date: Thu, 12 Sep 2024 13:55:04 +0200 Subject: [PATCH 13/13] Update distributed/deploy/local.py Co-authored-by: Hendrik Makait --- distributed/deploy/local.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/distributed/deploy/local.py b/distributed/deploy/local.py index db7e864d73d..b7f736e1ed5 100644 --- a/distributed/deploy/local.py +++ b/distributed/deploy/local.py @@ -62,9 +62,9 @@ class LocalCluster(SpecCluster): 'localhost:8787' or '0.0.0.0:8787'. Defaults to ':8787'. Set to ``None`` to disable the dashboard. Use ':0' for a random port. - When using ':8787' the dashboard will bind to the given interface from parameter "host". - If "host" is empty, binding will occur on all interfaces '0.0.0.0'. - To avoid firewall issues when deploying locally specify "host" parameter to localhost. + When specifying only a port like ':8787', the dashboard will bind to the given interface from the ``host`` parameter. + If ``host`` is empty, binding will occur on all interfaces '0.0.0.0'. + To avoid firewall issues when deploying locally, set ``host`` to 'localhost'. worker_dashboard_address: str Address on which to listen for the Bokeh worker diagnostics server like 'localhost:8787' or '0.0.0.0:8787'. Defaults to None which disables the dashboard.