diff --git a/.github/workflows/deploy_recipe.yaml b/.github/workflows/deploy_recipe.yaml index 31e7f34..67c7f0c 100644 --- a/.github/workflows/deploy_recipe.yaml +++ b/.github/workflows/deploy_recipe.yaml @@ -48,7 +48,8 @@ jobs: --job_name $JOB_NAME \ --region us-central1 \ --machine_type t2d-standard-16 \ - --disk_size_gb 50 \ - --num_workers 5 \ + --disk_size_gb 50 + --max_num_workers 100 \ + --experiments=minNumWorkers=5 \ --temp_location gs://leap-persistent/norlandrhagen/dataflow_temp/temp/ \ --staging_location gs://leap-persistent/norlandrhagen/dataflow_temp/staging/ diff --git a/xbeam_virtualizarr/recipe.py b/xbeam_virtualizarr/recipe.py index f9a1515..168d983 100644 --- a/xbeam_virtualizarr/recipe.py +++ b/xbeam_virtualizarr/recipe.py @@ -25,7 +25,7 @@ def run(argv=None, save_main_session=True): combined_ds = xr.open_dataset(reference_path, engine="kerchunk", chunks=None) # subset the reference zarr - source_dataset = combined_ds.isel(day=slice(0, 10000))[ + source_dataset = combined_ds.isel(day=slice(0, 20000))[ ["air_temperature"] ] # all vars # source_chunks = dict(source_dataset.sizes) # this is total size. Hardcode for now