From 00a5084238cffb1278b6ae0f8395ff270a28d08c Mon Sep 17 00:00:00 2001 From: Raphael Hagen Date: Wed, 12 Jun 2024 09:35:57 -0600 Subject: [PATCH] adds pyramid.py for generating metaflux pyramids --- feedstock/catalog.yaml | 8 ++++++ feedstock/pyramid.py | 56 ++++++++++++++++++++++++++++++++++++++ feedstock/requirements.txt | 1 + 3 files changed, 65 insertions(+) create mode 100644 feedstock/pyramid.py diff --git a/feedstock/catalog.yaml b/feedstock/catalog.yaml index 1b78674..ef0d8ec 100644 --- a/feedstock/catalog.yaml +++ b/feedstock/catalog.yaml @@ -11,3 +11,11 @@ stores: - id: "metaflux-monthly" name: "METAFLUX_GPP_RECO_monthly" url: "gs://leap-persistent-ro/data-library/feedstocks/metaflux_feedstock/meatflux_monthly.zarr" + + - id: "metaflux-daily-pyramid" + name: "METAFLUX_GPP_RECO_daily_pyramid_3_lvl" + url: "gs://leap-persistent-ro/data-library/feedstocks/metaflux_feedstock/metaflux_daily_pyramid_3_lvl.zarr" + + - id: "metaflux-monthly-pyramid" + name: "METAFLUX_GPP_RECO_monthly_pyramid_3_lvl" + url: "gs://leap-persistent-ro/data-library/feedstocks/metaflux_feedstock/meatflux_monthly_pyramid_3_lvl.zarr" diff --git a/feedstock/pyramid.py b/feedstock/pyramid.py new file mode 100644 index 0000000..9a5e78e --- /dev/null +++ b/feedstock/pyramid.py @@ -0,0 +1,56 @@ +import xarray as xr +from dataclasses import dataclass +import apache_beam as beam + +from pangeo_forge_recipes.patterns import pattern_from_file_sequence, FileType +from pangeo_forge_recipes.transforms import ( + OpenWithXarray, +) +from pangeo_forge_ndpyramid.transforms import StoreToPyramid +from leap_data_management_utils.data_management_transforms import ( + Copy, + get_catalog_store_urls, +) + + +## local testing +# user_path = f"gs://leap-scratch/{os.environ['JUPYTERHUB_USER']}/pgf/" +# fs_target = gcsfs.GCSFileSystem() +# target_root = FSSpecTarget(fs_target, user_path) + + +catalog_store_urls = get_catalog_store_urls("feedstock/catalog.yaml") + +pattern = pattern_from_file_sequence( + [ + "gs://leap-persistent-ro/data-library/feedstocks/metaflux_feedstock/meatflux_monthly.zarr" + ], + concat_dim="time", +) + + +@dataclass +class SelVars(beam.PTransform): + def _selvars(self, ds: xr.Dataset) -> xr.Dataset: + return ds[["GPP", "RECO"]] + + def expand(self, pcoll): + return pcoll | "sel" >> beam.MapTuple(lambda k, v: (k, self._selvars(v))) + + +with beam.Pipeline() as p: + ( + p + | beam.Create(pattern.items()) + | OpenWithXarray(file_type=FileType("zarr"), xarray_open_kwargs={"chunks": {}}) + | SelVars() + | "Write Pyramid Levels" + >> StoreToPyramid( + # target_root=target_root, + store_name="metaflux_monthly_3_lvl.zarr", + epsg_code="4326", + levels=3, + combine_dims=pattern.combine_dim_keys, + ) + | Copy(target=catalog_store_urls["metaflux-monthly-pyramid"]) + ) diff --git a/feedstock/requirements.txt b/feedstock/requirements.txt index 3a17e20..a47ca82 100644 --- a/feedstock/requirements.txt +++ b/feedstock/requirements.txt @@ -2,3 +2,4 @@ pangeo-forge-recipes==0.10.7 apache-beam[gcp] gcsfs leap-data-management-utils==0.0.5 +git+https://github.com/carbonplan/pangeo-forge-ndpyramid