Skip to content

Commit

Permalink
adds pyramid.py for generating metaflux pyramids
Browse files Browse the repository at this point in the history
  • Loading branch information
norlandrhagen committed Jun 12, 2024
1 parent 60e150c commit 00a5084
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 0 deletions.
8 changes: 8 additions & 0 deletions feedstock/catalog.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,11 @@ stores:
- id: "metaflux-monthly"
name: "METAFLUX_GPP_RECO_monthly"
url: "gs://leap-persistent-ro/data-library/feedstocks/metaflux_feedstock/meatflux_monthly.zarr"

- id: "metaflux-daily-pyramid"
name: "METAFLUX_GPP_RECO_daily_pyramid_3_lvl"
url: "gs://leap-persistent-ro/data-library/feedstocks/metaflux_feedstock/metaflux_daily_pyramid_3_lvl.zarr"

- id: "metaflux-monthly-pyramid"
name: "METAFLUX_GPP_RECO_monthly_pyramid_3_lvl"
url: "gs://leap-persistent-ro/data-library/feedstocks/metaflux_feedstock/meatflux_monthly_pyramid_3_lvl.zarr"
56 changes: 56 additions & 0 deletions feedstock/pyramid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import xarray as xr
from dataclasses import dataclass
import apache_beam as beam

from pangeo_forge_recipes.patterns import pattern_from_file_sequence, FileType
from pangeo_forge_recipes.transforms import (
OpenWithXarray,
)
from pangeo_forge_ndpyramid.transforms import StoreToPyramid
from leap_data_management_utils.data_management_transforms import (
Copy,
get_catalog_store_urls,
)


## local testing
# user_path = f"gs://leap-scratch/{os.environ['JUPYTERHUB_USER']}/pgf/"
# fs_target = gcsfs.GCSFileSystem()
# target_root = FSSpecTarget(fs_target, user_path)


catalog_store_urls = get_catalog_store_urls("feedstock/catalog.yaml")

pattern = pattern_from_file_sequence(
[
"gs://leap-persistent-ro/data-library/feedstocks/metaflux_feedstock/meatflux_monthly.zarr"
],
concat_dim="time",
)


@dataclass
class SelVars(beam.PTransform):
def _selvars(self, ds: xr.Dataset) -> xr.Dataset:
return ds[["GPP", "RECO"]]

def expand(self, pcoll):
return pcoll | "sel" >> beam.MapTuple(lambda k, v: (k, self._selvars(v)))


with beam.Pipeline() as p:
(
p
| beam.Create(pattern.items())
| OpenWithXarray(file_type=FileType("zarr"), xarray_open_kwargs={"chunks": {}})
| SelVars()
| "Write Pyramid Levels"
>> StoreToPyramid(
# target_root=target_root,
store_name="metaflux_monthly_3_lvl.zarr",
epsg_code="4326",
levels=3,
combine_dims=pattern.combine_dim_keys,
)
| Copy(target=catalog_store_urls["metaflux-monthly-pyramid"])
)
1 change: 1 addition & 0 deletions feedstock/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ pangeo-forge-recipes==0.10.7
apache-beam[gcp]
gcsfs
leap-data-management-utils==0.0.5
git+https://github.com/carbonplan/pangeo-forge-ndpyramid

0 comments on commit 00a5084

Please sign in to comment.