Attempted delayed replication factor fix + removal of Dockerfile + ne…

…w csv2bufr/pymetdecoder versions
wmo-im · Feb 15, 2024 · 070b3fd · 070b3fd
1 parent 0004cc8
commit 070b3fd
Show file tree

Hide file tree

Showing 9 changed files with 72 additions and 102 deletions.
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -7,7 +7,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.10, 3.11, 3.12]
+        python-version: [3.9, 3.10]
     env:
       BUFR_ORIGINATING_CENTRE: 123
       BUFR_ORIGINATING_SUBCENTRE: 123

diff --git a/.gitignore b/.gitignore
@@ -44,6 +44,8 @@ logs
 .vscode/settings.json
 # Ignore decoded CSV files
 decoded_*.csv
+# Ignore extra mapping files in data folders generated by synop2bufr
+data/**/*.json
 # Ignore bash scripts in data folder
 data/*.sh
 

diff --git a/Dockerfile b/Dockerfile
diff --git a/README.md b/README.md
@@ -17,14 +17,7 @@ Dependencies are listed in [requirements.txt](https://github.com/wmo-im/synop2bu
 
 Before using synop2bufr, we highly encourage you to set the `BUFR_ORIGINATING_CENTRE` and `BUFR_ORIGINATING_SUBCENTRE` environment variables. These variables are used to specify the originating centre and subcentre of the SYNOP messages. **Without these set, they will default to missing (255).**
 
-It is recommended that you set these environment variables in the Dockerfile, by editing the following lines with your originating centre and subcentre values:
-
-```bash
-ENV BUFR_ORIGINATING_CENTRE=<centre_value>
-ENV BUFR_ORIGINATING_SUBCENTRE=<subcentre_value>
-```
-
-Alternatively, you can set these environment variables in your shell if you want to run synop2bufr on your local machine. Here's how you can do it in a Bash shell:
+You can set these environment variables in your shell if you want to run synop2bufr on your local machine. Here's how you can do it in a Bash shell:
 
 ```bash
 export BUFR_ORIGINATING_CENTRE=<centre_value>
@@ -36,8 +29,11 @@ export BUFR_ORIGINATING_SUBCENTRE=<subcentre_value>
 To run synop2bufr from a Docker container:
 
 ```console
-docker build -t synop2bufr:local .
-docker run -it -v ${pwd}:/local synop2bufr
+docker run -it -v ${pwd}:/local wmoim/dim_eccodes_baseimage:2.34.0 bash
+apt-get update && apt-get install -y git
+cd /local
+python3 setup.py install
+synop2bufr --help
 ```
 
 Example data can be found in `data` directory, with the corresponding reference BUFR4 in `data/bufr`.

diff --git a/data/reinstall.sh b/data/reinstall.sh
diff --git a/docs/source/installation.rst b/docs/source/installation.rst
@@ -45,21 +45,20 @@ Alternatively, synop2bufr can be installed from source. First clone the reposito
    git clone https://github.com/wmo-im/synop2bufr.git
    cd synop2bufr
 
-If running in a Docker environment, build the Docker image and run the container:
+You can then run synop2bufr from an ecCodes base image as follows:
 
 .. code-block:: bash
 
-   docker build -t synop2bufr .
-   docker run -it -v ${pwd}:/app synop2bufr
-   cd /app
+   docker run -it -v ${pwd}:/local wmoim/dim_eccodes_baseimage:2.34.0 bash
+   apt-get update && apt-get install -y git
+   cd /local
+   python3 setup.py install
+   synop2bufr --help
 
 The above step can be skipped if not using Docker. If not using Docker the module and dependencies needs to be installed:
 
 .. code-block:: bash
-
-   pip3 install -r requirements.txt
-   pip3 install --no-cache-dir https://github.com/wmo-im/csv2bufr/archive/refs/tags/v0.3.1.zip
-   pip3 install --no-cache-dir https://github.com/wmo-im/pymetdecoder/archive/refs/tags/v0.1.0.zip
+   
    python3 setup.py install
    synop2bufr --help
 

diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,3 @@
-attrs==22.2.0
-numpy==1.24.0
 click
-csv2bufr
+pymetdecoder @ git+https://github.com/wmo-im/pymetdecoder.git@v0.1.11
+csv2bufr @ git+https://github.com/wmo-im/csv2bufr.git@v0.8.1
diff --git a/synop2bufr/__init__.py b/synop2bufr/__init__.py
@@ -1156,7 +1156,6 @@ def rad_convert(rad, time):
 
         # Name the array of section 4 items
         genus_array = decoded['section4']
-        print("genus_array", genus_array)
 
         # Get the number of section 4 groups in the SYNOP message
         num_s4_clouds = len(genus_array)
@@ -1242,7 +1241,7 @@ def extract_individual_synop(data: str) -> list:
                 raise ValueError((
                     "Delimiters (=) are not present in the string,"
                     " thus unable to identify separate SYNOP reports."
-                    ))
+                ))
 
             d = re.sub(r"\n+", " ", d)
             d = re.sub(r"\x03", "", d)
@@ -1563,7 +1562,7 @@ def transform(data: str, metadata: str, year: int,
                     def update_data_mapping(mapping: list, update: dict):
                         match = False
                         for idx in range(len(mapping)):
-                            if mapping[idx]['eccodes_key'] == update['eccodes_key']: # noqa
+                            if mapping[idx]['eccodes_key'] == update['eccodes_key']:  # noqa
                                 match = True
                                 break
                         if match:
@@ -1572,6 +1571,19 @@ def update_data_mapping(mapping: list, update: dict):
                             mapping.append(update)
                         return mapping
 
+                    # Add delayed descriptor replication factor (0 31 001)
+                    # to represent the number of section 3 cloud groups
+                    if num_s3_clouds > 0:
+                        s3_delayed_replication = {
+                            "eccodes_key":
+                                "#1#delayedDescriptorReplicationFactor",
+                            "value": f"const:{num_s3_clouds}"
+                        }
+                        mapping['data'] = update_data_mapping(
+                            mapping=mapping['data'],
+                            update=s3_delayed_replication)
+
+                    # Now add the rest of the mappings for section 3 clouds
                     for idx in range(num_s3_clouds):
                         # Build the dictionary of mappings for section 3
                         # group 8NsChshs
@@ -1584,13 +1596,13 @@ def update_data_mapping(mapping: list, update: dict):
                         # - verticalSignificance: used 7 times (for N,
                         # low-high cloud amount, low-high cloud drift)
                         s3_mappings = [
-                            # {"eccodes_key":
-                            #     f"#{idx+8}#verticalSignificanceSurfaceObservations", # noqa
-                            #     "value": f"data:vs_s3_{idx+1}"},
-                            # {"eccodes_key": f"#{idx+3}#cloudAmount",
-                            #     "value": f"data:cloud_amount_s3_{idx+1}",
-                            #     "valid_min": "const:0",
-                            #     "valid_max": "const:8"},
+                            {"eccodes_key":
+                                f"#{idx+8}#verticalSignificanceSurfaceObservations",  # noqa
+                                "value": f"data:vs_s3_{idx+1}"},
+                            {"eccodes_key": f"#{idx+3}#cloudAmount",
+                                "value": f"data:cloud_amount_s3_{idx+1}",
+                                "valid_min": "const:0",
+                                "valid_max": "const:8"},
                             {"eccodes_key": f"#{idx+5}#cloudType",
                                 "value": f"data:cloud_genus_s3_{idx+1}"},
                             {"eccodes_key": f"#{idx+2}#heightOfBaseOfCloud",
@@ -1600,6 +1612,19 @@ def update_data_mapping(mapping: list, update: dict):
                             mapping['data'] = update_data_mapping(
                                 mapping=mapping['data'], update=m)
 
+                    # Add delayed descriptor replication factor (0 31 001)
+                    # to represent the number of section 4 cloud groups
+                    if num_s4_clouds > 0:
+                        s4_delayed_replication = {
+                            "eccodes_key":
+                                "#2#delayedDescriptorReplicationFactor",
+                            "value": f"const:{num_s4_clouds}"
+                        }
+                        mapping['data'] = update_data_mapping(
+                            mapping=mapping['data'],
+                            update=s4_delayed_replication)
+
+                    # Now add the rest of the mappings for section 4 clouds
                     for idx in range(num_s4_clouds):
                         # Based upon the station height metadata, the
                         # value of vertical significance for section 4
@@ -1619,14 +1644,14 @@ def update_data_mapping(mapping: list, update: dict):
                         # NOTE: Some of the ecCodes keys are used in
                         # the above, so we must add 'num_s3_clouds'
                         s4_mappings = [
-                            # {"eccodes_key":
-                            #     f"#{idx+num_s3_clouds+8}#verticalSignificanceSurfaceObservations", # noqa
-                            #     "value": f"const:{vs_s4}"},
-                            # {"eccodes_key":
-                            #     f"#{idx+num_s3_clouds+3}#cloudAmount",
-                            #     "value": f"data:cloud_amount_s4_{idx+1}",
-                            #     "valid_min": "const:0",
-                            #     "valid_max": "const:8"},
+                            {"eccodes_key":
+                                f"#{idx+num_s3_clouds+8}#verticalSignificanceSurfaceObservations",  # noqa
+                                "value": f"const:{vs_s4}"},
+                            {"eccodes_key":
+                                f"#{idx+num_s3_clouds+3}#cloudAmount",
+                                "value": f"data:cloud_amount_s4_{idx+1}",
+                                "valid_min": "const:0",
+                                "valid_max": "const:8"},
                             {"eccodes_key":
                                 f"#{idx+num_s3_clouds+5}#cloudType",
                                 "value": f"data:cloud_genus_s4_{idx+1}"},
@@ -1640,17 +1665,17 @@ def update_data_mapping(mapping: list, update: dict):
                         for m in s4_mappings:
                             mapping['data'] = update_data_mapping(
                                 mapping=mapping['data'], update=m)
-                    # Now section 3 and 4 cloud groups have been
-                    # added to the mapping file, write the file
-                    # for debugging purposes
-                    with open('updated_mappings.json', 'w') as f:
-                        json.dump(mapping, f, indent=2)
                 except Exception as e:
                     LOGGER.error(e)
                     LOGGER.error(f"Missing station height for station {tsi}")
                     error_msgs.append(
                         f"Missing station height for station {tsi}")
                     conversion_success[tsi] = False
+            # Now section 3 and 4 cloud groups have been
+            # added to the mapping file, write the file
+            # for debugging purposes
+            with open('updated_mappings.json', 'w') as f:
+                json.dump(mapping, f, indent=2)
 
             if conversion_success[tsi]:
                 # At this point we have a dictionary for the data, a

diff --git a/tests/test_synop2bufr.py b/tests/test_synop2bufr.py
@@ -149,9 +149,9 @@ def test_bufr_307080(multiple_reports_307080, metadata_string):
     for item in result:
         msgs[item['_meta']['id']] = item
     # Test the md5 keys
-    assert msgs['WIGOS_0-20000-0-15015_20220321T120000']['_meta']['properties']['md5'] == '1e564e1ec2d679bbc120141ba031ab7a'  # noqa
-    assert msgs['WIGOS_0-20000-0-15020_20220321T120000']['_meta']['properties']['md5'] == 'db62277233118df3f1cf7b6a073f1cbe'  # noqa
-    assert msgs['WIGOS_0-20000-0-15090_20220321T120000']['_meta']['properties']['md5'] == '538db43645fb4b2459edfcb467048b7a'  # noqa
+    assert msgs['WIGOS_0-20000-0-15015_20220321T120000']['_meta']['properties']['md5'] == 'db1d075059a70e978647eb1cb0a3f4d2'  # noqa
+    assert msgs['WIGOS_0-20000-0-15020_20220321T120000']['_meta']['properties']['md5'] == '82035d667ce986cb528d0b11c2c8bd77'  # noqa
+    assert msgs['WIGOS_0-20000-0-15090_20220321T120000']['_meta']['properties']['md5'] == '598aaead55964cc01ba5a58e53a59e9f'  # noqa
 
     # Test the bufr template used for all the reports
     # (they should be the same for every report)
@@ -168,9 +168,9 @@ def test_bufr_307096(multiple_reports_307096, metadata_string):
     for item in result:
         msgs[item['_meta']['id']] = item
     # Test the md5 keys
-    assert msgs['WIGOS_0-20000-0-15015_20220321T120000']['_meta']['properties']['md5'] == '5f1744ec26875630efca0e1583cddca9'  # noqa
-    assert msgs['WIGOS_0-20000-0-15020_20220321T120000']['_meta']['properties']['md5'] == 'e2dc1199d4e38fae25d26ded815597da'  # noqa
-    assert msgs['WIGOS_0-20000-0-15090_20220321T120000']['_meta']['properties']['md5'] == '7c352acb43530946f2445a95eb349e68'  # noqa
+    assert msgs['WIGOS_0-20000-0-15015_20220321T120000']['_meta']['properties']['md5'] == '30200eed11629d03562aafb899bb7729'  # noqa
+    assert msgs['WIGOS_0-20000-0-15020_20220321T120000']['_meta']['properties']['md5'] == '7a368736fb403aa75408633fa17366e3'  # noqa
+    assert msgs['WIGOS_0-20000-0-15090_20220321T120000']['_meta']['properties']['md5'] == '91fef5babfcba6af9358b7b7e38f6960'  # noqa
 
     # Test the bufr template used for all the reports
     # (they should be the same for every report)