Merge pull request #169 from Ouranosinc/rdrs_addvars

Rdrs addvars
Ouranosinc · May 21, 2024 · 79c4cdc · 79c4cdc
2 parents c97fef4 + 6fc3ff3
commit 79c4cdc
Show file tree

Hide file tree

Showing 13 changed files with 296 additions and 68 deletions.
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -6,11 +6,21 @@ Changelog
 
 v0.6.0 (unreleased)
 -------------------
+Contributors to this version: Travis Logan (:user:`tlogan2000`), Trevor James Smith (:user:`Zeitsperre`).
 
 Announcements
 ^^^^^^^^^^^^^
 * `miranda` boilerplate code is now versioned with `cruft <https://cruft.github.io/cruft>`_ and the `Ouranosinc/cookiecutter-pypackage <https://github.com/Ouranosinc/cookiecutter-pypackage>`_ template.
 
+New features
+^^^^^^^^^^^^
+* Aggregation operations now support more variables (`hur`, `hurs`, `huss`, `rlds`, `ta`, `tdp`, `ua`, `uas`, `va`, `vas`)
+* `RDRDv21` has been added as a dataset to be converted.
+
+Bug fixes
+^^^^^^^^^
+* Transformation docstrings are now only updated when the transformation is actually applied.
+
 Internal changes
 ^^^^^^^^^^^^^^^^
 * `miranda` now has a security policy (`SECURITY.md`) for disclosing sensitive issues using secure communication channels. This has also been added to the documentation.

diff --git a/docs/conf.py b/docs/conf.py
@@ -180,7 +180,6 @@
     "navigation_with_keys": True,
     "source_branch": "main",
     "source_repository": "https://github.com/Ouranosinc/miranda/",
-    "top_of_page_button": "edit" if not on_rtd else None,
 }
 
 # Add any paths that contain custom themes here, relative to this directory.

diff --git a/miranda/convert/__init__.py b/miranda/convert/__init__.py
@@ -6,5 +6,4 @@
 from ._aggregation import *
 from ._data_corrections import *
 from ._data_definitions import *
-
-# from ._reconstruction import *
+from ._reconstruction import *
diff --git a/miranda/convert/_aggregation.py b/miranda/convert/_aggregation.py
@@ -39,25 +39,29 @@ def aggregations_possible(ds: xr.Dataset, freq: str = "day") -> dict[str, set[st
     offset, meaning = get_time_frequency(ds, minimum_continuous_period="1h")
 
     aggregation_legend = dict()
-    for v in ["tas", "tdps"]:
+    for v in ["tas", "tdps", "hurs"]:
         if freq == meaning:
             if not hasattr(ds, v) and (
                 hasattr(ds, f"{v}max") and hasattr(ds, f"{v}min")
             ):
                 aggregation_legend[f"_{v}"] = {"mean"}
     for variable in ds.data_vars:
-        if variable in ["tas", "tdps"]:
+        if variable in ["tas", "ta", "tdps", "tdp", "hurs", "hur"]:
             aggregation_legend[variable] = {"max", "mean", "min"}
+        elif variable in ["sfcWind"]:
+            aggregation_legend[variable] = {"max", "mean"}
         elif variable in [
             "evspsblpot",
             "hfls",
             "hfss",
-            "hur",
+            "huss",
             "hus",
             "pr",
             "prsn",
+            "prmod",
             "ps",
             "psl",
+            "rlds",
             "rsds",
             "rss",
             "rlds",
@@ -66,6 +70,13 @@ def aggregations_possible(ds: xr.Dataset, freq: str = "day") -> dict[str, set[st
             "snr",
             "snw",
             "swe",
+            "uas",
+            "ua",
+            "vas",
+            "va",
+            "40mWind",
+            "zcrd10000",
+            "zcrd09944",
         ]:
             aggregation_legend[variable] = {"mean"}
 

diff --git a/miranda/convert/_data_corrections.py b/miranda/convert/_data_corrections.py
@@ -409,18 +409,15 @@ def _transform(d: xr.Dataset, p: str, m: dict) -> xr.Dataset:
                 converted.append(vv)
             else:
                 raise NotImplementedError(f"Unknown transformation: {trans}")
+            prev_history = d.attrs.get("history", "")
+            history = f"Transformed variable `{vv}` values using method `{trans}`. {prev_history}"
+            d_out.attrs.update(dict(history=history))
         elif trans is False:
             logging.info(
                 f"No transformations needed for `{vv}` (Explicitly set to False)."
             )
             continue
 
-        prev_history = d.attrs.get("history", "")
-        history = (
-            f"Transformed variable `{vv}` values using method `{trans}`. {prev_history}"
-        )
-        d_out.attrs.update(dict(history=history))
-
     # Copy unconverted variables
     for vv in d.data_vars:
         if vv not in converted:
@@ -461,14 +458,14 @@ def _offset_time(d: xr.Dataset, p: str, m: dict) -> xr.Dataset:
                 out["time"] = out.time - np.timedelta64(offset[0], offset[1])
                 d_out[vv] = out
             converted.append(vv)
+            prev_history = d.attrs.get("history", "")
+            history = f"Offset variable `{vv}` values by `{offset[0]} {offset_meaning}(s). {prev_history}"
+            d_out.attrs.update(dict(history=history))
         elif offs is False:
             logging.info(
                 f"No time offsetting needed for `{vv}` in `{p}` (Explicitly set to False)."
             )
             continue
-        prev_history = d.attrs.get("history", "")
-        history = f"Offset variable `{vv}` values by `{offset[0]} {offset_meaning}(s). {prev_history}"
-        d_out.attrs.update(dict(history=history))
 
     # Copy unconverted variables
     for vv in d.data_vars:
@@ -488,14 +485,14 @@ def _invert_sign(d: xr.Dataset, p: str, m: dict) -> xr.Dataset:
                 out = d[vv]
                 d_out[out.name] = -out
             converted.append(vv)
+            prev_history = d.attrs.get("history", "")
+            history = f"Inverted sign for variable `{vv}` (switched direction of values). {prev_history}"
+            d_out.attrs.update(dict(history=history))
         elif inv_sign is False:
             logging.info(
                 f"No sign inversion needed for `{vv}` in `{p}` (Explicitly set to False)."
             )
             continue
-        prev_history = d.attrs.get("history", "")
-        history = f"Inverted sign for variable `{vv}` (switched direction of values). {prev_history}"
-        d_out.attrs.update(dict(history=history))
 
     # Copy unconverted variables
     for vv in d.data_vars:
@@ -549,6 +546,9 @@ def _clip_values(d: xr.Dataset, p: str, m: dict) -> xr.Dataset:
                     out = d[vv]
                     d_out[out.name] = out.clip(min_value, max_value)
                 converted.append(vv)
+                prev_history = d.attrs.get("history", "")
+                history = f"Clipped variable `{vv}` with `min={min_value}` and `max={max_value}`. {prev_history}"
+                d_out.attrs.update(dict(history=history))
             elif clip_values is False:
                 logging.info(
                     f"No clipping of values needed for `{vv}` in `{p}` (Explicitly set to False)."
@@ -558,10 +558,6 @@ def _clip_values(d: xr.Dataset, p: str, m: dict) -> xr.Dataset:
                 logging.info(f"No clipping of values needed for `{vv}` in `{p}`.")
                 continue
 
-            prev_history = d.attrs.get("history", "")
-            history = f"Clipped variable `{vv}` with `min={min_value}` and `max={max_value}`. {prev_history}"
-            d_out.attrs.update(dict(history=history))
-
     # Copy unconverted variables
     for vv in d.data_vars:
         if vv not in converted:
@@ -626,17 +622,18 @@ def _ensure_correct_time(d: xr.Dataset, p: str, m: dict) -> xr.Dataset:
             raise ValueError(error_msg)
 
         logging.info(f"Resampling dataset with time frequency: {freq_found}.")
+
         with xr.set_options(keep_attrs=True):
             d_out = d.assign_coords(
                 time=d.time.resample(time=freq_found).mean(dim="time").time
             )
-            d_out.time.attrs.update(d.time.attrs)
 
-        prev_history = d.attrs.get("history", "")
-        history = f"Resampled time with `freq={freq_found}`. {prev_history}"
-        d_out.attrs.update(dict(history=history))
-        return d_out
+        if any(d_out.time != d.time):
+            prev_history = d.attrs.get("history", "")
+            history = f"Resampled time with `freq={freq_found}`. {prev_history}"
+            d_out.attrs.update(dict(history=history))
 
+        return d_out
     return d
 
 
@@ -665,7 +662,11 @@ def dims_conversion(d: xr.Dataset, p: str, m: dict) -> xr.Dataset:
             )
             if cf_name:
                 rename_dims[dim] = cf_name
-    d = d.rename(rename_dims)
+    if rename_dims:
+        d = d.rename(rename_dims)
+        prev_history = d.attrs.get("history", "")
+        history = f"Renamed dimensons ({'; '.join([f'{k} : {i}' for k, i in rename_dims.items()])}). {prev_history}"
+        d.attrs.update(dict(history=history))
     for new in ["lon", "lat"]:
         if new == "lon" and "lon" in d.coords:
             if np.any(d.lon > 180):
@@ -685,8 +686,14 @@ def dims_conversion(d: xr.Dataset, p: str, m: dict) -> xr.Dataset:
     if "time" in d.dims and transpose_order:
         transpose_order.insert(0, "time")
         transpose_order.extend(list(set(d.dims) - set(transpose_order)))
+
     d = d.transpose(*transpose_order)
     d = d.sortby(transpose_order)
+    # add history only when we actually changed something
+    if any([list(d[v].dims) != transpose_order for v in d.data_vars]):
+        prev_history = d.attrs.get("history", "")
+        history = f"Transposed dimension order to {transpose_order}. {prev_history}"
+        d.attrs.update(dict(history=history))
 
     # Add dimension original name and update attrs
     dim_descriptions = m["dimensions"]
@@ -701,10 +708,6 @@ def dims_conversion(d: xr.Dataset, p: str, m: dict) -> xr.Dataset:
             if not field.startswith("_"):
                 d[cf_name].attrs.update({field: dim_descriptions[dim][field]})
 
-    prev_history = d.attrs.get("history", "")
-    history = f"Transposed and renamed dimensions. {prev_history}"
-    d.attrs.update(dict(history=history))
-
     return d
 
 

diff --git a/miranda/convert/_data_definitions.py b/miranda/convert/_data_definitions.py
@@ -267,7 +267,7 @@ def gather_rdrs(
             source=path.joinpath(vv),
             glob_pattern="{variable}_*_{name}_*.{suffix}",
             suffix=suffix,
-            recursive=True,
+            recursive=False,
         )
         files[name][vv] = tmp[name]
     return files

diff --git a/miranda/convert/_reconstruction.py b/miranda/convert/_reconstruction.py
@@ -142,7 +142,9 @@ def reanalysis_processing(
                         file_name = "_".join([var, time_freq, institute, project])
                         if domain != "not-specified":
                             file_name = f"{file_name}_{domain}"
-
+                        if not chunks:
+                            chunks = dict(time=24 * 10, lon=50, lat=50)
+                            print(chunks)
                         xr_kwargs = dict(
                             chunks=chunks,
                             engine=engine,