AusClimateService · stellema · May 29, 2025 · Apr 17, 2025 · Apr 17, 2025 · Apr 17, 2025
diff --git a/unseen/eva.py b/unseen/eva.py
@@ -72,6 +72,7 @@ def fit_gev(
     fc=None,
     floc=None,
     fscale=None,
+    bounds=None,
     retry_fit=False,
     assert_good_fit=False,
     goodness_of_fit_kwargs={},
@@ -100,6 +101,8 @@ def fit_gev(
         Initial guess of trend parameters. If None, the trend is fixed at zero.
     fc, floc, fscale : float, default None
         Fixed values for the shape, location and scale parameters.
+    bounds : list of tuples, optional
+        Custom bounds for the shape, loc0, loc1, scale0 and scale1 parameters.
     retry_fit : bool, default True
         Retry fit with initial estimate generated by passing data[::2] to
         fitstart. The best fit is returned. See notes.
@@ -172,7 +175,18 @@ def fit_gev(
     def check_support(dparams, covariate=None):
         """Check if the GEV parameters are within the support of the distribution."""
         c, loc, scale = unpack_gev_params(dparams, covariate)
-        assert np.isfinite(loc) and np.isfinite(scale) and scale > 0
+        supported = (
+            np.isfinite(loc).all() and np.isfinite(scale).all() and (scale > 0).all()
+        )
+        if not supported:
+            warnings.warn(f"GEV parameters are not supported: {dparams}.")
+
+    def ns_gev_parameter_bounds():
+        """Get bounds for the GEV parameters."""
+        bounds = [(-np.inf, np.inf)] * 5
+        bounds[0] = (-np.inf, np.inf)
+        bounds[3] = (1e-6, np.inf)  # Positive scale parameter
+        return bounds
 
     def _fit_1d(
         data,
@@ -184,6 +198,7 @@ def _fit_1d(
         fc,
         floc,
         fscale,
+        bounds,
         retry_fit,
         assert_good_fit,
         pick_best_model,
@@ -192,6 +207,7 @@ def _fit_1d(
         use_basinhopping,
         basinhopping_kwargs,
         goodness_of_fit_kwargs,
+        scipy_fit_kwargs,
     ):
         """Estimate distribution parameters."""
         if np.all(~np.isfinite(data)):
@@ -203,15 +219,9 @@ def _fit_1d(
             # Drop NaNs in data
             mask = np.isfinite(data)
             data = data[mask]
-            if not stationary:
+            if not stationary and not np.isscalar(covariate):
                 covariate = covariate[mask]
 
-        scipy_fit_kwargs = {}
-
-        for kw in ["fc", "floc", "fscale"]:
-            if eval(kw) is not None:
-                scipy_fit_kwargs[kw] = eval(kw)
-
         # Initial estimates of distribution parameters for MLE
         if isinstance(fitstart, str):
             dparams_i = _fitstart_1d(data, fitstart, scipy_fit_kwargs)
@@ -256,11 +266,7 @@ def _fit_1d(
         if not stationary:
             dparams_ns_i = [dparams_i[0], dparams_i[1], loc1, dparams_i[2], scale1]
 
-            # Define parameter bounds and fix
-            bounds = [(-np.inf, np.inf)] * 5
-            # bounds[0] = (-1, 1)
-            bounds[3] = (1e-6, np.inf)  # Allow positive scale parameter
-            # # Set initial value and bounds of fixed parameters
+            # Set initial value and bounds of fixed parameters
             for i, fixed in zip([0, 1, 3], [fc, floc, fscale]):
                 if fixed is not None:
                     dparams_ns_i[i] = fixed
@@ -289,7 +295,6 @@ def _fit_1d(
                     _gev_nllf, dparams_ns_i, args=(data, covariate), bounds=bounds
                 )
 
-            # todo: add minimize success checks
             dparams_ns = np.array([i for i in res.x], dtype="float64")
 
             # Stationary and nonstationary model relative goodness of fit
@@ -334,6 +339,9 @@ def _fit_1d(
                 warnings.warn(f"Data fit failed (p-value={pvalue} NLL={nll:.3f}).")
 
         # check_support(dparams, covariate)
+        if not np.isnan([dparams]).any():
+            check_support(dparams, covariate)
+
         return dparams
 
     if stationary and pick_best_model:
@@ -344,12 +352,26 @@ def _fit_1d(
     if covariate is not None:
         covariate = _format_covariate(data, covariate, core_dim)
     else:
-        # check if stationary or loca1 and scale1 are None using np.all
-        assert stationary is True or (
-            loc1 is None and scale1 is None
-        ), "Covariate must be provided for a nonstationary fit."
+        # Check if stationary or loc1 and scale1 are None using np.all
+        assert stationary is True, "Covariate must be provided for a nonstationary fit."
         covariate = 0  # No covariate (can't be None)
 
+    if stationary and use_basinhopping:
+        # Restrict the trend parameters to zero if stationary
+        stationary = False
+        loc1, scale1 = None, None
+        kwargs.update({"stationary": False, "loc1": loc1, "scale1": scale1})
+
+    if bounds is None:
+        kwargs["bounds"] = ns_gev_parameter_bounds()
+
+    scipy_fit_kwargs = {}
+
+    for kw in ["fc", "floc", "fscale"]:
+        if eval(kw) is not None:
+            scipy_fit_kwargs[kw] = eval(kw)
+
+    kwargs["scipy_fit_kwargs"] = scipy_fit_kwargs
     # Input core dimensions
     if core_dim is not None and hasattr(covariate, core_dim):
         # Covariate has the same core dimension as data
@@ -359,6 +381,7 @@ def _fit_1d(
         input_core_dims = [[core_dim], []]
 
     n_params = 3 if stationary else 5
+
     # Fit data to distribution parameters
     dparams = apply_ufunc(
         _fit_1d,
@@ -373,13 +396,26 @@ def _fit_1d(
         dask_gufunc_kwargs={"output_sizes": {"dparams": n_params}},
     )
 
-    # Format output (consistent with xclim)
+    # Format output
+    if loc1 is None and scale1 is None:
+        # Remove both trend parameters if not used
+        stationary = True
+        if hasattr(dparams, "isel"):
+            dparams = dparams.isel(dparams=[0, 1, 3], drop=True)
+        else:
+            dparams = dparams[[0, 1, 3]]
+
     if isinstance(data, DataArray):
-        if n_params == 3:
-            # todo: change loc to location
-            dparams.coords["dparams"] = ["c", "loc", "scale"]
+        if dparams["dparams"].size == 3:
+            dparams.coords["dparams"] = ["c", "location", "scale"]
         else:
-            dparams.coords["dparams"] = ["c", "loc0", "loc1", "scale0", "scale1"]
+            dparams.coords["dparams"] = [
+                "c",
+                "location_0",
+                "location_1",
+                "scale_0",
+                "scale_1",
+            ]
 
         # Add coordinates for the distribution parameters
         dist_name = "genextreme" if stationary else "nonstationary genextreme"
@@ -417,7 +453,7 @@ def penalised_sum(x):
     return total + penalty_sum
 
 
-def _gev_nllf(dparams, x, covariate=None):
+def _gev_nllf(dparams, x, covariate=0):
     """GEV penalised negative log-likelihood function.
 
     Parameters
@@ -448,10 +484,10 @@ def _gev_nllf(dparams, x, covariate=None):
     The NLLF is not finite when the shape is nonzero and Z is negative because
     the PDF is zero (i.e., ``log(0)=inf)``).
     """
+
     shape, loc, scale = unpack_gev_params(dparams, covariate)
     shape = -shape  # Reverse shape sign for consistency with scipy.stats results
 
-    # transform scale parameter so that it is always positive
     valid = scale > 0
     s = (x - loc) / scale
 
@@ -559,8 +595,8 @@ def __call__(self, x):
         """take a random step but ensure the new position is within the bounds"""
         min_step = np.maximum(self.xmin - x, -self.stepsize)
         max_step = np.minimum(self.xmax - x, self.stepsize)
+        x = x + self.rng.uniform(low=min_step, high=max_step, size=x.shape)
 
-        x += self.rng.uniform(low=min_step, high=max_step, size=x.shape)
         return x
 
 
@@ -739,7 +775,7 @@ def get_best_GEV_model_1d(
     return dparams
 
 
-def unpack_gev_params(dparams, covariate=None):
+def unpack_gev_params(dparams, covariate=0):
     """Unpack shape, loc, scale from dparams.
 
     Parameters
@@ -779,7 +815,7 @@ def unpack_gev_params(dparams, covariate=None):
     return shape, loc, scale
 
 
-def get_return_period(event, dparams=None, covariate=None, **kwargs):
+def get_return_period(event, dparams=None, covariate=0, **kwargs):
     """Get return periods for a given events.
 
     Parameters
@@ -818,7 +854,7 @@ def get_return_period(event, dparams=None, covariate=None, **kwargs):
     return 1.0 / probability
 
 
-def get_return_level(return_period, dparams=None, covariate=None, **kwargs):
+def get_return_level(return_period, dparams=None, covariate=0, **kwargs):
     """Get the return levels for given return periods.
 
     Parameters
@@ -899,6 +935,13 @@ def _empirical_return_period(da, event):
     return ri
 
 
+def empirical_return_level(da, return_period, **kwargs):
+    """Empirical return level of an event (1D)."""
+    cdf = ecdf(da).cdf
+    probability = 1 - (1 / return_period)
+    return np.interp(probability, cdf.probabilities, cdf.quantiles)
+
+
 def get_empirical_return_level(da, return_period, core_dim="time"):
     """Calculate the empirical return period of an event.
 
@@ -917,14 +960,8 @@ def get_empirical_return_level(da, return_period, core_dim="time"):
         The event return level (e.g., 100 mm of rainfall)
     """
 
-    def _empirical_return_level(da, period):
-        """Empirical return level of an event (1D)."""
-        sf = ecdf(da).sf
-        probability = 1 - (1 / period)
-        return np.interp(probability, sf.probabilities, sf.quantiles)
-
     return_level = apply_ufunc(
-        _empirical_return_level,
+        empirical_return_level,
         da,
         return_period,
         input_core_dims=[[core_dim], []],
@@ -967,6 +1004,7 @@ def gev_confidence_interval(
     n_resamples=1000,
     ci=0.95,
     core_dim="time",
+    covariate=0,
     fit_kwargs={},
 ):
     """
@@ -999,10 +1037,10 @@ def gev_confidence_interval(
     ci_bounds : xarray.DataArray
         Confidence intervals with lower and upper bounds along dim 'quantile'
     """
-    # todo: add max_shape_ratio?
+
     # Replace core dim with the one from the fit_kwargs if it exists
     core_dim = fit_kwargs.pop("core_dim", core_dim)
-    covariate = fit_kwargs.pop("covariate", None)
+    covariate = fit_kwargs.pop("covariate", covariate)
 
     rng = np.random.default_rng(seed=0)
     if dparams is None:
@@ -1040,11 +1078,11 @@ def gev_confidence_interval(
 
     if return_period is not None:
         result = get_return_level(
-            return_period, gev_params_resampled, core_dim=core_dim
+            return_period, gev_params_resampled, core_dim=core_dim, covariate=covariate
         )
     elif return_level is not None:
         result = get_return_period(
-            return_level, gev_params_resampled, core_dim=core_dim
+            return_level, gev_params_resampled, core_dim=core_dim, covariate=covariate
         )
 
     # Bounds of confidence intervals
@@ -1548,9 +1586,9 @@ def spatial_plot_gev_parameters(
         )
         # Add coastlines and lat/lon labels
         ax.coastlines()
-        ax.set_title(f"{params[i]}")
         ax.set_xlabel(None)
         ax.set_ylabel(None)
+        ax.set_title(f"{params[i].title()} parameter")
         ax.xaxis.set_major_formatter(LongitudeFormatter())
         ax.yaxis.set_major_formatter(LatitudeFormatter())
         ax.xaxis.set_minor_locator(AutoMinorLocator())
@@ -1563,11 +1601,11 @@ def spatial_plot_gev_parameters(
             ax.yaxis.set_visible(True)
 
     if dataset_name:
-        fig.suptitle(f"{dataset_name} GEV parameters", y=0.8 if stationary else 0.99)
+        fig.suptitle(f"{dataset_name} GEV parameters", y=0.75 if stationary else 0.97)
 
-    if not stationary:
-        # Hide the empty subplot
-        axes[-1].set_visible(False)
+    # Hide empty subplots
+    for ax in [ax for ax in axes if not ax.collections]:
+        ax.axis("off")
 
     plt.tight_layout()
     if outfile:
@@ -1587,10 +1625,9 @@ def _parse_command_line():
     parser.add_argument("outfile", type=str, help="Output file")
     parser.add_argument(
         "--stack_dims",
-        type=str,
-        nargs="*",
-        # default=["ensemble", "init_date", "lead_time"],
-        help="Dimensions to stack",
+        action="store_true",
+        default=False,
+        help="Stack ensemble, init_date, and lead_time dimensions",
     )
     parser.add_argument("--core_dim", type=str, default="time", help="Core dimension")
     parser.add_argument(
@@ -1626,6 +1663,12 @@ def _parse_command_line():
         default=False,
         help="Retry fit if it doesn't pass the goodness of fit test",
     )
+    parser.add_argument(
+        "--use_basinhopping",
+        action="store_true",
+        default=False,
+        help="Use basinhopping optimizer to find the best fit",
+    )
     parser.add_argument(
         "--assert_good_fit",
         action="store_true",
@@ -1729,11 +1772,16 @@ def _main():
         else:
             ds = ds.where(ds[args.lead_dim] >= args.min_lead)
 
-    # Stack dimensions along new "sample" dimension
-    if all([dim in ds[args.var].dims for dim in args.stack_dims]):
-        ds = ds.stack(**{"sample": args.stack_dims}, create_index=False)
+    # Stack ensemble, init and lead dimensions along new "sample" dimension
+    if args.stack_dims:
+        # Check if the dimensions exist in the dataset
+        dims = []
+        for dim in (args.ensemble_dim, args.init_dim, args.lead_dim):
+            if dim in ds.dims:
+                dims.append(dim)
+        ds = ds.stack(**{"sample": dims}, create_index=False)
         ds = ds.chunk(dict(sample=-1))  # fixes CAFE large chunk error
-        args.core_dim = "sample"
+        args.core_dim = "sample"  # Set core dimension to the new stacked dimension
 
     # Drop the maximum value before fitting
     if args.drop_max:
@@ -1754,6 +1802,7 @@ def _main():
         fitstart=args.fitstart,
         covariate=covariate,
         retry_fit=args.retry_fit,
+        use_basinhopping=args.use_basinhopping,
         assert_good_fit=args.assert_good_fit,
         pick_best_model=args.pick_best_model,
     )