Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
155 changes: 102 additions & 53 deletions unseen/eva.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def fit_gev(
fc=None,
floc=None,
fscale=None,
bounds=None,
retry_fit=False,
assert_good_fit=False,
goodness_of_fit_kwargs={},
Expand Down Expand Up @@ -100,6 +101,8 @@ def fit_gev(
Initial guess of trend parameters. If None, the trend is fixed at zero.
fc, floc, fscale : float, default None
Fixed values for the shape, location and scale parameters.
bounds : list of tuples, optional
Custom bounds for the shape, loc0, loc1, scale0 and scale1 parameters.
retry_fit : bool, default True
Retry fit with initial estimate generated by passing data[::2] to
fitstart. The best fit is returned. See notes.
Expand Down Expand Up @@ -172,7 +175,18 @@ def fit_gev(
def check_support(dparams, covariate=None):
"""Check if the GEV parameters are within the support of the distribution."""
c, loc, scale = unpack_gev_params(dparams, covariate)
assert np.isfinite(loc) and np.isfinite(scale) and scale > 0
supported = (
np.isfinite(loc).all() and np.isfinite(scale).all() and (scale > 0).all()
)
if not supported:
warnings.warn(f"GEV parameters are not supported: {dparams}.")

def ns_gev_parameter_bounds():
"""Get bounds for the GEV parameters."""
bounds = [(-np.inf, np.inf)] * 5
bounds[0] = (-np.inf, np.inf)
bounds[3] = (1e-6, np.inf) # Positive scale parameter
return bounds

def _fit_1d(
data,
Expand All @@ -184,6 +198,7 @@ def _fit_1d(
fc,
floc,
fscale,
bounds,
retry_fit,
assert_good_fit,
pick_best_model,
Expand All @@ -192,6 +207,7 @@ def _fit_1d(
use_basinhopping,
basinhopping_kwargs,
goodness_of_fit_kwargs,
scipy_fit_kwargs,
):
"""Estimate distribution parameters."""
if np.all(~np.isfinite(data)):
Expand All @@ -203,15 +219,9 @@ def _fit_1d(
# Drop NaNs in data
mask = np.isfinite(data)
data = data[mask]
if not stationary:
if not stationary and not np.isscalar(covariate):
covariate = covariate[mask]

scipy_fit_kwargs = {}

for kw in ["fc", "floc", "fscale"]:
if eval(kw) is not None:
scipy_fit_kwargs[kw] = eval(kw)

# Initial estimates of distribution parameters for MLE
if isinstance(fitstart, str):
dparams_i = _fitstart_1d(data, fitstart, scipy_fit_kwargs)
Expand Down Expand Up @@ -256,11 +266,7 @@ def _fit_1d(
if not stationary:
dparams_ns_i = [dparams_i[0], dparams_i[1], loc1, dparams_i[2], scale1]

# Define parameter bounds and fix
bounds = [(-np.inf, np.inf)] * 5
# bounds[0] = (-1, 1)
bounds[3] = (1e-6, np.inf) # Allow positive scale parameter
# # Set initial value and bounds of fixed parameters
# Set initial value and bounds of fixed parameters
for i, fixed in zip([0, 1, 3], [fc, floc, fscale]):
if fixed is not None:
dparams_ns_i[i] = fixed
Expand Down Expand Up @@ -289,7 +295,6 @@ def _fit_1d(
_gev_nllf, dparams_ns_i, args=(data, covariate), bounds=bounds
)

# todo: add minimize success checks
dparams_ns = np.array([i for i in res.x], dtype="float64")

# Stationary and nonstationary model relative goodness of fit
Expand Down Expand Up @@ -334,6 +339,9 @@ def _fit_1d(
warnings.warn(f"Data fit failed (p-value={pvalue} NLL={nll:.3f}).")

# check_support(dparams, covariate)
if not np.isnan([dparams]).any():
check_support(dparams, covariate)

return dparams

if stationary and pick_best_model:
Expand All @@ -344,12 +352,26 @@ def _fit_1d(
if covariate is not None:
covariate = _format_covariate(data, covariate, core_dim)
else:
# check if stationary or loca1 and scale1 are None using np.all
assert stationary is True or (
loc1 is None and scale1 is None
), "Covariate must be provided for a nonstationary fit."
# Check if stationary or loc1 and scale1 are None using np.all
assert stationary is True, "Covariate must be provided for a nonstationary fit."
covariate = 0 # No covariate (can't be None)

if stationary and use_basinhopping:
# Restrict the trend parameters to zero if stationary
stationary = False
loc1, scale1 = None, None
kwargs.update({"stationary": False, "loc1": loc1, "scale1": scale1})

if bounds is None:
kwargs["bounds"] = ns_gev_parameter_bounds()

scipy_fit_kwargs = {}

for kw in ["fc", "floc", "fscale"]:
if eval(kw) is not None:
scipy_fit_kwargs[kw] = eval(kw)

kwargs["scipy_fit_kwargs"] = scipy_fit_kwargs
# Input core dimensions
if core_dim is not None and hasattr(covariate, core_dim):
# Covariate has the same core dimension as data
Expand All @@ -359,6 +381,7 @@ def _fit_1d(
input_core_dims = [[core_dim], []]

n_params = 3 if stationary else 5

# Fit data to distribution parameters
dparams = apply_ufunc(
_fit_1d,
Expand All @@ -373,13 +396,26 @@ def _fit_1d(
dask_gufunc_kwargs={"output_sizes": {"dparams": n_params}},
)

# Format output (consistent with xclim)
# Format output
if loc1 is None and scale1 is None:
# Remove both trend parameters if not used
stationary = True
if hasattr(dparams, "isel"):
dparams = dparams.isel(dparams=[0, 1, 3], drop=True)
else:
dparams = dparams[[0, 1, 3]]

if isinstance(data, DataArray):
if n_params == 3:
# todo: change loc to location
dparams.coords["dparams"] = ["c", "loc", "scale"]
if dparams["dparams"].size == 3:
dparams.coords["dparams"] = ["c", "location", "scale"]
else:
dparams.coords["dparams"] = ["c", "loc0", "loc1", "scale0", "scale1"]
dparams.coords["dparams"] = [
"c",
"location_0",
"location_1",
"scale_0",
"scale_1",
]

# Add coordinates for the distribution parameters
dist_name = "genextreme" if stationary else "nonstationary genextreme"
Expand Down Expand Up @@ -417,7 +453,7 @@ def penalised_sum(x):
return total + penalty_sum


def _gev_nllf(dparams, x, covariate=None):
def _gev_nllf(dparams, x, covariate=0):
"""GEV penalised negative log-likelihood function.

Parameters
Expand Down Expand Up @@ -448,10 +484,10 @@ def _gev_nllf(dparams, x, covariate=None):
The NLLF is not finite when the shape is nonzero and Z is negative because
the PDF is zero (i.e., ``log(0)=inf)``).
"""

shape, loc, scale = unpack_gev_params(dparams, covariate)
shape = -shape # Reverse shape sign for consistency with scipy.stats results

# transform scale parameter so that it is always positive
valid = scale > 0
s = (x - loc) / scale

Expand Down Expand Up @@ -559,8 +595,8 @@ def __call__(self, x):
"""take a random step but ensure the new position is within the bounds"""
min_step = np.maximum(self.xmin - x, -self.stepsize)
max_step = np.minimum(self.xmax - x, self.stepsize)
x = x + self.rng.uniform(low=min_step, high=max_step, size=x.shape)

x += self.rng.uniform(low=min_step, high=max_step, size=x.shape)
return x


Expand Down Expand Up @@ -739,7 +775,7 @@ def get_best_GEV_model_1d(
return dparams


def unpack_gev_params(dparams, covariate=None):
def unpack_gev_params(dparams, covariate=0):
"""Unpack shape, loc, scale from dparams.

Parameters
Expand Down Expand Up @@ -779,7 +815,7 @@ def unpack_gev_params(dparams, covariate=None):
return shape, loc, scale


def get_return_period(event, dparams=None, covariate=None, **kwargs):
def get_return_period(event, dparams=None, covariate=0, **kwargs):
"""Get return periods for a given events.

Parameters
Expand Down Expand Up @@ -818,7 +854,7 @@ def get_return_period(event, dparams=None, covariate=None, **kwargs):
return 1.0 / probability


def get_return_level(return_period, dparams=None, covariate=None, **kwargs):
def get_return_level(return_period, dparams=None, covariate=0, **kwargs):
"""Get the return levels for given return periods.

Parameters
Expand Down Expand Up @@ -899,6 +935,13 @@ def _empirical_return_period(da, event):
return ri


def empirical_return_level(da, return_period, **kwargs):
"""Empirical return level of an event (1D)."""
cdf = ecdf(da).cdf
probability = 1 - (1 / return_period)
return np.interp(probability, cdf.probabilities, cdf.quantiles)


def get_empirical_return_level(da, return_period, core_dim="time"):
"""Calculate the empirical return period of an event.

Expand All @@ -917,14 +960,8 @@ def get_empirical_return_level(da, return_period, core_dim="time"):
The event return level (e.g., 100 mm of rainfall)
"""

def _empirical_return_level(da, period):
"""Empirical return level of an event (1D)."""
sf = ecdf(da).sf
probability = 1 - (1 / period)
return np.interp(probability, sf.probabilities, sf.quantiles)

return_level = apply_ufunc(
_empirical_return_level,
empirical_return_level,
da,
return_period,
input_core_dims=[[core_dim], []],
Expand Down Expand Up @@ -967,6 +1004,7 @@ def gev_confidence_interval(
n_resamples=1000,
ci=0.95,
core_dim="time",
covariate=0,
fit_kwargs={},
):
"""
Expand Down Expand Up @@ -999,10 +1037,10 @@ def gev_confidence_interval(
ci_bounds : xarray.DataArray
Confidence intervals with lower and upper bounds along dim 'quantile'
"""
# todo: add max_shape_ratio?

# Replace core dim with the one from the fit_kwargs if it exists
core_dim = fit_kwargs.pop("core_dim", core_dim)
covariate = fit_kwargs.pop("covariate", None)
covariate = fit_kwargs.pop("covariate", covariate)

rng = np.random.default_rng(seed=0)
if dparams is None:
Expand Down Expand Up @@ -1040,11 +1078,11 @@ def gev_confidence_interval(

if return_period is not None:
result = get_return_level(
return_period, gev_params_resampled, core_dim=core_dim
return_period, gev_params_resampled, core_dim=core_dim, covariate=covariate
)
elif return_level is not None:
result = get_return_period(
return_level, gev_params_resampled, core_dim=core_dim
return_level, gev_params_resampled, core_dim=core_dim, covariate=covariate
)

# Bounds of confidence intervals
Expand Down Expand Up @@ -1548,9 +1586,9 @@ def spatial_plot_gev_parameters(
)
# Add coastlines and lat/lon labels
ax.coastlines()
ax.set_title(f"{params[i]}")
ax.set_xlabel(None)
ax.set_ylabel(None)
ax.set_title(f"{params[i].title()} parameter")
ax.xaxis.set_major_formatter(LongitudeFormatter())
ax.yaxis.set_major_formatter(LatitudeFormatter())
ax.xaxis.set_minor_locator(AutoMinorLocator())
Expand All @@ -1563,11 +1601,11 @@ def spatial_plot_gev_parameters(
ax.yaxis.set_visible(True)

if dataset_name:
fig.suptitle(f"{dataset_name} GEV parameters", y=0.8 if stationary else 0.99)
fig.suptitle(f"{dataset_name} GEV parameters", y=0.75 if stationary else 0.97)

if not stationary:
# Hide the empty subplot
axes[-1].set_visible(False)
# Hide empty subplots
for ax in [ax for ax in axes if not ax.collections]:
ax.axis("off")

plt.tight_layout()
if outfile:
Expand All @@ -1587,10 +1625,9 @@ def _parse_command_line():
parser.add_argument("outfile", type=str, help="Output file")
parser.add_argument(
"--stack_dims",
type=str,
nargs="*",
# default=["ensemble", "init_date", "lead_time"],
help="Dimensions to stack",
action="store_true",
default=False,
help="Stack ensemble, init_date, and lead_time dimensions",
)
parser.add_argument("--core_dim", type=str, default="time", help="Core dimension")
parser.add_argument(
Expand Down Expand Up @@ -1626,6 +1663,12 @@ def _parse_command_line():
default=False,
help="Retry fit if it doesn't pass the goodness of fit test",
)
parser.add_argument(
"--use_basinhopping",
action="store_true",
default=False,
help="Use basinhopping optimizer to find the best fit",
)
parser.add_argument(
"--assert_good_fit",
action="store_true",
Expand Down Expand Up @@ -1729,11 +1772,16 @@ def _main():
else:
ds = ds.where(ds[args.lead_dim] >= args.min_lead)

# Stack dimensions along new "sample" dimension
if all([dim in ds[args.var].dims for dim in args.stack_dims]):
ds = ds.stack(**{"sample": args.stack_dims}, create_index=False)
# Stack ensemble, init and lead dimensions along new "sample" dimension
if args.stack_dims:
# Check if the dimensions exist in the dataset
dims = []
for dim in (args.ensemble_dim, args.init_dim, args.lead_dim):
if dim in ds.dims:
dims.append(dim)
ds = ds.stack(**{"sample": dims}, create_index=False)
ds = ds.chunk(dict(sample=-1)) # fixes CAFE large chunk error
args.core_dim = "sample"
args.core_dim = "sample" # Set core dimension to the new stacked dimension

# Drop the maximum value before fitting
if args.drop_max:
Expand All @@ -1754,6 +1802,7 @@ def _main():
fitstart=args.fitstart,
covariate=covariate,
retry_fit=args.retry_fit,
use_basinhopping=args.use_basinhopping,
assert_good_fit=args.assert_good_fit,
pick_best_model=args.pick_best_model,
)
Expand Down
Loading
Loading