Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions dp/generate_climos.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,11 +442,12 @@ def convert_flux_var_units(input_file, climo_data):
flux_variable = input_file.variables[flux_var]
units = Unit.from_udunits_str(flux_variable.units)

if units in [Unit('kg / m**2 / s'), Unit('mm / s')]:
logger.info("Converting {} variable to units mm/day".format(flux_var))
precip_units = [Unit('kg / m**2 / s'), Unit('mm / s'), Unit('g / cm**2 / s'), Unit('cm / s')]
if units in precip_units:
# Update units attribute
attributes['units'] = (units * Unit('s / day')).to_udunits_str()
# Multiply values by 86400 to convert from mm/s to mm/day
logger.info(f"Converting {flux_var} variable to units {attributes['units']}")
# Multiply values by 86400 to convert from mm/s to mm/day or from cm/s to cm/day
seconds_per_day = 86400

if hasattr(flux_variable, 'scale_factor') or hasattr(flux_variable, 'add_offset'):
Expand All @@ -463,7 +464,7 @@ def convert_flux_var_units(input_file, climo_data):
# This is not a packed file; modify the values proper
# Extract variable
var_only = cdo.select('name={}'.format(flux_var), input=climo_data)
# Multiply values by 86400 to convert from mm/s to mm/day
# Multiply values by 86400 to convert from mm/s to mm/day or from cm/s to cm/day
var_only = cdo.mulc(str(seconds_per_day), input=var_only)
# Replace pr in all-variables file
climo_data = cdo.replace(input=[climo_data, var_only])
Expand Down
17 changes: 10 additions & 7 deletions dp/generate_prsn.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ def create_prsn_netcdf_from_source(src, dst, to_delete=['original_name, comment'

prsn_var.standard_name = 'snowfall_flux'
prsn_var.long_name = 'Precipitation as Snow'
prsn_var.units = 'g cm-2 s-1'

for item in set(to_delete):
if hasattr(prsn_var, item):
Expand Down Expand Up @@ -163,12 +164,9 @@ def convert_temperature_units(data, units_from, units_to):

def check_pr_units(pr_units):
'''Ensure we have expected pr units'''
valid_units = [
Unit('kg / m**2 / s'),
Unit('mm / s'),
Unit('kg / d / m**2'),
Unit('kg / m**2 / d')
]
valid_units = ['kg / m**2 / s', 'kg / m**2 / d', 'kg / d / m**2', 'mm / s', 'mm / d', \
'g / cm**2 / s', 'g / cm**2 / d', 'g / d / cm**2', 'cm / s', 'cm / d']
valid_units = [Unit(u) for u in valid_units]
units = Unit.from_udunits_str(pr_units)
return units in valid_units

Expand Down Expand Up @@ -222,7 +220,12 @@ def process_to_prsn(variables, output_dataset, max_chunk_len):
}
means = np.mean([chunk_data['tasmin'], chunk_data['tasmax']], axis=0)
prsn_data = np.where(means < freezing, chunk_data['pr'], 0)
output_dataset.variables['prsn'][chunk] = prsn_data

prsn_units_from = ureg.parse_units('kg/m^2/s')
prsn_units_to = ureg.parse_units('g/cm^2/s')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you tell me more about using this as the target unit? You've changed both the mass (kg -> g) and the area (per square meter to per square centimeter). Is that definitely what we want? And is it definitely correct?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Converting from kg -> g multiplies the data by 1000 and converting from /m^2 -> /cm^2 divides the data by 10000, giving the net effect of dividing the data by 10. Since converting from mm to cm also divides by 10, I think this will work. I can check with Lee when he's back.


output_dataset.variables['prsn'][chunk] = prsn_data * \
Q_(1.0, prsn_units_from).to(prsn_units_to).magnitude


def generate_prsn_file(filepaths, chunk_size, outdir, output_file=None):
Expand Down
12 changes: 11 additions & 1 deletion tests/test_generate_prsn.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@
pr_freezing_from_units, create_prsn_netcdf_from_source, \
create_filepath_from_source, has_required_vars, matching_datasets, \
check_pr_units, process_to_prsn, convert_temperature_units
from pint import UnitRegistry

ureg = UnitRegistry()
Q_ = ureg.Quantity

@pytest.mark.parametrize('arrays', [
({'shape1': np.arange(10).reshape(2, 5), 'shape2': np.arange(10).reshape(2, 5)}),
Expand Down Expand Up @@ -67,7 +70,7 @@ def test_create_prsn_netcdf_from_source(tiny_dataset, fake_dataset):
assert np.shape(fake_dataset.variables['prsn']) == (11688, 4, 2)
assert fake_dataset.variables['prsn'].standard_name == 'snowfall_flux'
assert fake_dataset.variables['prsn'].long_name == 'Precipitation as Snow'

assert fake_dataset.variables['prsn'].units == 'g cm-2 s-1'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's not actually any check on the data values in this test, so it's hard to know whether it's doing the conversion correctly. Could you work out what the sum snowfall in this data should be an add that to the test?

Copy link
Contributor Author

@eyvorchuk eyvorchuk May 12, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe the dataset used for test_process_to_prsn results in a prsn array of all zeros since these tests pass.

result = fake_dataset.variables['prsn'][:] 
result = np.where(result != 0)
for array in result:
    assert len(array) == 0

Is there possibly another dataset I could use to test the conversion?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh right, you had mentioned that. Well that's not a great testing dataset, then is it?! :)

Maybe you could throw a few strategic low temperature values into the test data set... it might throw some other tests out of whack, but you could adjust as necessary.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we're only concerned about the conversion, would it be ok if I store the pr dataset in two variables, perform the conversion on one of them, and check to make sure that each value in the two datasets differs by a factor of 10? Or would you prefer I modify the data set and tests where needed?


@pytest.mark.parametrize('tiny_dataset, new_var, expected', [
('downscaled_pr', 'prsn', 'prsn_day_BCCAQ2_ACCESS1-0_ACCESS1-0+historical+rcp45+r1i1p1_r1i1p1_19600101-19911231.nc'),
Expand Down Expand Up @@ -151,6 +154,13 @@ def test_process_to_prsn(pr, tasmin, tasmax, fake_dataset):
for array in result:
assert len(array) == 0

# Test conversion from mm to cm
pr_mm = pr_dataset.variables['pr'][:]
pr_units_from = ureg.parse_units('kg/m^2/s')
pr_units_to = ureg.parse_units('g/cm^2/s')
pr_cm = pr_mm * Q_(1.0, pr_units_from).to(pr_units_to).magnitude
assert pr_cm == pytest.approx(pr_mm/10.0)


@pytest.mark.parametrize('units_from, units_to, expected', [
('degC', 'K', 273.15),
Expand Down