diff --git a/fre/cmor/cmor_helpers.py b/fre/cmor/cmor_helpers.py index a7e6c9033..0ef98bbbf 100644 --- a/fre/cmor/cmor_helpers.py +++ b/fre/cmor/cmor_helpers.py @@ -736,6 +736,12 @@ def filter_brands( brands: list, :return: The single brand string that survived disambiguation. :rtype: str """ + # guard: brands list must be non-empty + if not brands: + raise ValueError( + f'filter_brands called with an empty brands list for ' + f'{target_var!r} \u2014 there is nothing to disambiguate') + # map input vertical dim to MIP equivalent expected_mip_vert = None if input_vert_dim != 0: @@ -772,13 +778,25 @@ def filter_brands( brands: list, if len(filtered_brands) == 0: fre_logger.error('cmip7 brand disambiguation eliminated all candidates ' - 'from %s', brands) + 'from %s for target_var %s ' + '(has_time_bnds=%s, input_vert_dim=%s)', + brands, target_var, has_time_bnds, input_vert_dim) raise ValueError( - f'multiple brands {brands} found for {target_var}, ' - f'but none survived disambiguation filtering') + f'all candidate brands {brands} for {target_var!r} were eliminated ' + f'during disambiguation (has_time_bnds={has_time_bnds}, ' + f'input_vert_dim={input_vert_dim!r}). none survived filtering. ' + f'verify that the input file has the expected time bounds and ' + f'vertical coordinate, and that the MIP table entries for ' + f'{[f"{target_var}_{b}" for b in brands]} are correct') fre_logger.error('cmip7 brand disambiguation could not resolve between ' - '%s', filtered_brands) + '%s for target_var %s ' + '(has_time_bnds=%s, input_vert_dim=%s)', + filtered_brands, target_var, has_time_bnds, input_vert_dim) raise ValueError( - f'multiple brands {filtered_brands} remain for {target_var} after ' - f'disambiguation \u2014 cannot determine which brand to use') + f'multiple brands {filtered_brands} remain for {target_var!r} after ' + f'disambiguation (has_time_bnds={has_time_bnds}, ' + f'input_vert_dim={input_vert_dim!r}) \u2014 cannot determine which ' + f'brand to use. verify the input file dimensions and check whether ' + f'the MIP table has duplicate entries that share the same time-type ' + f'and vertical coordinate') diff --git a/fre/cmor/cmor_mixer.py b/fre/cmor/cmor_mixer.py index 43aa55d66..d4184ebc8 100755 --- a/fre/cmor/cmor_mixer.py +++ b/fre/cmor/cmor_mixer.py @@ -125,16 +125,21 @@ def rewrite_netcdf_file_var( mip_var_cfgs: dict = None, var_brand = None exp_cfg_mip_era = get_json_file_data(json_exp_config)['mip_era'].upper() if exp_cfg_mip_era == 'CMIP7': + brand_prefix = target_var + '_' brands = [] for mip_var in mip_var_cfgs["variable_entry"].keys(): - if all([ target_var == mip_var.split('_')[0], - var_dim == len(mip_var_cfgs["variable_entry"][mip_var]['dimensions']) ]): - brands.append(mip_var.split('_')[1]) - - if len(brands)>0: - if len(brands)==1: - var_brand=brands[0] - fre_logger.debug('cmip7 case, extracted brand %s',var_brand) + if not mip_var.startswith(brand_prefix): + continue + brand = mip_var[len(brand_prefix):] + if not brand: + continue + if var_dim == len(mip_var_cfgs["variable_entry"][mip_var]['dimensions']): + brands.append(brand) + + if len(brands) > 0: + if len(brands) == 1: + var_brand = brands[0] + fre_logger.debug('cmip7 case, extracted brand %s', var_brand) else: fre_logger.warning('cmip7 case, extracted multiple brands %s, attempting disambiguation', brands) @@ -144,9 +149,19 @@ def rewrite_netcdf_file_var( mip_var_cfgs: dict = None, input_vert_dim = get_vertical_dimension(ds, target_var) ) else: - fre_logger.error('cmip7 case detected, but dimensions of input data do not match ' - 'any of those found for the associated brands.') - raise ValueError + fre_logger.error( + 'CMIP7 branded-variable error for %s: no brands in the MIP ' + 'table matched with %d dimension(s). available branded entries ' + 'for this variable: %s', + target_var, var_dim, + [k for k in mip_var_cfgs["variable_entry"] if k.startswith(brand_prefix)] + ) + raise ValueError( + f'no brands in the MIP table match {target_var!r} with ' + f'{var_dim} dimension(s). check that the MIP table contains a ' + f'branded entry (e.g. {target_var}_) whose dimension ' + f'count matches the input data' + ) else: fre_logger.debug('non-cmip7 case detected, skipping variable brands') diff --git a/fre/cmor/tests/test_cmor_helpers.py b/fre/cmor/tests/test_cmor_helpers.py index 8ca3ab060..e2e22d6c2 100644 --- a/fre/cmor/tests/test_cmor_helpers.py +++ b/fre/cmor/tests/test_cmor_helpers.py @@ -412,7 +412,7 @@ def test_filter_brands_all_eliminated(): "sos_a": "longitude latitude time1", "sos_b": "longitude latitude time1", }) - with pytest.raises(ValueError, match='none survived'): + with pytest.raises(ValueError, match='none survived filtering'): filter_brands( brands=["a", "b"], target_var="sos", @@ -422,13 +422,44 @@ def test_filter_brands_all_eliminated(): ) +def test_filter_brands_all_eliminated_message_contains_properties(): + ''' the error message should include the input properties that led to elimination ''' + mip = _make_mip_var_cfgs({ + "sos_a": "longitude latitude time1", + }) + with pytest.raises(ValueError, match=r'has_time_bnds=True.*input_vert_dim=0'): + filter_brands( + brands=["a"], + target_var="sos", + mip_var_cfgs=mip, + has_time_bnds=True, + input_vert_dim=0, + ) + + def test_filter_brands_multiple_remain(): ''' should raise ValueError when multiple brands survive filtering ''' mip = _make_mip_var_cfgs({ "sos_a": "longitude latitude time", "sos_b": "longitude latitude time", }) - with pytest.raises(ValueError, match='remain for sos'): + with pytest.raises(ValueError, match="remain for 'sos'"): + filter_brands( + brands=["a", "b"], + target_var="sos", + mip_var_cfgs=mip, + has_time_bnds=True, + input_vert_dim=0, + ) + + +def test_filter_brands_multiple_remain_message_contains_properties(): + ''' the error message for multiple remaining brands should include input properties ''' + mip = _make_mip_var_cfgs({ + "sos_a": "longitude latitude time", + "sos_b": "longitude latitude time", + }) + with pytest.raises(ValueError, match=r'has_time_bnds=True.*input_vert_dim=0'): filter_brands( brands=["a", "b"], target_var="sos", @@ -436,3 +467,16 @@ def test_filter_brands_multiple_remain(): has_time_bnds=True, input_vert_dim=0, ) + + +def test_filter_brands_empty_brands_list(): + ''' should raise ValueError when brands list is empty ''' + mip = _make_mip_var_cfgs({}) + with pytest.raises(ValueError, match='empty brands list'): + filter_brands( + brands=[], + target_var="sos", + mip_var_cfgs=mip, + has_time_bnds=True, + input_vert_dim=0, + )