Standardize pvgis_tmy to return (data,meta) (#2470)

AdamRJensen · web-flow · commit d7023536c414 · 2025-06-05T13:20:21.000-04:00
* Initial commit

* Fix tests, whatsnew, linter

* Feedback from review
diff --git a/docs/sphinx/source/user_guide/modeling_topics/weather_data.rst b/docs/sphinx/source/user_guide/modeling_topics/weather_data.rst
@@ -76,7 +76,7 @@ them.
 Usage
 -----
 
-With some exceptions, the :py:mod:`pvlib.iotools` functions
+The :py:mod:`pvlib.iotools` functions
 provide a uniform interface for accessing data across many formats.
 Specifically, :py:mod:`pvlib.iotools` functions usually return two objects:
 a :py:class:`pandas.DataFrame` of the actual dataset, plus a metadata
@@ -89,8 +89,7 @@ Typical usage looks something like this:
 
 .. code-block:: python
 
-    # get_pvgis_tmy returns two additional values besides df and metadata
-    df, _, _, metadata = pvlib.iotools.get_pvgis_tmy(latitude, longitude, map_variables=True)
+    df, metadata = pvlib.iotools.get_pvgis_tmy(latitude, longitude, map_variables=True)
 
 This code will fetch a Typical Meteorological Year (TMY) dataset from PVGIS,
 returning a :py:class:`pandas.DataFrame` containing the hourly weather data
diff --git a/docs/sphinx/source/whatsnew/v0.12.1.rst b/docs/sphinx/source/whatsnew/v0.12.1.rst
@@ -11,6 +11,10 @@ Breaking Changes
   following the iotools convention instead of ``(data,inputs,meta)``.
   The ``inputs`` dictionary is now included in ``meta``, which
   has changed structure to accommodate it. (:pull:`2462`)
+* The functions :py:func:`~pvlib.iotools.read_pvgis_tmy` and
+  :py:func:`~pvlib.iotools.get_pvgis_tmy` now return ``(data,meta)``
+  following the iotools convention instead of ``(data,months_selected,inputs,meta)``.
+  (:pull:`2470`)
 * Remove ``outputformat='basic'`` option in :py:func:`~pvlib.iotools.get_pvgis_tmy`.
   (:pull:`2416`)
 
diff --git a/pvlib/iotools/pvgis.py b/pvlib/iotools/pvgis.py
@@ -261,7 +261,7 @@ def _parse_pvgis_hourly_json(src, map_variables):
 
 def _parse_pvgis_hourly_csv(src, map_variables):
     # The first 4 rows are latitude, longitude, elevation, radiation database
-    metadata = {'inputs': {}}
+    metadata = {'inputs': {}, 'descriptions': {}}
     # 'location' metadata
     # 'Latitude (decimal degrees): 45.000\r\n'
     metadata['inputs']['latitude'] = float(src.readline().split(':')[1])
@@ -440,6 +440,13 @@ def get_pvgis_tmy(latitude, longitude, outputformat='json', usehorizon=True,
 
     For more information see the PVGIS [1]_ TMY tool documentation [2]_.
 
+        .. versionchanged:: 0.13.0
+           The function now returns two items ``(data,meta)``. Previous
+           versions of this function returned four elements
+           ``(data,months_selected,inputs,meta)``. The ``inputs`` dictionary
+           and ``months_selected`` are  now included in ``meta``, which has
+           changed structure to accommodate it.
+
     Parameters
     ----------
     latitude : float
@@ -478,10 +485,6 @@ def get_pvgis_tmy(latitude, longitude, outputformat='json', usehorizon=True,
     -------
     data : pandas.DataFrame
         the weather data
-    months_selected : list
-        TMY year for each month, ``None`` for EPW
-    inputs : dict
-        the inputs, ``None`` for EPW
     metadata : list or dict
         file metadata
 
@@ -527,17 +530,16 @@ def get_pvgis_tmy(latitude, longitude, outputformat='json', usehorizon=True,
         else:
             raise requests.HTTPError(err_msg['message'])
     # initialize data to None in case API fails to respond to bad outputformat
-    data = None, None, None, None
+    data = None, None
     if outputformat == 'json':
         src = res.json()
-        data, months_selected, inputs, meta = _parse_pvgis_tmy_json(src)
+        data, meta = _parse_pvgis_tmy_json(src)
     elif outputformat == 'csv':
         with io.BytesIO(res.content) as src:
-            data, months_selected, inputs, meta = _parse_pvgis_tmy_csv(src)
+            data, meta = _parse_pvgis_tmy_csv(src)
     elif outputformat == 'epw':
         with io.StringIO(res.content.decode('utf-8')) as src:
             data, meta = read_epw(src)
-            months_selected, inputs = None, None
     elif outputformat == 'basic':
         err_msg = ("outputformat='basic' is no longer supported by pvlib, "
                    "please use outputformat='csv' instead.")
@@ -551,34 +553,37 @@ def get_pvgis_tmy(latitude, longitude, outputformat='json', usehorizon=True,
         coerce_year = coerce_year or 1990
         data = _coerce_and_roll_tmy(data, roll_utc_offset, coerce_year)
 
-    return data, months_selected, inputs, meta
+    return data, meta
 
 
 def _parse_pvgis_tmy_json(src):
-    inputs = src['inputs']
-    meta = src['meta']
-    months_selected = src['outputs']['months_selected']
+    meta = src['meta'].copy()
+    # Override the "inputs" in metadata
+    meta['inputs'] = src['inputs']
+    # Re-add the inputs in metadata one-layer down
+    meta['inputs']['descriptions'] = src['meta']['inputs']
+    meta['months_selected'] = src['outputs']['months_selected']
     data = pd.DataFrame(src['outputs']['tmy_hourly'])
     data.index = pd.to_datetime(
         data['time(UTC)'], format='%Y%m%d:%H%M', utc=True)
     data = data.drop('time(UTC)', axis=1)
-    return data, months_selected, inputs, meta
+    return data, meta
 
 
 def _parse_pvgis_tmy_csv(src):
     # the first 3 rows are latitude, longitude, elevation
-    inputs = {}
+    meta = {'inputs': {}, 'descriptions': {}}
     # 'Latitude (decimal degrees): 45.000\r\n'
-    inputs['latitude'] = float(src.readline().split(b':')[1])
+    meta['inputs']['latitude'] = float(src.readline().split(b':')[1])
     # 'Longitude (decimal degrees): 8.000\r\n'
-    inputs['longitude'] = float(src.readline().split(b':')[1])
+    meta['inputs']['longitude'] = float(src.readline().split(b':')[1])
     # Elevation (m): 1389.0\r\n
-    inputs['elevation'] = float(src.readline().split(b':')[1])
+    meta['inputs']['elevation'] = float(src.readline().split(b':')[1])
 
     # TMY has an extra line here: Irradiance Time Offset (h): 0.1761\r\n
     line = src.readline()
     if line.startswith(b'Irradiance Time Offset'):
-        inputs['irradiance time offset'] = float(line.split(b':')[1])
+        meta['inputs']['irradiance time offset'] = float(line.split(b':')[1])
         src.readline()  # skip over the "month,year\r\n"
     else:
         # `line` is already the "month,year\r\n" line, so nothing to do
@@ -589,6 +594,7 @@ def _parse_pvgis_tmy_csv(src):
     for month in range(12):
         months_selected.append(
             {'month': month+1, 'year': int(src.readline().split(b',')[1])})
+    meta['months_selected'] = months_selected
     # then there's the TMY (typical meteorological year) data
     # first there's a header row:
     #    time(UTC),T2m,RH,G(h),Gb(n),Gd(h),IR(h),WS10m,WD10m,SP
@@ -601,14 +607,26 @@ def _parse_pvgis_tmy_csv(src):
     data = pd.DataFrame(data, dtype=float)
     data.index = dtidx
     # finally there's some meta data
-    meta = [line.decode('utf-8').strip() for line in src.readlines()]
-    return data, months_selected, inputs, meta
+    meta['descriptions'] = {}
+    for line in src.readlines():
+        line = line.decode('utf-8').strip()
+        if ':' in line:
+            meta['descriptions'][line.split(':')[0]] = \
+                line.split(':')[1].strip()
+    return data, meta
 
 
 def read_pvgis_tmy(filename, pvgis_format=None, map_variables=True):
     """
     Read a TMY file downloaded from PVGIS.
 
+        .. versionchanged:: 0.13.0
+           The function now returns two items ``(data,meta)``. Previous
+           versions of this function returned four elements
+           ``(data,months_selected,inputs,meta)``. The ``inputs`` dictionary
+           and ``months_selected`` are  now included in ``meta``, which has
+           changed structure to accommodate it.
+
     Parameters
     ----------
     filename : str, pathlib.Path, or file-like buffer
@@ -629,10 +647,6 @@ def read_pvgis_tmy(filename, pvgis_format=None, map_variables=True):
     -------
     data : pandas.DataFrame
         the weather data
-    months_selected : list
-        TMY year for each month, ``None`` for EPW
-    inputs : dict
-        the inputs, ``None`` for EPW
     metadata : list or dict
         file metadata
 
@@ -662,7 +676,6 @@ def read_pvgis_tmy(filename, pvgis_format=None, map_variables=True):
     # EPW: use the EPW parser from the pvlib.iotools epw.py module
     if outputformat == 'epw':
         data, meta = read_epw(filename)
-        months_selected, inputs = None, None
 
     # NOTE: json and csv output formats have parsers defined as private
     # functions in this module
@@ -676,16 +689,14 @@ def read_pvgis_tmy(filename, pvgis_format=None, map_variables=True):
         except AttributeError:  # str/path has no .read() attribute
             with open(str(filename), 'r') as fbuf:
                 src = json.load(fbuf)
-        data, months_selected, inputs, meta = _parse_pvgis_tmy_json(src)
+        data, meta = _parse_pvgis_tmy_json(src)
 
     elif outputformat == 'csv':
         try:
-            data, months_selected, inputs, meta = \
-                _parse_pvgis_tmy_csv(filename)
+            data, meta = _parse_pvgis_tmy_csv(filename)
         except AttributeError:  # str/path has no .read() attribute
             with open(str(filename), 'rb') as fbuf:
-                data, months_selected, inputs, meta = \
-                    _parse_pvgis_tmy_csv(fbuf)
+                data, meta = _parse_pvgis_tmy_csv(fbuf)
 
     elif outputformat == 'basic':
         err_msg = "outputformat='basic' is no longer supported, please use " \
@@ -702,7 +713,7 @@ def read_pvgis_tmy(filename, pvgis_format=None, map_variables=True):
     if map_variables:
         data = data.rename(columns=VARIABLE_MAP)
 
-    return data, months_selected, inputs, meta
+    return data, meta
 
 
 def get_pvgis_horizon(latitude, longitude, url=URL, **kwargs):
diff --git a/tests/iotools/test_pvgis.py b/tests/iotools/test_pvgis.py
@@ -371,9 +371,7 @@ def meta_expected():
 
 @pytest.fixture
 def csv_meta(meta_expected):
-    return [
-        f"{k}: {v['description']} ({v['units']})" for k, v
-        in meta_expected['outputs']['tmy_hourly']['variables'].items()]
+    return meta_expected['outputs']['tmy_hourly']['variables']
 
 
 @pytest.fixture
@@ -393,7 +391,15 @@ def test_get_pvgis_tmy(expected, month_year_expected, inputs_expected,
 
 def _compare_pvgis_tmy_json(expected, month_year_expected, inputs_expected,
                             meta_expected, pvgis_data):
-    data, months_selected, inputs, meta = pvgis_data
+    data, meta = pvgis_data
+
+    # Re-create original outputs (prior to #2470)
+    months_selected = meta['months_selected']
+    inputs = meta['inputs'].copy()
+    del inputs['descriptions']
+    meta['inputs'] = meta['inputs']['descriptions']
+    del meta['months_selected']
+
     # check each column of output separately
     for outvar in meta_expected['outputs']['tmy_hourly']['variables'].keys():
         assert np.allclose(data[outvar], expected[outvar])
@@ -419,28 +425,27 @@ def _compare_pvgis_tmy_json(expected, month_year_expected, inputs_expected,
 @pytest.mark.remote_data
 @pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
 def test_get_pvgis_tmy_kwargs(userhorizon_expected):
-    _, _, inputs, _ = get_pvgis_tmy(45, 8, usehorizon=False,
-                                    map_variables=False)
-    assert inputs['meteo_data']['use_horizon'] is False
-    data, _, _, _ = get_pvgis_tmy(
+    _, meta = get_pvgis_tmy(45, 8, usehorizon=False, map_variables=False)
+    assert meta['inputs']['meteo_data']['use_horizon'] is False
+    data, _ = get_pvgis_tmy(
         45, 8, userhorizon=[0, 10, 20, 30, 40, 15, 25, 5], map_variables=False)
     assert np.allclose(
         data['G(h)'], userhorizon_expected['G(h)'].values)
     assert np.allclose(
         data['Gb(n)'], userhorizon_expected['Gb(n)'].values)
     assert np.allclose(
         data['Gd(h)'], userhorizon_expected['Gd(h)'].values)
-    _, _, inputs, _ = get_pvgis_tmy(45, 8, startyear=2005, map_variables=False)
-    assert inputs['meteo_data']['year_min'] == 2005
-    _, _, inputs, _ = get_pvgis_tmy(45, 8, endyear=2016, map_variables=False)
-    assert inputs['meteo_data']['year_max'] == 2016
+    _, meta = get_pvgis_tmy(45, 8, startyear=2005, map_variables=False)
+    assert meta['inputs']['meteo_data']['year_min'] == 2005
+    _, meta = get_pvgis_tmy(45, 8, endyear=2016, map_variables=False)
+    assert meta['inputs']['meteo_data']['year_max'] == 2016
 
 
 @pytest.mark.remote_data
 @pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
 def test_get_pvgis_tmy_coerce_year():
     """test utc_offset and coerce_year work as expected"""
-    base_case, _, _, _ = get_pvgis_tmy(45, 8)  # Turin
+    base_case, _ = get_pvgis_tmy(45, 8)  # Turin
     assert str(base_case.index.tz) == 'UTC'
     assert base_case.index.name == 'time(UTC)'
     noon_test_data = [
@@ -449,9 +454,9 @@ def test_get_pvgis_tmy_coerce_year():
     cet_tz = 1  # Turin time is CET
     cet_name = 'Etc/GMT-1'
     # check indices of rolled data after converting timezone
-    pvgis_data, _, _, _ = get_pvgis_tmy(45, 8, roll_utc_offset=cet_tz)
-    jan1_midnight = pd.Timestamp('1990-01-01 00:00:00', tz=cet_name)
-    dec31_midnight = pd.Timestamp('1990-12-31 23:00:00', tz=cet_name)
+    pvgis_data, _ = get_pvgis_tmy(45, 8, roll_utc_offset=cet_tz)
+    jan1_midnight = pd.Timestamp('1990-01-01 00', tz=cet_name)
+    dec31_midnight = pd.Timestamp('1990-12-31 23', tz=cet_name)
     assert pvgis_data.index[0] == jan1_midnight
     assert pvgis_data.index[-1] == dec31_midnight
     assert pvgis_data.index.name == f'time({cet_name})'
@@ -461,20 +466,20 @@ def test_get_pvgis_tmy_coerce_year():
         assert all(test_case == expected)
     # repeat tests with year coerced
     test_yr = 2021
-    pvgis_data, _, _, _ = get_pvgis_tmy(
+    pvgis_data, _ = get_pvgis_tmy(
         45, 8, roll_utc_offset=cet_tz, coerce_year=test_yr)
-    jan1_midnight = pd.Timestamp(f'{test_yr}-01-01 00:00:00', tz=cet_name)
-    dec31_midnight = pd.Timestamp(f'{test_yr}-12-31 23:00:00', tz=cet_name)
+    jan1_midnight = pd.Timestamp(f'{test_yr}-01-01 00', tz=cet_name)
+    dec31_midnight = pd.Timestamp(f'{test_yr}-12-31 23', tz=cet_name)
     assert pvgis_data.index[0] == jan1_midnight
     assert pvgis_data.index[-1] == dec31_midnight
     assert pvgis_data.index.name == f'time({cet_name})'
     for m, test_case in enumerate(noon_test_data):
         expected = pvgis_data[pvgis_data.index.month == m+1].iloc[12+cet_tz]
         assert all(test_case == expected)
     # repeat tests with year coerced but utc offset none or zero
-    pvgis_data, _, _, _ = get_pvgis_tmy(45, 8, coerce_year=test_yr)
-    jan1_midnight = pd.Timestamp(f'{test_yr}-01-01 00:00:00', tz='UTC')
-    dec31_midnight = pd.Timestamp(f'{test_yr}-12-31 23:00:00', tz='UTC')
+    pvgis_data, _ = get_pvgis_tmy(45, 8, coerce_year=test_yr)
+    jan1_midnight = pd.Timestamp(f'{test_yr}-01-01 00', tz='UTC')
+    dec31_midnight = pd.Timestamp(f'{test_yr}-12-31 23', tz='UTC')
     assert pvgis_data.index[0] == jan1_midnight
     assert pvgis_data.index[-1] == dec31_midnight
     assert pvgis_data.index.name == 'time(UTC)'
@@ -494,7 +499,13 @@ def test_get_pvgis_tmy_csv(expected, month_year_expected, inputs_expected,
 
 def _compare_pvgis_tmy_csv(expected, month_year_expected, inputs_expected,
                            meta_expected, csv_meta, pvgis_data):
-    data, months_selected, inputs, meta = pvgis_data
+    data, meta = pvgis_data
+
+    # Re-create original outputs (prior to #2470)
+    months_selected = meta['months_selected']
+    inputs = meta['inputs'].copy()
+    meta = meta['descriptions']
+
     # check each column of output separately
     for outvar in meta_expected['outputs']['tmy_hourly']['variables'].keys():
         assert np.allclose(data[outvar], expected[outvar])
@@ -526,7 +537,7 @@ def test_get_pvgis_tmy_epw(expected, epw_meta):
 
 
 def _compare_pvgis_tmy_epw(expected, epw_meta, pvgis_data):
-    data, _, _, meta = pvgis_data
+    data, meta = pvgis_data
     assert np.allclose(data.ghi, expected['G(h)'])
     assert np.allclose(data.dni, expected['Gb(n)'])
     assert np.allclose(data.dhi, expected['Gd(h)'])
@@ -556,8 +567,8 @@ def test_get_pvgis_tmy_basic():
 
 @pytest.mark.remote_data
 @pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
-def test_get_pvgis_map_variables(pvgis_tmy_mapped_columns):
-    actual, _, _, _ = get_pvgis_tmy(45, 8, map_variables=True)
+def test_get_pvgis_tmy_map_variables(pvgis_tmy_mapped_columns):
+    actual, _ = get_pvgis_tmy(45, 8, map_variables=True)
     assert all(c in pvgis_tmy_mapped_columns for c in actual.columns)
 
 
@@ -580,7 +591,7 @@ def test_read_pvgis_horizon_invalid_coords():
 
 def test_read_pvgis_tmy_map_variables(pvgis_tmy_mapped_columns):
     fn = TESTS_DATA_DIR / 'tmy_45.000_8.000_2005_2023.json'
-    actual, _, _, _ = read_pvgis_tmy(fn, map_variables=True)
+    actual, _ = read_pvgis_tmy(fn, map_variables=True)
     assert all(c in pvgis_tmy_mapped_columns for c in actual.columns)