Skip to content

Commit d702353

Browse files
authored
Standardize pvgis_tmy to return (data,meta) (#2470)
* Initial commit * Fix tests, whatsnew, linter * Feedback from review
1 parent 6e6a71b commit d702353

File tree

4 files changed

+87
-62
lines changed

4 files changed

+87
-62
lines changed

docs/sphinx/source/user_guide/modeling_topics/weather_data.rst

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ them.
7676
Usage
7777
-----
7878

79-
With some exceptions, the :py:mod:`pvlib.iotools` functions
79+
The :py:mod:`pvlib.iotools` functions
8080
provide a uniform interface for accessing data across many formats.
8181
Specifically, :py:mod:`pvlib.iotools` functions usually return two objects:
8282
a :py:class:`pandas.DataFrame` of the actual dataset, plus a metadata
@@ -89,8 +89,7 @@ Typical usage looks something like this:
8989

9090
.. code-block:: python
9191
92-
# get_pvgis_tmy returns two additional values besides df and metadata
93-
df, _, _, metadata = pvlib.iotools.get_pvgis_tmy(latitude, longitude, map_variables=True)
92+
df, metadata = pvlib.iotools.get_pvgis_tmy(latitude, longitude, map_variables=True)
9493
9594
This code will fetch a Typical Meteorological Year (TMY) dataset from PVGIS,
9695
returning a :py:class:`pandas.DataFrame` containing the hourly weather data

docs/sphinx/source/whatsnew/v0.12.1.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@ Breaking Changes
1111
following the iotools convention instead of ``(data,inputs,meta)``.
1212
The ``inputs`` dictionary is now included in ``meta``, which
1313
has changed structure to accommodate it. (:pull:`2462`)
14+
* The functions :py:func:`~pvlib.iotools.read_pvgis_tmy` and
15+
:py:func:`~pvlib.iotools.get_pvgis_tmy` now return ``(data,meta)``
16+
following the iotools convention instead of ``(data,months_selected,inputs,meta)``.
17+
(:pull:`2470`)
1418
* Remove ``outputformat='basic'`` option in :py:func:`~pvlib.iotools.get_pvgis_tmy`.
1519
(:pull:`2416`)
1620

pvlib/iotools/pvgis.py

Lines changed: 43 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ def _parse_pvgis_hourly_json(src, map_variables):
261261

262262
def _parse_pvgis_hourly_csv(src, map_variables):
263263
# The first 4 rows are latitude, longitude, elevation, radiation database
264-
metadata = {'inputs': {}}
264+
metadata = {'inputs': {}, 'descriptions': {}}
265265
# 'location' metadata
266266
# 'Latitude (decimal degrees): 45.000\r\n'
267267
metadata['inputs']['latitude'] = float(src.readline().split(':')[1])
@@ -440,6 +440,13 @@ def get_pvgis_tmy(latitude, longitude, outputformat='json', usehorizon=True,
440440
441441
For more information see the PVGIS [1]_ TMY tool documentation [2]_.
442442
443+
.. versionchanged:: 0.13.0
444+
The function now returns two items ``(data,meta)``. Previous
445+
versions of this function returned four elements
446+
``(data,months_selected,inputs,meta)``. The ``inputs`` dictionary
447+
and ``months_selected`` are now included in ``meta``, which has
448+
changed structure to accommodate it.
449+
443450
Parameters
444451
----------
445452
latitude : float
@@ -478,10 +485,6 @@ def get_pvgis_tmy(latitude, longitude, outputformat='json', usehorizon=True,
478485
-------
479486
data : pandas.DataFrame
480487
the weather data
481-
months_selected : list
482-
TMY year for each month, ``None`` for EPW
483-
inputs : dict
484-
the inputs, ``None`` for EPW
485488
metadata : list or dict
486489
file metadata
487490
@@ -527,17 +530,16 @@ def get_pvgis_tmy(latitude, longitude, outputformat='json', usehorizon=True,
527530
else:
528531
raise requests.HTTPError(err_msg['message'])
529532
# initialize data to None in case API fails to respond to bad outputformat
530-
data = None, None, None, None
533+
data = None, None
531534
if outputformat == 'json':
532535
src = res.json()
533-
data, months_selected, inputs, meta = _parse_pvgis_tmy_json(src)
536+
data, meta = _parse_pvgis_tmy_json(src)
534537
elif outputformat == 'csv':
535538
with io.BytesIO(res.content) as src:
536-
data, months_selected, inputs, meta = _parse_pvgis_tmy_csv(src)
539+
data, meta = _parse_pvgis_tmy_csv(src)
537540
elif outputformat == 'epw':
538541
with io.StringIO(res.content.decode('utf-8')) as src:
539542
data, meta = read_epw(src)
540-
months_selected, inputs = None, None
541543
elif outputformat == 'basic':
542544
err_msg = ("outputformat='basic' is no longer supported by pvlib, "
543545
"please use outputformat='csv' instead.")
@@ -551,34 +553,37 @@ def get_pvgis_tmy(latitude, longitude, outputformat='json', usehorizon=True,
551553
coerce_year = coerce_year or 1990
552554
data = _coerce_and_roll_tmy(data, roll_utc_offset, coerce_year)
553555

554-
return data, months_selected, inputs, meta
556+
return data, meta
555557

556558

557559
def _parse_pvgis_tmy_json(src):
558-
inputs = src['inputs']
559-
meta = src['meta']
560-
months_selected = src['outputs']['months_selected']
560+
meta = src['meta'].copy()
561+
# Override the "inputs" in metadata
562+
meta['inputs'] = src['inputs']
563+
# Re-add the inputs in metadata one-layer down
564+
meta['inputs']['descriptions'] = src['meta']['inputs']
565+
meta['months_selected'] = src['outputs']['months_selected']
561566
data = pd.DataFrame(src['outputs']['tmy_hourly'])
562567
data.index = pd.to_datetime(
563568
data['time(UTC)'], format='%Y%m%d:%H%M', utc=True)
564569
data = data.drop('time(UTC)', axis=1)
565-
return data, months_selected, inputs, meta
570+
return data, meta
566571

567572

568573
def _parse_pvgis_tmy_csv(src):
569574
# the first 3 rows are latitude, longitude, elevation
570-
inputs = {}
575+
meta = {'inputs': {}, 'descriptions': {}}
571576
# 'Latitude (decimal degrees): 45.000\r\n'
572-
inputs['latitude'] = float(src.readline().split(b':')[1])
577+
meta['inputs']['latitude'] = float(src.readline().split(b':')[1])
573578
# 'Longitude (decimal degrees): 8.000\r\n'
574-
inputs['longitude'] = float(src.readline().split(b':')[1])
579+
meta['inputs']['longitude'] = float(src.readline().split(b':')[1])
575580
# Elevation (m): 1389.0\r\n
576-
inputs['elevation'] = float(src.readline().split(b':')[1])
581+
meta['inputs']['elevation'] = float(src.readline().split(b':')[1])
577582

578583
# TMY has an extra line here: Irradiance Time Offset (h): 0.1761\r\n
579584
line = src.readline()
580585
if line.startswith(b'Irradiance Time Offset'):
581-
inputs['irradiance time offset'] = float(line.split(b':')[1])
586+
meta['inputs']['irradiance time offset'] = float(line.split(b':')[1])
582587
src.readline() # skip over the "month,year\r\n"
583588
else:
584589
# `line` is already the "month,year\r\n" line, so nothing to do
@@ -589,6 +594,7 @@ def _parse_pvgis_tmy_csv(src):
589594
for month in range(12):
590595
months_selected.append(
591596
{'month': month+1, 'year': int(src.readline().split(b',')[1])})
597+
meta['months_selected'] = months_selected
592598
# then there's the TMY (typical meteorological year) data
593599
# first there's a header row:
594600
# time(UTC),T2m,RH,G(h),Gb(n),Gd(h),IR(h),WS10m,WD10m,SP
@@ -601,14 +607,26 @@ def _parse_pvgis_tmy_csv(src):
601607
data = pd.DataFrame(data, dtype=float)
602608
data.index = dtidx
603609
# finally there's some meta data
604-
meta = [line.decode('utf-8').strip() for line in src.readlines()]
605-
return data, months_selected, inputs, meta
610+
meta['descriptions'] = {}
611+
for line in src.readlines():
612+
line = line.decode('utf-8').strip()
613+
if ':' in line:
614+
meta['descriptions'][line.split(':')[0]] = \
615+
line.split(':')[1].strip()
616+
return data, meta
606617

607618

608619
def read_pvgis_tmy(filename, pvgis_format=None, map_variables=True):
609620
"""
610621
Read a TMY file downloaded from PVGIS.
611622
623+
.. versionchanged:: 0.13.0
624+
The function now returns two items ``(data,meta)``. Previous
625+
versions of this function returned four elements
626+
``(data,months_selected,inputs,meta)``. The ``inputs`` dictionary
627+
and ``months_selected`` are now included in ``meta``, which has
628+
changed structure to accommodate it.
629+
612630
Parameters
613631
----------
614632
filename : str, pathlib.Path, or file-like buffer
@@ -629,10 +647,6 @@ def read_pvgis_tmy(filename, pvgis_format=None, map_variables=True):
629647
-------
630648
data : pandas.DataFrame
631649
the weather data
632-
months_selected : list
633-
TMY year for each month, ``None`` for EPW
634-
inputs : dict
635-
the inputs, ``None`` for EPW
636650
metadata : list or dict
637651
file metadata
638652
@@ -662,7 +676,6 @@ def read_pvgis_tmy(filename, pvgis_format=None, map_variables=True):
662676
# EPW: use the EPW parser from the pvlib.iotools epw.py module
663677
if outputformat == 'epw':
664678
data, meta = read_epw(filename)
665-
months_selected, inputs = None, None
666679

667680
# NOTE: json and csv output formats have parsers defined as private
668681
# functions in this module
@@ -676,16 +689,14 @@ def read_pvgis_tmy(filename, pvgis_format=None, map_variables=True):
676689
except AttributeError: # str/path has no .read() attribute
677690
with open(str(filename), 'r') as fbuf:
678691
src = json.load(fbuf)
679-
data, months_selected, inputs, meta = _parse_pvgis_tmy_json(src)
692+
data, meta = _parse_pvgis_tmy_json(src)
680693

681694
elif outputformat == 'csv':
682695
try:
683-
data, months_selected, inputs, meta = \
684-
_parse_pvgis_tmy_csv(filename)
696+
data, meta = _parse_pvgis_tmy_csv(filename)
685697
except AttributeError: # str/path has no .read() attribute
686698
with open(str(filename), 'rb') as fbuf:
687-
data, months_selected, inputs, meta = \
688-
_parse_pvgis_tmy_csv(fbuf)
699+
data, meta = _parse_pvgis_tmy_csv(fbuf)
689700

690701
elif outputformat == 'basic':
691702
err_msg = "outputformat='basic' is no longer supported, please use " \
@@ -702,7 +713,7 @@ def read_pvgis_tmy(filename, pvgis_format=None, map_variables=True):
702713
if map_variables:
703714
data = data.rename(columns=VARIABLE_MAP)
704715

705-
return data, months_selected, inputs, meta
716+
return data, meta
706717

707718

708719
def get_pvgis_horizon(latitude, longitude, url=URL, **kwargs):

tests/iotools/test_pvgis.py

Lines changed: 38 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -371,9 +371,7 @@ def meta_expected():
371371

372372
@pytest.fixture
373373
def csv_meta(meta_expected):
374-
return [
375-
f"{k}: {v['description']} ({v['units']})" for k, v
376-
in meta_expected['outputs']['tmy_hourly']['variables'].items()]
374+
return meta_expected['outputs']['tmy_hourly']['variables']
377375

378376

379377
@pytest.fixture
@@ -393,7 +391,15 @@ def test_get_pvgis_tmy(expected, month_year_expected, inputs_expected,
393391

394392
def _compare_pvgis_tmy_json(expected, month_year_expected, inputs_expected,
395393
meta_expected, pvgis_data):
396-
data, months_selected, inputs, meta = pvgis_data
394+
data, meta = pvgis_data
395+
396+
# Re-create original outputs (prior to #2470)
397+
months_selected = meta['months_selected']
398+
inputs = meta['inputs'].copy()
399+
del inputs['descriptions']
400+
meta['inputs'] = meta['inputs']['descriptions']
401+
del meta['months_selected']
402+
397403
# check each column of output separately
398404
for outvar in meta_expected['outputs']['tmy_hourly']['variables'].keys():
399405
assert np.allclose(data[outvar], expected[outvar])
@@ -419,28 +425,27 @@ def _compare_pvgis_tmy_json(expected, month_year_expected, inputs_expected,
419425
@pytest.mark.remote_data
420426
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
421427
def test_get_pvgis_tmy_kwargs(userhorizon_expected):
422-
_, _, inputs, _ = get_pvgis_tmy(45, 8, usehorizon=False,
423-
map_variables=False)
424-
assert inputs['meteo_data']['use_horizon'] is False
425-
data, _, _, _ = get_pvgis_tmy(
428+
_, meta = get_pvgis_tmy(45, 8, usehorizon=False, map_variables=False)
429+
assert meta['inputs']['meteo_data']['use_horizon'] is False
430+
data, _ = get_pvgis_tmy(
426431
45, 8, userhorizon=[0, 10, 20, 30, 40, 15, 25, 5], map_variables=False)
427432
assert np.allclose(
428433
data['G(h)'], userhorizon_expected['G(h)'].values)
429434
assert np.allclose(
430435
data['Gb(n)'], userhorizon_expected['Gb(n)'].values)
431436
assert np.allclose(
432437
data['Gd(h)'], userhorizon_expected['Gd(h)'].values)
433-
_, _, inputs, _ = get_pvgis_tmy(45, 8, startyear=2005, map_variables=False)
434-
assert inputs['meteo_data']['year_min'] == 2005
435-
_, _, inputs, _ = get_pvgis_tmy(45, 8, endyear=2016, map_variables=False)
436-
assert inputs['meteo_data']['year_max'] == 2016
438+
_, meta = get_pvgis_tmy(45, 8, startyear=2005, map_variables=False)
439+
assert meta['inputs']['meteo_data']['year_min'] == 2005
440+
_, meta = get_pvgis_tmy(45, 8, endyear=2016, map_variables=False)
441+
assert meta['inputs']['meteo_data']['year_max'] == 2016
437442

438443

439444
@pytest.mark.remote_data
440445
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
441446
def test_get_pvgis_tmy_coerce_year():
442447
"""test utc_offset and coerce_year work as expected"""
443-
base_case, _, _, _ = get_pvgis_tmy(45, 8) # Turin
448+
base_case, _ = get_pvgis_tmy(45, 8) # Turin
444449
assert str(base_case.index.tz) == 'UTC'
445450
assert base_case.index.name == 'time(UTC)'
446451
noon_test_data = [
@@ -449,9 +454,9 @@ def test_get_pvgis_tmy_coerce_year():
449454
cet_tz = 1 # Turin time is CET
450455
cet_name = 'Etc/GMT-1'
451456
# check indices of rolled data after converting timezone
452-
pvgis_data, _, _, _ = get_pvgis_tmy(45, 8, roll_utc_offset=cet_tz)
453-
jan1_midnight = pd.Timestamp('1990-01-01 00:00:00', tz=cet_name)
454-
dec31_midnight = pd.Timestamp('1990-12-31 23:00:00', tz=cet_name)
457+
pvgis_data, _ = get_pvgis_tmy(45, 8, roll_utc_offset=cet_tz)
458+
jan1_midnight = pd.Timestamp('1990-01-01 00', tz=cet_name)
459+
dec31_midnight = pd.Timestamp('1990-12-31 23', tz=cet_name)
455460
assert pvgis_data.index[0] == jan1_midnight
456461
assert pvgis_data.index[-1] == dec31_midnight
457462
assert pvgis_data.index.name == f'time({cet_name})'
@@ -461,20 +466,20 @@ def test_get_pvgis_tmy_coerce_year():
461466
assert all(test_case == expected)
462467
# repeat tests with year coerced
463468
test_yr = 2021
464-
pvgis_data, _, _, _ = get_pvgis_tmy(
469+
pvgis_data, _ = get_pvgis_tmy(
465470
45, 8, roll_utc_offset=cet_tz, coerce_year=test_yr)
466-
jan1_midnight = pd.Timestamp(f'{test_yr}-01-01 00:00:00', tz=cet_name)
467-
dec31_midnight = pd.Timestamp(f'{test_yr}-12-31 23:00:00', tz=cet_name)
471+
jan1_midnight = pd.Timestamp(f'{test_yr}-01-01 00', tz=cet_name)
472+
dec31_midnight = pd.Timestamp(f'{test_yr}-12-31 23', tz=cet_name)
468473
assert pvgis_data.index[0] == jan1_midnight
469474
assert pvgis_data.index[-1] == dec31_midnight
470475
assert pvgis_data.index.name == f'time({cet_name})'
471476
for m, test_case in enumerate(noon_test_data):
472477
expected = pvgis_data[pvgis_data.index.month == m+1].iloc[12+cet_tz]
473478
assert all(test_case == expected)
474479
# repeat tests with year coerced but utc offset none or zero
475-
pvgis_data, _, _, _ = get_pvgis_tmy(45, 8, coerce_year=test_yr)
476-
jan1_midnight = pd.Timestamp(f'{test_yr}-01-01 00:00:00', tz='UTC')
477-
dec31_midnight = pd.Timestamp(f'{test_yr}-12-31 23:00:00', tz='UTC')
480+
pvgis_data, _ = get_pvgis_tmy(45, 8, coerce_year=test_yr)
481+
jan1_midnight = pd.Timestamp(f'{test_yr}-01-01 00', tz='UTC')
482+
dec31_midnight = pd.Timestamp(f'{test_yr}-12-31 23', tz='UTC')
478483
assert pvgis_data.index[0] == jan1_midnight
479484
assert pvgis_data.index[-1] == dec31_midnight
480485
assert pvgis_data.index.name == 'time(UTC)'
@@ -494,7 +499,13 @@ def test_get_pvgis_tmy_csv(expected, month_year_expected, inputs_expected,
494499

495500
def _compare_pvgis_tmy_csv(expected, month_year_expected, inputs_expected,
496501
meta_expected, csv_meta, pvgis_data):
497-
data, months_selected, inputs, meta = pvgis_data
502+
data, meta = pvgis_data
503+
504+
# Re-create original outputs (prior to #2470)
505+
months_selected = meta['months_selected']
506+
inputs = meta['inputs'].copy()
507+
meta = meta['descriptions']
508+
498509
# check each column of output separately
499510
for outvar in meta_expected['outputs']['tmy_hourly']['variables'].keys():
500511
assert np.allclose(data[outvar], expected[outvar])
@@ -526,7 +537,7 @@ def test_get_pvgis_tmy_epw(expected, epw_meta):
526537

527538

528539
def _compare_pvgis_tmy_epw(expected, epw_meta, pvgis_data):
529-
data, _, _, meta = pvgis_data
540+
data, meta = pvgis_data
530541
assert np.allclose(data.ghi, expected['G(h)'])
531542
assert np.allclose(data.dni, expected['Gb(n)'])
532543
assert np.allclose(data.dhi, expected['Gd(h)'])
@@ -556,8 +567,8 @@ def test_get_pvgis_tmy_basic():
556567

557568
@pytest.mark.remote_data
558569
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
559-
def test_get_pvgis_map_variables(pvgis_tmy_mapped_columns):
560-
actual, _, _, _ = get_pvgis_tmy(45, 8, map_variables=True)
570+
def test_get_pvgis_tmy_map_variables(pvgis_tmy_mapped_columns):
571+
actual, _ = get_pvgis_tmy(45, 8, map_variables=True)
561572
assert all(c in pvgis_tmy_mapped_columns for c in actual.columns)
562573

563574

@@ -580,7 +591,7 @@ def test_read_pvgis_horizon_invalid_coords():
580591

581592
def test_read_pvgis_tmy_map_variables(pvgis_tmy_mapped_columns):
582593
fn = TESTS_DATA_DIR / 'tmy_45.000_8.000_2005_2023.json'
583-
actual, _, _, _ = read_pvgis_tmy(fn, map_variables=True)
594+
actual, _ = read_pvgis_tmy(fn, map_variables=True)
584595
assert all(c in pvgis_tmy_mapped_columns for c in actual.columns)
585596

586597

0 commit comments

Comments
 (0)