Skip to content
This repository was archived by the owner on Sep 11, 2023. It is now read-only.

Commit d9d83fd

Browse files
Merge pull request #714 from openclimatefix/issue/update-use-pv-live
get gsp list from pvlive, not url
2 parents ca58f19 + 43ea8f0 commit d9d83fd

File tree

4 files changed

+61
-51
lines changed

4 files changed

+61
-51
lines changed

nowcasting_dataset/data_sources/gsp/eso.py

Lines changed: 2 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -8,22 +8,19 @@
88
99
get_gsp_metadata_from_eso: gets the gsp metadata
1010
get_gsp_shape_from_eso: gets the shape of the gsp regions
11-
get_list_of_gsp_ids: gets a list of gsp_ids, by using 'get_gsp_metadata_from_eso'
1211
1312
Peter Dudfield
1413
2021-09-13
1514
"""
1615

17-
import json
1816
import logging
1917
import os
20-
import urllib
21-
from typing import List, Optional
2218
from urllib.request import urlopen
2319

2420
import geopandas as gpd
2521
import pandas as pd
2622

23+
from nowcasting_dataset.data_sources.gsp.pvlive import get_list_of_gsp_ids
2724
from nowcasting_dataset.geospatial import osgb_to_lat_lon
2825

2926
logger = logging.getLogger(__name__)
@@ -70,13 +67,7 @@ def get_gsp_metadata_from_eso(
7067
logger.debug("loading local file for ESO metadata:done")
7168
else:
7269
# we now get this from pvlive
73-
url = "https://api0.solar.sheffield.ac.uk/pvlive/api/v4/gsp_list"
74-
# TODO need to replace this, but not quite sure what it will be for the moment.
75-
with urllib.request.urlopen(url) as fileobj:
76-
d = json.loads(fileobj.read())
77-
78-
# make dataframe
79-
metadata = pd.DataFrame(data=d["data"], columns=d["meta"])
70+
metadata = get_list_of_gsp_ids(return_dataframe=True, return_national=False)
8071

8172
# drop duplicates
8273
metadata = metadata.drop_duplicates(subset=["gsp_id"])
@@ -212,36 +203,3 @@ def get_gsp_shape_from_eso(
212203
shape_gpd["RegionID"] = range(1, len(shape_gpd) + 1)
213204

214205
return shape_gpd
215-
216-
217-
def get_list_of_gsp_ids(maximum_number_of_gsp: Optional[int] = None) -> List[int]:
218-
"""
219-
Get list of gsp ids from ESO metadata
220-
221-
Args:
222-
maximum_number_of_gsp: Truncate list of GSPs to be no larger than this number of GSPs.
223-
Set to None to disable truncation.
224-
225-
Returns: list of gsp ids
226-
227-
"""
228-
# get a lit of gsp ids
229-
metadata = get_gsp_metadata_from_eso(calculate_centroid=False)
230-
231-
# get rid of nans, and duplicates
232-
metadata = metadata[~metadata["gsp_id"].isna()]
233-
metadata.drop_duplicates(subset=["gsp_id"], inplace=True)
234-
235-
# make into list
236-
gsp_ids = metadata["gsp_id"].to_list()
237-
gsp_ids = [int(gsp_id) for gsp_id in gsp_ids]
238-
239-
# adjust number of gsp_ids
240-
if maximum_number_of_gsp is None:
241-
maximum_number_of_gsp = len(metadata)
242-
if maximum_number_of_gsp > len(metadata):
243-
logger.warning(f"Only {len(metadata)} gsp available to load")
244-
if maximum_number_of_gsp < len(metadata):
245-
gsp_ids = gsp_ids[0:maximum_number_of_gsp]
246-
247-
return gsp_ids

nowcasting_dataset/data_sources/gsp/pvlive.py

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,57 @@
22
import logging
33
from concurrent import futures
44
from datetime import datetime, timedelta
5-
from typing import Optional
5+
from typing import List, Optional, Union
66

77
import pandas as pd
88
import pytz
99
from pvlive_api import PVLive
1010
from tqdm import tqdm
1111

12-
from nowcasting_dataset.data_sources.gsp.eso import get_list_of_gsp_ids
13-
1412
logger = logging.getLogger(__name__)
1513

1614
CHUNK_DURATION = timedelta(days=30)
1715

1816

17+
def get_list_of_gsp_ids(
18+
maximum_number_of_gsp: Optional[int] = None,
19+
return_dataframe: bool = False,
20+
return_national: bool = True,
21+
) -> Union[List[int], pd.DataFrame]:
22+
"""
23+
Get list of gsp ids from ESO metadata
24+
25+
Args:
26+
maximum_number_of_gsp: Truncate list of GSPs to be no larger than this number of GSPs.
27+
Set to None to disable truncation.
28+
return_dataframe: Return as a dataframr with columns 'gsp_id', 'gsp_name', 'pes_id'
29+
return_national: Return gsp_id=0 in ths data
30+
31+
Returns: list of gsp ids
32+
33+
"""
34+
35+
# setup pv Live class, although here we are getting historic data
36+
pvl = PVLive()
37+
gsp_ids = pvl.gsp_list
38+
39+
if not return_national:
40+
gsp_ids = gsp_ids[gsp_ids["gsp_id"] != 0]
41+
42+
# adjust number of gsp_ids
43+
if maximum_number_of_gsp is None:
44+
maximum_number_of_gsp = len(gsp_ids)
45+
if maximum_number_of_gsp > len(gsp_ids):
46+
logger.warning(f"Only {len(gsp_ids)} gsp available to load")
47+
if maximum_number_of_gsp < len(gsp_ids):
48+
gsp_ids = gsp_ids[0:maximum_number_of_gsp]
49+
50+
if return_dataframe:
51+
return gsp_ids
52+
else:
53+
return list(gsp_ids["gsp_id"])
54+
55+
1956
def load_pv_gsp_raw_data_from_pvlive(
2057
start: datetime, end: datetime, number_of_gsp: int = None, normalize_data: bool = True
2158
) -> pd.DataFrame:

nowcasting_dataset/manager/manager.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,11 +82,14 @@ def configure_loggers(
8282
def create_files_specifying_spatial_and_temporal_locations_of_each_example_if_necessary(
8383
self,
8484
) -> None:
85-
"""Creates CSV files specifying the locations of each example if those files don't exist yet.
85+
"""Creates CSV files specifying the locations of each example
86+
87+
This only happens if those files don't exist yet.
8688
8789
Creates one file per split, in this location:
8890
89-
`<output_data.filepath> / <split_name> / spatial_and_temporal_locations_of_each_example.csv`
91+
`<output_data.filepath> / <split_name> /
92+
spatial_and_temporal_locations_of_each_example.csv`
9093
9194
Creates the output directory if it does not exist.
9295

tests/data_sources/gsp/test_gsp_pvlive.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,20 @@
66

77
from nowcasting_dataset.data_sources.gsp.pvlive import (
88
get_installed_capacity,
9+
get_list_of_gsp_ids,
910
load_pv_gsp_raw_data_from_pvlive,
1011
)
1112

1213

14+
def test_get_list_of_gsp_ids():
15+
"""Test get lis of gsp ids"""
16+
gsp_id = get_list_of_gsp_ids(maximum_number_of_gsp=10)
17+
assert len(gsp_id) == 10
18+
19+
gsp_id = get_list_of_gsp_ids()
20+
assert len(gsp_id) == 318
21+
22+
1323
def test_load_gsp_raw_data_from_pvlive_one_gsp_one_day():
1424
"""
1525
Test that one gsp system data can be loaded, just for one day
@@ -58,7 +68,7 @@ def test_load_gsp_raw_data_from_pvlive_one_gsp():
5868

5969
assert isinstance(gsp_pv_df, pd.DataFrame)
6070
print(gsp_pv_df)
61-
assert len(gsp_pv_df) == (48 * 30)
71+
assert len(gsp_pv_df) == (48 * 30) + 1
6272
# 30 days in january,
6373
assert "datetime_gmt" in gsp_pv_df.columns
6474
assert "generation_mw" in gsp_pv_df.columns
@@ -89,4 +99,6 @@ def test_get_installed_capacity():
8999

90100
assert len(installed_capacity) == 3
91101
assert "installedcapacity_mwp" == installed_capacity.name
92-
assert installed_capacity.iloc[0] == 177.0772
102+
103+
# look at first GSP
104+
assert installed_capacity.iloc[1] == 177.0772

0 commit comments

Comments
 (0)