Skip to content

Commit

Permalink
0.5.1 - rename_epw_files fixed and improved
Browse files Browse the repository at this point in the history
  • Loading branch information
dsanchez-garcia committed Dec 18, 2022
1 parent 1f410a1 commit d4a8112
Show file tree
Hide file tree
Showing 47 changed files with 201,694 additions and 8,890 deletions.
8,768 changes: 0 additions & 8,768 deletions RCP26-2050_Macapa.epw

This file was deleted.

16 changes: 11 additions & 5 deletions accim/data/datawrangling.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def __init__(
:param filelist: A list of the EPW files. If omitted, it will rename all sample_EPWs in that folder.
:param confirm_renaming: True or False, #to skip renaming confirmation on prompt command or console
:param confirm_deletion: True or False #to skip deletion confirmation on prompt command or console
:param match_cities: True or False. Default is False. It takes the possible city names and checks it is in an extensive list of cities. It's computatinally very expensive.
:param match_cities: True or False. Default is False. It takes the possible city names and checks it is in an extensive list of cities. It's computationally very expensive.
"""
import glob
import pandas as pd
Expand Down Expand Up @@ -342,7 +342,7 @@ def __init__(
amendments_list = list(
int(i)
for i
in input('\nIf any of the city or subcountry names need some amendment '
in input('\nIf any of the city or subcountry names needs some amendment '
'(if you are not happy with any of the available options, '
'you can exclude it from renaming at the next stage), '
'please enter the EPW IDs separated by space:').split()
Expand Down Expand Up @@ -391,13 +391,13 @@ def __init__(
print(epw_df.loc[i, "EPW_mod_filtered"])
print("If you haven't found yet the correct city or subcountry, it may be in the following address:")
print(epw_df.loc[i, "location_address"])
epw_df.loc[i, 'amended_city_or_subcountry'] = input('Please enter the amended city or subcountry, which must be unique: ')
epw_df.loc[i, 'amended_city_or_subcountry'] = input('Please enter the amended city or subcountry, which must be unique: ').replace(' ', '-')
# todo check again if there are repeated combinations EPW_country-EPW_city_or_subcountry
temp_name = f'{epw_df.loc[i, "EPW_country"]}_{epw_df.loc[i, "amended_city_or_subcountry"]}_{epw_df.loc[i, "EPW_scenario_year"]}'
epw_df.loc[i, 'EPW_new_names'] = temp_name
while list(epw_df['EPW_new_names']).count(temp_name) > 1:
print(f"{epw_df.loc[i, 'EPW_new_names']} already exists in the EPW file list, therefore you need to select a different city or subcountry name.")
epw_df.loc[i, 'amended_city_or_subcountry'] = input('Please enter again the amended city or subcountry, which must be unique: ')
epw_df.loc[i, 'amended_city_or_subcountry'] = input('Please enter again the amended city or subcountry, which must be unique: ').replace(' ', '-')
temp_name = f'{epw_df.loc[i, "EPW_country"]}_{epw_df.loc[i, "amended_city_or_subcountry"]}_{epw_df.loc[i, "EPW_scenario_year"]}'
epw_df.loc[i, 'EPW_new_names'] = temp_name

Expand All @@ -406,7 +406,13 @@ def __init__(
print(f'ID: {i} / {epw_df.loc[i, "EPW_names"]} / {epw_df.loc[i, "EPW_new_names"]}')


exclusion_list = list(name for name in input('\nIf you want to exclude some EPWs from renaming, please enter the new names separated by space:').split())
exclusion_list = list(
name
for name
in input('\nIf you want to exclude some EPWs from renaming, '
'please enter the new names separated by space, '
'otherwise, hit enter to continue:').split()
)

if confirm_renaming is None:
proceed = input('\nDo you want to rename the file or files? [y/n]:')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
"filelist=list_of_files_to_rename, # if omitted, it will rename all EPWs in that folder \n",
"confirm_renaming=True or False, #to skip renaming confirmation on prompt command or console \n",
"confirm_deletion=True or False #to skip deletion confirmation on prompt command or console \n",
"match_cities: True or False. Default is False. It's computationally very expensive. \n",
")"
]
},
Expand All @@ -39,7 +40,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 8,
"id": "8bcf4ced",
"metadata": {},
"outputs": [
Expand All @@ -49,10 +50,12 @@
"['.ipynb_checkpoints',\n",
" 'GBR_Aberdeen.Dyce.030910_IWEC.epw',\n",
" 'GBR_London.Gatwick.037760_IWEC.epw',\n",
" 'using_rename_epw_files.ipynb']"
" 'RCP26_2100_GC03_Ponta_Grossa.epw',\n",
" 'using_rename_epw_files.ipynb',\n",
" '__init__.py']"
]
},
"execution_count": 12,
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -67,20 +70,20 @@
"id": "c282206a",
"metadata": {},
"source": [
"You can see there are 2 EPW files, which are:"
"You can see there are 3 EPW files, which are:"
]
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 9,
"id": "33e64dbd",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['GBR_Aberdeen.Dyce.030910_IWEC.epw', 'GBR_London.Gatwick.037760_IWEC.epw']\n"
"['GBR_Aberdeen.Dyce.030910_IWEC.epw', 'GBR_London.Gatwick.037760_IWEC.epw', 'RCP26_2100_GC03_Ponta_Grossa.epw']\n"
]
}
],
Expand All @@ -99,18 +102,10 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 13,
"id": "4254ac77",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\users\\sanga\\appdata\\local\\programs\\python\\python39\\lib\\site-packages\\accim\\data\\datawrangling.py:44: FutureWarning: The default value of regex will change from True to False in a future version.\n",
" self.epw_df['EPW_names'] = self.epw_df['EPW_file_names'].str.replace('.epw', '')\n"
]
},
{
"name": "stdout",
"output_type": "stream",
Expand All @@ -120,71 +115,86 @@
"GBR_London.Gatwick.037760_IWEC.epw\n",
"Since no match has been found between scenarios and EPW file name, Present year has been assigned to the following EPW files:\n",
"GBR_Aberdeen.Dyce.030910_IWEC.epw\n",
"GBR_London.Gatwick.037760_IWEC.epw\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\users\\sanga\\appdata\\local\\programs\\python\\python39\\lib\\site-packages\\accim\\data\\datawrangling.py:124: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will*not* be treated as literal strings when regex=True.\n",
" self.epw_df['EPW_mod'] = self.epw_df['EPW_names'].str.replace('-', '_').str.replace('.', '_').str.split('_')\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"The previous names of the EPW files are:\n",
"GBR_Aberdeen.Dyce.030910_IWEC\n",
"GBR_London.Gatwick.037760_IWEC\n",
"And the new names of the EPW files are going to be:\n",
"United-Kingdom_Aberdeen_Present\n",
"United-Kingdom_London_Present\n"
"GBR_London.Gatwick.037760_IWEC.epw\n",
"\n",
"The previous and new names of the EPW files and their unique IDs are:\n",
"ID: 0 / GBR_Aberdeen.Dyce.030910_IWEC / United-Kingdom_Aberdeen_Present\n",
"ID: 1 / GBR_London.Gatwick.037760_IWEC / United-Kingdom_Gatwick_Present\n",
"ID: 2 / RCP26_2100_GC03_Ponta_Grossa / Brazil_Grossa_RCP26-2100\n",
"\n",
"If any of the city or subcountry names needs some amendment (if you are not happy with any of the available options, you can exclude it from renaming at the next stage), please enter the EPW IDs separated by space:1 2\n",
"\n",
"Regarding the file ID: 1 / old name: GBR_London.Gatwick.037760_IWEC / new name: United-Kingdom_Gatwick_Present, the available options for city or subcountry are:\n",
"['GBR', 'London', 'Gatwick', '037760', 'IWEC']\n",
"If you haven't found yet the correct city or subcountry, it may be in the following address:\n",
"London Gatwick Airport, North Terminal Approach, Crawley, West Sussex, England, RH6 0PH, United Kingdom\n",
"Please enter the amended city or subcountry, which must be unique: London\n",
"\n",
"Regarding the file ID: 2 / old name: RCP26_2100_GC03_Ponta_Grossa / new name: Brazil_Grossa_RCP26-2100, the available options for city or subcountry are:\n",
"['GC03', 'Ponta', 'Grossa', 'Ponta Grossa', 'Regiao Geografica Imediata de Ponta Grossa']\n",
"If you haven't found yet the correct city or subcountry, it may be in the following address:\n",
"Praça Barão de Guaraúna, Centro, Ponta Grossa, Região Geográfica Imediata de Ponta Grossa, Região Geográfica Intermediária de Ponta Grossa, Paraná, Região Sul, 84010-050, Brasil\n",
"Please enter the amended city or subcountry, which must be unique: Ponta Grossa\n",
"\n",
"The previous and new names of the EPW files after city or subcountry name amendments and their unique IDs are:\n",
"ID: 1 / GBR_London.Gatwick.037760_IWEC / United-Kingdom_London_Present\n",
"ID: 2 / RCP26_2100_GC03_Ponta_Grossa / Brazil_Ponta-Grossa_RCP26-2100\n",
"\n",
"If you want to exclude some EPWs from renaming, please enter the new names separated by space, otherwise, hit enter to continue:\n",
"\n",
"Do you want to rename the file or files? [y/n]:y\n",
"The file GBR_Aberdeen.Dyce.030910_IWEC has been renamed to United-Kingdom_Aberdeen_Present\n",
"The file GBR_London.Gatwick.037760_IWEC has been renamed to United-Kingdom_London_Present\n",
"The file RCP26_2100_GC03_Ponta_Grossa has been renamed to Brazil_Ponta-Grossa_RCP26-2100\n"
]
},
{
"data": {
"text/plain": [
"<accim.data.datawrangling.rename_epw_files at 0x20b3aa45fd0>"
"<accim.data.datawrangling.rename_epw_files at 0x1e0d61a3520>"
]
},
"execution_count": 8,
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from accim.data.datawrangling import rename_epw_files\n",
"rename_epw_files(confirm_renaming=True, confirm_deletion=False)"
"rename_epw_files(\n",
" confirm_deletion=False,\n",
")"
]
},
{
"cell_type": "markdown",
"id": "34031be9",
"metadata": {},
"source": [
"You can see above that there was no reference to RCP scenarios in the original EPW file name, therefore these has been considered as Present scenario. The same applies to the Year field. Finally, states the previous and the new names of the EPWs. So, now, let's see what files we do have in the folder."
"You can see above that there was no reference to RCP scenarios in the original EPW file name in 2 of the instances, therefore these has been considered as Present scenario. The same applies to the Year field. Finally, states the previous and the new names of the EPWs. So, now, let's see what files we do have in the folder."
]
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 14,
"id": "16ae976e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['.ipynb_checkpoints',\n",
" 'Brazil_Ponta-Grossa_RCP26-2100.epw',\n",
" 'GBR_Aberdeen.Dyce.030910_IWEC.epw',\n",
" 'GBR_London.Gatwick.037760_IWEC.epw',\n",
" 'RCP26_2100_GC03_Ponta_Grossa.epw',\n",
" 'United-Kingdom_Aberdeen_Present.epw',\n",
" 'United-Kingdom_London_Present.epw',\n",
" 'using_rename_epw_files.ipynb']"
" 'using_rename_epw_files.ipynb',\n",
" '__init__.py']"
]
},
"execution_count": 9,
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -203,15 +213,15 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 15,
"id": "6e80b813",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['United-Kingdom_Aberdeen_Present.epw', 'United-Kingdom_London_Present.epw']\n"
"['Brazil_Ponta-Grossa_RCP26-2100.epw', 'United-Kingdom_Aberdeen_Present.epw', 'United-Kingdom_London_Present.epw']\n"
]
}
],
Expand All @@ -230,14 +240,22 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 16,
"id": "dd09f3f6",
"metadata": {},
"outputs": [],
"source": [
"for i in new_epws:\n",
" os.remove(i)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9d38da62",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
Loading

0 comments on commit d4a8112

Please sign in to comment.