From 88cc3537a37ca23ddb45d47c0d985b08b639d72d Mon Sep 17 00:00:00 2001 From: Alexei Schwab Date: Mon, 25 Nov 2024 15:23:41 +0000 Subject: [PATCH 1/5] Fix: Generalising score field based on user input --- src/create_webmap.py | 24 ++++++++++++++---------- src/points_to_h3.py | 11 ++++++++++- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/src/create_webmap.py b/src/create_webmap.py index 3040d6a..f544c09 100644 --- a/src/create_webmap.py +++ b/src/create_webmap.py @@ -29,6 +29,10 @@ def main( Path, typer.Argument(help="Path to file containing h3 polygons."), ], + score_field: Annotated[ + str, + typer.Argument(help="Name of field containing GVI score."), + ], filename: Annotated[ str, typer.Argument(help="(Optional) Path to file for HTML output.") ] = "./data/processed/gvi_webmap.html", @@ -49,7 +53,7 @@ def main( gdf = gdf.to_crs("EPSG:4326") # Round GVI score to make map labels more readable - gdf["gvi_score"] = round(gdf["gvi_score"], 2) + gdf[score_field] = round(gdf[score_field], 2) # get central coordinates of all features centre = ( @@ -88,8 +92,8 @@ def main( # Lookup the colourmap values for each GVI score cmap = matplotlib.colormaps["Greens"] - gdf["gvi_norm"] = (gdf.gvi_score - np.min(gdf.gvi_score)) / ( - np.max(gdf.gvi_score) - np.min(gdf.gvi_score) + gdf["gvi_norm"] = (gdf[score_field] - np.min(gdf[score_field])) / ( + np.max(gdf[score_field]) - np.min(gdf[score_field]) ) gdf["html_color"] = gdf["gvi_norm"].apply( lambda x: matplotlib.colors.rgb2hex(cmap(x)) @@ -99,25 +103,25 @@ def main( # Pick 4 evenly-spaced values from the gvi scores to use in the legend legend_gvi = list( np.arange( - gdf.gvi_score.min(), - gdf.gvi_score.max(), - (gdf.gvi_score.max() - gdf.gvi_score.min()) / 4, + gdf[score_field].min(), + gdf[score_field].max(), + (gdf[score_field].max() - gdf[score_field].min()) / 4, dtype=float, ) ) # Generate labels by looking up what the GVI score would be for those values legend_label_1 = round( - np.linspace(gdf.gvi_score.min(), gdf.gvi_score.max(), 100)[0], 1 + np.linspace(gdf[score_field].min(), gdf[score_field].max(), 100)[0], 1 ) legend_label_2 = round( - np.linspace(gdf.gvi_score.min(), gdf.gvi_score.max(), 100)[33], 1 + np.linspace(gdf[score_field].min(), gdf[score_field].max(), 100)[33], 1 ) legend_label_3 = round( - np.linspace(gdf.gvi_score.min(), gdf.gvi_score.max(), 100)[66], 1 + np.linspace(gdf[score_field].min(), gdf[score_field].max(), 100)[66], 1 ) legend_label_4 = round( - np.linspace(gdf.gvi_score.min(), gdf.gvi_score.max(), 100)[99], 1 + np.linspace(gdf[score_field].min(), gdf[score_field].max(), 100)[99], 1 ) # Normalise the label values to lookup against the colourmap diff --git a/src/points_to_h3.py b/src/points_to_h3.py index 03323b2..0c25727 100644 --- a/src/points_to_h3.py +++ b/src/points_to_h3.py @@ -20,6 +20,10 @@ def main( Path, typer.Argument(help="Path to file containing point layer with GVI scores."), ], + score_field: Annotated[ + str, + typer.Argument(help="Field containing the score"), + ], output_file: Annotated[ Path, typer.Argument( @@ -39,6 +43,7 @@ def main( Args: input_file: Path to file containing point layer with GVI scores. + score_field: The field name from the input data containing the score cell_resolution: H3 cell resolution to aggregate to, between 0 (largest) and 15 (smallest) aggregation_operations: @@ -67,7 +72,11 @@ def main( else: raise Exception("Expected point data in interim data file but none found") - # Check data contains numeric gvi_score field + # Check data contains score field + if score_field in gpd.read_file(input_file).columns: + pass + else: + raise Exception("Specified score field not found in input file") # Load input data gdf = gpd.read_file(input_file) From d9940994675c47a2f60888bc0275578d8db0b808 Mon Sep 17 00:00:00 2001 From: Alexei Schwab Date: Mon, 2 Dec 2024 11:02:22 +0000 Subject: [PATCH 2/5] Fix: add pinned h3 dependency to fix h3pandas error --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 71c4c9a..c89e5c4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,8 @@ dependencies = [ "folium", "geopandas", "geopy", - "h3pandas", + "h3 <4", + "h3pandas, "loguru", "mapclassify", "matplotlib", From ef718d7cc330a86761051b729cae9f6010df2aa6 Mon Sep 17 00:00:00 2001 From: Alexei Schwab Date: Mon, 2 Dec 2024 11:03:31 +0000 Subject: [PATCH 3/5] Fix: allow user to specify score field when creating h3 polygons --- src/points_to_h3.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/points_to_h3.py b/src/points_to_h3.py index 0c25727..65c69a8 100644 --- a/src/points_to_h3.py +++ b/src/points_to_h3.py @@ -2,6 +2,7 @@ from pathlib import Path import geopandas as gpd +import pandas as pd import h3pandas # noqa: F401 import typer @@ -82,14 +83,21 @@ def main( gdf = gpd.read_file(input_file) # Exclude points with no GVI score - gdf = gdf[~gdf.gvi_score.isna()] + gdf = gdf[~gdf[score_field].isna()] + + # Check score field is numeric - if not, convert + if gdf[score_field].dtype == pd.StringDtype: + try: + gdf[score_field] = pd.to_numeric(gdf[score_field]) + except Exception as e: + raise Exception("Could not convert score field to numeric data type") # Assign points to h3 cells at the selected resolution gdf_h3 = gdf.h3.geo_to_h3(cell_resolution).reset_index() # Aggregate the points to the assigned h3 cell gvi_mean = gdf_h3.groupby("h3_" + f"{cell_resolution:02}").agg( - {"gvi_score": "mean"} + {score_field: "mean"} ) # Convert the h3 cells to polygons From 399a15a9a9f94683d18943a642e589f53e50d7ac Mon Sep 17 00:00:00 2001 From: Alexei Schwab Date: Mon, 2 Dec 2024 11:06:30 +0000 Subject: [PATCH 4/5] docs: update readme to reflect custom score fields for points_to_h3 and create_webamp --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 99f90ab..5c94255 100644 --- a/README.md +++ b/README.md @@ -116,8 +116,8 @@ We can generate an H3 polygon layer from the point layer. As an overlay it may m ### Example ```bash -# python -m src.create_webmap path/to/input_file.gpkg path/to/output_file.gpkg cell_resolution -python -m src.points_to_h3 data/processed/Three_Rivers_GVI.gpkg data/processed/Three_Rivers_h3_polygons_10.gpkg 10 +# python -m src.create_webmap path/to/input_file.gpkg "score_field" path/to/output_file.gpkg cell_resolution +python -m src.points_to_h3 data/processed/Three_Rivers_GVI.gpkg "gvi_score" data/processed/Three_Rivers_h3_polygons_10.gpkg 10 ``` The larger the number for the [H3 cell resolution](https://h3geo.org/docs/core-library/restable/), the smaller the individual hexagons. @@ -131,8 +131,8 @@ To display an OpenStreetMap basemap under the data, you will need an API key fro ### Example ```bash -# python -m src.create_webmap path/to/input_file.gpkg path/to/output/output_file.html default_zoom_for_webmap -python -m src.create_webmap data/processed/Three_Rivers_h3_polygons_10.gpkg data/processed/Three_Rivers_gvi_webmap.html 10 +# python -m src.create_webmap path/to/input_file.gpkg "score_field" path/to/output/output_file.html default_zoom_for_webmap +python -m src.create_webmap data/processed/Three_Rivers_h3_polygons_10.gpkg "gvi_score" data/processed/Three_Rivers_gvi_webmap.html 10 ``` ## Config files From a8ff114138b231b9a1a0090ccbebf815e6e4604b Mon Sep 17 00:00:00 2001 From: Alexei Schwab Date: Mon, 2 Dec 2024 11:22:41 +0000 Subject: [PATCH 5/5] style: reformat points_to_h3 with ruff --- pyproject.toml | 2 +- src/points_to_h3.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c89e5c4..570192a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ dependencies = [ "geopandas", "geopy", "h3 <4", - "h3pandas, + "h3pandas", "loguru", "mapclassify", "matplotlib", diff --git a/src/points_to_h3.py b/src/points_to_h3.py index 65c69a8..c2f9851 100644 --- a/src/points_to_h3.py +++ b/src/points_to_h3.py @@ -22,7 +22,7 @@ def main( typer.Argument(help="Path to file containing point layer with GVI scores."), ], score_field: Annotated[ - str, + str, typer.Argument(help="Field containing the score"), ], output_file: Annotated[ @@ -74,7 +74,7 @@ def main( raise Exception("Expected point data in interim data file but none found") # Check data contains score field - if score_field in gpd.read_file(input_file).columns: + if score_field in gpd.read_file(input_file).columns: pass else: raise Exception("Specified score field not found in input file") @@ -86,8 +86,8 @@ def main( gdf = gdf[~gdf[score_field].isna()] # Check score field is numeric - if not, convert - if gdf[score_field].dtype == pd.StringDtype: - try: + if gdf[score_field].dtype == pd.StringDtype: + try: gdf[score_field] = pd.to_numeric(gdf[score_field]) except Exception as e: raise Exception("Could not convert score field to numeric data type")