diff --git a/README.md b/README.md index 99f90ab..5c94255 100644 --- a/README.md +++ b/README.md @@ -116,8 +116,8 @@ We can generate an H3 polygon layer from the point layer. As an overlay it may m ### Example ```bash -# python -m src.create_webmap path/to/input_file.gpkg path/to/output_file.gpkg cell_resolution -python -m src.points_to_h3 data/processed/Three_Rivers_GVI.gpkg data/processed/Three_Rivers_h3_polygons_10.gpkg 10 +# python -m src.create_webmap path/to/input_file.gpkg "score_field" path/to/output_file.gpkg cell_resolution +python -m src.points_to_h3 data/processed/Three_Rivers_GVI.gpkg "gvi_score" data/processed/Three_Rivers_h3_polygons_10.gpkg 10 ``` The larger the number for the [H3 cell resolution](https://h3geo.org/docs/core-library/restable/), the smaller the individual hexagons. @@ -131,8 +131,8 @@ To display an OpenStreetMap basemap under the data, you will need an API key fro ### Example ```bash -# python -m src.create_webmap path/to/input_file.gpkg path/to/output/output_file.html default_zoom_for_webmap -python -m src.create_webmap data/processed/Three_Rivers_h3_polygons_10.gpkg data/processed/Three_Rivers_gvi_webmap.html 10 +# python -m src.create_webmap path/to/input_file.gpkg "score_field" path/to/output/output_file.html default_zoom_for_webmap +python -m src.create_webmap data/processed/Three_Rivers_h3_polygons_10.gpkg "gvi_score" data/processed/Three_Rivers_gvi_webmap.html 10 ``` ## Config files diff --git a/pyproject.toml b/pyproject.toml index 71c4c9a..570192a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,6 +17,7 @@ dependencies = [ "folium", "geopandas", "geopy", + "h3 <4", "h3pandas", "loguru", "mapclassify", diff --git a/src/create_webmap.py b/src/create_webmap.py index 3040d6a..f544c09 100644 --- a/src/create_webmap.py +++ b/src/create_webmap.py @@ -29,6 +29,10 @@ def main( Path, typer.Argument(help="Path to file containing h3 polygons."), ], + score_field: Annotated[ + str, + typer.Argument(help="Name of field containing GVI score."), + ], filename: Annotated[ str, typer.Argument(help="(Optional) Path to file for HTML output.") ] = "./data/processed/gvi_webmap.html", @@ -49,7 +53,7 @@ def main( gdf = gdf.to_crs("EPSG:4326") # Round GVI score to make map labels more readable - gdf["gvi_score"] = round(gdf["gvi_score"], 2) + gdf[score_field] = round(gdf[score_field], 2) # get central coordinates of all features centre = ( @@ -88,8 +92,8 @@ def main( # Lookup the colourmap values for each GVI score cmap = matplotlib.colormaps["Greens"] - gdf["gvi_norm"] = (gdf.gvi_score - np.min(gdf.gvi_score)) / ( - np.max(gdf.gvi_score) - np.min(gdf.gvi_score) + gdf["gvi_norm"] = (gdf[score_field] - np.min(gdf[score_field])) / ( + np.max(gdf[score_field]) - np.min(gdf[score_field]) ) gdf["html_color"] = gdf["gvi_norm"].apply( lambda x: matplotlib.colors.rgb2hex(cmap(x)) @@ -99,25 +103,25 @@ def main( # Pick 4 evenly-spaced values from the gvi scores to use in the legend legend_gvi = list( np.arange( - gdf.gvi_score.min(), - gdf.gvi_score.max(), - (gdf.gvi_score.max() - gdf.gvi_score.min()) / 4, + gdf[score_field].min(), + gdf[score_field].max(), + (gdf[score_field].max() - gdf[score_field].min()) / 4, dtype=float, ) ) # Generate labels by looking up what the GVI score would be for those values legend_label_1 = round( - np.linspace(gdf.gvi_score.min(), gdf.gvi_score.max(), 100)[0], 1 + np.linspace(gdf[score_field].min(), gdf[score_field].max(), 100)[0], 1 ) legend_label_2 = round( - np.linspace(gdf.gvi_score.min(), gdf.gvi_score.max(), 100)[33], 1 + np.linspace(gdf[score_field].min(), gdf[score_field].max(), 100)[33], 1 ) legend_label_3 = round( - np.linspace(gdf.gvi_score.min(), gdf.gvi_score.max(), 100)[66], 1 + np.linspace(gdf[score_field].min(), gdf[score_field].max(), 100)[66], 1 ) legend_label_4 = round( - np.linspace(gdf.gvi_score.min(), gdf.gvi_score.max(), 100)[99], 1 + np.linspace(gdf[score_field].min(), gdf[score_field].max(), 100)[99], 1 ) # Normalise the label values to lookup against the colourmap diff --git a/src/points_to_h3.py b/src/points_to_h3.py index 03323b2..c2f9851 100644 --- a/src/points_to_h3.py +++ b/src/points_to_h3.py @@ -2,6 +2,7 @@ from pathlib import Path import geopandas as gpd +import pandas as pd import h3pandas # noqa: F401 import typer @@ -20,6 +21,10 @@ def main( Path, typer.Argument(help="Path to file containing point layer with GVI scores."), ], + score_field: Annotated[ + str, + typer.Argument(help="Field containing the score"), + ], output_file: Annotated[ Path, typer.Argument( @@ -39,6 +44,7 @@ def main( Args: input_file: Path to file containing point layer with GVI scores. + score_field: The field name from the input data containing the score cell_resolution: H3 cell resolution to aggregate to, between 0 (largest) and 15 (smallest) aggregation_operations: @@ -67,20 +73,31 @@ def main( else: raise Exception("Expected point data in interim data file but none found") - # Check data contains numeric gvi_score field + # Check data contains score field + if score_field in gpd.read_file(input_file).columns: + pass + else: + raise Exception("Specified score field not found in input file") # Load input data gdf = gpd.read_file(input_file) # Exclude points with no GVI score - gdf = gdf[~gdf.gvi_score.isna()] + gdf = gdf[~gdf[score_field].isna()] + + # Check score field is numeric - if not, convert + if gdf[score_field].dtype == pd.StringDtype: + try: + gdf[score_field] = pd.to_numeric(gdf[score_field]) + except Exception as e: + raise Exception("Could not convert score field to numeric data type") # Assign points to h3 cells at the selected resolution gdf_h3 = gdf.h3.geo_to_h3(cell_resolution).reset_index() # Aggregate the points to the assigned h3 cell gvi_mean = gdf_h3.groupby("h3_" + f"{cell_resolution:02}").agg( - {"gvi_score": "mean"} + {score_field: "mean"} ) # Convert the h3 cells to polygons