From 88cc3537a37ca23ddb45d47c0d985b08b639d72d Mon Sep 17 00:00:00 2001
From: Alexei Schwab <alexeischwab@gmail.com>
Date: Mon, 25 Nov 2024 15:23:41 +0000
Subject: [PATCH 1/5] Fix: Generalising score field based on user input

---
 src/create_webmap.py | 24 ++++++++++++++----------
 src/points_to_h3.py  | 11 ++++++++++-
 2 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/src/create_webmap.py b/src/create_webmap.py
index 3040d6a..f544c09 100644
--- a/src/create_webmap.py
+++ b/src/create_webmap.py
@@ -29,6 +29,10 @@ def main(
         Path,
         typer.Argument(help="Path to file containing h3 polygons."),
     ],
+    score_field: Annotated[
+        str,
+        typer.Argument(help="Name of field containing GVI score."),
+    ],
     filename: Annotated[
         str, typer.Argument(help="(Optional) Path to file for HTML output.")
     ] = "./data/processed/gvi_webmap.html",
@@ -49,7 +53,7 @@ def main(
         gdf = gdf.to_crs("EPSG:4326")
 
     # Round GVI score to make map labels more readable
-    gdf["gvi_score"] = round(gdf["gvi_score"], 2)
+    gdf[score_field] = round(gdf[score_field], 2)
 
     # get central coordinates of all features
     centre = (
@@ -88,8 +92,8 @@ def main(
 
     # Lookup the colourmap values for each GVI score
     cmap = matplotlib.colormaps["Greens"]
-    gdf["gvi_norm"] = (gdf.gvi_score - np.min(gdf.gvi_score)) / (
-        np.max(gdf.gvi_score) - np.min(gdf.gvi_score)
+    gdf["gvi_norm"] = (gdf[score_field] - np.min(gdf[score_field])) / (
+        np.max(gdf[score_field]) - np.min(gdf[score_field])
     )
     gdf["html_color"] = gdf["gvi_norm"].apply(
         lambda x: matplotlib.colors.rgb2hex(cmap(x))
@@ -99,25 +103,25 @@ def main(
     # Pick 4 evenly-spaced values from the gvi scores to use in the legend
     legend_gvi = list(
         np.arange(
-            gdf.gvi_score.min(),
-            gdf.gvi_score.max(),
-            (gdf.gvi_score.max() - gdf.gvi_score.min()) / 4,
+            gdf[score_field].min(),
+            gdf[score_field].max(),
+            (gdf[score_field].max() - gdf[score_field].min()) / 4,
             dtype=float,
         )
     )
 
     # Generate labels by looking up what the GVI score would be for those values
     legend_label_1 = round(
-        np.linspace(gdf.gvi_score.min(), gdf.gvi_score.max(), 100)[0], 1
+        np.linspace(gdf[score_field].min(), gdf[score_field].max(), 100)[0], 1
     )
     legend_label_2 = round(
-        np.linspace(gdf.gvi_score.min(), gdf.gvi_score.max(), 100)[33], 1
+        np.linspace(gdf[score_field].min(), gdf[score_field].max(), 100)[33], 1
     )
     legend_label_3 = round(
-        np.linspace(gdf.gvi_score.min(), gdf.gvi_score.max(), 100)[66], 1
+        np.linspace(gdf[score_field].min(), gdf[score_field].max(), 100)[66], 1
     )
     legend_label_4 = round(
-        np.linspace(gdf.gvi_score.min(), gdf.gvi_score.max(), 100)[99], 1
+        np.linspace(gdf[score_field].min(), gdf[score_field].max(), 100)[99], 1
     )
 
     # Normalise the label values to lookup against the colourmap
diff --git a/src/points_to_h3.py b/src/points_to_h3.py
index 03323b2..0c25727 100644
--- a/src/points_to_h3.py
+++ b/src/points_to_h3.py
@@ -20,6 +20,10 @@ def main(
         Path,
         typer.Argument(help="Path to file containing point layer with GVI scores."),
     ],
+    score_field: Annotated[
+        str, 
+        typer.Argument(help="Field containing the score"),
+    ],
     output_file: Annotated[
         Path,
         typer.Argument(
@@ -39,6 +43,7 @@ def main(
 
     Args:
             input_file: Path to file containing point layer with GVI scores.
+            score_field: The field name from the input data containing the score
             cell_resolution: H3 cell resolution to aggregate to,
                 between 0 (largest) and 15 (smallest)
             aggregation_operations:
@@ -67,7 +72,11 @@ def main(
     else:
         raise Exception("Expected point data in interim data file but none found")
 
-    # Check data contains numeric gvi_score field
+    # Check data contains score field
+    if score_field in  gpd.read_file(input_file).columns: 
+        pass
+    else:
+        raise Exception("Specified score field not found in input file")
 
     # Load input data
     gdf = gpd.read_file(input_file)

From d9940994675c47a2f60888bc0275578d8db0b808 Mon Sep 17 00:00:00 2001
From: Alexei Schwab <alexeischwab@gmail.com>
Date: Mon, 2 Dec 2024 11:02:22 +0000
Subject: [PATCH 2/5] Fix: add pinned h3 dependency to fix h3pandas error

---
 pyproject.toml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 71c4c9a..c89e5c4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,7 +17,8 @@ dependencies = [
   "folium",
   "geopandas",
   "geopy",
-  "h3pandas",
+  "h3 <4",
+  "h3pandas,
   "loguru",
   "mapclassify",
   "matplotlib",

From ef718d7cc330a86761051b729cae9f6010df2aa6 Mon Sep 17 00:00:00 2001
From: Alexei Schwab <alexeischwab@gmail.com>
Date: Mon, 2 Dec 2024 11:03:31 +0000
Subject: [PATCH 3/5] Fix: allow user to specify score field when creating h3
 polygons

---
 src/points_to_h3.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/points_to_h3.py b/src/points_to_h3.py
index 0c25727..65c69a8 100644
--- a/src/points_to_h3.py
+++ b/src/points_to_h3.py
@@ -2,6 +2,7 @@
 from pathlib import Path
 
 import geopandas as gpd
+import pandas as pd
 import h3pandas  # noqa: F401
 import typer
 
@@ -82,14 +83,21 @@ def main(
     gdf = gpd.read_file(input_file)
 
     # Exclude points with no GVI score
-    gdf = gdf[~gdf.gvi_score.isna()]
+    gdf = gdf[~gdf[score_field].isna()]
+
+    # Check score field is numeric - if not, convert
+    if gdf[score_field].dtype == pd.StringDtype: 
+        try: 
+            gdf[score_field] = pd.to_numeric(gdf[score_field])
+        except Exception as e:
+            raise Exception("Could not convert score field to numeric data type")
 
     # Assign points to h3 cells at the selected resolution
     gdf_h3 = gdf.h3.geo_to_h3(cell_resolution).reset_index()
 
     # Aggregate the points to the assigned h3 cell
     gvi_mean = gdf_h3.groupby("h3_" + f"{cell_resolution:02}").agg(
-        {"gvi_score": "mean"}
+        {score_field: "mean"}
     )
 
     # Convert the h3 cells to polygons

From 399a15a9a9f94683d18943a642e589f53e50d7ac Mon Sep 17 00:00:00 2001
From: Alexei Schwab <alexeischwab@gmail.com>
Date: Mon, 2 Dec 2024 11:06:30 +0000
Subject: [PATCH 4/5] docs: update readme to reflect custom score fields for
 points_to_h3 and create_webamp

---
 README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 99f90ab..5c94255 100644
--- a/README.md
+++ b/README.md
@@ -116,8 +116,8 @@ We can generate an H3 polygon layer from the point layer. As an overlay it may m
 ### Example
 
 ```bash
-# python -m src.create_webmap path/to/input_file.gpkg path/to/output_file.gpkg cell_resolution 
-python -m src.points_to_h3 data/processed/Three_Rivers_GVI.gpkg data/processed/Three_Rivers_h3_polygons_10.gpkg 10
+# python -m src.create_webmap path/to/input_file.gpkg "score_field" path/to/output_file.gpkg cell_resolution 
+python -m src.points_to_h3 data/processed/Three_Rivers_GVI.gpkg "gvi_score" data/processed/Three_Rivers_h3_polygons_10.gpkg 10
 ```
 
 The larger the number for the [H3 cell resolution](https://h3geo.org/docs/core-library/restable/), the smaller the individual hexagons. 
@@ -131,8 +131,8 @@ To display an OpenStreetMap basemap under the data, you will need an API key fro
 ### Example
 
 ```bash
-# python -m src.create_webmap path/to/input_file.gpkg path/to/output/output_file.html default_zoom_for_webmap 
-python -m src.create_webmap data/processed/Three_Rivers_h3_polygons_10.gpkg data/processed/Three_Rivers_gvi_webmap.html 10
+# python -m src.create_webmap path/to/input_file.gpkg "score_field" path/to/output/output_file.html default_zoom_for_webmap 
+python -m src.create_webmap data/processed/Three_Rivers_h3_polygons_10.gpkg "gvi_score" data/processed/Three_Rivers_gvi_webmap.html 10
 ```
 
 ## Config files

From a8ff114138b231b9a1a0090ccbebf815e6e4604b Mon Sep 17 00:00:00 2001
From: Alexei Schwab <alexeischwab@gmail.com>
Date: Mon, 2 Dec 2024 11:22:41 +0000
Subject: [PATCH 5/5] style: reformat points_to_h3 with ruff

---
 pyproject.toml      | 2 +-
 src/points_to_h3.py | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index c89e5c4..570192a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,7 +18,7 @@ dependencies = [
   "geopandas",
   "geopy",
   "h3 <4",
-  "h3pandas,
+  "h3pandas",
   "loguru",
   "mapclassify",
   "matplotlib",
diff --git a/src/points_to_h3.py b/src/points_to_h3.py
index 65c69a8..c2f9851 100644
--- a/src/points_to_h3.py
+++ b/src/points_to_h3.py
@@ -22,7 +22,7 @@ def main(
         typer.Argument(help="Path to file containing point layer with GVI scores."),
     ],
     score_field: Annotated[
-        str, 
+        str,
         typer.Argument(help="Field containing the score"),
     ],
     output_file: Annotated[
@@ -74,7 +74,7 @@ def main(
         raise Exception("Expected point data in interim data file but none found")
 
     # Check data contains score field
-    if score_field in  gpd.read_file(input_file).columns: 
+    if score_field in gpd.read_file(input_file).columns:
         pass
     else:
         raise Exception("Specified score field not found in input file")
@@ -86,8 +86,8 @@ def main(
     gdf = gdf[~gdf[score_field].isna()]
 
     # Check score field is numeric - if not, convert
-    if gdf[score_field].dtype == pd.StringDtype: 
-        try: 
+    if gdf[score_field].dtype == pd.StringDtype:
+        try:
             gdf[score_field] = pd.to_numeric(gdf[score_field])
         except Exception as e:
             raise Exception("Could not convert score field to numeric data type")