Skip to content

Commit

Permalink
2 add support for pulling down raster data from linz data service (#33)
Browse files Browse the repository at this point in the history
Based on code from Matt Wilkins that can be viewed at (restricted access) https://git.niwa.co.nz/flood-resilience-aotearoa/cylc-create-flood-domains/-/blob/main/bin/download_slope.py. This code was also reviewed by Matt.

Changes:
* Add module for downloading tiff rasters using the Koordinates export API
* Add two test - LINZ and LIRS
* Update .gitignore - had to update API keys to be more permissive
* update package version

Co-authored-by: github-actions <[email protected]>
  • Loading branch information
rosepearson and github-actions authored Nov 8, 2022
1 parent e7b2b31 commit 9f2bb8a
Show file tree
Hide file tree
Showing 15 changed files with 590 additions and 37 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ celerybeat.pid

# Environments
.env
.env.b64
.venv
env/
venv/
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "geoapis"
version = "0.2.9"
version = "0.3.0"
description = "A package for downloading geospatial data from web APIs."
readme = "README.md"
authors = [{ name = "Rose pearson", email = "[email protected]" }]
Expand Down
2 changes: 1 addition & 1 deletion src/geoapis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
@author: pearsonra
"""
__version__ = "0.2.9"
__version__ = "0.3.0"
4 changes: 2 additions & 2 deletions src/geoapis/geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@ def __init__(
self._set_up()

def _set_up(self):
"""Set CRS and select all tiles partially within the catchment, and look up the file column name"""
"""Set CRS and select all tiles partially within the catchment, and look up the
file column name"""

if self.catchment_polygon is not None:
self._tile_info = self._tile_info.to_crs(self.catchment_polygon.crs)
self._tile_info = geopandas.sjoin(self._tile_info, self.catchment_polygon)
self._tile_info = self._tile_info.reset_index(drop=True)

# Try workout the name of the column containing file name information.
column_names = self._tile_info.columns
column_name_matches = [
Expand Down
3 changes: 2 additions & 1 deletion src/geoapis/lidar.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,8 @@ def query_for_datasets_inside_catchment(self):
return response.json()

def download_dataset(self, dataset_prefix, client):
"""Download all files within an optional search polygon of a given dataset_prefix"""
"""Download all files within an optional search polygon of a given
dataset_prefix"""

if self.verbose:
print(f"Check files in dataset {dataset_prefix}")
Expand Down
241 changes: 241 additions & 0 deletions src/geoapis/raster.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
# -*- coding: utf-8 -*-
"""
Created on Fri Nov 7 10:10:55 2022
@author: pearsonra
"""

import urllib
import requests
import numpy
import geopandas
import abc
import typing
import pathlib
import logging
import time
import io
import zipfile


class KoordinatesExportsQueryBase(abc.ABC):
"""An abstract class to manage fetching Raster data using the Koordinates exports
API. Downloads the GeoTiff specified in the run routine.
API details at: https://help.koordinates.com/site-admin-apis/export-api/
Parameters
----------
key: str
The API key. Must have Search, view and download exported data
permissions.
cache_path: pathlib.Path
The location to download all GeoTiffs queried in the run method.
crs: int
The CRS EPSG code for the GeoTifss to be downloaded as.
bounding_polygon: geopandas.geodataframe.GeoDataFrame
An option geometry to clip the downloaded GeoTiffs within.
"""

@property
@abc.abstractmethod
def NETLOC_API():
"""This should be instantiated in the base class. Provide the netloc of the data
service."""

raise NotImplementedError("NETLOC_API must be instantiated in the child class")

SCHEME = "https"
PATH = "services/api/v1"
PATH_API_END = "/exports"
K_CRS = "EPSG:4326"

def __init__(
self,
key: str,
cache_path: typing.Union[str, pathlib.Path],
crs: int = None,
bounding_polygon: geopandas.geodataframe.GeoDataFrame = None,
):
"""Load in the wfs key and CRS/bounding_polygon if specified. Specify the layer
to import during run."""

self.key = key
self.cache_path = pathlib.Path(cache_path)
self.bounding_polygon = bounding_polygon
self.crs = crs

self.base_url = urllib.parse.urlunparse(
(
self.SCHEME,
self.NETLOC_API,
self.PATH,
"",
"",
"",
)
)

self._set_up()

def _set_up(self):
"""Ensure the bouding_polygon and CRS are in agreement."""

# Set the crs from the bounding_polygon if it's not been set
if self.crs is None and self.bounding_polygon is not None:
logging.info("The download CRS is being set from the bounding_polygon")
self.crs = self.bounding_polygon.crs.to_epsg()
# Set the bounding_polygon crs from the crs if they differ
if (
self.bounding_polygon is not None
and self.crs != self.bounding_polygon.crs.to_epsg()
):
logging.info(
"The bounding_polygon is being transformed to the specified "
"download CRS"
)
self.bounding_polygon.to_crs(self.crs)
# Enforce the bounding_polygon must be a single geometry if it exists
if self.bounding_polygon is not None:
assert (
len(self.bounding_polygon) == 1
), "The bounding polygon must be a single geometry"
assert (
len(numpy.array(self.bounding_polygon.exterior.loc[0].coords)) < 1000
), "The bounding polygon must be lass than 1000 points"

def run(self, layer: int) -> pathlib.Path:
"""Query for a specified layer and return a geopandas.GeoDataFrame of the vector
features. If a polygon_boundary is specified, only return vectors passing
through this polygon."""

headers = {"Authorization": f"key {self.key}"}

# Create the initial request
api_query = {
"crs": f"EPSG:{self.crs}",
"formats": {"grid": "image/tiff;subtype=geotiff"},
"items": [{"item": f"{self.base_url}/layers/{layer}/"}],
}
if self.bounding_polygon is not None:
exterior = self.bounding_polygon.to_crs(self.K_CRS).exterior.loc[0]
api_query["extent"] = {
"type": self.bounding_polygon.type.loc[0],
"coordinates": [list(exterior.coords)],
}
logging.info("Send initial request to download image")
response = requests.post(
url=f"{self.base_url}/exports/", headers=headers, json=api_query
)
query_id = response.json()["id"]

# Check the state of your exports until the triggered raster exports completes
logging.info("Check status of download request")
while True:
response = requests.get(
f"{self.base_url}/exports/",
headers=headers,
)
# find the triggered export
element = [
element for element in response.json() if element["id"] == query_id
][0]
logging.info(f"/texport state is {element['state']}")
if element["state"] == "processing":
logging.info("Not complete - check again in 20s")
time.sleep(20)
continue
elif element["state"] == "complete":
logging.info("/tCompleted - move to download")
break
else:
logging.warning(
f"Could not download raster. Ended with status {element['state']}"
)
return
# Download the completed export
logging.info(f"Downloading {element['download_url']} to {self.cache_path}")
with requests.get(
element["download_url"],
headers={"Authorization": f"key {self.key}"},
stream=True,
) as response:
response.raise_for_status()
zip_object = zipfile.ZipFile(io.BytesIO(response.content))
zip_object.extractall(self.cache_path / f"{layer}")
# Return the file names of the downloaded rasters
rasters = []
for file_name in (self.cache_path / f"{layer}").iterdir():
if file_name.suffix == ".tif":
rasters.append(file_name)
return rasters


class Linz(KoordinatesExportsQueryBase):
"""A class to manage fetching Vector data from LINZ.
LIRS data service can be accessed at: https://https://data.linz.govt.nz/
Note that only rasters supporting the grid image/tiff geotiff are supported
"""

NETLOC_API = "data.linz.govt.nz"


class Lris(KoordinatesExportsQueryBase):
"""A class to manage fetching Vector data from LRIS.
LIRS data service can be accessed at: https://lris.scinfo.org.nz/
Note that only rasters supporting the grid image/tiff geotiff are supported
"""

NETLOC_API = "lris.scinfo.org.nz"


class StatsNz(KoordinatesExportsQueryBase):
"""A class to manage fetching Vector data from the Stats NZ datafinder.
Stats NZ data service can be accessed at: datafinder.stats.govt.nz
Note that only rasters supporting the grid image/tiff geotiff are supported
"""

NETLOC_API = "datafinder.stats.govt.nz"


class KoordinatesQuery(KoordinatesExportsQueryBase):
"""A class to manage fetching Vector data from any generic data portal supporting
WFS.
Note that the 'geometry_name' used when making a WFS 'cql_filter' queries can vary
between layers. You will need to specify the 'geometry_name' of the layers you want
to download.
"""

def __init__(
self,
key: str,
netloc_url: str,
crs: int = None,
bounding_polygon: geopandas.geodataframe.GeoDataFrame = None,
):
"""Set NETLOC_API and instantiate the KoordinatesExportsQueryBase"""

self.netloc_url = netloc_url

# Setup the WfsQueryBase class
super(KoordinatesQuery, self).__init__(
key=key, crs=crs, bounding_polygon=bounding_polygon
)

@property
def NETLOC_API(self):
"""Instantiate the entered netloc of the data service."""

return self.netloc_url
Loading

0 comments on commit 9f2bb8a

Please sign in to comment.