-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathschemas.py
128 lines (109 loc) · 3.92 KB
/
schemas.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
"""Pydantic models for GeoParquet metadata."""
import ast
from pydantic import BeforeValidator, Field, BaseModel, field_validator, model_validator
from typing import Annotated, Optional, Literal, Union
from pyproj import CRS
EdgeType = Literal["planar", "spherical"]
FlatGeometryTypes = Annotated[
# TODO: support 3d geometries with Z suffix
Literal[
"Point",
"MultiPoint",
"LineString",
"MultiLineString",
"Polygon",
"MultiPolygon",
"GeometryCollection",
],
Field(description="The geometry types supported by the column"),
]
ZGeometryTypes = Annotated[
Literal[
"PointZ",
"MultiPointZ",
"LineStringZ",
"MultiLineStringZ",
"PolygonZ",
"MultiPolygonZ",
"GeometryCollectionZ",
],
Field(description="3D geometry types supported by the column"),
]
GeometryTypes = Union[FlatGeometryTypes, ZGeometryTypes]
class GeometryColumnMetadata(BaseModel):
encoding: Literal["WKB"]
geometry_types: list[GeometryTypes]
crs: Annotated[
str,
Field(
description="The CRS of the geometry column in a string format readable by pyproj. Is the converted to PROJJSON format"
),
] = "OGC:CRS84"
edges: Annotated[
EdgeType, Field(description="The type of edges of the geometries")
] = "planar"
bbox: Optional[
Annotated[list[float], Field(description="The bounding box of the geometries")]
] = None
epoch: Optional[
Annotated[
Union[int, float],
Field(description="Coordinate epoch in case of a dynamic CRS"),
]
] = None
orientation: Literal["counterclockwise"] = "counterclockwise"
@field_validator("crs")
@classmethod
def convert_crs_to_projjson(cls, v) -> str:
"""Parse a CRS string and return a PROJJSON string."""
try:
crs = CRS.from_string(v)
return crs.to_json()
except Exception as e:
raise ValueError(f"Invalid CRS string: {e}")
@field_validator("geometry_types")
@classmethod
def only_unique_types(cls, v):
if len(v) != len(set(v)):
raise ValueError("geometry_types items must be unique!")
return v
@field_validator("bbox")
@classmethod
def must_be_length_4(cls, v):
if v is not None and len(v) != 4:
raise ValueError("bbox must be a list of 4 floats!")
return v
class GeoParquetMetadata(BaseModel):
version: Annotated[
str, Field(description="The version of the GeoParquet format")
] = "1.1.0-dev"
primary_column: Annotated[
str, Field(description="The name of the geometry primary column")
] = "geometry"
columns: Annotated[
dict[str, GeometryColumnMetadata | dict | str],
Field(description="Metadata for each column (keys)"),
]
@model_validator(mode="after")
def contains_primary_col(self) -> "GeoParquetMetadata":
if not self.primary_column in self.columns.keys():
raise ValueError(
f"primary column={self.primary_column} not in arg:columns={self.columns}"
)
return self
@model_validator(mode="after")
def convert_geo_to_class(self) -> "GeoParquetMetadata":
if not isinstance(self.columns[self.primary_column], GeometryColumnMetadata):
if isinstance(self.columns[self.primary_column], str):
self.columns[self.primary_column] = ast.literal_eval(
self.columns[self.primary_column]
)
if isinstance(self.columns[self.primary_column], dict):
self.columns[self.primary_column] = GeometryColumnMetadata(
**self.columns[self.primary_column]
)
else:
raise ValueError(
f"Invalid primary column metadata: {self.columns[self.primary_column]}"
)
return self