Skip to content

Commit

Permalink
Updating to incorporate the 'amenity' type in the PK; adjusting data …
Browse files Browse the repository at this point in the history
…set to focus on the areas the app considers containing a relevant 'amenity' value
  • Loading branch information
mgoddard committed Oct 8, 2021
1 parent 4fc3f42 commit 00903be
Show file tree
Hide file tree
Showing 9 changed files with 31 additions and 27 deletions.
28 changes: 14 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ corresponding to the area shown in the figure below. The result of this operati
was a 36 GB Bzip'd XML file (not included here). This intermediate file was then
processed using [this Perl script](./osm/extract_points_from_osm_xml.pl), with the
result being piped through grep and, finally, gzip to produce a [smaller data
set](https://storage.googleapis.com/crl-goddard-gis/osm_475k_eu.txt.gz) containing
set](https://storage.googleapis.com/crl-goddard-gis/osm_50k_eu.txt.gz) containing
a smaller set of points which lie in the areas the app focuses on.

![Boundary of OSM data extract](./osm/OSM_extracted_region.jpg)
Expand All @@ -71,7 +71,8 @@ CREATE TABLE osm
, key_value TEXT[]
, ref_point GEOGRAPHY
, geohash4 TEXT -- First 4 characters of geohash, corresponding to a box of about +/- 20 km
, CONSTRAINT "primary" PRIMARY KEY (geohash4 ASC, id ASC)
, amenity TEXT
, CONSTRAINT "primary" PRIMARY KEY (geohash4 ASC, amenity ASC, id ASC)
);
CREATE INDEX ON osm USING GIN(ref_point);
```
Expand Down Expand Up @@ -125,20 +126,19 @@ index would be preferable since it permits far more complex comparisons.
12 WHERE
13 """
14 if useGeohash:
15 sql += "geohash4 = SUBSTRING(%s FOR 4)"
15 sql += "geohash4 = SUBSTRING(%s FOR 4) AND amenity = %s"
16 else:
17 sql += "ST_DWithin(ST_MakePoint(%s, %s)::GEOGRAPHY, ref_point, 5.0E+03, TRUE)"
17 sql += "ST_DWithin(ST_MakePoint(%s, %s)::GEOGRAPHY, ref_point, 5.0E+03, TRUE) AND key_value && ARRAY[%s]"
18 sql += """
19 AND key_value && ARRAY[%s]
20 )
21 SELECT * FROM q1
22 """
23 if useGeohash:
24 sql += "WHERE dist_m < 5.0E+03"
25 sql += """
26 ORDER BY dist_m ASC
27 LIMIT 10;
28 """
19 )
20 SELECT * FROM q1
21 """
22 if useGeohash:
23 sql += "WHERE dist_m < 5.0E+03"
24 sql += """
25 ORDER BY dist_m ASC
26 LIMIT 10;
27 """
```

## Run the app in one of 3 ways: (1) locally, (2) locally, but with app in a Docker container, (3) in Kubernetes (K8s)
Expand Down
2 changes: 1 addition & 1 deletion docker_include.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
tag=1.2
tag=1.3
docker_id="mgoddard"
img_name="crdb-geo-tourist"
2 changes: 1 addition & 1 deletion k8s/cockroachdb.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ spec:
memory: "12Gi"
tlsEnabled: true
image:
name: cockroachdb/cockroach:v21.1.5
name: cockroachdb/cockroach:v21.1.7
nodes: 3

---
Expand Down
2 changes: 1 addition & 1 deletion k8s/crdb-geo-tourist.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ spec:
spec:
containers:
- name: crdb-geo-tourist
image: mgoddard/crdb-geo-tourist:1.2
image: mgoddard/crdb-geo-tourist:1.3
imagePullPolicy: Always
env:
- name: PGHOST
Expand Down
4 changes: 2 additions & 2 deletions k8s/data-loader.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ metadata:
spec:
containers:
- name: crdb-geo-loader
image: mgoddard/crdb-geo-tourist:1.0
image: mgoddard/crdb-geo-tourist:1.3
imagePullPolicy: Always
env:
- name: PGHOST
Expand All @@ -19,7 +19,7 @@ spec:
- name: PGPASSWORD
value: "tourist"
- name: DATA_URL
value: "https://storage.googleapis.com/crl-goddard-gis/osm_475k_eu.txt.gz"
value: "https://storage.googleapis.com/crl-goddard-gis/osm_50k_eu.txt.gz"
command: ["/bin/bash", "-c"]
args: ["curl -s -k ${DATA_URL} | gunzip - | ./load_osm_stdin.py"]
restartPolicy: Never
Expand Down
2 changes: 1 addition & 1 deletion k8s/deploy_k8s.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ echo "Apply the CustomResourceDefinition (CRD) for the Operator"
run_cmd kubectl apply -f https://raw.githubusercontent.com/cockroachdb/cockroach-operator/master/config/crd/bases/crdb.cockroachlabs.com_crdbclusters.yaml

echo "Apply the Operator manifest"
OPERATOR_YAML="https://raw.githubusercontent.com/cockroachdb/cockroach-operator/master/manifests/operator.yaml"
OPERATOR_YAML="https://raw.githubusercontent.com/cockroachdb/cockroach-operator/master/install/operator.yaml"
run_cmd kubectl apply -f $OPERATOR_YAML

echo "Validate that the Operator is running"
Expand Down
2 changes: 1 addition & 1 deletion k8s/rolling_upgrade.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,6 @@ spec:
memory: "12Gi"
tlsEnabled: true
image:
name: cockroachdb/cockroach:v21.1.6
name: cockroachdb/cockroach:v21.1.9
nodes: 4

9 changes: 7 additions & 2 deletions load_osm_stdin.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@ def setup_db():
, key_value TEXT[]
, ref_point GEOGRAPHY
, geohash4 TEXT
, CONSTRAINT "primary" PRIMARY KEY (geohash4 ASC, id ASC)
, amenity TEXT
, CONSTRAINT "primary" PRIMARY KEY (geohash4 ASC, amenity ASC, id ASC)
);
"""
print("Creating osm table")
Expand Down Expand Up @@ -120,7 +121,7 @@ def setup_db():
cur.execute(sql, (s["name"], s["lat"], s["lon"]))
conn.commit()

sql = "INSERT INTO osm (id, date_time, uid, name, key_value, ref_point, geohash4) VALUES "
sql = "INSERT INTO osm (id, date_time, uid, name, key_value, ref_point, geohash4, amenity) VALUES "

vals = []
llre = re.compile(r"^-?\d+\.\d+$")
Expand Down Expand Up @@ -152,14 +153,18 @@ def setup_db():
for w in re.split(r"\W+", name.lower()):
if len(w) > 0:
kv.append(w)
amenity = ""
for x in kvagg.split('|'):
if len(x) == 0:
continue;
x = html.unescape(x)
x = re.sub(r"['\",{}]", "", x)
kv.append(x)
if x.startswith("amenity"):
amenity = x.split("=")[1]
row += ", '{" + ','.join(kv) + "}'"
row += ", ST_MakePoint(" + lon + ", " + lat + ")::GEOGRAPHY, '" + geohash[0:4] + "'"
row += ", '" + amenity + "'"
vals.append("(" + row + ")")
if len(vals) % rows_per_batch == 0:
print("Running INSERT for batch %d of %d rows" % (n_batch, rows_per_batch))
Expand Down
7 changes: 3 additions & 4 deletions map_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,10 @@ def features():
WHERE
"""
if useGeohash:
sql += "geohash4 = SUBSTRING(%s FOR 4)"
sql += "geohash4 = SUBSTRING(%s FOR 4) AND amenity = %s"
else:
sql += "ST_DWithin(ST_MakePoint(%s, %s)::GEOGRAPHY, ref_point, 5.0E+03, TRUE)"
sql += "ST_DWithin(ST_MakePoint(%s, %s)::GEOGRAPHY, ref_point, 5.0E+03, TRUE) AND key_value && ARRAY[%s]"
sql += """
AND key_value && ARRAY[%s]
)
SELECT * FROM q1
"""
Expand All @@ -101,7 +100,7 @@ def features():
with conn.cursor() as cur:
try:
if useGeohash:
cur.execute(sql, (lon, lat, geohash, "amenity=" + amenity))
cur.execute(sql, (lon, lat, geohash, amenity))
else:
cur.execute(sql, (lon, lat, lon, lat, "amenity=" + amenity))
for row in cur:
Expand Down

0 comments on commit 00903be

Please sign in to comment.