Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 91 additions & 2 deletions hestia/bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from telegram.ext import filters, MessageHandler, ApplicationBuilder, CommandHandler, CallbackQueryHandler, ContextTypes

import hestia_utils.db as db
import hestia_utils.geocode as geocode
import hestia_utils.meta as meta
import hestia_utils.secrets as secrets
import hestia_utils.strings as strings
Expand Down Expand Up @@ -272,7 +273,24 @@ async def filter(update: telegram.Update, context: ContextTypes.DEFAULT_TYPE) ->
for c in sub["filter_cities"]:
cities_str += f"{c.title()}, "

message = strings.get("filter", update.effective_chat.id, [sub['filter_min_price'], sub['filter_max_price'], sub['filter_min_sqm'], cities_str[:-2]])
if sub.get("filter_radius_km") is not None:
location_str = strings.get(
"filter_location_value",
update.effective_chat.id,
[
f"{float(sub['filter_center_lat']):.5f}",
f"{float(sub['filter_center_lon']):.5f}",
f"{float(sub['filter_radius_km']):g}",
],
)
else:
location_str = strings.get("filter_location_none", update.effective_chat.id)

message = strings.get(
"filter",
update.effective_chat.id,
[sub['filter_min_price'], sub['filter_max_price'], sub['filter_min_sqm'], cities_str[:-2], location_str],
)

# Set minprice filter
elif len(cmd) == 3 and cmd[1] in ["minprice", "min"]:
Expand Down Expand Up @@ -380,9 +398,80 @@ async def filter(update: telegram.Update, context: ContextTypes.DEFAULT_TYPE) ->

if len(sub_filter_cities) == 0:
message += strings.get("filter_city_empty", update.effective_chat.id)

# Show location filter
elif len(cmd) == 2 and cmd[1] == "location":
if sub.get("filter_radius_km") is not None:
message = strings.get(
"filter_location_value",
update.effective_chat.id,
[
f"{float(sub['filter_center_lat']):.5f}",
f"{float(sub['filter_center_lon']):.5f}",
f"{float(sub['filter_radius_km']):g}",
],
)
else:
message = strings.get("filter_location_none", update.effective_chat.id)

# Clear location filter
elif len(cmd) == 3 and cmd[1] == "location" and cmd[2] in ["clear", "off", "disable"]:
db.clear_filter_location(update.effective_chat)
message = strings.get("filter_location_cleared", update.effective_chat.id)

# Set location by raw lat/lon: /filter location <radius_km> <lat> <lon>
elif len(cmd) == 5 and cmd[1] == "location":
try:
radius_km = float(cmd[2])
lat = float(cmd[3])
lon = float(cmd[4])
except ValueError:
await context.bot.send_message(update.effective_chat.id, strings.get("filter_location_invalid", update.effective_chat.id))
return
if not (-90.0 <= lat <= 90.0) or not (-180.0 <= lon <= 180.0) or radius_km <= 0:
await context.bot.send_message(update.effective_chat.id, strings.get("filter_location_invalid", update.effective_chat.id))
return
db.set_filter_location(update.effective_chat, lat, lon, radius_km)
message = strings.get(
"filter_location_set",
update.effective_chat.id,
[f"{lat:.5f}", f"{lon:.5f}", f"{radius_km:g}"],
)

# Set location by place name: /filter location <radius_km> <place name...>
elif len(cmd) >= 4 and cmd[1] == "location":
try:
radius_km = float(cmd[2])
except ValueError:
await context.bot.send_message(update.effective_chat.id, strings.get("filter_location_invalid", update.effective_chat.id))
return
if radius_km <= 0:
await context.bot.send_message(update.effective_chat.id, strings.get("filter_location_invalid", update.effective_chat.id))
return
# Use the raw text so we don't distort city names with the earlier .lower().
raw_tokens = update.message.text.split(' ')[3:]
place = ' '.join(t.replace(';', '').replace('"', '').replace("'", '') for t in raw_tokens).strip()
if not place:
await context.bot.send_message(update.effective_chat.id, strings.get("filter_location_invalid", update.effective_chat.id))
return
geo = geocode.geocode(place, "")
if geo is None:
await context.bot.send_message(
update.effective_chat.id,
strings.get("filter_location_not_found", update.effective_chat.id, [place]),
)
return
lat, lon, _ = geo
db.set_filter_location(update.effective_chat, lat, lon, radius_km)
message = strings.get(
"filter_location_set_place",
update.effective_chat.id,
[place, f"{lat:.5f}", f"{lon:.5f}", f"{radius_km:g}"],
)

else:
message = strings.get("filter_invalid_command", update.effective_chat.id)

await context.bot.send_message(update.effective_chat.id, message, parse_mode="Markdown")


Expand Down
35 changes: 32 additions & 3 deletions hestia/hestia_utils/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,20 @@ def _write(query: str, params: list[str] = []) -> None:
if conn: conn.close()

def add_home(url: str, address: str, city: str, price: int, agency: str, date_added: str, sqm: int = -1) -> None:
_write("INSERT INTO hestia.homes (url, address, city, price, agency, date_added, sqm) VALUES (%s, %s, %s, %s, %s, %s, %s)", [url, address, city, str(price), agency, date_added, str(sqm)])
# Import here to avoid circular import (geocode imports db).
from hestia_utils import geocode as _geocode
try:
geo = _geocode.geocode(address, city)
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the wrong place to add this call imo, the db.py functions are simple abstractions for interacting with the database. Adding a synchronous call that has a third-party API dependency is a bad idea.

except Exception as e:
logging.warning(f"Geocoding failed for {address!r}, {city!r}: {repr(e)}")
geo = None
lat, lon, confidence = (None, None, None)
if geo is not None:
lat, lon, confidence = geo
_write(
"INSERT INTO hestia.homes (url, address, city, price, agency, date_added, sqm, lat, lon, geocode_confidence) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)",
[url, address, city, str(price), agency, date_added, str(sqm), lat, lon, confidence],
)
def add_user(telegram_id: int) -> None:
# Use an explicit column list so this stays valid when new columns are added to hestia.subscribers.
_write("INSERT INTO hestia.subscribers (telegram_enabled, telegram_id) VALUES (true, %s)", [str(telegram_id)])
Expand Down Expand Up @@ -219,13 +232,23 @@ def set_filter_agencies(telegram_chat: Chat, agencies: set[str]) -> None:
_write("UPDATE hestia.subscribers SET filter_agencies = %s WHERE telegram_id = %s", [str(list(agencies)).replace("'", '"'), str(telegram_chat.id)])
def set_filter_minsqm(telegram_chat: Chat, min_sqm: int) -> None:
_write("UPDATE hestia.subscribers SET filter_min_sqm = %s WHERE telegram_id = %s", [str(min_sqm), str(telegram_chat.id)])
def set_filter_location(telegram_chat: Chat, lat: float, lon: float, radius_km: float) -> None:
_write(
"UPDATE hestia.subscribers SET filter_center_lat = %s, filter_center_lon = %s, filter_radius_km = %s WHERE telegram_id = %s",
[lat, lon, radius_km, str(telegram_chat.id)],
)
def clear_filter_location(telegram_chat: Chat) -> None:
_write(
"UPDATE hestia.subscribers SET filter_center_lat = NULL, filter_center_lon = NULL, filter_radius_km = NULL WHERE telegram_id = %s",
[str(telegram_chat.id)],
)

def set_user_lang(telegram_chat: Chat, lang: Literal["en", "nl"]) -> None:
_write("UPDATE hestia.subscribers SET lang = %s WHERE telegram_id = %s", [lang, str(telegram_chat.id)])
LANG_CACHE[telegram_chat.id] = lang


FILTER_COLUMNS = ["filter_min_price", "filter_max_price", "filter_cities", "filter_agencies", "filter_min_sqm"]
FILTER_COLUMNS = ["filter_min_price", "filter_max_price", "filter_cities", "filter_agencies", "filter_min_sqm", "filter_center_lat", "filter_center_lon", "filter_radius_km"]


def _load_filter_defaults(cur) -> dict:
Expand Down Expand Up @@ -308,7 +331,10 @@ def link_account(telegram_id: int, code: str) -> Literal["success", "invalid_cod
filter_max_price = %s,
filter_cities = %s,
filter_agencies = %s,
filter_min_sqm = %s
filter_min_sqm = %s,
filter_center_lat = %s,
filter_center_lon = %s,
filter_radius_km = %s
WHERE telegram_id = %s
""",
[
Expand All @@ -317,6 +343,9 @@ def link_account(telegram_id: int, code: str) -> Literal["success", "invalid_cod
json.dumps(web_sub["filter_cities"]),
json.dumps(web_sub["filter_agencies"]),
web_sub["filter_min_sqm"],
web_sub.get("filter_center_lat"),
web_sub.get("filter_center_lon"),
web_sub.get("filter_radius_km"),
str(telegram_id),
],
)
Expand Down
159 changes: 159 additions & 0 deletions hestia/hestia_utils/geocode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
"""Address geocoding for Dutch addresses via PDOK Locatieserver.

Results are cached in hestia.geocode_cache to avoid repeat PDOK calls for the
same address. Scrapers remain untouched; geocoding happens at DB-insert time
(see hestia_utils.db.add_home) and is best-effort — a miss stores NULL coords
so broadcast() can skip the radius check rather than dropping the home.
"""

import logging
import math
import re
from typing import Optional, Tuple

import requests

import hestia_utils.db as db


PDOK_URL = "https://api.pdok.nl/bzk/locatieserver/search/v3_1/free"
PDOK_TIMEOUT = 5
MIN_SCORE = 7.0
USER_AGENT = "hestia-geocoder/1.0 (+https://hestia.bot)"

_UNIT_SUFFIX_RES = [
re.compile(r"\s+\d+(?:hg|bg|vg)\s*$", re.IGNORECASE), # "3hg", "2bg"
re.compile(r"\s+[A-Z]\d+\s*$"), # "B2"
re.compile(r"\s+(?:I{1,3}|IV|V|VI{1,3})\s*$"), # Roman numerals for floor
re.compile(r"\s+bis\s*$", re.IGNORECASE), # NL addition
]
_POINT_RE = re.compile(r"POINT\s*\(\s*([-\d.]+)\s+([-\d.]+)\s*\)")


def normalize_address(address: str) -> str:
"""Strip common unit/floor suffixes that PDOK doesn't know about.

Only strips if the remainder still contains a house number digit, so we
don't accidentally destroy the number itself.
"""
if not address:
return ""
cleaned = re.sub(r"\s+", " ", address.strip())
# One pass is enough for the suffixes we recognize.
for pattern in _UNIT_SUFFIX_RES:
candidate = pattern.sub("", cleaned).strip()
if candidate != cleaned and re.search(r"\d", candidate):
cleaned = candidate
break
return cleaned


def _parse_point(point_str: str) -> Optional[Tuple[float, float]]:
"""PDOK returns centroide_ll as 'POINT(lon lat)'. Returns (lat, lon)."""
if not point_str:
return None
m = _POINT_RE.search(point_str)
if not m:
return None
lon, lat = float(m.group(1)), float(m.group(2))
return (lat, lon)


def _pdok_lookup(address: str, city: str, fq: str = "type:adres") -> Optional[Tuple[float, float, float]]:
"""Hit PDOK Locatieserver. Returns (lat, lon, score) or None."""
query = f"{address} {city}".strip()
if not query:
return None
try:
r = requests.get(
PDOK_URL,
params={"q": query, "fq": fq, "rows": 1},
headers={"User-Agent": USER_AGENT, "Accept": "application/json"},
timeout=PDOK_TIMEOUT,
)
except requests.RequestException as e:
logging.warning(f"PDOK request failed for {query!r}: {repr(e)}")
return None

if r.status_code != 200:
logging.warning(f"PDOK returned {r.status_code} for {query!r}")
return None

try:
docs = r.json().get("response", {}).get("docs", [])
except ValueError:
logging.warning(f"PDOK returned non-JSON body for {query!r}")
return None

if not docs:
return None

top = docs[0]
score = float(top.get("score", 0.0))
coords = _parse_point(top.get("centroide_ll", ""))
if coords is None:
return None
lat, lon = coords
return (lat, lon, score)


def geocode(address: str, city: str) -> Optional[Tuple[float, float, float]]:
"""Resolve (address, city) to (lat, lon, confidence).

Returns None if no usable result. Uses hestia.geocode_cache so repeat
lookups are free. Confidence is PDOK's relevance score for the top hit
(higher = more confident); 0.0 indicates a low-confidence fallback.
"""
if not address:
return None
city = city or ""

cached = db.fetch_one(
"SELECT lat, lon, confidence FROM hestia.geocode_cache WHERE address = %s AND city = %s",
[address, city],
)
if cached:
if cached["lat"] is None or cached["lon"] is None:
return None
return (cached["lat"], cached["lon"], cached.get("confidence") or 0.0)

normalized = normalize_address(address)
result = _pdok_lookup(normalized, city, fq="type:adres")
if result is None or result[2] < MIN_SCORE:
fallback = _pdok_lookup(normalized, city, fq="type:weergavenaam")
if fallback is not None and (result is None or fallback[2] > result[2]):
result = (fallback[0], fallback[1], 0.0)

if result is None:
_store_cache(address, city, None, None, None)
return None

lat, lon, score = result
_store_cache(address, city, lat, lon, score)
return (lat, lon, score)


def _store_cache(address: str, city: str, lat, lon, confidence) -> None:
db._write(
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All writes are done through helpers in db.py, why is this an exception?

"""
INSERT INTO hestia.geocode_cache (address, city, lat, lon, confidence, fetched_at)
VALUES (%s, %s, %s, %s, %s, now())
ON CONFLICT (address, city) DO UPDATE SET
lat = EXCLUDED.lat,
lon = EXCLUDED.lon,
confidence = EXCLUDED.confidence,
fetched_at = EXCLUDED.fetched_at
""",
[address, city, lat, lon, confidence],
)


def haversine_km(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
"""Great-circle distance in kilometers between two WGS84 points."""
r = 6371.0088
phi1 = math.radians(lat1)
phi2 = math.radians(lat2)
dphi = math.radians(lat2 - lat1)
dlambda = math.radians(lon2 - lon1)
a = math.sin(dphi / 2) ** 2 + math.cos(phi1) * math.cos(phi2) * math.sin(dlambda / 2) ** 2
return 2 * r * math.asin(math.sqrt(a))
4 changes: 3 additions & 1 deletion hestia/hestia_utils/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,15 @@


class Home:
def __init__(self, address: str = '', city: str = '', url: str = '', agency: str = '', price: int = -1, sqm: int = -1):
def __init__(self, address: str = '', city: str = '', url: str = '', agency: str = '', price: int = -1, sqm: int = -1, lat: float = None, lon: float = None):
self.address = address
self.city = city
self.url = url
self.agency = agency
self.price = price
self.sqm = sqm
self.lat = lat
self.lon = lon

def __repr__(self) -> str:
return str(self)
Expand Down
Loading