Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Geocoder #654

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ before_install:
- redis-server --version
install:
- pip install --upgrade pip
- pip install mock nose>=0.10.1 pep8 flake8 coveralls
- pip install mock nose>=0.10.1 pep8 flake8 coveralls geocoder
- travis_retry pip install .
script:
- export AG_CONFIG=`pwd`/ag_config.txt.example
Expand Down
38 changes: 37 additions & 1 deletion amgut/lib/data_access/ag_data_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from passlib.hash import bcrypt

from amgut.lib.data_access.sql_connection import TRN
from amgut.lib.geocode import geocode_aglogins


# character sets for kit id, passwords and verification codes
Expand Down Expand Up @@ -120,7 +121,8 @@ def check_login_exists(self, email):
value = value[0][0]
return None if value == [] else value

def addAGLogin(self, email, name, address, city, state, zip_, country):
def addAGLogin(self, email, name, address, city, state, zip_, country,
geocode=True):
"""Adds a new login or returns the login_id if email already exists

Parameters
Expand All @@ -139,6 +141,10 @@ def addAGLogin(self, email, name, address, city, state, zip_, country):
Postal code to register for user
country : str
Country to register for user
geocode : bool
Use address to obtain lat,lng,elev via geocoding API.
Switch off useful for unit testing.
Default: True

Returns
-------
Expand All @@ -157,6 +163,9 @@ def addAGLogin(self, email, name, address, city, state, zip_, country):
TRN.add(sql, [clean_email, name, address, city, state, zip_,
country])
ag_login_id = TRN.execute_fetchlast()
# geocode new address to retrieve lat,lng and elevation
if geocode:
geocode_aglogins(ag_login_id)
return ag_login_id

def getAGBarcodeDetails(self, barcode):
Expand Down Expand Up @@ -1304,3 +1313,30 @@ def ut_get_ag_login_id_from_barcode(self, barcode):
if not info:
raise ValueError('Barcode "%s" not in DB' % barcode)
return info[0][0]

def ut_get_location(self, ag_login_id):
"""Get kit registration information
Parameters
----------
ag_login_id : str
A valid login ID, that should be a test as a valid UUID
Returns
-------
list of dict
A list of registration information associated with a common login
ID.
Raises
------
ValueError
Unknown ag_login_id passed
"""
with TRN:
sql = """SELECT latitude, longitude, elevation, cannot_geocode
FROM ag_login
WHERE ag_login_id = %s"""
TRN.add(sql, [ag_login_id])
info = TRN.execute_fetchindex()
if not info:
raise ValueError('ag_login_id not in database: %s' %
ag_login_id)
return [dict(row) for row in info][0]
26 changes: 23 additions & 3 deletions amgut/lib/data_access/test/test_ag_data_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,20 +60,40 @@ def test_addAGLogin(self):
# make sure the ag_login_id is a UUID4 string
ag_login_id = self.ag_data.addAGLogin(
new_email, 'TESTDUDE', '123 fake test street', 'testcity',
'teststate', '1L2 2G3', 'United Kingdom')
'teststate', '1L2 2G3', 'United Kingdom', geocode=False)
as_uuid = UUID(ag_login_id)
self.assertTrue(as_uuid.version, 4)

# test existing user
ag_login_id = self.ag_data.addAGLogin(
'[email protected]', 'TESTOTHER', '123 fake test street', 'testcity',
'teststate', '1L2 2G3', 'United Kingdom')
'teststate', '1L2 2G3', 'United Kingdom', geocode=False)

obs = self.ag_data.addAGLogin(
'[email protected]', 'TESTDUDE', '123 fake test street', 'testcity',
'teststate', '1L2 2G3', 'United Kingdom')
'teststate', '1L2 2G3', 'United Kingdom', geocode=False)
self.assertEqual(ag_login_id, obs)

@rollback
def test_addAGLogin_geocode(self):
# insert a new user and automatically geocode
ag_login = self.ag_data.addAGLogin(
'[email protected]', 'TESTDUDE', '9500 Gilman Drive',
'San Diego', 'CA', '', '', geocode=True)
obs = self.ag_data.ut_get_location(ag_login)
exp = {'latitude': 32.8747486, 'cannot_geocode': None,
'elevation': 126.171813964844, 'longitude': -117.2420258}
self.assertEqual(obs, exp)

# insert a new user, which cannot be located, and automatically geocode
ag_login2 = self.ag_data.addAGLogin(
'[email protected]', 'TESTDUDE2', '',
'', '', '', '', geocode=True)
obs = self.ag_data.ut_get_location(ag_login2)
exp = {'latitude': None, 'cannot_geocode': 'Y',
'elevation': None, 'longitude': None}
self.assertEqual(obs, exp)

def test_getAGBarcodeDetails_bad_barcode(self):
# test non-existant barcode
with self.assertRaises(ValueError):
Expand Down
120 changes: 120 additions & 0 deletions amgut/lib/geocode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import geocoder
import requests
from time import sleep

from amgut.lib.data_access.sql_connection import TRN


class GoogleAPILimitExceeded(Exception):
pass


def geocode_aglogins(ag_login_ids, force=False, sleepduration=0.1):
""" Retriev locations for one or more ag_login_ids and stores results in DB

Parameters
----------
ag_login_ids : str or [str]
A single ag_login_id or a list of ag_login_ids for which locations
should be retrieved.
force : bool
If True, locations are retrieved from the geoservice even if we already
have them in our DB. Useful, if locations needs to be updated.
Default = False.
sleepduration : float
Number of seconds to sleep before returning. This is necessary to avoid
excessive google API calls within a too short period of time, which
would be blocked by google.
Default: 0.1

Returns
-------
Stats about location lookups: dict {sucessful, cannot_geocode, checked,
provided}, where
- provided: is the number of passed ag_login_ids
- checked: the number of ag_login_ids for which location retrieval was
executed (sucessful or not). This number might be <= "provided",
since we do not look-up ag_login_ids which already have
latitude, longitude, elevation and cannot_geocode=False
in our DB.
- sucessful: number of successfully retrieved locations,
which is <= "checked"
- cannot_geocode: number of successfully retrieved locations, which is <=
"checked".
"""
# if only one ag_login_id is passed as a string, we convert it to a one
# element list to be compatible with the following code.
if type(ag_login_ids) == str:
ag_login_ids = [ag_login_ids]

# check with ag_logins are present in our DB for the given list of
# ag_login_ids
sql = """SELECT ag_login_id, address, zip, city, state, country
FROM ag.ag_login
WHERE ag_login_id in %s"""
# skip ag_logins if we already have lat,long,elev in our DB unless we
# enforce an update
if force is False:
sql += """AND (latitude IS NULL
OR longitude IS NULL
OR elevation IS NULL)"""

sql_update = """UPDATE ag.ag_login
SET latitude = %s,
longitude = %s,
elevation = %s,
cannot_geocode = %s
WHERE ag_login_id = %s"""

stats = {'successful': 0,
'cannot_geocode': 0,
'checked': 0,
'provided': len(ag_login_ids)}

with TRN:
TRN.add(sql, [tuple(ag_login_ids)])

# FROM:
# In case you have several addresses to encode, to use persistent HTTP
# connection as recommended by the request-library http://docs.python-
# requests.org/en/master/user/advanced/#session-objects you might use
# the following:
with requests.Session() as session:
for address in TRN.execute_fetchindex():
lat, lng, elev, cannot_geocode = None, None, None, None
# lookup lat,lng by address
address_str = " ".join([x for x in address[1:]
if x is not None])
g = geocoder.google(address_str, session=session)
# only continue if we got a valid result
if g.error is None:
lat, lng = g.latlng
# lookup elevation in a second call
e = geocoder.elevation(g.latlng, session=session)
# only continue if we got a valid result
if e.error is None:
elev = e.elevation
elif g.error == "OVER_QUERY_LIMIT":
raise GoogleAPILimitExceeded()
else:
cannot_geocode = 'Y'
elif g.error == "OVER_QUERY_LIMIT":
raise GoogleAPILimitExceeded()
else:
cannot_geocode = 'Y'

if cannot_geocode == 'Y':
stats['cannot_geocode'] += 1
else:
stats['successful'] += 1
stats['checked'] += 1

# update the database with results we just obtained
TRN.add(sql_update,
[lat, lng, elev, cannot_geocode, address[0]])

# currently necessary to avoid exceeding max calls per second
sleep(sleepduration)
TRN.execute()

return stats
155 changes: 155 additions & 0 deletions amgut/lib/test/test_geocoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
from unittest import TestCase, main

from amgut.lib.util import (rollback)
from amgut.lib.geocode import geocode_aglogins
from amgut.lib.data_access.ag_data_access import AGDataAccess
from math import radians, cos, sin, asin, sqrt


class TestGeocoder(TestCase):
def setUp(self):
self.ag_data = AGDataAccess()

def tearDown(self):
del self.ag_data

def haversine(self, lon1, lat1, lon2, lat2):
"""
Calculate the great circle distance between two points
on the earth (specified in decimal degrees)
"""
# convert decimal degrees to radians
lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
# haversine formula
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
c = 2 * asin(sqrt(a))
km = 6367 * c
return km

@rollback
def test_force(self):
# test if force leads to updating existing locations in DB
logins = ["578f5c16-c8e3-40a4-a618-9661605678b0",
"d8592c74-8037-2135-e040-8a80115d6401",
"d8592c74-803a-2135-e040-8a80115d6401",
"884cba01-9d8a-4beb-816f-c74d85fb7227"]
login_id = self.ag_data.addAGLogin('[email protected]',
'kurtjuergen_t1',
'9500 Gilman Drive',
'San Diego', 'CA', '', 'USA',
geocode=False)
logins.append(login_id)
obs = geocode_aglogins(logins, force=True)
exp = {'successful': 1, 'provided': 5, 'cannot_geocode': 4,
'checked': 5}
self.assertEqual(obs, exp)

@rollback
def test_multiple(self):
logins = ["00164c87-73b3-deb2-e050-8a800c5d54e1",
"001b21ee-85a2-457c-adf6-492b42134376",
"0023cc03-3332-eec6-e050-8a800c5d3c04",
"d8592c74-967c-2135-e040-8a80115d6401",
"15370442-313f-452f-bf5b-cd155e3deefe"]
login_id = self.ag_data.addAGLogin('[email protected]',
'kurtjuergen_t2',
'9500 Gilman Drive',
'San Diego', 'CA', '', 'USA',
geocode=False)
logins.append(login_id)
obs = geocode_aglogins(logins)
exp = {'successful': 1, 'provided': 6, 'cannot_geocode': 3,
'checked': 4}
self.assertEqual(obs, exp)

@rollback
def test_cannot_geocode(self):
# check that a fantasy address cannot be geocoded.
# Therefore we first need to insert a new ag_login_id
login_id = self.ag_data.addAGLogin('[email protected]',
'kurtjuergen_t4',
'skdgsisdf', '', '', '', '',
geocode=False)
old_loc = self.ag_data.ut_get_location(login_id)
self.assertEqual(old_loc, {'latitude': None,
'cannot_geocode': None,
'elevation': None,
'longitude': None})
obs = geocode_aglogins(login_id)
exp = {'successful': 0, 'provided': 1, 'cannot_geocode': 1,
'checked': 1}
self.assertItemsEqual(obs, exp)
new_loc = self.ag_data.ut_get_location(login_id)
exp_loc = {'latitude': None,
'cannot_geocode': 'Y',
'elevation': None,
'longitude': None}
distance = self.haversine(exp_loc['longitude'],
exp_loc['latitude'],
new_loc['longitude'],
new_loc['latitude'])
# broad addresses like "Gilman Drive 9500" might return different
# locations over time, since it is the address of the whole UCSD
# campus. Therefore, checking exact lat,lng will fail once the map
# service slightly changes its algorithms. Instead we check here if the
# location is in close vicinity from what we expact: less than 5km
self.assertTrue(distance < 5)

# test that geocoding is re-done
obs = geocode_aglogins(login_id)
self.assertEqual(obs, exp)

@rollback
def test_update(self):
# an ag_login_id without a location gets a new location assigned
login_id = self.ag_data.addAGLogin('[email protected]',
'kurtjuergen_t3',
'9500 Gilman Drive',
'San Diego', 'CA', '', 'USA',
geocode=False)
old_loc = self.ag_data.ut_get_location(login_id)
self.assertEqual(old_loc, {'latitude': None,
'cannot_geocode': None,
'elevation': None,
'longitude': None})
obs = geocode_aglogins(login_id)
exp = {'successful': 1, 'provided': 1, 'cannot_geocode': 0,
'checked': 1}
self.assertItemsEqual(obs, exp)
new_loc = self.ag_data.ut_get_location(login_id)
exp_loc = {'latitude': 32.8747486,
'cannot_geocode': None,
'elevation': 126.171813964844,
'longitude': -117.2420258}
distance = self.haversine(exp_loc['longitude'],
exp_loc['latitude'],
new_loc['longitude'],
new_loc['latitude'])
# broad addresses like "Gilman Drive 9500" might return different
# locations over time, since it is the address of the whole UCSD
# campus. Therefore, checking exact lat,lng will fail once the map
# service slightly changes its algorithms. Instead we check here if the
# location is in close vicinity from what we expact: less than 5km
self.assertTrue(distance < 5.0)

@rollback
def test_noupdate(self):
# an ag_login_id already with location does not get updated
login_id = "000fc4cd-8fa4-db8b-e050-8a800c5d02b5"
old_loc = self.ag_data.ut_get_location(login_id)
self.assertEqual(old_loc, {'latitude': 22.28661,
'cannot_geocode': None,
'elevation': 232.096176147461,
'longitude': -80.73577})
obs = geocode_aglogins(login_id)
exp = {'successful': 0, 'provided': 1, 'cannot_geocode': 0,
'checked': 0}
self.assertItemsEqual(obs, exp)
new_loc = self.ag_data.ut_get_location(login_id)
self.assertEqual(new_loc, old_loc)


if __name__ == '__main__':
main()