diff --git a/.gitignore b/.gitignore index cb4ccda..54e5a81 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ data node_modules package-lock.json +.npm-cache +.geonames-build +.DS_Store diff --git a/README.md b/README.md index 554bbcc..3ddcbd7 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # Offline Geocoder -Node library for reverse geocoding. Designed to be used offline (for example -embedded in a desktop or mobile application) - no web requests are made to -perform a lookup. +Node and React Native library for offline geocoding. Designed to be used +offline (for example embedded in a desktop or mobile application) — no web +requests are made to perform a lookup. ## Data @@ -32,20 +32,33 @@ lookups per second with a single process. npm install --save offline-geocoder ``` +For Node you also need `sqlite3`: + +``` +npm install --save sqlite3 +``` + +For Expo / React Native, install `expo-sqlite` instead: + +``` +npx expo install expo-sqlite +``` + You also need to obtain a database which isn't included in the package, to -generate your own take a look in `scripts`. +generate your own take a look at the [Generating the database](#generating-the-database) +section below. ## Usage When you initialize the library you need to pass the location of the database: ```javascript -const geocoder = require('offline-geocoder')({ database: 'data/geodata.db' }) +const geocoder = require('offline-geocoder')({ database: 'data/geocoder.sqlite' }) ``` ### Reverse Geocoding -To perform a revese geocode lookup just pass the coordinates: +To perform a reverse geocode lookup just pass the coordinates: ```javascript geocoder.reverse(41.89, 12.49) @@ -76,6 +89,75 @@ geocoder.reverse(41.89, 12.49, function(error, result) { }) ``` +### Forward Geocoding + +Forward geocoding matches a city name to its canonical entry. Requires a +database generated with the updated schema (see below). + +```javascript +geocoder.forward('rome') + .then(function(result) { + console.log(result) + }) +``` + +Returns `undefined` when no match is found, or when using an older database +without the required columns. + +### Location Lookup + +Look up a city by its GeoNames id: + +```javascript +geocoder.location().find(3169070) +geocoder.location.find('geonames:3169070') +``` + +Returns `undefined` when the id doesn't exist. Both numeric ids and +`geonames:` strings are accepted — use the prefixed form as a stable +grouping key across datasets. + +## Expo / React Native + +The React Native entrypoint avoids Node-only modules: + +```javascript +const createGeocoder = require('offline-geocoder/expo') + +const db = await SQLite.openDatabaseAsync('geocoder.sqlite') +const geocoder = createGeocoder({ db: db }) + +geocoder.reverse(41.89, 12.49) + .then(function(result) { + console.log(result) + }) +``` + +You'll need to bundle the SQLite database file with your app assets and copy +it to a location accessible by `expo-sqlite` on first launch. + +## Generating the database + +The repo includes a script to generate a SQLite database from GeoNames dumps: + +```bash +./scripts/generate_geonames.sh data/geocoder.sqlite +``` + +Environment variables for customization: + +| Variable | Default | Description | +|---|---|---| +| `GEONAMES_DATASET` | `cities1000` | GeoNames dump file to use | +| `GEONAMES_WORKDIR` | current directory | Working directory for temp files | +| `GEONAMES_DOWNLOAD` | `1` | Set to `0` to skip downloads | +| `GEONAMES_FEATURE_CODES` | `PPLA,PPLA2,PPLA3,PPLA4,PPLA5,PPLC` | Feature codes to keep | +| `GEONAMES_MIN_POPULATION` | `0` | Minimum population filter | +| `GEONAMES_INCLUDE_ADMIN1` | `1` | Set to `0` to skip admin1 data | + +The default feature codes exclude `PPL` which can include neighbourhood-like +populated places. The schema is defined in [`scripts/schema.sql`](scripts/schema.sql). + ## License This library is licensed under [the MIT license](https://github.com/lucaspiller/offline-geocoder/blob/master/LICENSE). diff --git a/package.json b/package.json index 767a09c..e9af3f0 100644 --- a/package.json +++ b/package.json @@ -1,14 +1,30 @@ { "name": "offline-geocoder", "version": "1.0.0", - "description": "Node library for offline geocoding", + "description": "Offline reverse and forward geocoding for Node and React Native", "repository": "https://github.com/lucaspiller/offline-geocoder", "main": "src/index.js", - "dependencies": { - "sqlite3": "^4.0.0" + "react-native": "src/expo.js", + "exports": { + ".": { + "react-native": "./src/expo.js", + "require": "./src/index.js", + "default": "./src/index.js" + }, + "./expo": "./src/expo.js" }, + "peerDependencies": { + "sqlite3": "^5.1.7" + }, + "peerDependenciesMeta": { + "sqlite3": { + "optional": true + } + }, + "dependencies": {}, "devDependencies": { - "jasmine": "^3.1.0" + "jasmine": "^5.12.0", + "sqlite3": "^5.1.7" }, "scripts": { "test": "jasmine" diff --git a/scripts/generate_geonames.sh b/scripts/generate_geonames.sh index 20f4413..ebb1666 100755 --- a/scripts/generate_geonames.sh +++ b/scripts/generate_geonames.sh @@ -1,104 +1,159 @@ #!/bin/bash +set -euo pipefail + +# Generates a geocoder SQLite database from GeoNames dump files. +# Usage: +# ./scripts/generate_geonames.sh [output_db_path] +# +# Environment variables: +# GEONAMES_DATASET cities dump name without extension (default: cities1000) +# GEONAMES_WORKDIR working dir for output and temp files (default: current dir) +# GEONAMES_DOWNLOAD set to 0 to skip downloads and use existing local files +# GEONAMES_FEATURE_CODES comma-separated GeoNames feature codes to keep +# (default: PPLA,PPLA2,PPLA3,PPLA4,PPLA5,PPLC) +# Note: PPL can include neighborhood-like entries. +# GEONAMES_MIN_POPULATION minimum population to keep (default: 0) +# GEONAMES_INCLUDE_ADMIN1 set to 0 to skip admin1 import entirely (default: 1) + +GEONAMES_DATASET="${GEONAMES_DATASET:-cities1000}" +GEONAMES_WORKDIR="${GEONAMES_WORKDIR:-$(pwd)}" +GEONAMES_DOWNLOAD="${GEONAMES_DOWNLOAD:-1}" +GEONAMES_FEATURE_CODES="${GEONAMES_FEATURE_CODES:-PPLA,PPLA2,PPLA3,PPLA4,PPLA5,PPLC}" +GEONAMES_MIN_POPULATION="${GEONAMES_MIN_POPULATION:-0}" +GEONAMES_INCLUDE_ADMIN1="${GEONAMES_INCLUDE_ADMIN1:-1}" +OUTPUT="${1:-db.sqlite}" + +# Resolve to absolute so the later cd into GEONAMES_WORKDIR doesn't break it +case "${OUTPUT}" in + /*) ;; + *) OUTPUT="$(pwd)/${OUTPUT}" ;; +esac + +DATA_FILE="${GEONAMES_DATASET}.txt" +ADMIN1_FILE="admin1CodesASCII.txt" +COUNTRY_FILE="countryInfo.txt" + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SCHEMA_FILE="${SCRIPT_DIR}/schema.sql" +TMP_DIR="${GEONAMES_WORKDIR}/.geonames-build" +SOURCE_DIR="${TMP_DIR}/source" + +mkdir -p "${GEONAMES_WORKDIR}" "${TMP_DIR}" "${SOURCE_DIR}" + +download_if_missing() { + local file="$1" + local url="$2" + + if [[ -f "${SOURCE_DIR}/${file}" ]]; then + echo "Using existing ${file}" + return + fi -DATA="cities1000.txt" -ADMIN1="admin1CodesASCII.txt" -COUNTRIES="countryInfo.txt" -OUTPUT="db.sqlite" - -if [ ! -f "$DATA" ]; then - echo "Downloading cities from Geonames..." - wget "http://download.geonames.org/export/dump/cities1000.zip" - unzip "cities1000.zip" -else - echo "Using existing $DATA" -fi + if [[ "${GEONAMES_DOWNLOAD}" != "1" ]]; then + echo "Missing ${file} and GEONAMES_DOWNLOAD=${GEONAMES_DOWNLOAD}." >&2 + echo "Provide local files in ${SOURCE_DIR} or enable downloads." >&2 + exit 1 + fi -if [ ! -f "$ADMIN1" ]; then - echo "Downloading admin1 from Geonames..." - wget "http://download.geonames.org/export/dump/admin1CodesASCII.txt" -else - echo "Using existing $ADMIN1" -fi + echo "Downloading ${file}..." + curl -fsSL "${url}" -o "${SOURCE_DIR}/${file}" +} -if [ ! -f "$COUNTRIES" ]; then - echo "Downloading countries from Geonames..." - wget "http://download.geonames.org/export/dump/countryInfo.txt" -else - echo "Using existing $COUNTRIES" -fi +download_and_extract_dataset_if_missing() { + if [[ -f "${SOURCE_DIR}/${DATA_FILE}" ]]; then + echo "Using existing ${DATA_FILE}" + return + fi -if [ -f "$OUTPUT" ]; then - echo - echo "The file $OUTPUT already exists." - read -p "Do you want to override it? (y/N) " -n 1 -r - echo - if [[ ! $REPLY =~ ^[Yy]$ ]]; then + if [[ "${GEONAMES_DOWNLOAD}" != "1" ]]; then + echo "Missing ${DATA_FILE} and GEONAMES_DOWNLOAD=${GEONAMES_DOWNLOAD}." >&2 + echo "Provide local files in ${SOURCE_DIR} or enable downloads." >&2 exit 1 fi - rm "$OUTPUT" + local zip_file="${GEONAMES_DATASET}.zip" + echo "Downloading ${zip_file}..." + curl -fsSL "https://download.geonames.org/export/dump/${zip_file}" -o "${SOURCE_DIR}/${zip_file}" + unzip -o -q "${SOURCE_DIR}/${zip_file}" -d "${SOURCE_DIR}" +} + +download_and_extract_dataset_if_missing +download_if_missing "${COUNTRY_FILE}" "https://download.geonames.org/export/dump/${COUNTRY_FILE}" +if [[ "${GEONAMES_INCLUDE_ADMIN1}" == "1" ]]; then + download_if_missing "${ADMIN1_FILE}" "https://download.geonames.org/export/dump/${ADMIN1_FILE}" fi -echo -echo "Generating..." - -awk 'BEGIN { FS="\t"; OFS=";" } { gsub("\"", "", $2); gsub(";", "", $2); print $1,$2,$9,$11 }' $DATA > features.tsv -awk 'BEGIN { FS="\t"; OFS=";" } { print $1,$5,$6 }' $DATA > coordinates.tsv -awk 'BEGIN { FS="\t"; OFS=";" } { split($1, id, "."); gsub("\"", "", $2); gsub(";", "", $2); print id[1],id[2],$2 }' $ADMIN1 > admin1.tsv -grep -vE '^#' $COUNTRIES | awk 'BEGIN { FS="\t"; OFS=";" } { print $1,$5 }' > countries.tsv - -echo ' -CREATE TABLE coordinates( - feature_id INTEGER, - latitude REAL, - longitude REAL, - PRIMARY KEY (feature_id) -); - -CREATE TABLE features( - id INTEGER, - name TEXT, - country_id TEXT, - admin1_id INTEGER, - PRIMARY KEY (id) -); - -CREATE TABLE admin1( - country_id TEXT, - id INTEGER, - name TEXT, - PRIMARY KEY (country_id, id) -); - -CREATE TABLE countries( - id TEXT, - name TEXT, - PRIMARY KEY (id) -); - -CREATE VIEW everything AS - SELECT - features.id, - features.name, - admin1.id AS admin1_id, - admin1.name AS admin1_name, - countries.id AS country_id, - countries.name AS country_name, - coordinates.latitude AS latitude, - coordinates.longitude AS longitude - FROM features - LEFT JOIN countries ON features.country_id = countries.id - LEFT JOIN admin1 ON features.country_id = admin1.country_id AND features.admin1_id = admin1.id - JOIN coordinates ON features.id = coordinates.feature_id; +echo "Preparing TSV files in ${TMP_DIR}..." +echo "Feature codes: ${GEONAMES_FEATURE_CODES}" +echo "Minimum population: ${GEONAMES_MIN_POPULATION}" +echo "Include admin1: ${GEONAMES_INCLUDE_ADMIN1}" +rm -f "${TMP_DIR}/features.tsv" "${TMP_DIR}/coordinates.tsv" +awk -v feature_codes="${GEONAMES_FEATURE_CODES}" -v min_population="${GEONAMES_MIN_POPULATION}" -v include_admin1="${GEONAMES_INCLUDE_ADMIN1}" -v features_out="${TMP_DIR}/features.tsv" -v coordinates_out="${TMP_DIR}/coordinates.tsv" 'BEGIN { + FS="\t"; + OFS=";"; + split(feature_codes, raw_codes, ","); + for (i in raw_codes) { + code = raw_codes[i]; + gsub(/^[[:space:]]+|[[:space:]]+$/, "", code); + if (code != "") { + allowed_codes[code] = 1; + } + } +} +{ + if (!($8 in allowed_codes)) { + next; + } + + population = ($15 == "" ? 0 : $15); + if (population < min_population) { + next; + } + + gsub("\"", "", $2); + gsub(";", "", $2); + gsub("\"", "", $3); + gsub(";", "", $3); + admin1_id = (include_admin1 == "1" ? $11 : ""); + print $1,$2,$3,$9,admin1_id,population >> features_out; + print $1,$5,$6 >> coordinates_out; +}' "${SOURCE_DIR}/${DATA_FILE}" + +if [[ "${GEONAMES_INCLUDE_ADMIN1}" == "1" ]]; then + awk 'BEGIN { FS="\t"; OFS=";" } + { + split($1, id, "."); + gsub("\"", "", $2); + gsub(";", "", $2); + print id[1],id[2],$2 + }' "${SOURCE_DIR}/${ADMIN1_FILE}" > "${TMP_DIR}/admin1.tsv" +else + : > "${TMP_DIR}/admin1.tsv" +fi -.separator ";" -.import coordinates.tsv coordinates -.import features.tsv features -.import admin1.tsv admin1 -.import countries.tsv countries +grep -vE '^#' "${SOURCE_DIR}/${COUNTRY_FILE}" | awk 'BEGIN { FS="\t"; OFS=";" } +{ + gsub("\"", "", $5); + gsub(";", "", $5); + print $1,$5 +}' > "${TMP_DIR}/countries.tsv" -CREATE INDEX coordinates_lat_lng ON coordinates (latitude, longitude); -' | sqlite3 "$OUTPUT" +rm -f "${OUTPUT}" +echo "Building ${OUTPUT}..." -COUNT=`sqlite3 "$OUTPUT" "SELECT COUNT(*) FROM features;"` -echo "Created $OUTPUT with $COUNT features." +{ + cat "${SCHEMA_FILE}" + cat <<'SQL' +.separator ";" +.import .geonames-build/coordinates.tsv coordinates +.import .geonames-build/features.tsv features +.import .geonames-build/admin1.tsv admin1 +.import .geonames-build/countries.tsv countries +SQL +} | ( + cd "${GEONAMES_WORKDIR}" && + sqlite3 "${OUTPUT}" +) + +COUNT="$(sqlite3 "${OUTPUT}" "SELECT COUNT(*) FROM features;")" +echo "Created ${OUTPUT} with ${COUNT} features." diff --git a/scripts/schema.sql b/scripts/schema.sql new file mode 100644 index 0000000..4457d55 --- /dev/null +++ b/scripts/schema.sql @@ -0,0 +1,48 @@ +CREATE TABLE coordinates( + feature_id INTEGER PRIMARY KEY, + latitude REAL NOT NULL, + longitude REAL NOT NULL +); + +CREATE TABLE features( + id INTEGER PRIMARY KEY, + name TEXT NOT NULL, + asciiname TEXT, + country_id TEXT NOT NULL, + admin1_id INTEGER, + population INTEGER NOT NULL DEFAULT 0 +); + +CREATE TABLE admin1( + country_id TEXT NOT NULL, + id INTEGER NOT NULL, + name TEXT NOT NULL, + PRIMARY KEY (country_id, id) +); + +CREATE TABLE countries( + id TEXT PRIMARY KEY, + name TEXT NOT NULL +); + +CREATE VIEW everything AS + SELECT + features.id AS id, + features.name AS name, + features.asciiname AS asciiname, + features.population AS population, + admin1.id AS admin1_id, + admin1.name AS admin1_name, + countries.id AS country_id, + countries.name AS country_name, + coordinates.latitude AS latitude, + coordinates.longitude AS longitude + FROM features + LEFT JOIN countries ON features.country_id = countries.id + LEFT JOIN admin1 ON features.country_id = admin1.country_id AND features.admin1_id = admin1.id + JOIN coordinates ON features.id = coordinates.feature_id; + +CREATE INDEX coordinates_lat_lng ON coordinates (latitude, longitude); +CREATE INDEX features_name_nocase ON features (name COLLATE NOCASE); +CREATE INDEX features_asciiname_nocase ON features (asciiname COLLATE NOCASE); +CREATE INDEX features_population_desc ON features (population DESC); diff --git a/spec/expo_adapter_spec.js b/spec/expo_adapter_spec.js new file mode 100644 index 0000000..0500da4 --- /dev/null +++ b/spec/expo_adapter_spec.js @@ -0,0 +1,28 @@ +const createExpoGeocoder = require('../src/expo.js'); +const fixtureDb = require('./helpers/fixture_db'); + +describe('expo adapter', () => { + var fixture; + + beforeAll((done) => { + fixtureDb.createFixtureDatabase().then(function(f) { + fixture = f; + done(); + }); + }); + + afterAll(() => { + fixture.cleanup(); + }); + + it('runs queries through the getAllAsync shim', (done) => { + var db = fixtureDb.createExpoDb(fixture.databasePath); + var geocoder = createExpoGeocoder({ db: db }); + + geocoder.reverse(41.89, 12.49) + .then(function(result) { + expect(result.id).toEqual(3169070); + done(); + }); + }); +}); diff --git a/spec/forward_spec.js b/spec/forward_spec.js new file mode 100644 index 0000000..8746659 --- /dev/null +++ b/spec/forward_spec.js @@ -0,0 +1,49 @@ +const createGeocoder = require('../src/index.js'); +const fixtureDb = require('./helpers/fixture_db'); + +describe('geocoder.forward', () => { + var fixture, geocoder; + + beforeAll((done) => { + fixtureDb.createFixtureDatabase().then(function(f) { + fixture = f; + geocoder = createGeocoder({ database: fixture.databasePath }); + done(); + }); + }); + + afterAll(() => { + fixture.cleanup(); + }); + + it('returns the best match for an exact query', (done) => { + geocoder.forward('Rome') + .then(function(result) { + expect(result).toEqual({ + id: 3169070, + name: 'Rome', + formatted: 'Rome, Latium, Italy', + country: { id: 'IT', name: 'Italy' }, + admin1: { id: 7, name: 'Latium' }, + coordinates: { latitude: 41.89193, longitude: 12.51133 } + }); + done(); + }); + }); + + it('falls back to fuzzy matching', (done) => { + geocoder.forward('angeles') + .then(function(result) { + expect(result.id).toEqual(5368361); + done(); + }); + }); + + it('returns undefined when nothing matches', (done) => { + geocoder.forward('xyzzy-not-a-city') + .then(function(result) { + expect(result).toBeUndefined(); + done(); + }); + }); +}); diff --git a/spec/helpers/fixture_db.js b/spec/helpers/fixture_db.js new file mode 100644 index 0000000..e5c8ba1 --- /dev/null +++ b/spec/helpers/fixture_db.js @@ -0,0 +1,78 @@ +"use strict"; + +const fs = require('fs') +const os = require('os') +const path = require('path') +const sqlite3 = require('sqlite3') + +const schemaSql = fs.readFileSync(path.join(__dirname, '../../scripts/schema.sql'), 'utf8') + +const fixtureSql = ` +INSERT INTO countries(id, name) VALUES ('IT', 'Italy'), ('FR', 'France'), ('US', 'United States'); +INSERT INTO admin1(country_id, id, name) VALUES + ('IT', 7, 'Latium'), + ('FR', 11, 'Ile-de-France'), + ('US', 36, 'New York'), + ('US', 5, 'California'); +INSERT INTO features(id, name, asciiname, country_id, admin1_id, population) VALUES + (3169070, 'Rome', 'Rome', 'IT', 7, 2873000), + (2988507, 'Paris', 'Paris', 'FR', 11, 2138551), + (5128581, 'New York City', 'New York City', 'US', 36, 8175133), + (5368361, 'Los Angeles', 'Los Angeles', 'US', 5, 3792621); +INSERT INTO coordinates(feature_id, latitude, longitude) VALUES + (3169070, 41.89193, 12.51133), + (2988507, 48.85341, 2.3488), + (5128581, 40.71427, -74.00597), + (5368361, 34.05223, -118.24368); +` + +function exec(db, sql) { + return new Promise(function(resolve, reject) { + db.exec(sql, function(err) { err ? reject(err) : resolve() }) + }) +} + +function close(db) { + return new Promise(function(resolve, reject) { + db.close(function(err) { err ? reject(err) : resolve() }) + }) +} + +function createFixtureDatabase() { + var dir = fs.mkdtempSync(path.join(os.tmpdir(), 'offline-geocoder-')) + var dbPath = path.join(dir, 'fixture.sqlite') + var db = new sqlite3.Database(dbPath) + + return exec(db, schemaSql) + .then(function() { return exec(db, fixtureSql) }) + .then(function() { return close(db) }) + .then(function() { + return { + databasePath: dbPath, + cleanup: function() { + fs.rmSync(dir, { recursive: true, force: true }) + } + } + }) +} + +// Minimal shim that looks like an expo-sqlite database so we can test the +// Expo adapter without pulling in the real package. +function createExpoDb(dbPath) { + var db = new sqlite3.Database(dbPath) + return { + getAllAsync: function(sql, params) { + return new Promise(function(resolve, reject) { + db.all(sql, params || [], function(err, rows) { + err ? reject(err) : resolve(rows || []) + }) + }) + }, + closeAsync: function() { return close(db) } + } +} + +module.exports = { + createFixtureDatabase: createFixtureDatabase, + createExpoDb: createExpoDb +} diff --git a/spec/location_spec.js b/spec/location_spec.js index 72c9de1..b53a637 100644 --- a/spec/location_spec.js +++ b/spec/location_spec.js @@ -1,6 +1,21 @@ -const geocoder = require('../src/index.js')(); +const createGeocoder = require('../src/index.js'); +const fixtureDb = require('./helpers/fixture_db'); describe('geocoder.location', () => { + var fixture, geocoder; + + beforeAll((done) => { + fixtureDb.createFixtureDatabase().then(function(f) { + fixture = f; + geocoder = createGeocoder({ database: fixture.databasePath }); + done(); + }); + }); + + afterAll(() => { + fixture.cleanup(); + }); + describe('.find', () => { it('performs a lookup by id', (done) => { geocoder.location().find(3169070) @@ -17,6 +32,14 @@ describe('geocoder.location', () => { }); }); + it('accepts geonames: prefixed ids', (done) => { + geocoder.location.find('geonames:3169070') + .then(function(result) { + expect(result.id).toEqual(3169070); + done(); + }); + }); + it("resolves undefined when a location can't be found", (done) => { geocoder.location().find(-1) .then(function(result) { diff --git a/spec/reverse_spec.js b/spec/reverse_spec.js index a5c40e0..673c45d 100644 --- a/spec/reverse_spec.js +++ b/spec/reverse_spec.js @@ -1,6 +1,21 @@ -const geocoder = require('../src/index.js')(); +const createGeocoder = require('../src/index.js'); +const fixtureDb = require('./helpers/fixture_db'); describe('geocoder.reverse', () => { + var fixture, geocoder; + + beforeAll((done) => { + fixtureDb.createFixtureDatabase().then(function(f) { + fixture = f; + geocoder = createGeocoder({ database: fixture.databasePath }); + done(); + }); + }); + + afterAll(() => { + fixture.cleanup(); + }); + it('performs reverse geocoding on a latitude and longitude', (done) => { geocoder.reverse(41.89, 12.49) .then(function(result) { diff --git a/spec/schema_spec.js b/spec/schema_spec.js new file mode 100644 index 0000000..667f07e --- /dev/null +++ b/spec/schema_spec.js @@ -0,0 +1,45 @@ +const sqlite3 = require('sqlite3'); +const fixtureDb = require('./helpers/fixture_db'); + +describe('generated schema', () => { + var fixture, db; + + beforeAll((done) => { + fixtureDb.createFixtureDatabase().then(function(f) { + fixture = f; + db = new sqlite3.Database(fixture.databasePath); + done(); + }); + }); + + afterAll((done) => { + db.close(function() { + fixture.cleanup(); + done(); + }); + }); + + it('has asciiname and population in the everything view', (done) => { + db.all('PRAGMA table_info(everything)', [], function(err, cols) { + var names = cols.map(function(c) { return c.name }); + expect(names).toContain('asciiname'); + expect(names).toContain('population'); + done(); + }); + }); + + it('creates indexes for reverse and forward lookups', (done) => { + db.all("PRAGMA index_list('coordinates')", [], function(err, coordIndexes) { + db.all("PRAGMA index_list('features')", [], function(err, featIndexes) { + var coordNames = coordIndexes.map(function(i) { return i.name }); + var featNames = featIndexes.map(function(i) { return i.name }); + + expect(coordNames).toContain('coordinates_lat_lng'); + expect(featNames).toContain('features_name_nocase'); + expect(featNames).toContain('features_asciiname_nocase'); + expect(featNames).toContain('features_population_desc'); + done(); + }); + }); + }); +}); diff --git a/src/expo.js b/src/expo.js new file mode 100644 index 0000000..0d66f67 --- /dev/null +++ b/src/expo.js @@ -0,0 +1,75 @@ +"use strict"; + +const reverse = require('./reverse') +const forward = require('./forward') +const findLocation = require('./location').find + +// Wraps an expo-sqlite database to match the node-sqlite3 callback +// interface that reverse.js, forward.js and location.js expect. +function wrapExpoDb(expoDb) { + return { + all: function(sql, params, callback) { + expoDb.getAllAsync(sql, params || []) + .then(function(rows) { callback(null, rows) }) + .catch(function(err) { callback(err) }) + }, + close: function(callback) { + if (typeof expoDb.closeAsync === 'function') { + expoDb.closeAsync() + .then(function() { if (callback) callback(null) }) + .catch(function(err) { if (callback) callback(err) }) + } else if (callback) { + callback(null) + } + } + } +} + +function ExpoGeocoder(options) { + var opts = options || {} + var expoDb = opts.db || opts.database + + if (!expoDb || typeof expoDb.getAllAsync !== 'function') { + throw new Error('Pass an opened expo-sqlite db via { db }.') + } + + this.db = wrapExpoDb(expoDb) +} + +ExpoGeocoder.prototype.reverse = function(latitude, longitude, callback) { + return reverse(this, latitude, longitude, callback) +} + +ExpoGeocoder.prototype.forward = function(query, callback) { + return forward(this, query, callback) +} + +ExpoGeocoder.prototype.location = function() { + const _this = this + + return { + find: function(locationId) { + return findLocation(_this, locationId) + } + } +} + +function createExpoGeocoder(options) { + var instance = new ExpoGeocoder(options) + + var locationFn = function() { + return { + find: function(locationId) { + return findLocation(instance, locationId) + } + } + } + locationFn.find = function(locationId) { + return findLocation(instance, locationId) + } + instance.location = locationFn + + return instance +} + +module.exports = createExpoGeocoder; diff --git a/src/forward.js b/src/forward.js new file mode 100644 index 0000000..8203d97 --- /dev/null +++ b/src/forward.js @@ -0,0 +1,131 @@ +"use strict"; + +const formatLocation = require('./location').format + +// Forward geocoding: tries an exact match on name/asciiname first, then +// falls back to prefix and substring matching. +// +// Requires the updated schema with asciiname and population columns. +// Returns undefined on databases without those columns. +function findByName(geocoder, query, callback) { + return new Promise(function(resolve, reject) { + var q = typeof query === 'string' ? query.trim() : '' + if (!q) { + if (typeof(callback) == 'function') { + callback(undefined, undefined) + } else { + resolve(undefined) + } + return + } + + // Check if the database supports forward search (cached per geocoder) + if (geocoder._forwardSupported === false) { + if (typeof(callback) == 'function') { + callback(undefined, undefined) + } else { + resolve(undefined) + } + return + } + + function doSearch() { + var exactQuery = `SELECT * FROM everything + WHERE name = ? COLLATE NOCASE OR asciiname = ? COLLATE NOCASE + ORDER BY + CASE WHEN name = ? COLLATE NOCASE THEN 0 + WHEN asciiname = ? COLLATE NOCASE THEN 1 + ELSE 2 END, + population DESC, id ASC + LIMIT 1` + + geocoder.db.all(exactQuery, [q, q, q, q], function(err, rows) { + if (err) { + if (typeof(callback) == 'function') { + callback(err, undefined) + } else if (typeof(reject) == 'function') { + reject(err) + } + return + } + + if (rows && rows[0]) { + const result = formatLocation(rows[0]) + if (typeof(callback) == 'function') { + callback(undefined, result) + } else { + resolve(result) + } + return + } + + // Fall back to prefix / substring match + var prefix = q + '%' + var contains = '%' + q + '%' + var fuzzyQuery = `SELECT * FROM everything + WHERE name LIKE ? COLLATE NOCASE + OR name LIKE ? COLLATE NOCASE + OR asciiname LIKE ? COLLATE NOCASE + OR asciiname LIKE ? COLLATE NOCASE + ORDER BY + CASE WHEN name LIKE ? COLLATE NOCASE THEN 0 + WHEN asciiname LIKE ? COLLATE NOCASE THEN 1 + ELSE 2 END, + population DESC, LENGTH(name) ASC, id ASC + LIMIT 1` + + geocoder.db.all(fuzzyQuery, [prefix, contains, prefix, contains, prefix, prefix], function(err, rows) { + if (err) { + if (typeof(callback) == 'function') { + callback(err, undefined) + } else if (typeof(reject) == 'function') { + reject(err) + } + } else { + const result = formatResult(rows) + if (typeof(callback) == 'function') { + callback(undefined, result) + } else { + resolve(result) + } + } + }) + }) + } + + if (geocoder._forwardSupported === true) { + doSearch() + return + } + + // Probe for the asciiname column (first call only) + geocoder.db.all('SELECT asciiname FROM everything LIMIT 0', [], function(err) { + geocoder._forwardSupported = !err + if (err) { + if (typeof(callback) == 'function') { + callback(undefined, undefined) + } else { + resolve(undefined) + } + } else { + doSearch() + } + }) + }) +} + +function formatResult(rows) { + const row = rows[0] + + if (row === undefined) { + return undefined + } else { + return formatLocation(row) + } +} + +function Forward(geocoder, query, callback) { + return findByName(geocoder, query, callback) +} + +module.exports = Forward; diff --git a/src/index.js b/src/index.js index 9ba44e7..1d4d555 100644 --- a/src/index.js +++ b/src/index.js @@ -1,25 +1,42 @@ "use strict"; const path = require('path') -const sqlite3 = require('sqlite3').verbose() const reverse = require('./reverse') +const forward = require('./forward') const findLocation = require('./location').find function Geocoder(options) { var geocoder = function(options) { this.options = options || {} - if (this.options.database === undefined) { - this.options.database = path.join(__filename, '../../data/db.sqlite') - } + if (this.options.db) { + // Accept a pre-opened database object (must have .all(sql, params, cb)) + this.db = this.options.db + } else { + var sqlite3 + try { + sqlite3 = (this.options.sqlite3 || require('sqlite3')).verbose() + } catch (err) { + err.message = 'sqlite3 is required for Node usage. Install it with `npm install sqlite3`.' + throw err + } + + if (this.options.database === undefined) { + this.options.database = path.join(__dirname, '../data/db.sqlite') + } - this.db = new sqlite3.Database(this.options.database) + this.db = new sqlite3.Database(this.options.database) + } } geocoder.prototype.reverse = function(latitude, longitude, callback) { return reverse(this, latitude, longitude, callback) } + geocoder.prototype.forward = function(query, callback) { + return forward(this, query, callback) + } + geocoder.prototype.location = function() { const _this = this @@ -30,7 +47,22 @@ function Geocoder(options) { } } - return new geocoder(options) + var instance = new geocoder(options) + + // Also support geocoder.location.find(id) without calling location() + var locationFn = function() { + return { + find: function(locationId) { + return findLocation(instance, locationId) + } + } + } + locationFn.find = function(locationId) { + return findLocation(instance, locationId) + } + instance.location = locationFn + + return instance } module.exports = Geocoder; diff --git a/src/location.js b/src/location.js index e24affe..1d4d097 100644 --- a/src/location.js +++ b/src/location.js @@ -1,25 +1,23 @@ "use strict"; +function normalizeId(value) { + if (typeof value === 'string') { + var match = /^geonames:(\d+)$/i.exec(value.trim()) + if (match) return Number(match[1]) + return Number(value) + } + return value +} + function find(geocoder, locationId) { return new Promise(function(resolve, reject) { - const query = `SELECT * FROM everything WHERE id = $id LIMIT 1` + const query = `SELECT * FROM everything WHERE id = ? LIMIT 1` - geocoder.db.all(query, { - $id: locationId - }, function(err, rows) { + geocoder.db.all(query, [normalizeId(locationId)], function(err, rows) { if (err) { - if (typeof(callback) == 'function') { - callback(err, undefined) - } else if (typeof(reject) == 'function') { - reject(err) - } + reject(err) } else { - const result = formatResult(rows) - if (typeof(callback) == 'function') { - callback(undefined, result) - } else if (typeof(resolve) == 'function') { - resolve(result) - } + resolve(formatResult(rows)) } }) }) diff --git a/src/reverse.js b/src/reverse.js index db92cd9..c7d030a 100644 --- a/src/reverse.js +++ b/src/reverse.js @@ -17,22 +17,21 @@ function findFeature(geocoder, latitude, longitude, callback) { const query = `SELECT * FROM everything WHERE id IN ( SELECT feature_id FROM coordinates - WHERE latitude BETWEEN $lat - 1.5 AND $lat + 1.5 - AND longitude BETWEEN $lon - 1.5 AND $lon + 1.5 + WHERE latitude BETWEEN ? - 1.5 AND ? + 1.5 + AND longitude BETWEEN ? - 1.5 AND ? + 1.5 ORDER BY ( - ($lat - latitude) * ($lat - latitude) + - ($lon - longitude) * ($lon - longitude) * $scale + (? - latitude) * (? - latitude) + + (? - longitude) * (? - longitude) * ? ) ASC LIMIT 1 )` const scale = Math.pow(Math.cos(latitude * Math.PI / 180), 2) - geocoder.db.all(query, { - $lat: latitude, - $lon: longitude, - $scale: scale - }, function(err, rows) { + geocoder.db.all(query, [ + latitude, latitude, longitude, longitude, + latitude, latitude, longitude, longitude, scale + ], function(err, rows) { if (err) { if (typeof(callback) == 'function') { callback(err, undefined)