-
Notifications
You must be signed in to change notification settings - Fork 0
/
load_OpenNames.py
101 lines (82 loc) · 3.5 KB
/
load_OpenNames.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#
# Open addresses Spatial Research
# OS Open Names Script
#
#
# Version 1.0 (Python) in progress
# Author John Muray
# Licence MIT
#
# Purpose Load OS Open Names to database
#
import csv
import glob
import MySQLdb
import string
import collections
from bulkinsert import *
import codecs
# Database configuration
username = "****"
password = "******"
hostname = "******"
database = "****"
dbConn = MySQLdb.connect(host=hostname,user=username,passwd=password,db=database, use_unicode=True, charset='utf8')
cur = dbConn.cursor()
dbConn.set_character_set('utf8')
cur.execute('SET NAMES utf8;')
cur.execute('SET CHARACTER SET utf8;')
cur.execute('SET character_set_connection=utf8;')
query = "TRUNCATE TABLE `gaz_opennames`;"
cur.execute(query)
openNames_fields = ['OS_ID','NAMES_URI','NAME1','NAME1_LANG','NAME2','NAME2_LANG','TYPE','LOCAL_TYPE','GEOMETRY_X','GEOMETRY_Y','GEOMETRY','MOST_DETAIL_VIEW_RES','LEAST_DETAIL_VIEW_RES','MBR_XMIN','MBR_YMIN','MBR_XMAX','MBR_YMAX','MBR','POSTCODE_DISTRICT','POSTCODE_DISTRICT_URI','POPULATED_PLACE','POPULATED_PLACE_URI','POPULATED_PLACE_TYPE','DISTRICT_BOROUGH','DISTRICT_BOROUGH_URI','DISTRICT_BOROUGH_TYPE','COUNTY_UNITARY','COUNTY_UNITARY_URI','COUNTY_UNITARY_TYPE','REGION','REGION_URI','COUNTRY','COUNTRY_URI','RELATED_SPATIAL_OBJECT','SAME_AS_DBPEDIA','SAME_AS_GEONAMES']
openNames_bi = BulkInsert(cur,"gaz_opennames",openNames_fields, max=100, ignore=True)
file = "DOC/OS_Open_Names_Header.csv"
csvfile = open(file, 'r')
reader = csv.reader(csvfile, delimiter=',', quotechar='"')
for row in reader:
csv_names = [unicode(cell, 'utf-8-sig') for cell in row]
csvfile.close()
print csv_names
nwrit = 0;
for file in sorted(glob.glob("DATA/*.csv")):
print file
nrecs = 0
csvfile = open(file, 'r')
reader = csv.reader(csvfile, delimiter=',', quotechar='"')
for row in reader:
row = dict(zip(csv_names,[unicode(cell, 'utf-8-sig') for cell in row]))
nrecs += 1
# print row
if (nrecs % 10000) == 0:
print "Records read: " + str(nrecs)
line = []
for field in csv_names:
# print field
# print row[field]
if field[0:4] == u'GEOM' or field[0:3] == u'MBR':
if row[field] == '':
line.extend([u'0'])
else:
line.extend([row[field]])
else:
line.extend([row[field]])
if field == 'GEOMETRY_Y':
if row[field] == '':
line.extend([u"GeomFromText('Point(0 0)')"])
else:
line.extend([u"GeomFromText('Point("+row['GEOMETRY_X']+" "+row['GEOMETRY_Y']+")')"])
if field == 'MBR_YMAX':
if row[field] == '':
line.extend([u"GeomFromText('Polygon((0 0,0 0,0 0,0 0,0 0))')"])
else:
line.extend([u"GeomFromText('Polygon(("+row['MBR_XMIN']+" "+row['MBR_YMIN']+","+row['MBR_XMIN']+" "+row['MBR_YMAX']+","+row['MBR_XMAX']+" "+row['MBR_YMAX']+","+row['MBR_XMAX']+" "+row['MBR_YMIN']+","+row['MBR_XMIN']+" "+row['MBR_YMIN']+"))')"])
# print line
openNames_bi.addRow(line)
nwrit += 1
print "Records read: " + str(nrecs) + " Written: " + str(nwrit)
csvfile.close()
openNames_bi.close()
dbConn.commit()
print "Writing changes to database"
dbConn.close()