From a1e9152674c4abc20ade1dccc5da510ff731ea0b Mon Sep 17 00:00:00 2001 From: nickpo Date: Sun, 12 Jun 2016 22:59:53 -0400 Subject: [PATCH 1/6] added events --- server/events/__init__.py | 0 server/events/admin.py | 3 + server/events/importers/somervillema.py | 140 ++++++++++++++++++++++++ server/events/migrations/__init__.py | 0 server/events/models.py | 3 + server/events/tests.py | 3 + server/events/views.py | 3 + 7 files changed, 152 insertions(+) create mode 100644 server/events/__init__.py create mode 100644 server/events/admin.py create mode 100644 server/events/importers/somervillema.py create mode 100644 server/events/migrations/__init__.py create mode 100644 server/events/models.py create mode 100644 server/events/tests.py create mode 100644 server/events/views.py diff --git a/server/events/__init__.py b/server/events/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/server/events/admin.py b/server/events/admin.py new file mode 100644 index 0000000..8c38f3f --- /dev/null +++ b/server/events/admin.py @@ -0,0 +1,3 @@ +from django.contrib import admin + +# Register your models here. diff --git a/server/events/importers/somervillema.py b/server/events/importers/somervillema.py new file mode 100644 index 0000000..f7e9cf4 --- /dev/null +++ b/server/events/importers/somervillema.py @@ -0,0 +1,140 @@ +''' +Cornerwise project + +Data importer for Somerville's Public Minutes and Agendas. This script extracts +event information for public meetings from Somerville's website. + +Usage: + +import events.importers.somervillema +print(json.dumps(somervillema.get_data(), sort_keys=True, indent=4)) +''' + +from datetime import datetime +from bs4 import BeautifulSoup +import logging +import requests +import re +import json + + +logger = logging.getLogger(__name__) + + +def get_date(soup, tr): + '''Get the date that the event was posted on the page.''' + + css = ('html > body > center > table > tbody > tr:nth-of-type(2) > ' + 'td > table > tbody > tr:nth-of-type(1) > td:nth-of-type(3) > ' + 'table > tbody > tr:nth-of-type(2) > td:nth-of-type(1) > div > ' + 'div:nth-of-type(3) > * > tbody > tr:nth-of-type({}) > ' + 'td:nth-of-type(1) > span'.format(tr)) + date = soup.select(css)[0].get_text() + date_ret = datetime.strptime(str(date), "%b %d, %Y") + + logger.info('get_date returned {}'.format(date)) + return date_ret + + +def get_link(soup, tr): + '''Get the link to the page containing the details for the event.''' + + css = ('html > body > center > table > tbody > tr:nth-of-type(2) > ' + 'td > table > tbody > tr:nth-of-type(1) > td:nth-of-type(3) > ' + 'table > tbody > tr:nth-of-type(2) > td:nth-of-type(1) > div > ' + 'div:nth-of-type(3) > * > tbody > tr:nth-of-type({}) > ' + 'td:nth-of-type(2) > a'.format(tr)) + link = soup.select(css)[0].attrs['href'] + + logger.info('get_link returned {}'.format(link)) + return link + + +def scrape_page(url, parent_event_date): + '''Scrape the data from the event detail page.''' + + out_dict = {} + new_page = requests.get(url) + new_soup = BeautifulSoup(new_page.content, 'html.parser') + event_addr = new_soup.select('#event_map > a')[0].attrs['href'] + event_loc = new_soup.select('#event_address')[0].get_text('|').split('|') + + try: + event_date = new_soup.select('#page_main > * > b')[0].get_text() + out_dict['date'] = event_date + + except IndexError: + event_time = new_soup.select('#page_main > p:nth-of-type(3)')[0].get_text().strip().split()[8:10] + out_dict['date'] = parent_event_date.strftime( \ + "%A, %B %d, %Y, {} {}".format(event_time[0], + event_time[1])) + + # This pulls the first text item under 'location' and strips it + # of whitespace characters. + tmploc = [event_loc[i].strip() for i in range(len(event_loc)) \ + if event_loc[i].strip() != ''] + out_dict['location'] = tmploc[1] + + # This gets the address from the google maps url in the href that's + # attached to the map icon on the page. + out_dict['address'] = event_addr[event_addr.find('&q=')+3:] + + # Some pages have limited information, and will result in these two + # keys having the same value. This clears out one of them. + if out_dict['location'] == out_dict['address']: out_dict['address'] = '' + + try: + event_cnt = new_soup.select('#event_contact_wrapper')[0].get_text('|').split('|') + tmpcnt = [event_cnt[i].strip() for i in range(len(event_cnt)) \ + if event_cnt[i].strip() != ''] + + out_dict['contact_name'] = tmpcnt[1] + out_dict['contact_phone'] = tmpcnt[2] + out_dict['contact_email'] = tmpcnt[3] + + try: + if len(tmpcnt) > 3: + out_dict['contact2_name'] = tmpcnt[4] + out_dict['contact2_phone'] = tmpcnt[5] + out_dict['contact2_email'] = tmpcnt[6] + + except IndexError: + pass + + except IndexError: + out_dict['contact_name'] = '' + out_dict['contact_phone'] = '' + out_dict['contact_email'] = '' + + return out_dict + + +def get_data(): + '''Run through all the upcoming events, and scrape each page. + Return the raw data.''' + + base_url = 'http://www.somervillema.gov' + page = requests.get(base_url + '/government/public-minutes') + soup = BeautifulSoup(page.content, 'html.parser') + + data = {'events': []} + a = 1 + event_date = get_date(soup, a) + + while event_date > datetime.now(): + link = get_link(soup, a) + new_url = base_url + link + + logger.info('new_url={}'.format(new_url)) + data['events'].append(scrape_page(new_url, event_date)) + + a += 1 + event_date = get_date(soup, a) + logger.info('Run #{} complete.'.format(a)) + + return data + + +if __name__ == '__main__': + get_data() + #print json.dumps(get_data(), sort_keys=True, indent=3) diff --git a/server/events/migrations/__init__.py b/server/events/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/server/events/models.py b/server/events/models.py new file mode 100644 index 0000000..71a8362 --- /dev/null +++ b/server/events/models.py @@ -0,0 +1,3 @@ +from django.db import models + +# Create your models here. diff --git a/server/events/tests.py b/server/events/tests.py new file mode 100644 index 0000000..7ce503c --- /dev/null +++ b/server/events/tests.py @@ -0,0 +1,3 @@ +from django.test import TestCase + +# Create your tests here. diff --git a/server/events/views.py b/server/events/views.py new file mode 100644 index 0000000..91ea44a --- /dev/null +++ b/server/events/views.py @@ -0,0 +1,3 @@ +from django.shortcuts import render + +# Create your views here. From d5ac648477e29d3879705d75aebd1ad82d0aa0f1 Mon Sep 17 00:00:00 2001 From: nickpo Date: Wed, 20 Jul 2016 18:10:16 -0400 Subject: [PATCH 2/6] added layers --- server/layers/__init__.py | 0 server/layers/admin.py | 3 +++ server/layers/migrations/__init__.py | 0 server/layers/models.py | 3 +++ server/layers/tests.py | 3 +++ server/layers/views.py | 3 +++ 6 files changed, 12 insertions(+) create mode 100644 server/layers/__init__.py create mode 100644 server/layers/admin.py create mode 100644 server/layers/migrations/__init__.py create mode 100644 server/layers/models.py create mode 100644 server/layers/tests.py create mode 100644 server/layers/views.py diff --git a/server/layers/__init__.py b/server/layers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/server/layers/admin.py b/server/layers/admin.py new file mode 100644 index 0000000..8c38f3f --- /dev/null +++ b/server/layers/admin.py @@ -0,0 +1,3 @@ +from django.contrib import admin + +# Register your models here. diff --git a/server/layers/migrations/__init__.py b/server/layers/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/server/layers/models.py b/server/layers/models.py new file mode 100644 index 0000000..71a8362 --- /dev/null +++ b/server/layers/models.py @@ -0,0 +1,3 @@ +from django.db import models + +# Create your models here. diff --git a/server/layers/tests.py b/server/layers/tests.py new file mode 100644 index 0000000..7ce503c --- /dev/null +++ b/server/layers/tests.py @@ -0,0 +1,3 @@ +from django.test import TestCase + +# Create your tests here. diff --git a/server/layers/views.py b/server/layers/views.py new file mode 100644 index 0000000..91ea44a --- /dev/null +++ b/server/layers/views.py @@ -0,0 +1,3 @@ +from django.shortcuts import render + +# Create your views here. From 6e1ddb8eb9ed9c301aedafd54c4c6af2b94c9bde Mon Sep 17 00:00:00 2001 From: nickpo Date: Wed, 20 Jul 2016 18:42:20 -0400 Subject: [PATCH 3/6] added models --- server/layers/models.py | 31 ++++++++++++++++++++++++++++++- server/layers/views.py | 5 ++++- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/server/layers/models.py b/server/layers/models.py index 71a8362..0dea9e5 100644 --- a/server/layers/models.py +++ b/server/layers/models.py @@ -1,3 +1,32 @@ from django.db import models +from django.conf import settings -# Create your models here. +class Layers(models.Model): + source = models.CharField(max_length=128, + help_text="The source of the layer data.") + layer_id = models.CharField(max_length=64) + icon = models.CharField(max_length=64) + icon_credit = models.CharField(max_lenth=128) + region_name = models.CharField(max_Length=128, + default=settings.GEO_REGION, + null=True, + help_text="") + title = models.CharField(max_length=128, + help_text="The name of the layer.") + short_name = models.CharField(max_length=64, + help_text="The shortened name of the layer.") + info = models.CharField(max_length=512, + help_text="A general summary of what the layer represents.") + template = models.TextField(default="", + help_text="The template used to display the layer. NOTE: possibly to be removed and made in to a distinct Django template.") + color = models.CharField(max_length=24, + help_text="The color of the layer.") + shown = models.BooleanField(default=False, + help_text="Switch for whether or not the layer is shown.") + marker_type = models.CharField(max_length="24", + help_text="The type of marker to display on the layer.") + marker_color = models.CharField(max_length=24, + help_text="The color of the marker being displayed.") + marker_fillcolor = models.CharField(max_length=24) + marker_radius = models.IntegerField(default=0) + marker_fillopacity = models.IntegerField(default=0) diff --git a/server/layers/views.py b/server/layers/views.py index 91ea44a..d6d91de 100644 --- a/server/layers/views.py +++ b/server/layers/views.py @@ -1,3 +1,6 @@ from django.shortcuts import render -# Create your views here. +# TODO: +# * Pull data from the model. +# * Accept get parameters (filter_by_region_name, etc..) +# * Render data as raw JSON. From 77e38c4cbcbdd0da71dd614294bc66401de8b941 Mon Sep 17 00:00:00 2001 From: nickpo Date: Wed, 20 Jul 2016 19:06:26 -0400 Subject: [PATCH 4/6] updated models/views --- server/layers/models.py | 2 +- server/layers/views.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/server/layers/models.py b/server/layers/models.py index 0dea9e5..b53bbac 100644 --- a/server/layers/models.py +++ b/server/layers/models.py @@ -1,7 +1,7 @@ from django.db import models from django.conf import settings -class Layers(models.Model): +class Layer(models.Model): source = models.CharField(max_length=128, help_text="The source of the layer data.") layer_id = models.CharField(max_length=64) diff --git a/server/layers/views.py b/server/layers/views.py index d6d91de..e9bedbd 100644 --- a/server/layers/views.py +++ b/server/layers/views.py @@ -1,6 +1,11 @@ from django.shortcuts import render +from django.http import HttpResponseDirect +from .models import Layer # TODO: # * Pull data from the model. # * Accept get parameters (filter_by_region_name, etc..) # * Render data as raw JSON. + +def layers_json(req): + layers = Layer. From 985c1c38f81d67df637cd3d32dc7bed7cbf59328 Mon Sep 17 00:00:00 2001 From: nickpo Date: Wed, 20 Jul 2016 19:31:25 -0400 Subject: [PATCH 5/6] updated models/views --- server/layers/models.py | 1 - server/layers/views.py | 17 ++++++++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/server/layers/models.py b/server/layers/models.py index b53bbac..fe1adae 100644 --- a/server/layers/models.py +++ b/server/layers/models.py @@ -4,7 +4,6 @@ class Layer(models.Model): source = models.CharField(max_length=128, help_text="The source of the layer data.") - layer_id = models.CharField(max_length=64) icon = models.CharField(max_length=64) icon_credit = models.CharField(max_lenth=128) region_name = models.CharField(max_Length=128, diff --git a/server/layers/views.py b/server/layers/views.py index e9bedbd..797126b 100644 --- a/server/layers/views.py +++ b/server/layers/views.py @@ -1,5 +1,7 @@ from django.shortcuts import render from django.http import HttpResponseDirect +from django.forms.models import model_to_dict + from .models import Layer # TODO: @@ -7,5 +9,18 @@ # * Accept get parameters (filter_by_region_name, etc..) # * Render data as raw JSON. + +# URL FORMAT: +# cornerwise.org/layers/list?region=Somerville®ion=Cambridge +# +# PARAMETERS (FILTERS): +# region='region name' +# def layers_json(req): - layers = Layer. + regions = req.GET.getlist("region") + layers = Layer.objects.filter(region_name__in=regions) + mdict = model_to_dict(layers) + + return HttpResponse(mdict) + + From 335a75727126ce5111bf69a4fdc0361279bec23a Mon Sep 17 00:00:00 2001 From: nickpo Date: Wed, 20 Jul 2016 19:51:35 -0400 Subject: [PATCH 6/6] added urls and modified cornerwise/urls --- server/cornerwise/urls.py | 1 + server/layers/urls.py | 7 +++++++ server/layers/views.py | 6 ++---- 3 files changed, 10 insertions(+), 4 deletions(-) create mode 100644 server/layers/urls.py diff --git a/server/cornerwise/urls.py b/server/cornerwise/urls.py index 1e71611..10f8160 100644 --- a/server/cornerwise/urls.py +++ b/server/cornerwise/urls.py @@ -33,6 +33,7 @@ url(r'^proposal/', include(proposal_urls)), url(r"^doc/", include(doc_urls)), url(r"^user/", include(user_urls)), + url(r"^layers/", inclue(layers_urls)), url(r"^$", index), url(r"^" + settings.MEDIA_URL + "(?P.*)$", diff --git a/server/layers/urls.py b/server/layers/urls.py new file mode 100644 index 0000000..4f57454 --- /dev/null +++ b/server/layers/urls.py @@ -0,0 +1,7 @@ +from django.conf.urls import url + +from . import views + +urlpatterns = [ + url(r"^list$", views.layers_json, name="list-layers"), +] diff --git a/server/layers/views.py b/server/layers/views.py index 797126b..d63063b 100644 --- a/server/layers/views.py +++ b/server/layers/views.py @@ -19,8 +19,6 @@ def layers_json(req): regions = req.GET.getlist("region") layers = Layer.objects.filter(region_name__in=regions) - mdict = model_to_dict(layers) - - return HttpResponse(mdict) - + mlist = [model_to_dict(l) for l in layers] + return HttpResponse(json.dumps(mlist, sort_keys=True, indent=4))