Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions server/cornerwise/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
url(r'^proposal/', include(proposal_urls)),
url(r"^doc/", include(doc_urls)),
url(r"^user/", include(user_urls)),
url(r"^layers/", inclue(layers_urls)),
url(r"^$", index),

url(r"^" + settings.MEDIA_URL + "(?P<path>.*)$",
Expand Down
Empty file added server/events/__init__.py
Empty file.
3 changes: 3 additions & 0 deletions server/events/admin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from django.contrib import admin

# Register your models here.
140 changes: 140 additions & 0 deletions server/events/importers/somervillema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
'''
Cornerwise project

Data importer for Somerville's Public Minutes and Agendas. This script extracts
event information for public meetings from Somerville's website.

Usage:

import events.importers.somervillema
print(json.dumps(somervillema.get_data(), sort_keys=True, indent=4))
'''

from datetime import datetime
from bs4 import BeautifulSoup
import logging
import requests
import re
import json


logger = logging.getLogger(__name__)


def get_date(soup, tr):
'''Get the date that the event was posted on the page.'''

css = ('html > body > center > table > tbody > tr:nth-of-type(2) > '
'td > table > tbody > tr:nth-of-type(1) > td:nth-of-type(3) > '
'table > tbody > tr:nth-of-type(2) > td:nth-of-type(1) > div > '
'div:nth-of-type(3) > * > tbody > tr:nth-of-type({}) > '
'td:nth-of-type(1) > span'.format(tr))
date = soup.select(css)[0].get_text()
date_ret = datetime.strptime(str(date), "%b %d, %Y")

logger.info('get_date returned {}'.format(date))
return date_ret


def get_link(soup, tr):
'''Get the link to the page containing the details for the event.'''

css = ('html > body > center > table > tbody > tr:nth-of-type(2) > '
'td > table > tbody > tr:nth-of-type(1) > td:nth-of-type(3) > '
'table > tbody > tr:nth-of-type(2) > td:nth-of-type(1) > div > '
'div:nth-of-type(3) > * > tbody > tr:nth-of-type({}) > '
'td:nth-of-type(2) > a'.format(tr))
link = soup.select(css)[0].attrs['href']

logger.info('get_link returned {}'.format(link))
return link


def scrape_page(url, parent_event_date):
'''Scrape the data from the event detail page.'''

out_dict = {}
new_page = requests.get(url)
new_soup = BeautifulSoup(new_page.content, 'html.parser')
event_addr = new_soup.select('#event_map > a')[0].attrs['href']
event_loc = new_soup.select('#event_address')[0].get_text('|').split('|')

try:
event_date = new_soup.select('#page_main > * > b')[0].get_text()
out_dict['date'] = event_date

except IndexError:
event_time = new_soup.select('#page_main > p:nth-of-type(3)')[0].get_text().strip().split()[8:10]
out_dict['date'] = parent_event_date.strftime( \
"%A, %B %d, %Y, {} {}".format(event_time[0],
event_time[1]))

# This pulls the first text item under 'location' and strips it
# of whitespace characters.
tmploc = [event_loc[i].strip() for i in range(len(event_loc)) \
if event_loc[i].strip() != '']
out_dict['location'] = tmploc[1]

# This gets the address from the google maps url in the href that's
# attached to the map icon on the page.
out_dict['address'] = event_addr[event_addr.find('&q=')+3:]

# Some pages have limited information, and will result in these two
# keys having the same value. This clears out one of them.
if out_dict['location'] == out_dict['address']: out_dict['address'] = ''

try:
event_cnt = new_soup.select('#event_contact_wrapper')[0].get_text('|').split('|')
tmpcnt = [event_cnt[i].strip() for i in range(len(event_cnt)) \
if event_cnt[i].strip() != '']

out_dict['contact_name'] = tmpcnt[1]
out_dict['contact_phone'] = tmpcnt[2]
out_dict['contact_email'] = tmpcnt[3]

try:
if len(tmpcnt) > 3:
out_dict['contact2_name'] = tmpcnt[4]
out_dict['contact2_phone'] = tmpcnt[5]
out_dict['contact2_email'] = tmpcnt[6]

except IndexError:
pass

except IndexError:
out_dict['contact_name'] = ''
out_dict['contact_phone'] = ''
out_dict['contact_email'] = ''

return out_dict


def get_data():
'''Run through all the upcoming events, and scrape each page.
Return the raw data.'''

base_url = 'http://www.somervillema.gov'
page = requests.get(base_url + '/government/public-minutes')
soup = BeautifulSoup(page.content, 'html.parser')

data = {'events': []}
a = 1
event_date = get_date(soup, a)

while event_date > datetime.now():
link = get_link(soup, a)
new_url = base_url + link

logger.info('new_url={}'.format(new_url))
data['events'].append(scrape_page(new_url, event_date))

a += 1
event_date = get_date(soup, a)
logger.info('Run #{} complete.'.format(a))

return data


if __name__ == '__main__':
get_data()
#print json.dumps(get_data(), sort_keys=True, indent=3)
Empty file.
3 changes: 3 additions & 0 deletions server/events/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from django.db import models

# Create your models here.
3 changes: 3 additions & 0 deletions server/events/tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from django.test import TestCase

# Create your tests here.
3 changes: 3 additions & 0 deletions server/events/views.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from django.shortcuts import render

# Create your views here.
Empty file added server/layers/__init__.py
Empty file.
3 changes: 3 additions & 0 deletions server/layers/admin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from django.contrib import admin

# Register your models here.
Empty file.
31 changes: 31 additions & 0 deletions server/layers/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from django.db import models
from django.conf import settings

class Layer(models.Model):
source = models.CharField(max_length=128,
help_text="The source of the layer data.")
icon = models.CharField(max_length=64)
icon_credit = models.CharField(max_lenth=128)
region_name = models.CharField(max_Length=128,
default=settings.GEO_REGION,
null=True,
help_text="")
title = models.CharField(max_length=128,
help_text="The name of the layer.")
short_name = models.CharField(max_length=64,
help_text="The shortened name of the layer.")
info = models.CharField(max_length=512,
help_text="A general summary of what the layer represents.")
template = models.TextField(default="",
help_text="The template used to display the layer. NOTE: possibly to be removed and made in to a distinct Django template.")
color = models.CharField(max_length=24,
help_text="The color of the layer.")
shown = models.BooleanField(default=False,
help_text="Switch for whether or not the layer is shown.")
marker_type = models.CharField(max_length="24",
help_text="The type of marker to display on the layer.")
marker_color = models.CharField(max_length=24,
help_text="The color of the marker being displayed.")
marker_fillcolor = models.CharField(max_length=24)
marker_radius = models.IntegerField(default=0)
marker_fillopacity = models.IntegerField(default=0)
3 changes: 3 additions & 0 deletions server/layers/tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from django.test import TestCase

# Create your tests here.
7 changes: 7 additions & 0 deletions server/layers/urls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from django.conf.urls import url

from . import views

urlpatterns = [
url(r"^list$", views.layers_json, name="list-layers"),
]
24 changes: 24 additions & 0 deletions server/layers/views.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from django.shortcuts import render
from django.http import HttpResponseDirect
from django.forms.models import model_to_dict

from .models import Layer

# TODO:
# * Pull data from the model.
# * Accept get parameters (filter_by_region_name, etc..)
# * Render data as raw JSON.


# URL FORMAT:
# cornerwise.org/layers/list?region=Somerville&region=Cambridge
#
# PARAMETERS (FILTERS):
# region='region name'
#
def layers_json(req):
regions = req.GET.getlist("region")
layers = Layer.objects.filter(region_name__in=regions)
mlist = [model_to_dict(l) for l in layers]

return HttpResponse(json.dumps(mlist, sort_keys=True, indent=4))