Skip to content

Commit

Permalink
cf brutasse#37 - Poor man client-sive clustering algorithm
Browse files Browse the repository at this point in the history
Basicaly:
- divide the world in a grids, sized according to zoom level
- for each point, calculate the closest grid node
- take a "random" point of the cluster to set the center

But well, we have a POC working, let's refine it now :)
  • Loading branch information
yohanboniface committed Oct 12, 2012
1 parent 3e9a748 commit 9ad5d27
Show file tree
Hide file tree
Showing 6 changed files with 190 additions and 114 deletions.
203 changes: 97 additions & 106 deletions djangopeople/djangopeople/clustering.py
Original file line number Diff line number Diff line change
@@ -1,106 +1,97 @@
import math
import json

from django.db.models import Q
from django.http import HttpResponse

from .models import DjangoPerson, ClusteredPoint
from ..clusterlizard.clusterer import Clusterer


def latlong_to_mercator(lat, long):
x = long * 20037508.34 / 180
y = math.log(math.tan((90 + lat) * math.pi / 360)) / (math.pi / 180)
y = y * 20037508.34 / 180
return x, y


def mercator_to_latlong(x, y):
lon = (x / 20037508.34) * 180
lat = (y / 20037508.34) * 180
lat = 180 / math.pi * (2 * math.atan(
math.exp(lat * math.pi / 180)
) - math.pi / 2)
return lat, lon


def input_generator():
"""
The input to ClusterLizard should be a generator that yields
(mx,my,id) tuples. This function reads them from the DjangoPeople models.
"""
for person in DjangoPerson.objects.all():
mx, my = latlong_to_mercator(person.latitude, person.longitude)
yield (mx, my, person.id)


def save_clusters(clusters, zoom):
"""
The output function provided to ClusterLizard should be a
function that takes 'clusters', a set of clusters, and 'zoom',
the integer Google zoom level.
"""
for cluster in clusters:
lat, long = mercator_to_latlong(*cluster.mean)
ClusteredPoint.objects.create(
latitude=lat,
longitude=long,
number=len(cluster),
zoom=zoom,
djangoperson_id=(len(cluster) == 1 and
list(cluster.points)[0][2] or None),
)


def progress(done, left, took, zoom, eta):
"""
You can also pass in an optional progress callback.
"""
print "Iter %s (%s clusters) [%.3f secs] [zoom: %s] [ETA %s]" % (
done, left, took, zoom, eta,
)


def as_json(request, x2, y1, x1, y2, z):
"""
View that returns clusters for the given zoom level as JSON.
"""
x1, y1, x2, y2 = map(float, (x1, y1, x2, y2))
if y1 > y2:
y1, y2 = y2, y1

if x1 < x2: # View not crossing the date line
query = ClusteredPoint.objects.filter(latitude__gt=y1,
latitude__lt=y2,
longitude__gt=x1,
longitude__lt=x2, zoom=z)
else: # View crossing the date line
query = ClusteredPoint.objects.filter(
Q(longitude__lt=x1) | Q(longitude__gt=x2,
latitude__gt=y1,
latitude__lt=y2),
zoom=z)

points = []
for cluster in query:
if cluster.djangoperson:
points.append((cluster.longitude, cluster.latitude,
cluster.number,
cluster.djangoperson.get_absolute_url()))
else:
points.append((cluster.longitude, cluster.latitude,
cluster.number, None))
return HttpResponse(json.dumps(points))


def run():
"""
Runs the clustering, clearing the DB first.
"""
ClusteredPoint.objects.all().delete()
clusterer = Clusterer(
input_generator(),
save_clusters,
progress,
)
clusterer.run()
from django.core.cache import cache

from .models import DjangoPerson


class Cluster(object):

# Must contain a %d fo the zoom level
CACHE_KEY = "clusters_for_%d"

ZOOM_LEVELS = {
0: {
"step": 70, # Grid step
"min": 20 # Min markers in a cluster to display it
},
1: {
"step": 50,
"min": 10
},
2: {
"step": 45,
"min": 5
},
3: {
"step": 30,
"min": 3
},
4: {
"step": 15,
"min": 2
},
5: {
"step": 5,
"min": 2
},
}

def get_points(self):
return DjangoPerson.objects.values('id', 'latitude', 'longitude')

def mass_populate_cache(self):
"""Runs server-side clustering for each zoom level."""
points = self.get_points()
for zoom_level in self.ZOOM_LEVELS.iterkeys():
self.populate_cache(points, zoom_level)

def populate_cache(self, points, zoom_level):
"""
Runs the server-side clustering and cache it for front usage.
Poor man algorithm:
- divide the world in a grids, sized according to zoom level
- for each point, calculate the closest grid node
- take a "random" point in the cluster to set the center
(For better rendering, these clusters could be reclustered,
as they are now not numerous. But it's not done right now.)
"""
simple_cluster = {}
for point in points:
lat = point['latitude']
lng = point['longitude']
grid_point = (
lat - lat % self.ZOOM_LEVELS[zoom_level]['step'],
lng - lng % self.ZOOM_LEVELS[zoom_level]['step'],
)
if not grid_point in simple_cluster:
simple_cluster[grid_point] = {}
simple_cluster[grid_point]['points'] = []
simple_cluster[grid_point]['points'].append(point)
# Ungrid the cluster center artificially
simple_cluster[grid_point]['lat'] = lat
simple_cluster[grid_point]['lng'] = lng
geojson = {'type': 'FeatureCollection', 'features': []}
for cluster in simple_cluster.itervalues():
if len(cluster['points']) < self.ZOOM_LEVELS[zoom_level]['min']:
continue
feature = self.make_feature(cluster)
geojson['features'].append(feature)
cache.set(self.CACHE_KEY % zoom_level, geojson)
return geojson

def make_feature(self, cluster):
lat = cluster['lat']
lng = cluster['lng']
feature = {'type': 'Feature', 'properties': {}}
feature['geometry'] = {
"type": "Point",
"coordinates": [lng, lat]
}
feature['properties']['len'] = len(cluster['points'])
return feature

def get_for_zoom(self, zoom_level):
clusters = cache.get(self.CACHE_KEY % int(zoom_level))
if not clusters:
points = self.get_points()
clusters = self.populate_cache(points, zoom_level)
return clusters
5 changes: 3 additions & 2 deletions djangopeople/djangopeople/management/commands/recluster.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from django.core.management.base import NoArgsCommand

from ... import clustering
from ...clustering import Cluster


class Command(NoArgsCommand):
help = "Re-runs the server-side clustering"

def handle_noargs(self, **options):
clustering.run()
cluster = Cluster()
cluster.mass_populate_cache()
74 changes: 70 additions & 4 deletions djangopeople/djangopeople/static/djangopeople/js/maps.js
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,21 @@ L.PeopleMarker = L.Marker.extend({

});

L.ClusterMarker = L.Marker.extend({

onAdd: function(map) {
L.Marker.prototype.onAdd.call(this, map);

this.on("click", this._onClick);
},

_onClick: function() {
var current_zoom = this._map.getZoom();
this._map.setView(this.getLatLng(), current_zoom + 1)
}

});

L.TileLayer.ClusteredGeoJSONTile = L.TileLayer.extend({

initClusterMarker: function (map) {
Expand All @@ -54,6 +69,44 @@ L.TileLayer.ClusteredGeoJSONTile = L.TileLayer.extend({
});
},

initServerClustersLayer: function (map) {
if (this.serverClusterLayer) {
map.removeLayer(this.serverClusterLayer);
}
this.serverClusterLayer = new L.GeoJSON(null, {
pointToLayer: function (feature, latlng) {
var childCount = feature.properties.len
var c = ' marker-cluster-';
if (childCount < 10) {
c += 'small';
} else if (childCount < 100) {
c += 'medium';
} else {
c += 'large';
}
var marker = new L.ClusterMarker(latlng, {
icon: new L.DivIcon({ html: '<div><span>' + childCount + '</span></div>', className: 'marker-cluster' + c, iconSize: new L.Point(40, 40) })
});
return marker;
}
});
},

detachServerClusterLayer: function () {
if (typeof this._map !== "undefined" && this._map.hasLayer(this.serverClusterLayer)) {
this._map.removeLayer(this.serverClusterLayer)
}
},

loadServerClusters: function () {
this._map.addLayer(this.serverClusterLayer)
var self = this;
$.getJSON("/clusters/" + this._getZoomForUrl(), function (data) {
DATA = data;
self.serverClusterLayer.addData(data);
});
},

initTmpLayer: function (e) {
// Goal: add markers to cluster in one shot
this.tmpLayer = new L.GeoJSON(null, {
Expand Down Expand Up @@ -81,10 +134,18 @@ L.TileLayer.ClusteredGeoJSONTile = L.TileLayer.extend({
onAdd: function (map) {
this._map = map;
var self = this;
this.zoomSwitchAt = 5
map.on('zoomstart', function (e) {
// Delete the cluster to prevent from having several times
// Delete the clusters to prevent from having several times
// the same people
self.initClusterMarker(map);
self.initServerClustersLayer(map);

});
map.on('zoomend', function (e) {
if (self._getZoomForUrl() <= self.zoomSwitchAt) {
self.loadServerClusters();
}
});
this.on({
'geojsonloadinit': this.initTmpLayer,
Expand All @@ -93,9 +154,7 @@ L.TileLayer.ClusteredGeoJSONTile = L.TileLayer.extend({
L.TileLayer.prototype.onAdd.call(this, map);
},

_addTile: function (tilePoint, container) {
L.TileLayer.prototype._addTile.call(this, tilePoint, container)

_addJSONTile: function (tilePoint, container) {
var z = this._getZoomForUrl(),
x = tilePoint.x,
y = tilePoint.y;
Expand All @@ -122,6 +181,13 @@ L.TileLayer.ClusteredGeoJSONTile = L.TileLayer.extend({
self.fire("geojsonloadend")
}
});
},

_addTile: function (tilePoint, container) {
L.TileLayer.prototype._addTile.call(this, tilePoint, container)
if (this._getZoomForUrl() > this.zoomSwitchAt) {
this._addJSONTile(tilePoint, container);
}
}

});
2 changes: 1 addition & 1 deletion djangopeople/djangopeople/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
{% block nav_li_class_home %} class="current"{% endblock %}

{% block post_map_init %}
MAP.locate({setView: true});
MAP.locate({setView: true, maxZoom: 6});
{% endblock %}

{% block map %}
Expand Down
18 changes: 17 additions & 1 deletion djangopeople/djangopeople/views.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import re
import json
import datetime
import operator
import re

from django.contrib import auth
from django.core import signing
Expand All @@ -12,6 +13,7 @@
from django.utils import timezone
from django.utils.translation import ugettext_lazy as _
from django.views import generic
from django.http import HttpResponse

from password_reset.views import Recover
from tagging.models import Tag, TaggedItem
Expand All @@ -24,6 +26,7 @@
LocationForm, FindingForm, AccountForm, PasswordForm,
DeletionRequestForm, AccountDeletionForm)
from .models import DjangoPerson, Country, User, Region, PortfolioSite
from .clustering import Cluster

from ..django_openidauth.models import associate_openid, UserOpenID
from ..machinetags.utils import tagdict
Expand Down Expand Up @@ -437,6 +440,19 @@ class ProfilePopupView(generic.DetailView):
profile_popup = ProfilePopupView.as_view()


class GeoClustersView(generic.TemplateView):

response_class = HttpResponse

def render_to_response(self, context, **response_kwargs):
response_kwargs['content_type'] = 'application/json'
zoom_level = context['params']['zoom_level']
cluster = Cluster()
data = cluster.get_for_zoom(zoom_level)
return self.response_class(json.dumps(data), **response_kwargs)
geoclusters = GeoClustersView.as_view()


class DjangoPersonEditViewBase(generic.UpdateView):
def get_object(self):
return get_object_or_404(DjangoPerson,
Expand Down
2 changes: 2 additions & 0 deletions djangopeople/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ def perm_redirect(url):

url(r'^search/$', views.search, name='search'),

url(r'^clusters/(?P<zoom_level>\d{1})$', views.geoclusters, name='geoclusters'),

url(r'^skills/(?P<tag>.*)/$', views.skill, name='skill_detail'),
url(r'^skills/$', views.skill_cloud, name='skill_cloud'),

Expand Down

0 comments on commit 9ad5d27

Please sign in to comment.