Skip to content

Website traffic stats gathering #1679

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Mar 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions config/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@
"libraries",
"mailing_list",
"news",
"reports",
"core",
"slack",
]
Expand Down
93 changes: 4 additions & 89 deletions libraries/forms.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,16 @@
import io
import base64
from functools import cached_property
from itertools import groupby, chain
from operator import attrgetter
from dataclasses import dataclass, field
from datetime import date, timedelta

import psycopg2
from wordcloud import WordCloud, STOPWORDS
from matplotlib import pyplot as plt

from django.template.loader import render_to_string
from django.db.models import F, Q, Count, OuterRef, Sum, When, Value, Case
from django.forms import Form, ModelChoiceField, ModelForm, BooleanField
from django.conf import settings

from core.models import RenderedContent, SiteSettings
from libraries.utils import batched, boost_normalize_words, grey_color_func
from core.models import RenderedContent
from reports.generation import generate_wordcloud
from slack.models import Channel, SlackActivityBucket, SlackUser
from versions.models import Version
from .models import (
Expand All @@ -25,10 +19,10 @@
Issue,
Library,
LibraryVersion,
WordcloudMergeWord,
)
from libraries.constants import SUB_LIBRARIES
from mailing_list.models import EmailData
from .utils import batched


class LibraryForm(ModelForm):
Expand Down Expand Up @@ -448,73 +442,6 @@ def _get_top_contributors_for_library_version(self, library_order):
)
return top_contributors_release

def _get_mail_content(self, version):
prior_version = (
Version.objects.minor_versions()
.filter(version_array__lt=version.cleaned_version_parts_int)
.order_by("-release_date")
.first()
)
if not prior_version or not settings.HYPERKITTY_DATABASE_NAME:
return []
conn = psycopg2.connect(settings.HYPERKITTY_DATABASE_URL)
with conn.cursor(name="fetch-mail-content") as cursor:
cursor.execute(
"""
SELECT content FROM hyperkitty_email
WHERE date >= %(start)s AND date < %(end)s;
""",
{"start": prior_version.release_date, "end": version.release_date},
)
for [content] in cursor:
yield content

def _generate_hyperkitty_word_cloud(self, version):
"""Generates a wordcloud png and returns it as a base64 string."""
wc = WordCloud(
mode="RGBA",
background_color=None,
width=1400,
height=700,
stopwords=STOPWORDS | SiteSettings.load().wordcloud_ignore_set,
font_path=settings.BASE_DIR / "static" / "font" / "notosans_mono.woff",
)
word_frequencies = {}
for content in self._get_mail_content(version):
for key, val in wc.process_text(content).items():
if len(key) < 2:
continue
key_lower = key.lower()
if key_lower not in word_frequencies:
word_frequencies[key_lower] = 0
word_frequencies[key_lower] += val
if not word_frequencies:
return None, {}

word_frequencies = boost_normalize_words(
word_frequencies,
{x.from_word: x.to_word for x in WordcloudMergeWord.objects.all()},
)

wc.generate_from_frequencies(word_frequencies)
plt.figure(figsize=(14, 7), facecolor=None)
plt.imshow(
wc.recolor(color_func=grey_color_func, random_state=3),
interpolation="bilinear",
)
plt.axis("off")
image_bytes = io.BytesIO()
plt.savefig(
image_bytes,
format="png",
dpi=100,
bbox_inches="tight",
pad_inches=0,
transparent=True,
)
image_bytes.seek(0)
return base64.b64encode(image_bytes.read()).decode(), word_frequencies

def _count_mailinglist_contributors(self, version):
version_lt = list(
Version.objects.minor_versions()
Expand Down Expand Up @@ -838,19 +765,6 @@ def get_stats(self):
Channel.objects.filter(name__istartswith="boost").order_by("name"), 10
)
committee_members = version.financial_committee_members.all()
wordcloud_base64, word_frequencies = self._generate_hyperkitty_word_cloud(
version
)
# first sort by number, then sort the top 200 alphabetically
word_frequencies = {
key: val
for key, val in sorted(
word_frequencies.items(),
key=lambda x: x[1],
reverse=True,
)
}
wordcloud_top_words = sorted(list(word_frequencies.keys())[:200])
library_index_library_data = []
for library in self._get_libraries_by_quality():
library_index_library_data.append(
Expand All @@ -859,6 +773,7 @@ def get_stats(self):
library in [lib["library"] for lib in library_data],
)
)
wordcloud_base64, wordcloud_top_words = generate_wordcloud(version)

return {
"committee_members": committee_members,
Expand Down
149 changes: 90 additions & 59 deletions libraries/management/commands/release_tasks.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import traceback
from contextlib import suppress
from dataclasses import dataclass
from typing import Callable

import djclick as click

Expand All @@ -13,6 +15,7 @@
from core.githubhelper import GithubAPIClient
from libraries.forms import CreateReportForm
from libraries.tasks import update_commits
from reports.models import WebsiteStatReport
from slack.management.commands.fetch_slack_activity import get_my_channels, locked
from versions.models import Version

Expand All @@ -34,67 +37,94 @@ def progress_message(message: str):
return f"{timezone.now()}: {message}"


@dataclass
class ReleaseTask:
"""
A distinct task to be completed.

Action can be a callable or a list of string arguments to pass to `call_command`
"""

description: str
action: Callable | list[str]

def run(self):
if isinstance(self.action, Callable):
self.action()
else:
call_command(*self.action)


class ReleaseTasksManager:
latest_version: Version | None = None
progress_messages: list[str] = []
handled_commits: dict[str, int] = {}

def __init__(self, should_generate_report: bool = False):
self.should_generate_report = should_generate_report
self.tasks = [
ReleaseTask("Importing versions", self.import_versions),
ReleaseTask(
"Importing most recent beta version",
["import_beta_release", "--delete-versions"],
),
ReleaseTask("Importing libraries", ["update_libraries"]),
ReleaseTask(
"Saving library-version relationships", self.import_library_versions
),
ReleaseTask("Adding library maintainers", ["update_maintainers"]),
ReleaseTask("Adding library authors", ["update_authors"]),
ReleaseTask(
"Adding library version authors", ["update_library_version_authors"]
),
ReleaseTask("Importing git commits", self.handle_commits),
ReleaseTask("Syncing mailinglist statistics", ["sync_mailinglist_stats"]),
ReleaseTask("Updating github issues", ["update_issues"]),
ReleaseTask("Updating slack activity buckets", ["fetch_slack_activity"]),
ReleaseTask("Updating website statistics", self.update_website_statistics),
ReleaseTask("Generating report", self.generate_report),
]

def update_release_data(self) -> dict[str:int]:
for task in self.tasks:
self.progress_messages.append(progress_message(f"{task.description}..."))
task.run()
self.progress_messages.append(
progress_message(f"Finished {task.description.lower()}")
)
return self.handled_commits

def import_versions(self):
call_command("import_versions", "--new")
self.latest_version = Version.objects.most_recent()

def import_library_versions(self):
latest_version_number = self.latest_version.name.lstrip("boost-")
call_command("import_library_versions", min_release=latest_version_number)

def handle_commits(self):
self.handled_commits = update_commits(min_version=self.latest_version.name)

def update_website_statistics(self):
report, _ = WebsiteStatReport.objects.get_or_create(version=self.latest_version)
report.populate_from_api()

def generate_report(self):
if not self.should_generate_report:
self.progress_messages.append(
progress_message("Skipped - report generation not requested")
)
return
form = CreateReportForm({"version": self.latest_version.id})
form.cache_html()


@locked(1138692)
def run_commands(progress: list[str], generate_report: bool = False):
if not settings.SLACK_BOT_TOKEN:
raise ValueError("SLACK_BOT_TOKEN is not set.")
handled_commits = {}
progress.append(progress_message("Importing versions..."))
call_command("import_versions", "--new")
progress.append(progress_message("Finished importing versions."))
latest_version: Version = Version.objects.most_recent()
latest_version_name = latest_version.name

progress.append(progress_message("Importing most recent beta version..."))
call_command("import_beta_release", "--delete-versions")
progress.append(progress_message("Finished importing most recent beta version."))

progress.append(progress_message("Importing libraries..."))
call_command("update_libraries")
progress.append(progress_message("Finished importing libraries."))

progress.append(progress_message("Saving library-version relationships..."))
latest_version_number = latest_version_name.lstrip("boost-")
call_command("import_library_versions", min_release=latest_version_number)
progress.append(progress_message("Finished saving library-version relationships."))

progress.append(progress_message("Adding library maintainers..."))
call_command("update_maintainers")
progress.append(progress_message("Finished adding library maintainers."))

progress.append(progress_message("Adding library authors..."))
call_command("update_authors")
progress.append(progress_message("Finished adding library authors."))

progress.append(progress_message("Adding library version authors..."))
call_command("update_library_version_authors")
progress.append(progress_message("Finished adding library version authors."))

progress.append(progress_message("Importing git commits..."))
handled_commits = update_commits(min_version=latest_version_name)
progress.append(progress_message("Finished importing commits."))

progress.append(progress_message("Syncing mailinglist statistics..."))
call_command("sync_mailinglist_stats")
progress.append(progress_message("Finished syncing mailinglist statistics."))

progress.append(progress_message("Updating github issues..."))
call_command("update_issues")
progress.append(progress_message("Finished updating github issues..."))

progress.append(progress_message("Updating slack activity buckets..."))
call_command("fetch_slack_activity")
progress.append(progress_message("Finished updating slack activity buckets."))

if generate_report:
progress.append(
progress_message(f"Generating report for {latest_version_name}...")
)
form = CreateReportForm({"version": latest_version.id})
form.cache_html()
progress.append(
progress_message(f"Finished generating report for {latest_version_name}.")
)
manager = ReleaseTasksManager(should_generate_report=generate_report)
handled_commits = manager.update_release_data()

progress.extend(manager.progress_messages)

return handled_commits

Expand Down Expand Up @@ -196,4 +226,5 @@ def command(user_id=None, generate_report=False):
send_notification(
user,
"\n\n".join(message),
subject="Task Complete: release_tasks",
)
20 changes: 1 addition & 19 deletions libraries/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from itertools import islice
import random
import string
import re
from itertools import islice

import structlog
import tempfile
Expand Down Expand Up @@ -296,21 +296,3 @@ def parse_line(line: str):
"Some library versions were skipped during artifact parsing.",
skipped_library_versions=skipped_library_versions,
)


def boost_normalize_words(frequencies, word_map):
# from word, to word
for o, n in word_map.items():
from_count = frequencies.get(o, 0)
if not from_count:
continue
to_count = frequencies.get(n, 0)
frequencies[n] = from_count + to_count
del frequencies[o]
return frequencies


def grey_color_func(
word, font_size, position, orientation, random_state=None, **kwargs
):
return "hsl(0, 0%%, %d%%)" % random.randint(10, 80)
Empty file added reports/__init__.py
Empty file.
32 changes: 32 additions & 0 deletions reports/admin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from django.contrib import admin

from reports.models import WebsiteStatReport, WebsiteStatItem


class StatInline(admin.StackedInline):
model = WebsiteStatItem
extra = 0
fields = ("value",)
readonly_fields = fields
can_delete = False


@admin.register(WebsiteStatReport)
class WebsiteStatReportAdmin(admin.ModelAdmin):
inlines = (StatInline,)
list_display = ("version", "pageviews", "unique_visitors", "period")
ordering = ("-version",)

# def get_queryset(self, request):
# qs = super().get_queryset(request)
# return qs.prefetch_related("stats")

def pageviews(self, obj):
return f"{int(obj.stats.get(code_name='pageviews').value):,}"

def unique_visitors(self, obj):
return f"{int(obj.stats.get(code_name='visitors').value):,}"


@admin.register(WebsiteStatItem)
class WebsiteStatItemAdmin(admin.ModelAdmin): ...
6 changes: 6 additions & 0 deletions reports/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from django.apps import AppConfig


class ReportsConfig(AppConfig):
default_auto_field = "django.db.models.BigAutoField"
name = "reports"
Loading