Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
"""Revise annotation count view

Revision ID: 42933d84aa52
Revises: e88e4e962dc7
Create Date: 2025-12-26 15:27:30.368862

"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa

Check warning on line 11 in alembic/versions/2025_12_26_1527-42933d84aa52_revise_annotation_count_view.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_12_26_1527-42933d84aa52_revise_annotation_count_view.py#L11 <401>

'sqlalchemy as sa' imported but unused
Raw output
./alembic/versions/2025_12_26_1527-42933d84aa52_revise_annotation_count_view.py:11:1: F401 'sqlalchemy as sa' imported but unused


# revision identifiers, used by Alembic.
revision: str = '42933d84aa52'
down_revision: Union[str, None] = 'e88e4e962dc7'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:

Check warning on line 21 in alembic/versions/2025_12_26_1527-42933d84aa52_revise_annotation_count_view.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_12_26_1527-42933d84aa52_revise_annotation_count_view.py#L21 <103>

Missing docstring in public function
Raw output
./alembic/versions/2025_12_26_1527-42933d84aa52_revise_annotation_count_view.py:21:1: D103 Missing docstring in public function
op.execute("""DROP VIEW IF EXISTS url_annotation_count_view""")
op.execute(
"""
CREATE VIEW url_annotation_count_view AS
WITH
auto_location_count AS (
SELECT
u_1.id,
count(anno.url_id) AS cnt
FROM
urls u_1
JOIN annotation__location__auto__subtasks anno
ON u_1.id = anno.url_id
GROUP BY
u_1.id
)
, auto_agency_count AS (
SELECT
u_1.id,
count(anno.url_id) AS cnt
FROM
urls u_1
JOIN annotation__agency__auto__subtasks anno
ON u_1.id = anno.url_id
GROUP BY
u_1.id
)
, auto_url_type_count AS (
SELECT
u_1.id,
count(anno.url_id) AS cnt
FROM
urls u_1
JOIN annotation__url_type__auto anno
ON u_1.id = anno.url_id
GROUP BY
u_1.id
)
, auto_record_type_count AS (
SELECT
u_1.id,
count(anno.url_id) AS cnt
FROM
urls u_1
JOIN annotation__record_type__auto anno
ON u_1.id = anno.url_id
GROUP BY
u_1.id
)
, user_location_count AS (
SELECT
u_1.id,
count(anno.url_id) AS cnt
FROM
urls u_1
JOIN annotation__location__user anno
ON u_1.id = anno.url_id
GROUP BY
u_1.id
)
, user_agency_count AS (
SELECT
u_1.id,
count(anno.url_id) AS cnt
FROM
urls u_1
JOIN annotation__agency__user anno
ON u_1.id = anno.url_id
GROUP BY
u_1.id
)
, user_url_type_count AS (
SELECT
u_1.id,
count(anno.url_id) AS cnt
FROM
urls u_1
JOIN annotation__url_type__user anno
ON u_1.id = anno.url_id
GROUP BY
u_1.id
)
, user_record_type_count AS (
SELECT
u_1.id,
count(anno.url_id) AS cnt
FROM
urls u_1
JOIN annotation__record_type__user anno
ON u_1.id = anno.url_id
GROUP BY
u_1.id
)
, anon_location_count AS (
SELECT
u_1.id,
count(anno.url_id) AS cnt
FROM
urls u_1
JOIN annotation__location__anon anno
ON u_1.id = anno.url_id
GROUP BY
u_1.id
)
, anon_agency_count AS (
SELECT
u_1.id,
count(anno.url_id) AS cnt
FROM
urls u_1
JOIN annotation__agency__anon anno
ON u_1.id = anno.url_id
GROUP BY
u_1.id
)
, anon_url_type_count AS (
SELECT
u_1.id,
count(anno.url_id) AS cnt
FROM
urls u_1
JOIN annotation__url_type__anon anno
ON u_1.id = anno.url_id
GROUP BY
u_1.id
)
, anon_record_type_count AS (
SELECT
u_1.id,
count(anno.url_id) AS cnt
FROM
urls u_1
JOIN annotation__record_type__anon anno
ON u_1.id = anno.url_id
GROUP BY
u_1.id
)
SELECT
u.id AS url_id,
COALESCE(auto_ag.cnt, 0::bigint) AS auto_agency_count,
COALESCE(auto_loc.cnt, 0::bigint) AS auto_location_count,
COALESCE(auto_rec.cnt, 0::bigint) AS auto_record_type_count,
COALESCE(auto_typ.cnt, 0::bigint) AS auto_url_type_count,
COALESCE(user_ag.cnt, 0::bigint) AS user_agency_count,
COALESCE(user_loc.cnt, 0::bigint) AS user_location_count,
COALESCE(user_rec.cnt, 0::bigint) AS user_record_type_count,
COALESCE(user_typ.cnt, 0::bigint) AS user_url_type_count,
COALESCE(anon_ag.cnt, 0::bigint) AS anon_agency_count,
COALESCE(anon_loc.cnt, 0::bigint) AS anon_location_count,
COALESCE(anon_rec.cnt, 0::bigint) AS anon_record_type_count,
COALESCE(anon_typ.cnt, 0::bigint) AS anon_url_type_count,
COALESCE(auto_ag.cnt, 0::bigint) + COALESCE(auto_loc.cnt, 0::bigint) + COALESCE(auto_rec.cnt, 0::bigint) +
COALESCE(auto_typ.cnt, 0::bigint) + COALESCE(user_ag.cnt, 0::bigint) + COALESCE(user_loc.cnt, 0::bigint) +
COALESCE(user_rec.cnt, 0::bigint) + COALESCE(user_typ.cnt, 0::bigint) + COALESCE(anon_ag.cnt, 0::bigint) +
COALESCE(anon_loc.cnt, 0::bigint) + COALESCE(anon_rec.cnt, 0::bigint) + COALESCE(anon_typ.cnt, 0::bigint) AS total_anno_count

FROM
urls u
LEFT JOIN auto_agency_count auto_ag
ON auto_ag.id = u.id
LEFT JOIN auto_location_count auto_loc
ON auto_loc.id = u.id
LEFT JOIN auto_record_type_count auto_rec
ON auto_rec.id = u.id
LEFT JOIN auto_url_type_count auto_typ
ON auto_typ.id = u.id
LEFT JOIN user_agency_count user_ag
ON user_ag.id = u.id
LEFT JOIN user_location_count user_loc
ON user_loc.id = u.id
LEFT JOIN user_record_type_count user_rec
ON user_rec.id = u.id
LEFT JOIN user_url_type_count user_typ
ON user_typ.id = u.id
LEFT JOIN anon_agency_count anon_ag
ON user_ag.id = u.id
LEFT JOIN anon_location_count anon_loc
ON user_loc.id = u.id
LEFT JOIN anon_record_type_count anon_rec
ON user_rec.id = u.id
LEFT JOIN anon_url_type_count anon_typ
ON user_typ.id = u.id

"""
)


def downgrade() -> None:

Check warning on line 209 in alembic/versions/2025_12_26_1527-42933d84aa52_revise_annotation_count_view.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_12_26_1527-42933d84aa52_revise_annotation_count_view.py#L209 <103>

Missing docstring in public function
Raw output
./alembic/versions/2025_12_26_1527-42933d84aa52_revise_annotation_count_view.py:209:1: D103 Missing docstring in public function
pass
79 changes: 46 additions & 33 deletions src/api/endpoints/annotate/_shared/queries/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
This module contains helper functions for the annotate GET queries
"""

from sqlalchemy import Select, case, exists, select
from sqlalchemy import Select, case, CTE, ColumnElement
from sqlalchemy.orm import joinedload

from src.collectors.enums import URLStatus
Expand All @@ -15,10 +15,9 @@
from src.db.models.views.url_annotations_flags import URLAnnotationFlagsView


def get_select() -> Select:
return (
Select(URL)

def add_joins(query: Select) -> Select:

Check warning on line 18 in src/api/endpoints/annotate/_shared/queries/helper.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/_shared/queries/helper.py#L18 <103>

Missing docstring in public function
Raw output
./src/api/endpoints/annotate/_shared/queries/helper.py:18:1: D103 Missing docstring in public function
query = (
query
.join(
URLAnnotationFlagsView,
URLAnnotationFlagsView.url_id == URL.id
Expand All @@ -28,10 +27,12 @@
URLAnnotationCount.url_id == URL.id
)
)
return query

def conclude(query: Select) -> Select:
# Add common where conditions
query = query.where(
def add_common_where_conditions(

Check warning on line 32 in src/api/endpoints/annotate/_shared/queries/helper.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/_shared/queries/helper.py#L32 <103>

Missing docstring in public function
Raw output
./src/api/endpoints/annotate/_shared/queries/helper.py:32:1: D103 Missing docstring in public function
query: Select,
) -> Select:
return query.where(
URL.status == URLStatus.OK.value,
not_exists_url(
FlagURLSuspended
Expand All @@ -42,29 +43,41 @@
)
)


query = (
# Add load options
query.options(
joinedload(URL.html_content),
joinedload(URL.user_url_type_suggestions),
joinedload(URL.user_record_type_suggestions),
joinedload(URL.anon_record_type_suggestions),
joinedload(URL.anon_url_type_suggestions),
)
# Sorting Priority
.order_by(
# Privilege manually submitted URLs first
case(
(URL.source == URLSource.MANUAL, 0),
else_=1
).asc(),
# Break ties by favoring URL with higher total annotations
URLAnnotationCount.total_anno_count.desc(),
# Break additional ties by favoring least recently created URLs
URL.id.asc()
)
# Limit to 1 result
.limit(1)
def add_load_options(

Check warning on line 46 in src/api/endpoints/annotate/_shared/queries/helper.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/_shared/queries/helper.py#L46 <103>

Missing docstring in public function
Raw output
./src/api/endpoints/annotate/_shared/queries/helper.py:46:1: D103 Missing docstring in public function
query: Select
) -> Select:
return query.options(
joinedload(URL.html_content),
joinedload(URL.user_url_type_suggestions),
joinedload(URL.user_record_type_suggestions),
joinedload(URL.anon_record_type_suggestions),
joinedload(URL.anon_url_type_suggestions),
)
return query

def bool_sort(

Check warning on line 57 in src/api/endpoints/annotate/_shared/queries/helper.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/_shared/queries/helper.py#L57 <103>

Missing docstring in public function
Raw output
./src/api/endpoints/annotate/_shared/queries/helper.py:57:1: D103 Missing docstring in public function
condition: ColumnElement[bool]
) -> ColumnElement[int]:
return case(
(condition, 0),
else_=1
).asc()

def common_sorts(

Check warning on line 65 in src/api/endpoints/annotate/_shared/queries/helper.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/_shared/queries/helper.py#L65 <103>

Missing docstring in public function
Raw output
./src/api/endpoints/annotate/_shared/queries/helper.py:65:1: D103 Missing docstring in public function
base_cte: CTE
) -> list[ColumnElement[int]]:
return [
# Privilege URLs whose batches are associated with locations
# followed by ANY user

Check failure on line 70 in src/api/endpoints/annotate/_shared/queries/helper.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/_shared/queries/helper.py#L70 <131>

continuation line unaligned for hanging indent
Raw output
./src/api/endpoints/annotate/_shared/queries/helper.py:70:11: E131 continuation line unaligned for hanging indent
bool_sort(base_cte.c.followed_by_any_user),
# Privilege Manually Submitted URLs
bool_sort(URL.source == URLSource.MANUAL),
# Privilege based on total number of user annotations
URLAnnotationCount.user_url_type_count.desc(),
# Privilege based on total number of anon annotations
URLAnnotationCount.anon_url_type_count.desc(),
# Privilege based on total number of auto annotations
URLAnnotationCount.auto_url_type_count.desc(),
# Break additional ties by favoring least recently created URLs
URL.id.asc()
]

Check warning on line 83 in src/api/endpoints/annotate/_shared/queries/helper.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/_shared/queries/helper.py#L83 <391>

blank line at end of file
Raw output
./src/api/endpoints/annotate/_shared/queries/helper.py:83:1: W391 blank line at end of file
Loading