Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion alembic.ini
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ script_location = alembic
# Uncomment the line below if you want the files to be prepended with date and time
# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
# for all available tokens
# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s

# sys.path path, will be prepended to sys.path if present.
# defaults to the current working directory.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ def downgrade() -> None:
op.drop_constraint("url_error_info_task_id_fkey", 'url_error_info', type_='foreignkey')
op.drop_constraint('uq_url_id_error', 'url_error_info', type_='unique')
op.drop_column('url_error_info', 'task_id')
op.drop_column('url_metadata', 'notes')
op.drop_table('link_task_urls')
op.drop_table('task_errors')
op.drop_table('tasks')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,13 @@ def upgrade() -> None:

def downgrade() -> None:
validation_status.create(op.get_bind())

op.alter_column(
table_name="url_metadata",
column_name="validation_status",
existing_type=metadata_validation_status,
type_=validation_status,
postgresql_using="validation_status::text::validation_status"
)
#
# op.alter_column(
# table_name="url_metadata",
# column_name="validation_status",
# existing_type=metadata_validation_status,
# type_=validation_status,
# postgresql_using="validation_status::text::validation_status"
# )

metadata_validation_status.drop(op.get_bind(), checkfirst=True)
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""Update confirmed_url_agency unique constraint to be only url_id

Revision ID: 0c6dc00806ce
Revises: 76f902fe18cd
Create Date: 2025-02-23 08:55:07.046607

"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa

Check warning on line 11 in alembic/versions/2025_02_23_0855-0c6dc00806ce_update_confirmed_url_agency_unique_.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_02_23_0855-0c6dc00806ce_update_confirmed_url_agency_unique_.py#L11 <401>

'sqlalchemy as sa' imported but unused
Raw output
./alembic/versions/2025_02_23_0855-0c6dc00806ce_update_confirmed_url_agency_unique_.py:11:1: F401 'sqlalchemy as sa' imported but unused


# revision identifiers, used by Alembic.
revision: str = '0c6dc00806ce'
down_revision: Union[str, None] = '76f902fe18cd'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:

Check warning on line 21 in alembic/versions/2025_02_23_0855-0c6dc00806ce_update_confirmed_url_agency_unique_.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_02_23_0855-0c6dc00806ce_update_confirmed_url_agency_unique_.py#L21 <103>

Missing docstring in public function
Raw output
./alembic/versions/2025_02_23_0855-0c6dc00806ce_update_confirmed_url_agency_unique_.py:21:1: D103 Missing docstring in public function
op.drop_constraint(
constraint_name="uq_confirmed_url_agency",
table_name="confirmed_url_agency",
)

op.create_unique_constraint(
constraint_name="uq_confirmed_url_agency",
table_name="confirmed_url_agency",
columns=["url_id"],
)


def downgrade() -> None:

Check warning on line 34 in alembic/versions/2025_02_23_0855-0c6dc00806ce_update_confirmed_url_agency_unique_.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_02_23_0855-0c6dc00806ce_update_confirmed_url_agency_unique_.py#L34 <103>

Missing docstring in public function
Raw output
./alembic/versions/2025_02_23_0855-0c6dc00806ce_update_confirmed_url_agency_unique_.py:34:1: D103 Missing docstring in public function
op.drop_constraint(
constraint_name="uq_confirmed_url_agency",
table_name="confirmed_url_agency",
)

op.create_unique_constraint(
constraint_name="uq_confirmed_url_agency",
table_name="confirmed_url_agency",
columns=["url_id", "agency_id"],
)

Check warning on line 44 in alembic/versions/2025_02_23_0855-0c6dc00806ce_update_confirmed_url_agency_unique_.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_02_23_0855-0c6dc00806ce_update_confirmed_url_agency_unique_.py#L44 <292>

no newline at end of file
Raw output
./alembic/versions/2025_02_23_0855-0c6dc00806ce_update_confirmed_url_agency_unique_.py:44:6: W292 no newline at end of file
Original file line number Diff line number Diff line change
@@ -0,0 +1,294 @@
"""Overhaul annotation organization

New Tables
- AutoRelevantSuggestions
- AutoRecordTypeSuggestions
- UserRelevantSuggestions
- UserRecordTypeSuggestions

New Columns for `URL`
- `agency_id`
- `record_type`
- `relevant`

Removed Tables
- `URLMetadata`
- `ConfirmedURLAgency`
- `MetadataAnnotation`

Update URL Status to just three enum value:
- VALIDATED
- SUBMITTED
- PENDING

Revision ID: 33421c0590bb
Revises: 0c6dc00806ce
Create Date: 2025-02-23 10:23:19.696248

"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa
from sqlalchemy import UniqueConstraint

from util.alembic_helpers import switch_enum_type

# revision identifiers, used by Alembic.
revision: str = '33421c0590bb'
down_revision: Union[str, None] = '0c6dc00806ce'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None

record_type_values = [
"Accident Reports",
"Arrest Records",
"Calls for Service",
"Car GPS",
"Citations",
"Dispatch Logs",
"Dispatch Recordings",
"Field Contacts",
"Incident Reports",
"Misc Police Activity",
"Officer Involved Shootings",
"Stops",
"Surveys",
"Use of Force Reports",
"Vehicle Pursuits",
"Complaints & Misconduct",
"Daily Activity Logs",
"Training & Hiring Info",
"Personnel Records",
"Annual & Monthly Reports",
"Budgets & Finances",
"Contact Info & Agency Meta",
"Geographic",
"List of Data Sources",
"Policies & Contracts",
"Crime Maps & Reports",
"Crime Statistics",
"Media Bulletins",
"Records Request Info",
"Resources",
"Sex Offender Registry",
"Wanted Persons",
"Booking Reports",
"Court Cases",
"Incarceration Records",
"Other"
]


record_type_enum = sa.Enum(*record_type_values, name='record_type')

def run_data_migrations():

Check warning on line 85 in alembic/versions/2025_02_23_1023-33421c0590bb_overhaul_annotation_organization.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_02_23_1023-33421c0590bb_overhaul_annotation_organization.py#L85 <103>

Missing docstring in public function
Raw output
./alembic/versions/2025_02_23_1023-33421c0590bb_overhaul_annotation_organization.py:85:1: D103 Missing docstring in public function

op.execute(
"""
INSERT INTO AUTO_RELEVANT_SUGGESTIONS (url_id, relevant)
SELECT url_id, LOWER(value)::boolean
FROM public.url_metadata
WHERE validation_source = 'Machine Learning'
and attribute = 'Relevant'
"""

Check warning on line 94 in alembic/versions/2025_02_23_1023-33421c0590bb_overhaul_annotation_organization.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_02_23_1023-33421c0590bb_overhaul_annotation_organization.py#L94 <191>

indentation contains tabs
Raw output
./alembic/versions/2025_02_23_1023-33421c0590bb_overhaul_annotation_organization.py:94:1: W191 indentation contains tabs

Check failure on line 94 in alembic/versions/2025_02_23_1023-33421c0590bb_overhaul_annotation_organization.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_02_23_1023-33421c0590bb_overhaul_annotation_organization.py#L94 <101>

indentation contains mixed spaces and tabs
Raw output
./alembic/versions/2025_02_23_1023-33421c0590bb_overhaul_annotation_organization.py:94:1: E101 indentation contains mixed spaces and tabs
)

op.execute(
"""
INSERT INTO AUTO_RECORD_TYPE_SUGGESTIONS(url_id, record_type)
SELECT url_id, value::record_type
FROM public.url_metadata
WHERE validation_source = 'Machine Learning'
and attribute = 'Record Type'
"""
)

op.execute(
"""
INSERT INTO USER_RELEVANT_SUGGESTIONS(url_id, relevant, user_id)
SELECT um.url_id, LOWER(um.value)::boolean, ma.user_id
FROM public.url_metadata um
INNER join metadata_annotations ma on um.id = ma.metadata_id
where um.attribute = 'Relevant'
"""
)

op.execute(
"""
INSERT INTO USER_RECORD_TYPE_SUGGESTIONS(url_id, record_type, user_id)
SELECT um.url_id, um.value::record_type, ma.user_id
FROM public.url_metadata um
INNER join metadata_annotations ma on um.id = ma.metadata_id
where um.attribute = 'Record Type'

"""
)

def upgrade() -> None:

Check warning on line 128 in alembic/versions/2025_02_23_1023-33421c0590bb_overhaul_annotation_organization.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_02_23_1023-33421c0590bb_overhaul_annotation_organization.py#L128 <103>

Missing docstring in public function
Raw output
./alembic/versions/2025_02_23_1023-33421c0590bb_overhaul_annotation_organization.py:128:1: D103 Missing docstring in public function

# Create the new tables
op.create_table(
'auto_relevant_suggestions',
sa.Column('id', sa.Integer(), primary_key=True),
sa.Column('url_id', sa.Integer(), sa.ForeignKey('urls.id', ondelete='CASCADE'), nullable=False),
sa.Column('relevant', sa.Boolean(), nullable=False),
sa.Column('created_at', sa.TIMESTAMP(), nullable=False, server_default=sa.text('now()')),
sa.Column('updated_at', sa.TIMESTAMP(), nullable=False, server_default=sa.text('now()'), onupdate=sa.text('now()')),
UniqueConstraint(
'url_id',
name='auto_relevant_suggestions_uq_url_id'
)
)

op.create_table(
'auto_record_type_suggestions',
sa.Column('id', sa.Integer(), primary_key=True),
sa.Column(
'url_id',
sa.Integer(),
sa.ForeignKey('urls.id', ondelete='CASCADE'),
nullable=False
),
sa.Column('record_type', record_type_enum, nullable=False),
sa.Column('created_at', sa.TIMESTAMP(), nullable=False, server_default=sa.text('now()')),
sa.Column('updated_at', sa.TIMESTAMP(), nullable=False, server_default=sa.text('now()'), onupdate=sa.text('now()')),
UniqueConstraint(
'url_id',
name='auto_record_type_suggestions_uq_url_id'
)
)

op.create_table(
'user_relevant_suggestions',
sa.Column('id', sa.Integer(), primary_key=True),
sa.Column(
'url_id',
sa.Integer(),
sa.ForeignKey('urls.id', ondelete='CASCADE'),
nullable=False
),
sa.Column('user_id', sa.Integer(), nullable=False),
sa.Column('relevant', sa.Boolean(), nullable=False),
sa.Column('created_at', sa.TIMESTAMP(), nullable=False, server_default=sa.text('now()')),
sa.Column('updated_at', sa.TIMESTAMP(), nullable=False, server_default=sa.text('now()'), onupdate=sa.text('now()')),
sa.UniqueConstraint("url_id", "user_id", name="uq_user_relevant_suggestions")
)

op.create_table(
'user_record_type_suggestions',
sa.Column('id', sa.Integer(), primary_key=True),
sa.Column(
'url_id',
sa.Integer(),
sa.ForeignKey('urls.id', ondelete='CASCADE'),
nullable=False
),
sa.Column('user_id', sa.Integer(), nullable=False),
sa.Column('record_type', record_type_enum, nullable=False),
sa.Column('created_at', sa.TIMESTAMP(), nullable=False, server_default=sa.text('now()')),
sa.Column('updated_at', sa.TIMESTAMP(), nullable=False, server_default=sa.text('now()'), onupdate=sa.text('now()')),
sa.UniqueConstraint("url_id", "user_id", name="uq_user_record_type_suggestions")
)

# Add the new columns
op.add_column(
'urls',
sa.Column('record_type', record_type_enum, nullable=True)
)

op.add_column(
'urls',
sa.Column('relevant', sa.Boolean(), nullable=True)
)

op.add_column(
'urls',
sa.Column(
'agency_id',
sa.Integer(),
sa.ForeignKey('agencies.agency_id', ondelete='NO ACTION'),
nullable=True
)
)

run_data_migrations()

# Delete the old tables
op.drop_table('metadata_annotations')
op.drop_table('url_metadata')
op.drop_table('confirmed_url_agency')

switch_enum_type(
table_name='urls',
column_name='outcome',
enum_name='url_status',
new_enum_values=['pending', 'submitted', 'validated', 'error', 'duplicate']
)





def downgrade() -> None:

Check warning on line 233 in alembic/versions/2025_02_23_1023-33421c0590bb_overhaul_annotation_organization.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_02_23_1023-33421c0590bb_overhaul_annotation_organization.py#L233 <103>

Missing docstring in public function
Raw output
./alembic/versions/2025_02_23_1023-33421c0590bb_overhaul_annotation_organization.py:233:1: D103 Missing docstring in public function

Check failure on line 233 in alembic/versions/2025_02_23_1023-33421c0590bb_overhaul_annotation_organization.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_02_23_1023-33421c0590bb_overhaul_annotation_organization.py#L233 <303>

too many blank lines (5)
Raw output
./alembic/versions/2025_02_23_1023-33421c0590bb_overhaul_annotation_organization.py:233:1: E303 too many blank lines (5)
# Drop the new tables
op.drop_table('auto_relevant_suggestions')
op.drop_table('auto_record_type_suggestions')
op.drop_table('user_relevant_suggestions')
op.drop_table('user_record_type_suggestions')

# Drop the new columns
op.drop_column('urls', 'record_type')
op.drop_column('urls', 'relevant')
op.drop_column('urls', 'agency_id')

# Create the old tables
op.create_table(
'url_metadata',
sa.Column('id', sa.Integer(), primary_key=True),
sa.Column('url_id', sa.Integer(), sa.ForeignKey('urls.id', ondelete='CASCADE'), nullable=False),
sa.Column('attribute', sa.String(), nullable=False),
sa.Column('value', sa.String(), nullable=False),
sa.Column('created_at', sa.TIMESTAMP(), nullable=False, server_default=sa.text('now()')),
sa.Column('updated_at', sa.TIMESTAMP(), nullable=False, server_default=sa.text('now()'), onupdate=sa.text('now()')),
sa.UniqueConstraint(
"url_id",
"attribute",
name="uq_url_id_attribute"),
)

op.create_table(
'confirmed_url_agency',
sa.Column('id', sa.Integer(), primary_key=True),
sa.Column('url_id', sa.Integer(), sa.ForeignKey('urls.id', ondelete='CASCADE'), nullable=False),
sa.Column(
'agency_id',
sa.Integer(),
sa.ForeignKey('agencies.agency_id', ondelete='CASCADE'), nullable=False),
sa.Column('created_at', sa.TIMESTAMP(), nullable=False, server_default=sa.text('now()')),
sa.Column('updated_at', sa.TIMESTAMP(), nullable=False, server_default=sa.text('now()'), onupdate=sa.text('now()')),
sa.UniqueConstraint("url_id", name="uq_confirmed_url_agency")
)

op.create_table(
'metadata_annotations',
sa.Column('id', sa.Integer(), primary_key=True),
sa.Column('metadata_id', sa.Integer(), sa.ForeignKey('url_metadata.id', ondelete='CASCADE'), nullable=False),
sa.Column('user_id', sa.Integer(), nullable=False),
sa.Column('created_at', sa.TIMESTAMP(), nullable=False, server_default=sa.text('now()')),
sa.Column('updated_at', sa.TIMESTAMP(), nullable=False, server_default=sa.text('now()'), onupdate=sa.text('now()')),
sa.UniqueConstraint(
"user_id",
"metadata_id",
name="metadata_annotations_uq_user_id_metadata_id"),
)

switch_enum_type(
table_name='urls',
column_name='outcome',
enum_name='url_status',
new_enum_values=['pending', 'submitted', 'human_labeling', 'rejected', 'duplicate', 'error']
)

# Drop enum
record_type_enum.drop(op.get_bind())
Loading