Skip to content

Commit

Permalink
fix: delete more unused DB indices
Browse files Browse the repository at this point in the history
  • Loading branch information
raphael0202 committed Jul 18, 2024
1 parent c316e41 commit 4942c44
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 6 deletions.
40 changes: 40 additions & 0 deletions migrations/005_drop_other_indices.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import peewee as pw
from peewee_migrate import Migrator


def migrate(migrator: Migrator, database: pw.Database, *, fake=False):
"""Drop unused indices:
- prediction_data: 6242 MB in prod
- prediction_source_image: 1795 MB in prod
- product_insight_bounding_box: 188 MB in prod
- product_insight_confidence: 332 MB in prod
- image_prediction_model_version: 533 MB in prod
- product_insight_username: 661 MB in prod
"""
migrator.sql("DROP INDEX IF EXISTS prediction_data")
migrator.sql("DROP INDEX IF EXISTS prediction_source_image")

migrator.sql("DROP INDEX IF EXISTS logo_confidence_threshold_type")
migrator.sql("DROP INDEX IF EXISTS logoconfidencethreshold_type")

migrator.sql("DROP INDEX IF EXISTS logo_confidence_threshold_value")
migrator.sql("DROP INDEX IF EXISTS logoconfidencethreshold_value")

migrator.sql("DROP INDEX IF EXISTS product_insight_bounding_box")
migrator.sql("DROP INDEX IF EXISTS productinsight_bounding_box")

migrator.sql("DROP INDEX IF EXISTS product_insight_confidence")
migrator.sql("DROP INDEX IF EXISTS productinsight_confidence")

migrator.sql("DROP INDEX IF EXISTS image_prediction_model_version")
migrator.sql("DROP INDEX IF EXISTS imagepredictionmodel_version")

migrator.sql("DROP INDEX IF EXISTS product_insight_username")
migrator.sql("DROP INDEX IF EXISTS productinsight_username")


def rollback(migrator: Migrator, database: pw.Database, *, fake=False):
"""These indices are too long to build using a migration script, rollback
should be done manually."""
pass
4 changes: 4 additions & 0 deletions robotoff/images.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,8 @@ def delete_images(product_id: ProductIdentifier, image_ids: list[str]):
.where(
Prediction.source_image.in_(source_images),
Prediction.server_type == server_type,
# Add barcode filter to speed up the query
Prediction.barcode == product_id.barcode,
)
.execute()
)
Expand All @@ -273,6 +275,8 @@ def delete_images(product_id: ProductIdentifier, image_ids: list[str]):
ProductInsight.source_image.in_(source_images),
ProductInsight.server_type == server_type,
ProductInsight.annotation.is_null(),
# Add barcode filter to speed up the query
ProductInsight.barcode == product_id.barcode,
)
.execute()
)
Expand Down
12 changes: 6 additions & 6 deletions robotoff/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ class ProductInsight(BaseModel):

# If the insight was annotated manually, this field stores the username of
# the annotator (or first annotator, if multiple votes were cast).
username = peewee.TextField(index=True, null=True)
username = peewee.TextField(null=True)

# Stores the list of countries that are associated with the product.
# E.g. possible values are "en:united-states" or "en:france".
Expand Down Expand Up @@ -194,7 +194,7 @@ class ProductInsight(BaseModel):
# tags
campaign = BinaryJSONField(null=True, index=True, default=list)
# Confidence score of the insight, may be null
confidence = peewee.FloatField(null=True, index=True)
confidence = peewee.FloatField(null=True)

# bounding box corresponding to the area of the image related
# to the insight that was detected.
Expand All @@ -212,11 +212,11 @@ def get_product_id(self) -> ProductIdentifier:
class Prediction(BaseModel):
barcode = peewee.CharField(max_length=100, null=False, index=True)
type = peewee.CharField(max_length=256, index=True)
data = BinaryJSONField(index=True)
data = BinaryJSONField()
timestamp = peewee.DateTimeField(index=True)
value_tag = peewee.TextField(null=True)
value = peewee.TextField(null=True)
source_image = peewee.TextField(null=True, index=True)
source_image = peewee.TextField(null=True)
automatic_processing = peewee.BooleanField(null=True)
predictor = peewee.CharField(max_length=100, null=True)
predictor_version = peewee.CharField(max_length=100, null=True)
Expand Down Expand Up @@ -392,8 +392,8 @@ class Meta:


class LogoConfidenceThreshold(BaseModel):
type = peewee.CharField(null=True, index=True)
value = peewee.CharField(null=True, index=True)
type = peewee.CharField(null=True)
value = peewee.CharField(null=True)
threshold = peewee.FloatField(null=False)


Expand Down

0 comments on commit 4942c44

Please sign in to comment.