diff --git a/migrations/005_drop_other_indices.py b/migrations/005_drop_other_indices.py new file mode 100644 index 0000000000..2f942f7a71 --- /dev/null +++ b/migrations/005_drop_other_indices.py @@ -0,0 +1,40 @@ +import peewee as pw +from peewee_migrate import Migrator + + +def migrate(migrator: Migrator, database: pw.Database, *, fake=False): + """Drop unused indices: + + - prediction_data: 6242 MB in prod + - prediction_source_image: 1795 MB in prod + - product_insight_bounding_box: 188 MB in prod + - product_insight_confidence: 332 MB in prod + - image_prediction_model_version: 533 MB in prod + - product_insight_username: 661 MB in prod + """ + migrator.sql("DROP INDEX IF EXISTS prediction_data") + migrator.sql("DROP INDEX IF EXISTS prediction_source_image") + + migrator.sql("DROP INDEX IF EXISTS logo_confidence_threshold_type") + migrator.sql("DROP INDEX IF EXISTS logoconfidencethreshold_type") + + migrator.sql("DROP INDEX IF EXISTS logo_confidence_threshold_value") + migrator.sql("DROP INDEX IF EXISTS logoconfidencethreshold_value") + + migrator.sql("DROP INDEX IF EXISTS product_insight_bounding_box") + migrator.sql("DROP INDEX IF EXISTS productinsight_bounding_box") + + migrator.sql("DROP INDEX IF EXISTS product_insight_confidence") + migrator.sql("DROP INDEX IF EXISTS productinsight_confidence") + + migrator.sql("DROP INDEX IF EXISTS image_prediction_model_version") + migrator.sql("DROP INDEX IF EXISTS imagepredictionmodel_version") + + migrator.sql("DROP INDEX IF EXISTS product_insight_username") + migrator.sql("DROP INDEX IF EXISTS productinsight_username") + + +def rollback(migrator: Migrator, database: pw.Database, *, fake=False): + """These indices are too long to build using a migration script, rollback + should be done manually.""" + pass diff --git a/robotoff/images.py b/robotoff/images.py index c9016c1176..dd0cd3e607 100644 --- a/robotoff/images.py +++ b/robotoff/images.py @@ -264,6 +264,8 @@ def delete_images(product_id: ProductIdentifier, image_ids: list[str]): .where( Prediction.source_image.in_(source_images), Prediction.server_type == server_type, + # Add barcode filter to speed up the query + Prediction.barcode == product_id.barcode, ) .execute() ) @@ -273,6 +275,8 @@ def delete_images(product_id: ProductIdentifier, image_ids: list[str]): ProductInsight.source_image.in_(source_images), ProductInsight.server_type == server_type, ProductInsight.annotation.is_null(), + # Add barcode filter to speed up the query + ProductInsight.barcode == product_id.barcode, ) .execute() ) diff --git a/robotoff/models.py b/robotoff/models.py index 34204954c9..6c9f2ec713 100644 --- a/robotoff/models.py +++ b/robotoff/models.py @@ -130,7 +130,7 @@ class ProductInsight(BaseModel): # If the insight was annotated manually, this field stores the username of # the annotator (or first annotator, if multiple votes were cast). - username = peewee.TextField(index=True, null=True) + username = peewee.TextField(null=True) # Stores the list of countries that are associated with the product. # E.g. possible values are "en:united-states" or "en:france". @@ -194,7 +194,7 @@ class ProductInsight(BaseModel): # tags campaign = BinaryJSONField(null=True, index=True, default=list) # Confidence score of the insight, may be null - confidence = peewee.FloatField(null=True, index=True) + confidence = peewee.FloatField(null=True) # bounding box corresponding to the area of the image related # to the insight that was detected. @@ -212,11 +212,11 @@ def get_product_id(self) -> ProductIdentifier: class Prediction(BaseModel): barcode = peewee.CharField(max_length=100, null=False, index=True) type = peewee.CharField(max_length=256, index=True) - data = BinaryJSONField(index=True) + data = BinaryJSONField() timestamp = peewee.DateTimeField(index=True) value_tag = peewee.TextField(null=True) value = peewee.TextField(null=True) - source_image = peewee.TextField(null=True, index=True) + source_image = peewee.TextField(null=True) automatic_processing = peewee.BooleanField(null=True) predictor = peewee.CharField(max_length=100, null=True) predictor_version = peewee.CharField(max_length=100, null=True) @@ -392,8 +392,8 @@ class Meta: class LogoConfidenceThreshold(BaseModel): - type = peewee.CharField(null=True, index=True) - value = peewee.CharField(null=True, index=True) + type = peewee.CharField(null=True) + value = peewee.CharField(null=True) threshold = peewee.FloatField(null=False)