Skip to content

django-lasuite malware detection integration #936

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ and this project adheres to

## [Unreleased]

### Added

- ✨(backend) integrate maleware_detection from django-lasuite

## [3.2.0] - 2025-05-05

## Added
Expand Down
2 changes: 2 additions & 0 deletions docs/env.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,5 @@ These are the environmental variables you can set for the impress-backend contai
| DJANGO_CSRF_TRUSTED_ORIGINS | CSRF trusted origins | [] |
| REDIS_URL | cache url | redis://redis:6379/1 |
| CACHES_DEFAULT_TIMEOUT | cache default timeout | 30 |
| MALWARE_DETECTION_BACKEND | The malware detection backend use from the django-lasuite package | lasuite.malware_detection.backends.dummy.DummyBackend |
| MALWARE_DETECTION_PARAMETERS | A dict containing all the parameters to initiate the malware detection backend | {"callback_path": "core.malware_detection.malware_detection_callback",} |
21 changes: 20 additions & 1 deletion src/backend/core/api/viewsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import requests
import rest_framework as drf
from botocore.exceptions import ClientError
from lasuite.malware_detection import malware_detection
from rest_framework import filters, status, viewsets
from rest_framework import response as drf_response
from rest_framework.permissions import AllowAny
Expand Down Expand Up @@ -1156,7 +1157,10 @@ def attachment_upload(self, request, *args, **kwargs):

# Prepare metadata for storage
extra_args = {
"Metadata": {"owner": str(request.user.id)},
"Metadata": {
"owner": str(request.user.id),
"status": enums.DocumentAttachmentStatus.PROCESSING,
},
"ContentType": serializer.validated_data["content_type"],
}
file_unsafe = ""
Expand Down Expand Up @@ -1188,6 +1192,8 @@ def attachment_upload(self, request, *args, **kwargs):
document.attachments.append(key)
document.save()

malware_detection.analyse_file(key, document_id=document.id)

return drf.response.Response(
{"file": f"{settings.MEDIA_URL:s}{key:s}"},
status=drf.status.HTTP_201_CREATED,
Expand Down Expand Up @@ -1271,6 +1277,19 @@ def media_auth(self, request, *args, **kwargs):
logger.debug("User '%s' lacks permission for attachment", user)
raise drf.exceptions.PermissionDenied()

# Check if the attachment is ready
s3_client = default_storage.connection.meta.client
bucket_name = default_storage.bucket_name
head_resp = s3_client.head_object(Bucket=bucket_name, Key=key)
metadata = head_resp.get("Metadata", {})
# In order to be compatible with existing upload without `status` metadata,
# we consider them as ready.
if (
metadata.get("status", enums.DocumentAttachmentStatus.READY)
!= enums.DocumentAttachmentStatus.READY
):
raise drf.exceptions.PermissionDenied()

# Generate S3 authorization headers using the extracted URL parameters
request = utils.generate_s3_authorization_headers(key)

Expand Down
8 changes: 8 additions & 0 deletions src/backend/core/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""

import re
from enum import StrEnum

from django.conf import global_settings, settings
from django.db import models
Expand Down Expand Up @@ -38,3 +39,10 @@ class MoveNodePositionChoices(models.TextChoices):
LAST_SIBLING = "last-sibling", _("Last sibling")
LEFT = "left", _("Left")
RIGHT = "right", _("Right")


class DocumentAttachmentStatus(StrEnum):
"""Defines the possible statuses for an attachment."""

PROCESSING = "processing"
READY = "ready"
51 changes: 51 additions & 0 deletions src/backend/core/malware_detection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"""Malware detection callbacks"""

import logging

from django.core.files.storage import default_storage

from lasuite.malware_detection.enums import ReportStatus

from core.enums import DocumentAttachmentStatus
from core.models import Document

logger = logging.getLogger(__name__)


def malware_detection_callback(file_path, status, error_info, **kwargs):
"""Malware detection callback"""

if status == ReportStatus.SAFE:
logger.info("File %s is safe", file_path)
# Get existing metadata
s3_client = default_storage.connection.meta.client
bucket_name = default_storage.bucket_name
head_resp = s3_client.head_object(Bucket=bucket_name, Key=file_path)
metadata = head_resp.get("Metadata", {})
metadata.update({"status": DocumentAttachmentStatus.READY})
# Update status in metadata
s3_client.copy_object(
Bucket=bucket_name,
CopySource={"Bucket": bucket_name, "Key": file_path},
Key=file_path,
ContentType=head_resp.get("ContentType"),
Metadata=metadata,
MetadataDirective="REPLACE",
)
return

document_id = kwargs.get("document_id")
logger.error(
"File %s for document %s is infected with malware. Error info: %s",
file_path,
document_id,
error_info,
)

# Remove the file from the document and change the status to unsafe
document = Document.objects.get(pk=document_id)
document.attachments.remove(file_path)
document.save(update_fields=["attachments"])

# Delete the file from the storage
default_storage.delete(file_path)
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import re
import uuid
from unittest import mock

from django.core.files.storage import default_storage
from django.core.files.uploadedfile import SimpleUploadedFile
Expand All @@ -12,6 +13,7 @@
from rest_framework.test import APIClient

from core import factories
from core.api.viewsets import malware_detection
from core.tests.conftest import TEAM, USER, VIA

pytestmark = pytest.mark.django_db
Expand Down Expand Up @@ -59,7 +61,8 @@ def test_api_documents_attachment_upload_anonymous_success():
file = SimpleUploadedFile(name="test.png", content=PIXEL, content_type="image/png")

url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
response = APIClient().post(url, {"file": file}, format="multipart")
with mock.patch.object(malware_detection, "analyse_file") as mock_analyse_file:
response = APIClient().post(url, {"file": file}, format="multipart")

assert response.status_code == 201

Expand All @@ -74,12 +77,13 @@ def test_api_documents_attachment_upload_anonymous_success():
assert document.attachments == [f"{document.id!s}/attachments/{file_id!s}.png"]

# Now, check the metadata of the uploaded file
key = file_path.replace("/media", "")
key = file_path.replace("/media/", "")
mock_analyse_file.assert_called_once_with(key, document_id=document.id)
file_head = default_storage.connection.meta.client.head_object(
Bucket=default_storage.bucket_name, Key=key
)

assert file_head["Metadata"] == {"owner": "None"}
assert file_head["Metadata"] == {"owner": "None", "status": "processing"}
assert file_head["ContentType"] == "image/png"
assert file_head["ContentDisposition"] == 'inline; filename="test.png"'

Expand Down Expand Up @@ -139,14 +143,19 @@ def test_api_documents_attachment_upload_authenticated_success(reach, role):
file = SimpleUploadedFile(name="test.png", content=PIXEL, content_type="image/png")

url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
response = client.post(url, {"file": file}, format="multipart")
with mock.patch.object(malware_detection, "analyse_file") as mock_analyse_file:
response = client.post(url, {"file": file}, format="multipart")

assert response.status_code == 201

pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.png")
match = pattern.search(response.json()["file"])
file_id = match.group(1)

mock_analyse_file.assert_called_once_with(
f"{document.id!s}/attachments/{file_id!s}.png", document_id=document.id
)

# Validate that file_id is a valid UUID
uuid.UUID(file_id)

Expand Down Expand Up @@ -210,7 +219,8 @@ def test_api_documents_attachment_upload_success(via, role, mock_user_teams):
file = SimpleUploadedFile(name="test.png", content=PIXEL, content_type="image/png")

url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
response = client.post(url, {"file": file}, format="multipart")
with mock.patch.object(malware_detection, "analyse_file") as mock_analyse_file:
response = client.post(url, {"file": file}, format="multipart")

assert response.status_code == 201

Expand All @@ -226,11 +236,12 @@ def test_api_documents_attachment_upload_success(via, role, mock_user_teams):
assert document.attachments == [f"{document.id!s}/attachments/{file_id!s}.png"]

# Now, check the metadata of the uploaded file
key = file_path.replace("/media", "")
key = file_path.replace("/media/", "")
mock_analyse_file.assert_called_once_with(key, document_id=document.id)
file_head = default_storage.connection.meta.client.head_object(
Bucket=default_storage.bucket_name, Key=key
)
assert file_head["Metadata"] == {"owner": str(user.id)}
assert file_head["Metadata"] == {"owner": str(user.id), "status": "processing"}
assert file_head["ContentType"] == "image/png"
assert file_head["ContentDisposition"] == 'inline; filename="test.png"'

Expand Down Expand Up @@ -304,7 +315,8 @@ def test_api_documents_attachment_upload_fix_extension(
url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"

file = SimpleUploadedFile(name=name, content=content)
response = client.post(url, {"file": file}, format="multipart")
with mock.patch.object(malware_detection, "analyse_file") as mock_analyse_file:
response = client.post(url, {"file": file}, format="multipart")

assert response.status_code == 201

Expand All @@ -324,11 +336,16 @@ def test_api_documents_attachment_upload_fix_extension(
uuid.UUID(file_id)

# Now, check the metadata of the uploaded file
key = file_path.replace("/media", "")
key = file_path.replace("/media/", "")
mock_analyse_file.assert_called_once_with(key, document_id=document.id)
file_head = default_storage.connection.meta.client.head_object(
Bucket=default_storage.bucket_name, Key=key
)
assert file_head["Metadata"] == {"owner": str(user.id), "is_unsafe": "true"}
assert file_head["Metadata"] == {
"owner": str(user.id),
"is_unsafe": "true",
"status": "processing",
}
assert file_head["ContentType"] == content_type
assert file_head["ContentDisposition"] == f'attachment; filename="{name:s}"'

Expand Down Expand Up @@ -364,7 +381,8 @@ def test_api_documents_attachment_upload_unsafe():
file = SimpleUploadedFile(
name="script.exe", content=b"\x4d\x5a\x90\x00\x03\x00\x00\x00"
)
response = client.post(url, {"file": file}, format="multipart")
with mock.patch.object(malware_detection, "analyse_file") as mock_analyse_file:
response = client.post(url, {"file": file}, format="multipart")

assert response.status_code == 201

Expand All @@ -381,11 +399,16 @@ def test_api_documents_attachment_upload_unsafe():
file_id = file_id.replace("-unsafe", "")
uuid.UUID(file_id)

key = file_path.replace("/media/", "")
mock_analyse_file.assert_called_once_with(key, document_id=document.id)
# Now, check the metadata of the uploaded file
key = file_path.replace("/media", "")
file_head = default_storage.connection.meta.client.head_object(
Bucket=default_storage.bucket_name, Key=key
)
assert file_head["Metadata"] == {"owner": str(user.id), "is_unsafe": "true"}
assert file_head["Metadata"] == {
"owner": str(user.id),
"is_unsafe": "true",
"status": "processing",
}
assert file_head["ContentType"] == "application/octet-stream"
assert file_head["ContentDisposition"] == 'attachment; filename="script.exe"'
Loading
Loading