Skip to content

Commit 16872b4

Browse files
fix: refactor for audio description
1 parent 8095edc commit 16872b4

File tree

14 files changed

+333
-864
lines changed

14 files changed

+333
-864
lines changed
Lines changed: 28 additions & 190 deletions
Original file line numberDiff line numberDiff line change
@@ -1,57 +1,31 @@
11
"""
22
Storage handlers for audio description (AD) files.
33
4-
Audio description files can be large (hundreds of MB), so the upload flow
5-
follows the same direct-to-S3 pattern used for video uploads:
6-
7-
1. The browser asks the CMS for a pre-signed PUT URL.
8-
2. The browser PUTs the file directly to S3 with that URL — bytes never
9-
traverse the Django worker.
10-
3. The browser POSTs back to the CMS to mark the upload complete.
11-
12-
edx-val stores only the metadata (filename, format, S3 key, status); all
13-
boto3 / S3 interaction lives here so that edx-val remains free of any AWS
14-
dependencies, mirroring how `video_storage_handlers.py` already works for
15-
video files.
4+
Files are saved via edx-val's Django storage abstraction
5+
(FileSystemStorage locally, S3Boto3Storage in production).
6+
edx-val owns the file record and URL generation; this module
7+
handles validation, sanitisation, and delegates to edx-val's API.
168
"""
179

1810
import logging
1911
import os
2012
import re
21-
from uuid import uuid4
2213

2314
from django.conf import settings
15+
from django.core.files.base import ContentFile
2416
from edxval.api import (
25-
create_video_audio_description,
17+
create_or_update_video_audio_description,
2618
delete_video_audio_description,
27-
get_video_audio_description,
28-
mark_video_audio_description_ready,
19+
get_video_audio_description_url
2920
)
3021

31-
from .video_storage_handlers import get_s3_client
32-
3322
log = logging.getLogger(__name__)
3423

3524

36-
def _rewrite_devstack_presigned_url(url):
37-
"""
38-
Devstack: localstack uses an internal docker hostname that the browser
39-
can't resolve. Rewrite it to localhost so the browser can reach the
40-
pre-signed URL. localstack is permissive about the host header so the
41-
SigV4 signature still validates. No-op in production.
42-
"""
43-
endpoint = getattr(settings, 'AWS_S3_ENDPOINT_URL', '') or ''
44-
if 'edx.devstack.localstack' in endpoint:
45-
return url.replace('edx.devstack.localstack', 'localhost')
46-
return url
47-
48-
4925
class AudioDescriptionUploadError(Exception):
5026
"""Raised when an AD upload request is invalid or cannot be fulfilled."""
5127

5228

53-
# Maps allowed content types to canonical file_format strings stored in
54-
# edx-val's `VideoAudioDescription.file_format` column.
5529
_CONTENT_TYPE_TO_FORMAT = {
5630
'audio/mpeg': 'mp3',
5731
'audio/mp4': 'm4a',
@@ -60,26 +34,13 @@ class AudioDescriptionUploadError(Exception):
6034
'audio/aac': 'aac',
6135
}
6236

63-
64-
def _get_settings():
65-
"""Return the VIDEO_AUDIO_DESCRIPTION_SETTINGS dict."""
66-
return getattr(settings, 'VIDEO_AUDIO_DESCRIPTION_SETTINGS', {})
67-
68-
69-
def _get_bucket_name():
70-
"""
71-
Return the S3 bucket where AD files are stored. AD files share the
72-
VEM bucket with video files, namespaced by S3_KEY_PREFIX.
73-
"""
74-
return settings.VIDEO_UPLOAD_PIPELINE.get('VEM_S3_BUCKET', '')
37+
ALLOWED_FORMATS = {'mp3', 'm4a', 'wav', 'aac'}
7538

7639

7740
def _sanitize_file_name(file_name):
7841
"""
7942
Strip path components and any characters outside a safe subset.
80-
Mirrors the ASCII-only check used by the video upload flow.
8143
"""
82-
# Drop any directory components a client may have included.
8344
base = os.path.basename(file_name or '')
8445
if not base:
8546
raise AudioDescriptionUploadError('file_name is required')
@@ -91,187 +52,64 @@ def _sanitize_file_name(file_name):
9152
f'The file name for {base} must contain only ASCII characters.'
9253
) from exc
9354

94-
# Replace anything that isn't a safe URL/key character with an underscore.
9555
return re.sub(r'[^A-Za-z0-9._-]', '_', base)
9656

9757

9858
def _resolve_format(content_type, file_name):
9959
"""
10060
Pick the canonical file_format string for the given content type,
101-
falling back to the file extension when the content type alone is
102-
ambiguous (e.g. audio/mp4 → m4a).
61+
falling back to the file extension.
10362
"""
10463
fmt = _CONTENT_TYPE_TO_FORMAT.get(content_type)
10564
if fmt:
10665
return fmt
10766
ext = os.path.splitext(file_name or '')[1].lstrip('.').lower()
108-
if ext in {'mp3', 'm4a', 'wav', 'aac'}:
67+
if ext in ALLOWED_FORMATS:
10968
return ext
11069
raise AudioDescriptionUploadError(
11170
f'Unsupported audio description content type: {content_type}'
11271
)
11372

11473

115-
def generate_audio_description_upload_url(edx_video_id, file_name, content_type, file_size):
74+
def upload_audio_description(edx_video_id, file_name, content_type, file_data):
11675
"""
117-
Step 1 of the upload flow.
118-
119-
Validates the request, generates a pre-signed PUT URL against the VEM
120-
S3 bucket, and creates a pending VideoAudioDescription record in
121-
edx-val (status='upload').
76+
Validate and save an audio description file via edx-val.
12277
123-
Returns a dict with `upload_url`, `s3_key`, `edx_video_id`, and
124-
`expires_in` suitable for serialization to the browser.
78+
Returns the storage URL for the saved file.
12579
"""
12680
if not edx_video_id:
12781
raise AudioDescriptionUploadError('edx_video_id is required')
12882

129-
ad_settings = _get_settings()
130-
allowed_types = ad_settings.get('ALLOWED_CONTENT_TYPES', [])
131-
max_bytes = ad_settings.get('MAX_BYTES', 0)
132-
key_prefix = ad_settings.get('S3_KEY_PREFIX', 'audio_descriptions/')
133-
expires_in = ad_settings.get('PRESIGNED_PUT_EXPIRATION_SECONDS', 3600)
134-
135-
if content_type not in allowed_types:
136-
raise AudioDescriptionUploadError(
137-
f'Unsupported audio description content type: {content_type}'
138-
)
139-
140-
try:
141-
file_size = int(file_size)
142-
except (TypeError, ValueError) as exc:
143-
raise AudioDescriptionUploadError('file_size must be an integer') from exc
144-
145-
if file_size <= 0:
146-
raise AudioDescriptionUploadError('file_size must be greater than zero')
147-
if max_bytes and file_size > max_bytes:
148-
raise AudioDescriptionUploadError(
149-
f'Audio description file exceeds maximum allowed size of {max_bytes} bytes'
150-
)
151-
15283
safe_name = _sanitize_file_name(file_name)
15384
file_format = _resolve_format(content_type, safe_name)
154-
s3_key = f'{key_prefix}{edx_video_id}/{uuid4().hex}_{safe_name}'
15585

156-
bucket = _get_bucket_name()
157-
if not bucket:
86+
max_bytes = getattr(settings, 'VIDEO_AUDIO_DESCRIPTION_SETTINGS', {}).get(
87+
'VIDEO_AUDIO_DESCRIPTION_MAX_BYTES', 0
88+
)
89+
if max_bytes and hasattr(file_data, 'size') and file_data.size > max_bytes:
15890
raise AudioDescriptionUploadError(
159-
'VIDEO_UPLOAD_PIPELINE.VEM_S3_BUCKET is not configured'
91+
f'Audio description file exceeds maximum allowed size of {max_bytes} bytes'
16092
)
16193

162-
s3_client = get_s3_client()
163-
upload_url = s3_client.generate_presigned_url(
164-
ClientMethod='put_object',
165-
Params={
166-
'Bucket': bucket,
167-
'Key': s3_key,
168-
'ContentType': content_type,
169-
},
170-
ExpiresIn=expires_in,
171-
)
172-
upload_url = _rewrite_devstack_presigned_url(upload_url)
94+
content = file_data if isinstance(file_data, ContentFile) else ContentFile(file_data.read())
17395

174-
create_video_audio_description(
96+
return create_or_update_video_audio_description(
17597
video_id=edx_video_id,
176-
file_name=safe_name,
177-
file_format=file_format,
178-
s3_key=s3_key,
179-
file_size=file_size,
98+
metadata={'file_name': safe_name, 'file_format': file_format},
99+
file_data=content,
180100
)
181101

182-
return {
183-
'upload_url': upload_url,
184-
's3_key': s3_key,
185-
'edx_video_id': edx_video_id,
186-
'expires_in': expires_in,
187-
}
188-
189-
190-
def complete_audio_description_upload(edx_video_id, s3_key):
191-
"""
192-
Step 3 of the upload flow.
193-
194-
Verifies the object exists in S3 (HEAD), then flips the edx-val
195-
record from 'upload' to 'ready'. Returns the serialized record.
196-
"""
197-
if not edx_video_id or not s3_key:
198-
raise AudioDescriptionUploadError('edx_video_id and s3_key are required')
199-
200-
record = get_video_audio_description(edx_video_id)
201-
if record is None:
202-
raise AudioDescriptionUploadError(
203-
f'No pending audio description found for video {edx_video_id}'
204-
)
205-
if record['s3_key'] != s3_key:
206-
raise AudioDescriptionUploadError(
207-
'Provided s3_key does not match the pending audio description record'
208-
)
209-
210-
bucket = _get_bucket_name()
211-
s3_client = get_s3_client()
212-
try:
213-
head = s3_client.head_object(Bucket=bucket, Key=s3_key)
214-
except Exception as exc:
215-
log.exception(
216-
'Audio description object missing or unreadable in S3 (bucket=%s key=%s)',
217-
bucket,
218-
s3_key,
219-
)
220-
raise AudioDescriptionUploadError(
221-
'Audio description object was not found in S3'
222-
) from exc
223-
224-
expected_size = record.get('file_size')
225-
actual_size = head.get('ContentLength')
226-
if expected_size and actual_size and int(expected_size) != int(actual_size):
227-
log.warning(
228-
'Audio description size mismatch for %s: expected=%s actual=%s',
229-
edx_video_id,
230-
expected_size,
231-
actual_size,
232-
)
233-
234-
return mark_video_audio_description_ready(edx_video_id)
235-
236102

237103
def delete_audio_description(edx_video_id):
238104
"""
239-
Delete the AD record from edx-val and the underlying object from S3.
240-
241-
Returns True if a record was deleted, False if there was nothing to
242-
delete.
105+
Delete the AD record and file from storage.
106+
Returns True if a record was deleted.
243107
"""
244-
record = get_video_audio_description(edx_video_id)
245-
if record is None:
246-
return False
108+
return delete_video_audio_description(edx_video_id)
247109

248-
bucket = _get_bucket_name()
249-
s3_client = get_s3_client()
250-
try:
251-
s3_client.delete_object(Bucket=bucket, Key=record['s3_key'])
252-
except Exception: # pylint: disable=broad-except
253-
# Log and continue — orphaned S3 objects are recoverable, but a
254-
# dangling DB row would block re-uploads.
255-
log.exception(
256-
'Failed to delete audio description object from S3 (bucket=%s key=%s)',
257-
bucket,
258-
record['s3_key'],
259-
)
260-
261-
delete_video_audio_description(edx_video_id)
262-
return True
263110

264-
265-
def generate_audio_description_download_url(edx_video_id):
111+
def get_audio_description_url(edx_video_id):
266112
"""
267-
Generate a fresh pre-signed GET URL for the AD file. Returns None if
268-
no ready record exists for the given video.
269-
270-
The actual S3 work lives in `xmodule.video_block.audio_description_urls`
271-
so the LMS video block can mint download URLs without importing
272-
`cms.djangoapps.contentstore` (which is not in INSTALLED_APPS for LMS).
113+
Return the download URL for the audio description, or None.
273114
"""
274-
from xmodule.video_block.audio_description_urls import ( # pylint: disable=import-outside-toplevel
275-
generate_audio_description_download_url as _generate,
276-
)
277-
return _generate(edx_video_id)
115+
return get_video_audio_description_url(edx_video_id)

cms/djangoapps/contentstore/rest_api/v1/serializers/course_waffle_flags.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -197,8 +197,6 @@ def get_enable_outline_component_creation(self, obj):
197197
def get_enable_audio_description_upload(self, obj):
198198
"""
199199
Method to get the enable_audio_description_upload waffle flag.
200-
201-
This is an instance-wide flag (not per-course) that gates the
202-
audio description upload UI in the Studio video editor.
203200
"""
204-
return toggles.audio_description_upload_enabled()
201+
course_key = self.get_course_key()
202+
return toggles.audio_description_upload_enabled(course_key)

0 commit comments

Comments
 (0)