11"""
22Storage handlers for audio description (AD) files.
33
4- Audio description files can be large (hundreds of MB), so the upload flow
5- follows the same direct-to-S3 pattern used for video uploads:
6-
7- 1. The browser asks the CMS for a pre-signed PUT URL.
8- 2. The browser PUTs the file directly to S3 with that URL — bytes never
9- traverse the Django worker.
10- 3. The browser POSTs back to the CMS to mark the upload complete.
11-
12- edx-val stores only the metadata (filename, format, S3 key, status); all
13- boto3 / S3 interaction lives here so that edx-val remains free of any AWS
14- dependencies, mirroring how `video_storage_handlers.py` already works for
15- video files.
4+ Files are saved via edx-val's Django storage abstraction
5+ (FileSystemStorage locally, S3Boto3Storage in production).
6+ edx-val owns the file record and URL generation; this module
7+ handles validation, sanitisation, and delegates to edx-val's API.
168"""
179
1810import logging
1911import os
2012import re
21- from uuid import uuid4
2213
2314from django .conf import settings
15+ from django .core .files .base import ContentFile
2416from edxval .api import (
25- create_video_audio_description ,
17+ create_or_update_video_audio_description ,
2618 delete_video_audio_description ,
27- get_video_audio_description ,
28- mark_video_audio_description_ready ,
19+ get_video_audio_description_url
2920)
3021
31- from .video_storage_handlers import get_s3_client
32-
3322log = logging .getLogger (__name__ )
3423
3524
36- def _rewrite_devstack_presigned_url (url ):
37- """
38- Devstack: localstack uses an internal docker hostname that the browser
39- can't resolve. Rewrite it to localhost so the browser can reach the
40- pre-signed URL. localstack is permissive about the host header so the
41- SigV4 signature still validates. No-op in production.
42- """
43- endpoint = getattr (settings , 'AWS_S3_ENDPOINT_URL' , '' ) or ''
44- if 'edx.devstack.localstack' in endpoint :
45- return url .replace ('edx.devstack.localstack' , 'localhost' )
46- return url
47-
48-
4925class AudioDescriptionUploadError (Exception ):
5026 """Raised when an AD upload request is invalid or cannot be fulfilled."""
5127
5228
53- # Maps allowed content types to canonical file_format strings stored in
54- # edx-val's `VideoAudioDescription.file_format` column.
5529_CONTENT_TYPE_TO_FORMAT = {
5630 'audio/mpeg' : 'mp3' ,
5731 'audio/mp4' : 'm4a' ,
@@ -60,26 +34,13 @@ class AudioDescriptionUploadError(Exception):
6034 'audio/aac' : 'aac' ,
6135}
6236
63-
64- def _get_settings ():
65- """Return the VIDEO_AUDIO_DESCRIPTION_SETTINGS dict."""
66- return getattr (settings , 'VIDEO_AUDIO_DESCRIPTION_SETTINGS' , {})
67-
68-
69- def _get_bucket_name ():
70- """
71- Return the S3 bucket where AD files are stored. AD files share the
72- VEM bucket with video files, namespaced by S3_KEY_PREFIX.
73- """
74- return settings .VIDEO_UPLOAD_PIPELINE .get ('VEM_S3_BUCKET' , '' )
37+ ALLOWED_FORMATS = {'mp3' , 'm4a' , 'wav' , 'aac' }
7538
7639
7740def _sanitize_file_name (file_name ):
7841 """
7942 Strip path components and any characters outside a safe subset.
80- Mirrors the ASCII-only check used by the video upload flow.
8143 """
82- # Drop any directory components a client may have included.
8344 base = os .path .basename (file_name or '' )
8445 if not base :
8546 raise AudioDescriptionUploadError ('file_name is required' )
@@ -91,187 +52,64 @@ def _sanitize_file_name(file_name):
9152 f'The file name for { base } must contain only ASCII characters.'
9253 ) from exc
9354
94- # Replace anything that isn't a safe URL/key character with an underscore.
9555 return re .sub (r'[^A-Za-z0-9._-]' , '_' , base )
9656
9757
9858def _resolve_format (content_type , file_name ):
9959 """
10060 Pick the canonical file_format string for the given content type,
101- falling back to the file extension when the content type alone is
102- ambiguous (e.g. audio/mp4 → m4a).
61+ falling back to the file extension.
10362 """
10463 fmt = _CONTENT_TYPE_TO_FORMAT .get (content_type )
10564 if fmt :
10665 return fmt
10766 ext = os .path .splitext (file_name or '' )[1 ].lstrip ('.' ).lower ()
108- if ext in { 'mp3' , 'm4a' , 'wav' , 'aac' } :
67+ if ext in ALLOWED_FORMATS :
10968 return ext
11069 raise AudioDescriptionUploadError (
11170 f'Unsupported audio description content type: { content_type } '
11271 )
11372
11473
115- def generate_audio_description_upload_url (edx_video_id , file_name , content_type , file_size ):
74+ def upload_audio_description (edx_video_id , file_name , content_type , file_data ):
11675 """
117- Step 1 of the upload flow.
118-
119- Validates the request, generates a pre-signed PUT URL against the VEM
120- S3 bucket, and creates a pending VideoAudioDescription record in
121- edx-val (status='upload').
76+ Validate and save an audio description file via edx-val.
12277
123- Returns a dict with `upload_url`, `s3_key`, `edx_video_id`, and
124- `expires_in` suitable for serialization to the browser.
78+ Returns the storage URL for the saved file.
12579 """
12680 if not edx_video_id :
12781 raise AudioDescriptionUploadError ('edx_video_id is required' )
12882
129- ad_settings = _get_settings ()
130- allowed_types = ad_settings .get ('ALLOWED_CONTENT_TYPES' , [])
131- max_bytes = ad_settings .get ('MAX_BYTES' , 0 )
132- key_prefix = ad_settings .get ('S3_KEY_PREFIX' , 'audio_descriptions/' )
133- expires_in = ad_settings .get ('PRESIGNED_PUT_EXPIRATION_SECONDS' , 3600 )
134-
135- if content_type not in allowed_types :
136- raise AudioDescriptionUploadError (
137- f'Unsupported audio description content type: { content_type } '
138- )
139-
140- try :
141- file_size = int (file_size )
142- except (TypeError , ValueError ) as exc :
143- raise AudioDescriptionUploadError ('file_size must be an integer' ) from exc
144-
145- if file_size <= 0 :
146- raise AudioDescriptionUploadError ('file_size must be greater than zero' )
147- if max_bytes and file_size > max_bytes :
148- raise AudioDescriptionUploadError (
149- f'Audio description file exceeds maximum allowed size of { max_bytes } bytes'
150- )
151-
15283 safe_name = _sanitize_file_name (file_name )
15384 file_format = _resolve_format (content_type , safe_name )
154- s3_key = f'{ key_prefix } { edx_video_id } /{ uuid4 ().hex } _{ safe_name } '
15585
156- bucket = _get_bucket_name ()
157- if not bucket :
86+ max_bytes = getattr (settings , 'VIDEO_AUDIO_DESCRIPTION_SETTINGS' , {}).get (
87+ 'VIDEO_AUDIO_DESCRIPTION_MAX_BYTES' , 0
88+ )
89+ if max_bytes and hasattr (file_data , 'size' ) and file_data .size > max_bytes :
15890 raise AudioDescriptionUploadError (
159- 'VIDEO_UPLOAD_PIPELINE.VEM_S3_BUCKET is not configured '
91+ f'Audio description file exceeds maximum allowed size of { max_bytes } bytes '
16092 )
16193
162- s3_client = get_s3_client ()
163- upload_url = s3_client .generate_presigned_url (
164- ClientMethod = 'put_object' ,
165- Params = {
166- 'Bucket' : bucket ,
167- 'Key' : s3_key ,
168- 'ContentType' : content_type ,
169- },
170- ExpiresIn = expires_in ,
171- )
172- upload_url = _rewrite_devstack_presigned_url (upload_url )
94+ content = file_data if isinstance (file_data , ContentFile ) else ContentFile (file_data .read ())
17395
174- create_video_audio_description (
96+ return create_or_update_video_audio_description (
17597 video_id = edx_video_id ,
176- file_name = safe_name ,
177- file_format = file_format ,
178- s3_key = s3_key ,
179- file_size = file_size ,
98+ metadata = {'file_name' : safe_name , 'file_format' : file_format },
99+ file_data = content ,
180100 )
181101
182- return {
183- 'upload_url' : upload_url ,
184- 's3_key' : s3_key ,
185- 'edx_video_id' : edx_video_id ,
186- 'expires_in' : expires_in ,
187- }
188-
189-
190- def complete_audio_description_upload (edx_video_id , s3_key ):
191- """
192- Step 3 of the upload flow.
193-
194- Verifies the object exists in S3 (HEAD), then flips the edx-val
195- record from 'upload' to 'ready'. Returns the serialized record.
196- """
197- if not edx_video_id or not s3_key :
198- raise AudioDescriptionUploadError ('edx_video_id and s3_key are required' )
199-
200- record = get_video_audio_description (edx_video_id )
201- if record is None :
202- raise AudioDescriptionUploadError (
203- f'No pending audio description found for video { edx_video_id } '
204- )
205- if record ['s3_key' ] != s3_key :
206- raise AudioDescriptionUploadError (
207- 'Provided s3_key does not match the pending audio description record'
208- )
209-
210- bucket = _get_bucket_name ()
211- s3_client = get_s3_client ()
212- try :
213- head = s3_client .head_object (Bucket = bucket , Key = s3_key )
214- except Exception as exc :
215- log .exception (
216- 'Audio description object missing or unreadable in S3 (bucket=%s key=%s)' ,
217- bucket ,
218- s3_key ,
219- )
220- raise AudioDescriptionUploadError (
221- 'Audio description object was not found in S3'
222- ) from exc
223-
224- expected_size = record .get ('file_size' )
225- actual_size = head .get ('ContentLength' )
226- if expected_size and actual_size and int (expected_size ) != int (actual_size ):
227- log .warning (
228- 'Audio description size mismatch for %s: expected=%s actual=%s' ,
229- edx_video_id ,
230- expected_size ,
231- actual_size ,
232- )
233-
234- return mark_video_audio_description_ready (edx_video_id )
235-
236102
237103def delete_audio_description (edx_video_id ):
238104 """
239- Delete the AD record from edx-val and the underlying object from S3.
240-
241- Returns True if a record was deleted, False if there was nothing to
242- delete.
105+ Delete the AD record and file from storage.
106+ Returns True if a record was deleted.
243107 """
244- record = get_video_audio_description (edx_video_id )
245- if record is None :
246- return False
108+ return delete_video_audio_description (edx_video_id )
247109
248- bucket = _get_bucket_name ()
249- s3_client = get_s3_client ()
250- try :
251- s3_client .delete_object (Bucket = bucket , Key = record ['s3_key' ])
252- except Exception : # pylint: disable=broad-except
253- # Log and continue — orphaned S3 objects are recoverable, but a
254- # dangling DB row would block re-uploads.
255- log .exception (
256- 'Failed to delete audio description object from S3 (bucket=%s key=%s)' ,
257- bucket ,
258- record ['s3_key' ],
259- )
260-
261- delete_video_audio_description (edx_video_id )
262- return True
263110
264-
265- def generate_audio_description_download_url (edx_video_id ):
111+ def get_audio_description_url (edx_video_id ):
266112 """
267- Generate a fresh pre-signed GET URL for the AD file. Returns None if
268- no ready record exists for the given video.
269-
270- The actual S3 work lives in `xmodule.video_block.audio_description_urls`
271- so the LMS video block can mint download URLs without importing
272- `cms.djangoapps.contentstore` (which is not in INSTALLED_APPS for LMS).
113+ Return the download URL for the audio description, or None.
273114 """
274- from xmodule .video_block .audio_description_urls import ( # pylint: disable=import-outside-toplevel
275- generate_audio_description_download_url as _generate ,
276- )
277- return _generate (edx_video_id )
115+ return get_video_audio_description_url (edx_video_id )
0 commit comments