diff --git a/cms/djangoapps/contentstore/git_export_utils.py b/cms/djangoapps/contentstore/git_export_utils.py index e0ac80a4627f..a2c292a34c23 100644 --- a/cms/djangoapps/contentstore/git_export_utils.py +++ b/cms/djangoapps/contentstore/git_export_utils.py @@ -13,10 +13,12 @@ from django.contrib.auth.models import User # lint-amnesty, pylint: disable=imported-auth-user from django.utils import timezone from django.utils.translation import gettext_lazy as _ +from opaque_keys.edx.locator import LibraryLocator, LibraryLocatorV2 +from openedx.core.djangoapps.content_libraries.api import extract_library_v2_zip_to_dir from xmodule.contentstore.django import contentstore from xmodule.modulestore.django import modulestore -from xmodule.modulestore.xml_exporter import export_course_to_xml +from xmodule.modulestore.xml_exporter import export_course_to_xml, export_library_to_xml log = logging.getLogger(__name__) @@ -66,10 +68,33 @@ def cmd_log(cmd, cwd): return output -def export_to_git(course_id, repo, user='', rdir=None): - """Export a course to git.""" +def export_to_git(content_key, repo, user='', rdir=None): + """ + Export a course or library to git. + + Args: + content_key: CourseKey or LibraryLocator for the content to export + repo (str): Git repository URL + user (str): Optional username for git commit identity + rdir (str): Optional custom directory name for the repository + + Raises: + GitExportError: For various git operation failures + """ # pylint: disable=too-many-statements + # Detect content type and select appropriate export function + content_type_label = "library" + is_library_v2 = isinstance(content_key, LibraryLocatorV2) + if is_library_v2: + # V2 libraries use backup API with zip extraction + content_export_func = extract_library_v2_zip_to_dir + elif isinstance(content_key, LibraryLocator): + content_export_func = export_library_to_xml + else: + content_export_func = export_course_to_xml + content_type_label = "course" + if not GIT_REPO_EXPORT_DIR: raise GitExportError(GitExportError.NO_EXPORT_DIR) @@ -128,15 +153,20 @@ def export_to_git(course_id, repo, user='', rdir=None): log.exception('Failed to pull git repository: %r', ex.output) raise GitExportError(GitExportError.CANNOT_PULL) from ex - # export course as xml before commiting and pushing + # export content as xml (or zip for v2 libraries) before commiting and pushing root_dir = os.path.dirname(rdirp) - course_dir = os.path.basename(rdirp).rsplit('.git', 1)[0] + content_dir = os.path.basename(rdirp).rsplit('.git', 1)[0] + try: - export_course_to_xml(modulestore(), contentstore(), course_id, - root_dir, course_dir) - except (OSError, AttributeError): - log.exception('Failed export to xml') - raise GitExportError(GitExportError.XML_EXPORT_FAIL) # lint-amnesty, pylint: disable=raise-missing-from + if is_library_v2: + content_export_func(content_key, root_dir, content_dir, user) + else: + # V1 libraries and courses: use XML export (no user parameter) + content_export_func(modulestore(), contentstore(), content_key, + root_dir, content_dir) + except (OSError, AttributeError) as ex: + log.exception('Failed to export %s', content_type_label) + raise GitExportError(GitExportError.XML_EXPORT_FAIL) from ex # Get current branch if not already set if not branch: @@ -160,9 +190,7 @@ def export_to_git(course_id, repo, user='', rdir=None): ident = GIT_EXPORT_DEFAULT_IDENT time_stamp = timezone.now() cwd = os.path.abspath(rdirp) - commit_msg = "Export from Studio at {time_stamp}".format( - time_stamp=time_stamp, - ) + commit_msg = f"Export {content_type_label} from Studio at {time_stamp}" try: cmd_log(['git', 'config', 'user.email', ident['email']], cwd) cmd_log(['git', 'config', 'user.name', ident['name']], cwd) @@ -180,3 +208,10 @@ def export_to_git(course_id, repo, user='', rdir=None): except subprocess.CalledProcessError as ex: log.exception('Error running git push command: %r', ex.output) raise GitExportError(GitExportError.CANNOT_PUSH) from ex + + log.info( + '%s %s exported to git repository %s successfully', + content_type_label.capitalize(), + content_key, + repo, + ) diff --git a/cms/djangoapps/contentstore/management/commands/tests/test_git_export.py b/cms/djangoapps/contentstore/management/commands/tests/test_git_export.py index 8a2334b34375..fb564ad850e8 100644 --- a/cms/djangoapps/contentstore/management/commands/tests/test_git_export.py +++ b/cms/djangoapps/contentstore/management/commands/tests/test_git_export.py @@ -9,13 +9,14 @@ import subprocess import unittest from io import StringIO +from unittest.mock import patch from uuid import uuid4 from django.conf import settings from django.core.management import call_command from django.core.management.base import CommandError from django.test.utils import override_settings -from opaque_keys.edx.locator import CourseLocator +from opaque_keys.edx.locator import CourseLocator, LibraryLocator, LibraryLocatorV2 import cms.djangoapps.contentstore.git_export_utils as git_export_utils from cms.djangoapps.contentstore.git_export_utils import GitExportError @@ -34,6 +35,9 @@ class TestGitExport(CourseTestCase): Excercise the git_export django management command with various inputs. """ + LIBRARY_V2_KEY = LibraryLocatorV2(org='TestOrg', slug='test-lib') + LIBRARY_V1_KEY = LibraryLocator(org='TestOrg', library='test-lib') + def setUp(self): """ Create/reinitialize bare repo and folders needed @@ -182,3 +186,52 @@ def test_no_change(self): with self.assertRaisesRegex(GitExportError, str(GitExportError.CANNOT_COMMIT)): git_export_utils.export_to_git( self.course.id, f'file://{self.bare_repo_dir}') + + @patch('cms.djangoapps.contentstore.git_export_utils.cmd_log', return_value=b'main') + @patch('cms.djangoapps.contentstore.git_export_utils.extract_library_v2_zip_to_dir') + def test_library_v2_export_selects_correct_function(self, mock_extract, mock_cmd_log): + """ + When ``export_to_git`` is given a LibraryLocatorV2 key it must call + ``extract_library_v2_zip_to_dir`` and must not call the v1 XML export + functions (``export_library_to_xml`` or ``export_course_to_xml``). + cmd_log is mocked so no real git subprocess or repo state is needed. + """ + mock_extract.return_value = None + repo_url = f'file://{self.bare_repo_dir}' + + with patch('cms.djangoapps.contentstore.git_export_utils.export_course_to_xml') as mock_course_xml, \ + patch('cms.djangoapps.contentstore.git_export_utils.export_library_to_xml') as mock_lib_xml: + git_export_utils.export_to_git(self.LIBRARY_V2_KEY, repo_url, self.user.username) + + assert mock_extract.call_args[0][0] == self.LIBRARY_V2_KEY + mock_course_xml.assert_not_called() + mock_lib_xml.assert_not_called() + + @patch('cms.djangoapps.contentstore.git_export_utils.cmd_log', return_value=b'main') + @patch('cms.djangoapps.contentstore.git_export_utils.export_library_to_xml') + def test_library_v1_export_selects_correct_function(self, mock_lib_xml, mock_cmd_log): + """ + When ``export_to_git`` is given a LibraryLocator (v1) key it must call + ``export_library_to_xml`` and must not call ``extract_library_v2_zip_to_dir``. + cmd_log is mocked so no real git subprocess or repo state is needed. + """ + mock_lib_xml.return_value = None + repo_url = f'file://{self.bare_repo_dir}' + + with patch('cms.djangoapps.contentstore.git_export_utils.extract_library_v2_zip_to_dir') as mock_v2, \ + patch('cms.djangoapps.contentstore.git_export_utils.export_course_to_xml') as mock_course_xml: + git_export_utils.export_to_git(self.LIBRARY_V1_KEY, repo_url, self.user.username) + + assert mock_lib_xml.called + mock_v2.assert_not_called() + mock_course_xml.assert_not_called() + + @patch('cms.djangoapps.contentstore.git_export_utils.extract_library_v2_zip_to_dir', + side_effect=OSError('disk full')) + def test_library_v2_export_failure_raises_xml_export_fail(self, mock_extract): + """ + If ``extract_library_v2_zip_to_dir`` raises, ``export_to_git`` should + wrap it in ``GitExportError.XML_EXPORT_FAIL``. + """ + with self.assertRaisesRegex(GitExportError, str(GitExportError.XML_EXPORT_FAIL)): + git_export_utils.export_to_git(self.LIBRARY_V2_KEY, f'file://{self.bare_repo_dir}') diff --git a/openedx/core/djangoapps/content_libraries/api/__init__.py b/openedx/core/djangoapps/content_libraries/api/__init__.py index 5f7db8b17f72..f6936f130023 100644 --- a/openedx/core/djangoapps/content_libraries/api/__init__.py +++ b/openedx/core/djangoapps/content_libraries/api/__init__.py @@ -1,6 +1,7 @@ """ Python API for working with content libraries """ +from .backup import * from .block_metadata import * from .collections import * from .container_metadata import * diff --git a/openedx/core/djangoapps/content_libraries/api/backup.py b/openedx/core/djangoapps/content_libraries/api/backup.py new file mode 100644 index 000000000000..97a3b3c5bd4c --- /dev/null +++ b/openedx/core/djangoapps/content_libraries/api/backup.py @@ -0,0 +1,77 @@ +""" +Public API for content library backup (zip export) utilities. +""" +from __future__ import annotations + +import os +from datetime import datetime +import shutil +from tempfile import mkdtemp +import zipfile + +from django.conf import settings +from django.contrib.auth import get_user_model +from django.utils.text import slugify +from opaque_keys.edx.locator import LibraryLocatorV2, log +from path import Path + +from openedx_content.api import create_zip_file as create_lib_zip_file + +__all__ = ["create_library_v2_zip", "extract_library_v2_zip_to_dir"] + + +def create_library_v2_zip(library_key: LibraryLocatorV2, user) -> tuple: + """ + Create a zip backup of a v2 library and return ``(temp_dir, zip_file_path)``. + + The caller is responsible for cleaning up ``temp_dir`` when done. + + Args: + library_key: LibraryLocatorV2 identifying the library to export. + user: User object passed to the backup API. + + Returns: + A tuple of ``(temp_dir as Path, zip_file_path as str)``. + """ + root_dir = Path(mkdtemp()) + sanitized_lib_key = str(library_key).replace(":", "-") + sanitized_lib_key = slugify(sanitized_lib_key, allow_unicode=True) + timestamp = datetime.now().strftime("%Y-%m-%d-%H%M%S") + filename = f'{sanitized_lib_key}-{timestamp}.zip' + file_path = os.path.join(root_dir, filename) + origin_server = getattr(settings, 'CMS_BASE', None) + create_lib_zip_file(lp_key=str(library_key), path=file_path, user=user, origin_server=origin_server) + return root_dir, file_path + + +def extract_library_v2_zip_to_dir(library_key, root_dir, library_dir, user=None): + """ + Export a v2 library to a directory by creating a zip backup and extracting it. + + V2 libraries are stored in Learning Core and use a zip-based backup mechanism. + This function creates a temporary zip backup, extracts its contents into + ``library_dir`` under ``root_dir``, then cleans up the temporary zip. + + Args: + library_key: LibraryLocatorV2 for the library to export + root_dir: Root directory where library_dir will be created + library_dir: Directory name under root_dir to extract the library into + user: Username string for the backup API (optional) + + Raises: + Exception: If backup creation or extraction fails + """ + # Get user object for backup API + user_obj = get_user_model().objects.filter(username=user).first() + temp_dir, zip_path = create_library_v2_zip(library_key, user_obj) + + try: + target_dir = os.path.join(root_dir, library_dir) + os.makedirs(target_dir, exist_ok=True) + # Extract zip contents (will overwrite existing files) + with zipfile.ZipFile(zip_path, 'r') as zip_ref: + zip_ref.extractall(target_dir) + log.info('Extracted library v2 backup to %s', target_dir) + finally: + if temp_dir.exists(): + shutil.rmtree(temp_dir) diff --git a/openedx/core/djangoapps/content_libraries/api/tests/__init__.py b/openedx/core/djangoapps/content_libraries/api/tests/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/openedx/core/djangoapps/content_libraries/api/tests/test_backup.py b/openedx/core/djangoapps/content_libraries/api/tests/test_backup.py new file mode 100644 index 000000000000..8947ba40a1dd --- /dev/null +++ b/openedx/core/djangoapps/content_libraries/api/tests/test_backup.py @@ -0,0 +1,89 @@ +""" +Tests for content library backup (zip export) utilities. +""" +from __future__ import annotations + +import shutil +import tempfile +import zipfile +from unittest.mock import MagicMock, patch + +import pytest +from django.test import TestCase +from opaque_keys.edx.locator import LibraryLocatorV2 +from path import Path + +from openedx.core.djangoapps.content_libraries.api.backup import extract_library_v2_zip_to_dir + + +LIBRARY_KEY = LibraryLocatorV2(org='TestOrg', slug='test-lib') + + +class TestExtractLibraryV2ZipToDir(TestCase): + """ + Tests for ``extract_library_v2_zip_to_dir``. + """ + + def _make_zip_in_temp_dir(self, contents=None): + """ + Helper: create a real temp dir + zip file and return (temp_dir_path, zip_path). + ``contents`` is a dict of {filename: bytes} to write into the zip. + """ + temp_dir = Path(tempfile.mkdtemp()) + zip_path = str(temp_dir / 'library.zip') + with zipfile.ZipFile(zip_path, 'w') as zf: + for name, data in (contents or {'data.xml': b''}).items(): + zf.writestr(name, data) + return temp_dir, zip_path + + @patch('openedx.core.djangoapps.content_libraries.api.backup.get_user_model') + @patch('openedx.core.djangoapps.content_libraries.api.backup.create_library_v2_zip') + def test_successful_extraction(self, mock_create_zip, mock_get_user_model): + """ + On a successful call the function should: + - resolve the username to a user object via the user model, + - pass that user object to ``create_library_v2_zip``, + - create the target directory if it does not already exist, + - extract the zip contents into /, + - clean up the temporary zip directory. + """ + root_dir = Path(tempfile.mkdtemp()) + temp_zip_dir, zip_path = self._make_zip_in_temp_dir({'content.xml': b''}) + mock_create_zip.return_value = (temp_zip_dir, zip_path) + mock_user = MagicMock() + mock_get_user_model.return_value.objects.filter.return_value.first.return_value = mock_user + + try: + target = root_dir / 'my-library' + assert not target.exists(), "Target dir should not exist before the call" + + extract_library_v2_zip_to_dir(LIBRARY_KEY, str(root_dir), 'my-library', user='testuser') + + mock_get_user_model.return_value.objects.filter.assert_called_once_with(username='testuser') + mock_create_zip.assert_called_once_with(LIBRARY_KEY, mock_user) + assert target.isdir(), "Target dir should have been created" + assert (target / 'content.xml').exists(), "Zip content should be extracted" + assert not temp_zip_dir.exists(), "Temp zip dir should have been removed" + finally: + shutil.rmtree(root_dir, ignore_errors=True) + shutil.rmtree(temp_zip_dir, ignore_errors=True) + + @patch('openedx.core.djangoapps.content_libraries.api.backup.get_user_model') + @patch('openedx.core.djangoapps.content_libraries.api.backup.create_library_v2_zip') + def test_temp_dir_cleaned_up_even_on_extraction_error(self, mock_create_zip, mock_get_user_model): + """ + The temporary directory must be cleaned up even when extraction raises. + """ + root_dir = Path(tempfile.mkdtemp()) + temp_zip_dir, zip_path = self._make_zip_in_temp_dir() + mock_create_zip.return_value = (temp_zip_dir, zip_path) + mock_get_user_model.return_value.objects.filter.return_value.first.return_value = None + + try: + with patch('zipfile.ZipFile.extractall', side_effect=OSError('disk full')): + with pytest.raises(OSError): + extract_library_v2_zip_to_dir(LIBRARY_KEY, str(root_dir), 'my-library', user=None) + assert not temp_zip_dir.exists(), "Temp dir should be cleaned up on error" + finally: + shutil.rmtree(root_dir, ignore_errors=True) + shutil.rmtree(temp_zip_dir, ignore_errors=True) diff --git a/openedx/core/djangoapps/content_libraries/tasks.py b/openedx/core/djangoapps/content_libraries/tasks.py index a28bf1e861dd..b52522e012d0 100644 --- a/openedx/core/djangoapps/content_libraries/tasks.py +++ b/openedx/core/djangoapps/content_libraries/tasks.py @@ -19,20 +19,17 @@ from io import StringIO import logging import os -from datetime import datetime -from tempfile import mkdtemp, NamedTemporaryFile +from tempfile import NamedTemporaryFile import json import shutil from django.core.files.base import ContentFile from django.contrib.auth import get_user_model from django.core.serializers.json import DjangoJSONEncoder -from django.conf import settings from celery import shared_task from celery.utils.log import get_task_logger from celery_utils.logged_task import LoggedTask from django.core.files import File -from django.utils.text import slugify from edx_django_utils.monitoring import ( set_code_owner_attribute, set_code_owner_attribute_from_module, @@ -58,9 +55,7 @@ LIBRARY_CONTAINER_UPDATED ) from openedx_content import api as content_api -from openedx_content.api import create_zip_file as create_lib_zip_file from openedx_content.models_api import DraftChangeLog, PublishLog -from path import Path from user_tasks.models import UserTaskArtifact from user_tasks.tasks import UserTask, UserTaskStatus from xblock.fields import Scope @@ -563,15 +558,8 @@ def backup_library(self, user_id: int, library_key_str: str) -> None: self.status.set_state('Exporting') set_custom_attribute("exporting_started", str(library_key)) - root_dir = Path(mkdtemp()) - sanitized_lib_key = str(library_key).replace(":", "-") - sanitized_lib_key = slugify(sanitized_lib_key, allow_unicode=True) - timestamp = datetime.now().strftime("%Y-%m-%d-%H%M%S") - filename = f'{sanitized_lib_key}-{timestamp}.zip' - file_path = os.path.join(root_dir, filename) user = User.objects.get(id=user_id) - origin_server = getattr(settings, 'CMS_BASE', None) - create_lib_zip_file(lp_key=str(library_key), path=file_path, user=user, origin_server=origin_server) + _root_dir, file_path = api.create_library_v2_zip(library_key, user) set_custom_attribute("exporting_completed", str(library_key)) with open(file_path, 'rb') as zipfile: