Skip to content
61 changes: 48 additions & 13 deletions cms/djangoapps/contentstore/git_export_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@
from django.contrib.auth.models import User # lint-amnesty, pylint: disable=imported-auth-user
from django.utils import timezone
from django.utils.translation import gettext_lazy as _
from opaque_keys.edx.locator import LibraryLocator, LibraryLocatorV2

from openedx.core.djangoapps.content_libraries.api import extract_library_v2_zip_to_dir
from xmodule.contentstore.django import contentstore
from xmodule.modulestore.django import modulestore
from xmodule.modulestore.xml_exporter import export_course_to_xml
from xmodule.modulestore.xml_exporter import export_course_to_xml, export_library_to_xml

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -66,10 +68,33 @@ def cmd_log(cmd, cwd):
return output


def export_to_git(course_id, repo, user='', rdir=None):
"""Export a course to git."""
def export_to_git(content_key, repo, user='', rdir=None):
"""
Export a course or library to git.

Args:
content_key: CourseKey or LibraryLocator for the content to export
repo (str): Git repository URL
user (str): Optional username for git commit identity
rdir (str): Optional custom directory name for the repository

Raises:
GitExportError: For various git operation failures
"""
# pylint: disable=too-many-statements

# Detect content type and select appropriate export function
content_type_label = "library"
is_library_v2 = isinstance(content_key, LibraryLocatorV2)
if is_library_v2:
# V2 libraries use backup API with zip extraction
content_export_func = extract_library_v2_zip_to_dir
elif isinstance(content_key, LibraryLocator):
content_export_func = export_library_to_xml
else:
content_export_func = export_course_to_xml
content_type_label = "course"
Comment on lines +88 to +96
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: we could use default dict here.


if not GIT_REPO_EXPORT_DIR:
raise GitExportError(GitExportError.NO_EXPORT_DIR)

Expand Down Expand Up @@ -128,15 +153,20 @@ def export_to_git(course_id, repo, user='', rdir=None):
log.exception('Failed to pull git repository: %r', ex.output)
raise GitExportError(GitExportError.CANNOT_PULL) from ex

# export course as xml before commiting and pushing
# export content as xml (or zip for v2 libraries) before commiting and pushing
root_dir = os.path.dirname(rdirp)
course_dir = os.path.basename(rdirp).rsplit('.git', 1)[0]
content_dir = os.path.basename(rdirp).rsplit('.git', 1)[0]

try:
export_course_to_xml(modulestore(), contentstore(), course_id,
root_dir, course_dir)
except (OSError, AttributeError):
log.exception('Failed export to xml')
raise GitExportError(GitExportError.XML_EXPORT_FAIL) # lint-amnesty, pylint: disable=raise-missing-from
if is_library_v2:
content_export_func(content_key, root_dir, content_dir, user)
else:
# V1 libraries and courses: use XML export (no user parameter)
content_export_func(modulestore(), contentstore(), content_key,
root_dir, content_dir)
except (OSError, AttributeError) as ex:
log.exception('Failed to export %s', content_type_label)
raise GitExportError(GitExportError.XML_EXPORT_FAIL) from ex

# Get current branch if not already set
if not branch:
Expand All @@ -160,9 +190,7 @@ def export_to_git(course_id, repo, user='', rdir=None):
ident = GIT_EXPORT_DEFAULT_IDENT
time_stamp = timezone.now()
cwd = os.path.abspath(rdirp)
commit_msg = "Export from Studio at {time_stamp}".format(
time_stamp=time_stamp,
)
commit_msg = f"Export {content_type_label} from Studio at {time_stamp}"
try:
cmd_log(['git', 'config', 'user.email', ident['email']], cwd)
cmd_log(['git', 'config', 'user.name', ident['name']], cwd)
Expand All @@ -180,3 +208,10 @@ def export_to_git(course_id, repo, user='', rdir=None):
except subprocess.CalledProcessError as ex:
log.exception('Error running git push command: %r', ex.output)
raise GitExportError(GitExportError.CANNOT_PUSH) from ex

log.info(
'%s %s exported to git repository %s successfully',
content_type_label.capitalize(),
content_key,
repo,
)
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,14 @@
import subprocess
import unittest
from io import StringIO
from unittest.mock import patch
from uuid import uuid4

from django.conf import settings
from django.core.management import call_command
from django.core.management.base import CommandError
from django.test.utils import override_settings
from opaque_keys.edx.locator import CourseLocator
from opaque_keys.edx.locator import CourseLocator, LibraryLocator, LibraryLocatorV2

import cms.djangoapps.contentstore.git_export_utils as git_export_utils
from cms.djangoapps.contentstore.git_export_utils import GitExportError
Expand All @@ -34,6 +35,9 @@ class TestGitExport(CourseTestCase):
Excercise the git_export django management command with various inputs.
"""

LIBRARY_V2_KEY = LibraryLocatorV2(org='TestOrg', slug='test-lib')
LIBRARY_V1_KEY = LibraryLocator(org='TestOrg', library='test-lib')

def setUp(self):
"""
Create/reinitialize bare repo and folders needed
Expand Down Expand Up @@ -182,3 +186,52 @@ def test_no_change(self):
with self.assertRaisesRegex(GitExportError, str(GitExportError.CANNOT_COMMIT)):
git_export_utils.export_to_git(
self.course.id, f'file://{self.bare_repo_dir}')

@patch('cms.djangoapps.contentstore.git_export_utils.cmd_log', return_value=b'main')
@patch('cms.djangoapps.contentstore.git_export_utils.extract_library_v2_zip_to_dir')
def test_library_v2_export_selects_correct_function(self, mock_extract, mock_cmd_log):
"""
When ``export_to_git`` is given a LibraryLocatorV2 key it must call
``extract_library_v2_zip_to_dir`` and must not call the v1 XML export
functions (``export_library_to_xml`` or ``export_course_to_xml``).
cmd_log is mocked so no real git subprocess or repo state is needed.
"""
mock_extract.return_value = None
repo_url = f'file://{self.bare_repo_dir}'

with patch('cms.djangoapps.contentstore.git_export_utils.export_course_to_xml') as mock_course_xml, \
patch('cms.djangoapps.contentstore.git_export_utils.export_library_to_xml') as mock_lib_xml:
git_export_utils.export_to_git(self.LIBRARY_V2_KEY, repo_url, self.user.username)

assert mock_extract.call_args[0][0] == self.LIBRARY_V2_KEY
mock_course_xml.assert_not_called()
mock_lib_xml.assert_not_called()

@patch('cms.djangoapps.contentstore.git_export_utils.cmd_log', return_value=b'main')
@patch('cms.djangoapps.contentstore.git_export_utils.export_library_to_xml')
def test_library_v1_export_selects_correct_function(self, mock_lib_xml, mock_cmd_log):
"""
When ``export_to_git`` is given a LibraryLocator (v1) key it must call
``export_library_to_xml`` and must not call ``extract_library_v2_zip_to_dir``.
cmd_log is mocked so no real git subprocess or repo state is needed.
"""
mock_lib_xml.return_value = None
repo_url = f'file://{self.bare_repo_dir}'

with patch('cms.djangoapps.contentstore.git_export_utils.extract_library_v2_zip_to_dir') as mock_v2, \
patch('cms.djangoapps.contentstore.git_export_utils.export_course_to_xml') as mock_course_xml:
git_export_utils.export_to_git(self.LIBRARY_V1_KEY, repo_url, self.user.username)

assert mock_lib_xml.called
mock_v2.assert_not_called()
mock_course_xml.assert_not_called()

@patch('cms.djangoapps.contentstore.git_export_utils.extract_library_v2_zip_to_dir',
side_effect=OSError('disk full'))
def test_library_v2_export_failure_raises_xml_export_fail(self, mock_extract):
"""
If ``extract_library_v2_zip_to_dir`` raises, ``export_to_git`` should
wrap it in ``GitExportError.XML_EXPORT_FAIL``.
"""
with self.assertRaisesRegex(GitExportError, str(GitExportError.XML_EXPORT_FAIL)):
git_export_utils.export_to_git(self.LIBRARY_V2_KEY, f'file://{self.bare_repo_dir}')
1 change: 1 addition & 0 deletions openedx/core/djangoapps/content_libraries/api/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Python API for working with content libraries
"""
from .backup import *
from .block_metadata import *
from .collections import *
from .container_metadata import *
Expand Down
77 changes: 77 additions & 0 deletions openedx/core/djangoapps/content_libraries/api/backup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
"""
Public API for content library backup (zip export) utilities.
"""
from __future__ import annotations

import os
from datetime import datetime
import shutil
from tempfile import mkdtemp
import zipfile

from django.conf import settings
from django.contrib.auth import get_user_model
from django.utils.text import slugify
from opaque_keys.edx.locator import LibraryLocatorV2, log
from path import Path

from openedx_content.api import create_zip_file as create_lib_zip_file

__all__ = ["create_library_v2_zip", "extract_library_v2_zip_to_dir"]


def create_library_v2_zip(library_key: LibraryLocatorV2, user) -> tuple:
"""
Create a zip backup of a v2 library and return ``(temp_dir, zip_file_path)``.

The caller is responsible for cleaning up ``temp_dir`` when done.

Args:
library_key: LibraryLocatorV2 identifying the library to export.
user: User object passed to the backup API.

Returns:
A tuple of ``(temp_dir as Path, zip_file_path as str)``.
"""
root_dir = Path(mkdtemp())
sanitized_lib_key = str(library_key).replace(":", "-")
sanitized_lib_key = slugify(sanitized_lib_key, allow_unicode=True)
timestamp = datetime.now().strftime("%Y-%m-%d-%H%M%S")
filename = f'{sanitized_lib_key}-{timestamp}.zip'
file_path = os.path.join(root_dir, filename)
origin_server = getattr(settings, 'CMS_BASE', None)
create_lib_zip_file(lp_key=str(library_key), path=file_path, user=user, origin_server=origin_server)
return root_dir, file_path


def extract_library_v2_zip_to_dir(library_key, root_dir, library_dir, user=None):
"""
Export a v2 library to a directory by creating a zip backup and extracting it.

V2 libraries are stored in Learning Core and use a zip-based backup mechanism.
This function creates a temporary zip backup, extracts its contents into
``library_dir`` under ``root_dir``, then cleans up the temporary zip.

Args:
library_key: LibraryLocatorV2 for the library to export
root_dir: Root directory where library_dir will be created
library_dir: Directory name under root_dir to extract the library into
user: Username string for the backup API (optional)

Raises:
Exception: If backup creation or extraction fails
"""
# Get user object for backup API
user_obj = get_user_model().objects.filter(username=user).first()
temp_dir, zip_path = create_library_v2_zip(library_key, user_obj)

try:
target_dir = os.path.join(root_dir, library_dir)
os.makedirs(target_dir, exist_ok=True)
# Extract zip contents (will overwrite existing files)
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(target_dir)
log.info('Extracted library v2 backup to %s', target_dir)
finally:
if temp_dir.exists():
Comment on lines +64 to +76
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some of the code comments are not needed here as the code is simple and readable that we don't really need. Also, there are some unnecessary blank lines as well.

shutil.rmtree(temp_dir)
Empty file.
89 changes: 89 additions & 0 deletions openedx/core/djangoapps/content_libraries/api/tests/test_backup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
"""
Tests for content library backup (zip export) utilities.
"""
from __future__ import annotations

import shutil
import tempfile
import zipfile
from unittest.mock import MagicMock, patch

import pytest
from django.test import TestCase
from opaque_keys.edx.locator import LibraryLocatorV2
from path import Path

from openedx.core.djangoapps.content_libraries.api.backup import extract_library_v2_zip_to_dir


LIBRARY_KEY = LibraryLocatorV2(org='TestOrg', slug='test-lib')


class TestExtractLibraryV2ZipToDir(TestCase):
"""
Tests for ``extract_library_v2_zip_to_dir``.
"""

def _make_zip_in_temp_dir(self, contents=None):
"""
Helper: create a real temp dir + zip file and return (temp_dir_path, zip_path).
``contents`` is a dict of {filename: bytes} to write into the zip.
"""
temp_dir = Path(tempfile.mkdtemp())
zip_path = str(temp_dir / 'library.zip')
with zipfile.ZipFile(zip_path, 'w') as zf:
for name, data in (contents or {'data.xml': b'<library/>'}).items():
zf.writestr(name, data)
return temp_dir, zip_path

@patch('openedx.core.djangoapps.content_libraries.api.backup.get_user_model')
@patch('openedx.core.djangoapps.content_libraries.api.backup.create_library_v2_zip')
def test_successful_extraction(self, mock_create_zip, mock_get_user_model):
"""
On a successful call the function should:
- resolve the username to a user object via the user model,
- pass that user object to ``create_library_v2_zip``,
- create the target directory if it does not already exist,
- extract the zip contents into <root_dir>/<library_dir>,
- clean up the temporary zip directory.
"""
root_dir = Path(tempfile.mkdtemp())
temp_zip_dir, zip_path = self._make_zip_in_temp_dir({'content.xml': b'<lib/>'})
mock_create_zip.return_value = (temp_zip_dir, zip_path)
mock_user = MagicMock()
mock_get_user_model.return_value.objects.filter.return_value.first.return_value = mock_user

try:
target = root_dir / 'my-library'
assert not target.exists(), "Target dir should not exist before the call"

extract_library_v2_zip_to_dir(LIBRARY_KEY, str(root_dir), 'my-library', user='testuser')

mock_get_user_model.return_value.objects.filter.assert_called_once_with(username='testuser')
mock_create_zip.assert_called_once_with(LIBRARY_KEY, mock_user)
assert target.isdir(), "Target dir should have been created"
assert (target / 'content.xml').exists(), "Zip content should be extracted"
assert not temp_zip_dir.exists(), "Temp zip dir should have been removed"
finally:
shutil.rmtree(root_dir, ignore_errors=True)
shutil.rmtree(temp_zip_dir, ignore_errors=True)

@patch('openedx.core.djangoapps.content_libraries.api.backup.get_user_model')
@patch('openedx.core.djangoapps.content_libraries.api.backup.create_library_v2_zip')
def test_temp_dir_cleaned_up_even_on_extraction_error(self, mock_create_zip, mock_get_user_model):
"""
The temporary directory must be cleaned up even when extraction raises.
"""
root_dir = Path(tempfile.mkdtemp())
temp_zip_dir, zip_path = self._make_zip_in_temp_dir()
mock_create_zip.return_value = (temp_zip_dir, zip_path)
mock_get_user_model.return_value.objects.filter.return_value.first.return_value = None

try:
with patch('zipfile.ZipFile.extractall', side_effect=OSError('disk full')):
with pytest.raises(OSError):
extract_library_v2_zip_to_dir(LIBRARY_KEY, str(root_dir), 'my-library', user=None)
assert not temp_zip_dir.exists(), "Temp dir should be cleaned up on error"
finally:
shutil.rmtree(root_dir, ignore_errors=True)
shutil.rmtree(temp_zip_dir, ignore_errors=True)
Loading
Loading