Skip to content

Commit

Permalink
feat!: updating import api, now it will download course for url also.
Browse files Browse the repository at this point in the history
  • Loading branch information
awais786 committed Jan 1, 2025
1 parent 953140a commit 228bd05
Show file tree
Hide file tree
Showing 12 changed files with 392 additions and 36 deletions.
51 changes: 47 additions & 4 deletions cms/djangoapps/contentstore/api/tests/test_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,18 @@
import os
import tarfile
import tempfile
from unittest.mock import Mock, patch

from django.urls import reverse
from path import Path as path
from rest_framework import status
from rest_framework.test import APITestCase
from user_tasks.models import UserTaskStatus

from common.djangoapps.student.tests.factories import StaffFactory, UserFactory
from xmodule.modulestore.tests.django_utils import SharedModuleStoreTestCase
from xmodule.modulestore.tests.factories import CourseFactory

from common.djangoapps.student.tests.factories import StaffFactory
from common.djangoapps.student.tests.factories import UserFactory


class CourseImportViewTest(SharedModuleStoreTestCase, APITestCase):
"""
Expand Down Expand Up @@ -85,7 +85,7 @@ def test_student_import_fails(self):
resp = self.client.post(self.get_url(self.course_key), {'course_data': fp}, format='multipart')
self.assertEqual(resp.status_code, status.HTTP_403_FORBIDDEN)

def test_staff_with_access_import_succeeds(self):
def test_staff_with_access_import_course_by_file_succeeds(self):
"""
Test that a staff user can access the API and successfully upload a course
"""
Expand All @@ -94,6 +94,49 @@ def test_staff_with_access_import_succeeds(self):
resp = self.client.post(self.get_url(self.course_key), {'course_data': fp}, format='multipart')
self.assertEqual(resp.status_code, status.HTTP_200_OK)

def test_staff_with_access_import_course_by_url_succeeds(self):
"""
Test that a staff user can access the API and successfully import a course using a URL
"""
self.client.login(username=self.staff.username, password=self.password)

# Mocked URL and file content
file_url = "https://example.com/test-course.tar.gz"
with open(self.good_tar_fullpath, 'rb') as fp:
file_content = fp.read()

# Mock requests.get
with patch('requests.get') as mock_get:
mock_response = Mock()
mock_response.status_code = 200
mock_response.iter_content = lambda chunk_size: (file_content[i:i + chunk_size] for i in
range(0, len(file_content), chunk_size))
mock_get.return_value = mock_response

# Make the API request for course import
import_response = self.client.post(
self.get_url(self.course_key),
{'file_url': file_url},
format='json'
)

# Assertions for import response
self.assertEqual(import_response.status_code, status.HTTP_200_OK)
self.assertIn('task_id', import_response.data)
self.assertIn('filename', import_response.data)

# Verify task status
task_id = import_response.data['task_id']
filename = import_response.data['filename']
status_response = self.client.get(
self.get_url(self.course_key),
{'task_id': task_id, 'filename': filename}
)

# Assertions for task status
self.assertEqual(status_response.status_code, status.HTTP_200_OK)
self.assertEqual(status_response.data['state'], UserTaskStatus.SUCCEEDED)

def test_staff_has_no_access_import_fails(self):
"""
Test that a staff user can't access another course via the API
Expand Down
94 changes: 68 additions & 26 deletions cms/djangoapps/contentstore/api/views/course_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
import base64
import logging
import os
from urllib.parse import urlparse

import requests
from django.conf import settings
from django.core.files import File
from edx_django_utils.monitoring import set_custom_attribute, set_custom_attributes_for_course_key
Expand Down Expand Up @@ -116,49 +118,89 @@ def post(self, request, course_key):
"""
set_custom_attribute('course_import_init', True)
set_custom_attributes_for_course_key(course_key)

try:
if 'course_data' not in request.FILES:
# Check for input source
if 'course_data' not in request.FILES and 'file_url' not in request.data:
raise self.api_error(
status_code=status.HTTP_400_BAD_REQUEST,
developer_message='Missing required parameter',
error_code='internal_error',
developer_message='Missing required parameter: course_data or file_url',
error_code='missing_parameter',
)

filename = request.FILES['course_data'].name
if not filename.endswith(IMPORTABLE_FILE_TYPES):
raise self.api_error(
status_code=status.HTTP_400_BAD_REQUEST,
developer_message='Parameter in the wrong format',
error_code='internal_error',
)
course_dir = path(settings.GITHUB_REPO_ROOT) / base64.urlsafe_b64encode(
repr(course_key).encode('utf-8')
).decode('utf-8')
temp_filepath = course_dir / filename
if not course_dir.isdir():
os.mkdir(course_dir)

log.debug(f'importing course to {temp_filepath}')
with open(temp_filepath, "wb+") as temp_file:
for chunk in request.FILES['course_data'].chunks():
temp_file.write(chunk)

log.info("Course import %s: Upload complete", course_key)
if not course_dir.isdir():
os.makedirs(course_dir)

if 'course_data' in request.FILES:
uploaded_file = request.FILES['course_data']
filename = uploaded_file.name
if not filename.endswith(IMPORTABLE_FILE_TYPES):
raise self.api_error(
status_code=status.HTTP_400_BAD_REQUEST,
developer_message=f'File type not supported: {filename}',
error_code='invalid_file_type',
)
temp_filepath = course_dir / filename

log.info(f"Course import {course_key}: Upload complete, file: {filename}")
with open(temp_filepath, "wb") as temp_file:
for chunk in uploaded_file.chunks():
temp_file.write(chunk)

# Handle file URL
elif 'file_url' in request.data:
file_url = request.data['file_url']
filename = os.path.basename(urlparse(file_url).path)
if not filename.endswith(IMPORTABLE_FILE_TYPES):
raise self.api_error(
status_code=status.HTTP_400_BAD_REQUEST,
developer_message=f'File type not supported: {filename}',
error_code='invalid_file_type',
)
response = requests.get(file_url, stream=True)
if response.status_code != 200:
raise self.api_error(
status_code=status.HTTP_400_BAD_REQUEST,
developer_message='Failed to download file from URL',
error_code='download_error',
)
temp_filepath = course_dir / filename
total_size = 0 # Track total size in bytes
with open(temp_filepath, "wb") as temp_file:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
chunk_size = len(chunk)
total_size += chunk_size
temp_file.write(chunk)
log.info(f"Course import {course_key}: File downloaded from URL, file: {filename}")

# Save file to storage
with open(temp_filepath, 'rb') as local_file:
django_file = File(local_file)
storage_path = course_import_export_storage.save('olx_import/' + filename, django_file)
storage_path = course_import_export_storage.save(f'olx_import/{filename}', django_file)

# Start asynchronous task
async_result = import_olx.delay(
request.user.id, str(course_key), storage_path, filename, request.LANGUAGE_CODE)
return Response({
'task_id': async_result.task_id
})
request.user.id, str(course_key), storage_path, filename, request.LANGUAGE_CODE
)
return Response(
{
'task_id': async_result.task_id,
'filename': filename,
'storage_path': storage_path,
},
status=status.HTTP_200_OK
)
except Exception as e:
log.exception(f'Course import {course_key}: Unknown error in import')
log.exception(f"Course import {course_key}: Error during import")
raise self.api_error(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
developer_message=str(e),
error_code='internal_error'
error_code='internal_error',
)

@course_author_access_required
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@
VideoUploadSerializer,
VideoUsageSerializer,
)
from .course_templates import CourseSerializer, CourseMetadataSerializer
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from rest_framework import serializers

class CourseMetadataSerializer(serializers.Serializer):
course_id = serializers.CharField()
title = serializers.CharField()
description = serializers.CharField()
thumbnail = serializers.URLField()
active = serializers.BooleanField()

class CourseSerializer(serializers.Serializer):
courses_name = serializers.CharField()
zip_url = serializers.URLField()
metadata = CourseMetadataSerializer()
9 changes: 4 additions & 5 deletions cms/djangoapps/contentstore/rest_api/v1/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@
CourseDetailsView,
CourseTeamView,
CourseTextbooksView,
CourseTemplatesListView,
CourseIndexView,
CourseGradingView,
CourseGroupConfigurationsView,
CourseRerunView,
CourseSettingsView,
CourseVideosView,
CourseWaffleFlagsView,
HomePageView,
HomePageCoursesView,
HomePageLibrariesView,
Expand Down Expand Up @@ -133,11 +133,10 @@
name="container_vertical"
),
re_path(
fr'^course_waffle_flags(?:/{COURSE_ID_PATTERN})?$',
CourseWaffleFlagsView.as_view(),
name="course_waffle_flags"
fr'^course_templates/{settings.COURSE_ID_PATTERN}$',
CourseTemplatesListView.as_view(),
name="course_templates_api"
),

# Authoring API
# Do not use under v1 yet (Nov. 23). The Authoring API is still experimental and the v0 versions should be used
]
2 changes: 1 addition & 1 deletion cms/djangoapps/contentstore/rest_api/v1/views/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
from .course_details import CourseDetailsView
from .course_index import CourseIndexView
from .course_rerun import CourseRerunView
from .course_waffle_flags import CourseWaffleFlagsView
from .course_team import CourseTeamView
from .course_templates import CourseTemplatesListView
from .grading import CourseGradingView
from .group_configurations import CourseGroupConfigurationsView
from .help_urls import HelpUrlsView
Expand Down
102 changes: 102 additions & 0 deletions cms/djangoapps/contentstore/rest_api/v1/views/course_templates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework.exceptions import NotFound
from opaque_keys.edx.keys import CourseKey # Import CourseKey if using edX
from django.conf import settings
import requests
from openedx.core.lib.api.view_utils import (
DeveloperErrorViewMixin,
view_auth_classes,
)
from rest_framework.request import Request
from ..serializers import CourseSerializer, CourseMetadataSerializer


# @view_auth_classes(is_authenticated=True)
class CourseTemplatesListView(DeveloperErrorViewMixin, APIView):
"""
API endpoint to fetch and return course data from a GitHub repository.
This view dynamically fetches course data from a specified GitHub repository
and returns it in a structured JSON format. It processes directories and files
in the repository to extract course names, ZIP URLs, and metadata files.
Example URL:
/api/courses/<course_key_string>/
Query Parameters:
course_key_string (str): The course key in the format `org+course+run`.
Example Response:
[
{
"courses_name": "AI Courses",
"zip_url": "https://raw.githubusercontent.com/awais786/courses/main/edly/AI%20Courses/course._Rnm_t%20(1).tar.gz",
"metadata": {
"course_id": "course-v1:edX+DemoX+T2024",
"title": "Introduction to Open edX",
"description": "Learn the fundamentals of the Open edX platform, including how to create and manage courses.",
"thumbnail": "https://discover.ilmx.org/wp-content/uploads/2024/01/Course-image-2.webp",
"active": true
}
}
]
Raises:
NotFound: If there is an error fetching data from the repository.
"""
def get(self, request: Request, course_id: str):
"""
Handle GET requests to fetch course data.
Args:
request: The HTTP request object.
course_id (str): The course id.
Returns:
Response: A structured JSON response containing course data.
"""
try:
# Extract organization from course key
course_key = CourseKey.from_string(course_id)
organization = course_key.org

# GitHub repository details. It should come from settings.
templates_repo_url = f"https://api.github.com/repos/awais786/courses/contents/{organization}"

# Fetch data from GitHub
data = fetch_contents(templates_repo_url)
courses = []
for directory in data:
course_data = {'courses_name': directory["name"]}
contents = fetch_contents(directory["url"]) # Assume directory contains URL to course contents

for item in contents:
if item['name'].endswith('.tar.gz'): # Check if file is a ZIP file
course_data['zip_url'] = item['download_url']
elif item['name'].endswith('.json'): # Check if file is a JSON metadata file
course_data['metadata'] = fetch_contents(item['download_url'])

courses.append(course_data)

# Serialize and return the data
serializer = CourseSerializer(courses, many=True)
return Response(serializer.data)

except Exception as err:
raise NotFound(f"Error fetching course data: {str(err)}")


def fetch_contents(url):
headers = {
"Authorization": f"token {settings.GITHUB_TOKEN_COURSE_TEMPLATES}",
"Accept": "application/vnd.github.v3+json",
}
try:
response = requests.get(url, headers=headers)
response.raise_for_status() # Raise error for 4xx/5xx responses
return response.json()
except Exception as err:
return JsonResponseBadRequest({"error": err.message})
Loading

0 comments on commit 228bd05

Please sign in to comment.