From c319a36a94206f899e1286aaafa459b098b6dd29 Mon Sep 17 00:00:00 2001 From: shaikh-ma <88078876+shaikh-ma@users.noreply.github.com> Date: Fri, 17 Jan 2025 19:53:35 +0530 Subject: [PATCH 1/3] updates to fork function --- scrunch/datasets.py | 12 +++++++++--- scrunch/tests/test_datasets.py | 35 ++++++++++++++++++---------------- 2 files changed, 28 insertions(+), 19 deletions(-) diff --git a/scrunch/datasets.py b/scrunch/datasets.py index 9c65a21c..bf0363dc 100644 --- a/scrunch/datasets.py +++ b/scrunch/datasets.py @@ -2425,9 +2425,15 @@ def fork(self, description=None, name=None, is_published=False, else: if project: # Create fork in given Project path. - body["project"] = ( - project if project.startswith("http") else get_project(project).url - ) + try: + project = get_project(project).url + except KeyError: + # Creating full project URL for sub-folders + connection = _default_connection(connection=None) + site_url = connection.session.site_url + project = site_url + "/projects/{}/".format(project) + finally: + body["project"] = project else: raise ValueError( "Project parameter should be provided when preserve_owner=False." diff --git a/scrunch/tests/test_datasets.py b/scrunch/tests/test_datasets.py index 3d7fe98c..a0bc7097 100644 --- a/scrunch/tests/test_datasets.py +++ b/scrunch/tests/test_datasets.py @@ -20,6 +20,7 @@ from io import StringIO from unittest import TestCase +from unittest.mock import patch from requests import Response import pytest @@ -1781,7 +1782,7 @@ def _create(*args): def test_fork_project(self): - project = 'https://test.crunch.io/api/projects/1234/' + project = '1234' sess = MagicMock() body = JSONObject({ 'name': 'ds name', @@ -1798,17 +1799,15 @@ def test_fork_project(self): 'body': { 'name': 'FORK #1 of ds name', 'description': 'ds description', - 'project': project, # Project added + 'project': "https://test.crunch.io/api/projects/{}/".format(project), # Project added 'is_published': False, } } - # Using project parameter - ds.fork(preserve_owner=False, project=project) - ds_res.forks.create.assert_called_with(expected_payload) - # Using owner parameter - ds.fork(preserve_owner=False, owner=project) - ds_res.forks.create.assert_called_with(expected_payload) + with patch('scrunch.datasets.get_project') as mock_get_project: + mock_get_project.return_value.url = project + ds.fork(preserve_owner=False, project=project) + # Validations err_msg1 = ( @@ -1816,28 +1815,29 @@ def test_fork_project(self): "Please try again by passing only 'project' parameter." ) with pytest.raises(ValueError, match=err_msg1): - ds.fork(owner="ABCD", project="1234") + ds.fork(owner="ABCD", project=project) err_msg2 = ("Cannot pass 'project' or 'owner' when preserve_owner=True.") with pytest.raises(ValueError, match=err_msg2): - ds.fork(preserve_owner=True, project="1234") + ds.fork(preserve_owner=True, project=project) with pytest.raises(ValueError, match=err_msg2): - ds.fork(preserve_owner=True, owner="1234") + ds.fork(preserve_owner=True, owner=project) def test_fork_preserve_owner(self): - project = "http://test.crunch.io/api/projects/123/" + project_url = "http://test.crunch.io/api/projects/123/" sess = MagicMock() + sess.site_url = "http://test.crunch.io/api/" body = JSONObject( { "name": "ds name", "description": "ds description", - "owner": project, + "owner": project_url, } ) ds_res = MagicMock(session=sess, body=body) - ds_res.project.self = project + ds_res.project.self = project_url ds_res.forks = MagicMock() ds_res.forks.index = {} ds = BaseDataset(ds_res) @@ -1849,14 +1849,17 @@ def test_fork_preserve_owner(self): ): ds.fork(preserve_owner=False, project=None) - ds.fork(preserve_owner=False, owner=project) + with patch('scrunch.datasets.get_project') as mock_get_project: + mock_get_project.return_value.url = project_url + ds.fork(preserve_owner=False, project="123") + ds_res.forks.create.assert_called_with( { "element": "shoji:entity", "body": { "name": "FORK #1 of ds name", "description": "ds description", - "project": project, # Project preserved + "project": project_url, "is_published": False, }, } From 47edc6aadec9b61d0f1766ece99f7045d3ff5eb9 Mon Sep 17 00:00:00 2001 From: shaikh-ma <88078876+shaikh-ma@users.noreply.github.com> Date: Fri, 17 Jan 2025 20:22:51 +0530 Subject: [PATCH 2/3] fix import error --- scrunch/tests/test_datasets.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/scrunch/tests/test_datasets.py b/scrunch/tests/test_datasets.py index a0bc7097..4fbb31a2 100644 --- a/scrunch/tests/test_datasets.py +++ b/scrunch/tests/test_datasets.py @@ -20,7 +20,6 @@ from io import StringIO from unittest import TestCase -from unittest.mock import patch from requests import Response import pytest @@ -1804,7 +1803,7 @@ def test_fork_project(self): } } - with patch('scrunch.datasets.get_project') as mock_get_project: + with mock.patch('scrunch.datasets.get_project') as mock_get_project: mock_get_project.return_value.url = project ds.fork(preserve_owner=False, project=project) @@ -1849,7 +1848,7 @@ def test_fork_preserve_owner(self): ): ds.fork(preserve_owner=False, project=None) - with patch('scrunch.datasets.get_project') as mock_get_project: + with mock.patch('scrunch.datasets.get_project') as mock_get_project: mock_get_project.return_value.url = project_url ds.fork(preserve_owner=False, project="123") From c69785e47aa0067a2eabef146d967e804782fc97 Mon Sep 17 00:00:00 2001 From: shaikh-ma <88078876+shaikh-ma@users.noreply.github.com> Date: Wed, 5 Feb 2025 12:10:14 +0530 Subject: [PATCH 3/3] updates as per code review --- scrunch/datasets.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/scrunch/datasets.py b/scrunch/datasets.py index bf0363dc..ecd79afc 100644 --- a/scrunch/datasets.py +++ b/scrunch/datasets.py @@ -2380,7 +2380,7 @@ def fork(self, description=None, name=None, is_published=False, :returns _fork: scrunch.datasets.BaseDataset """ from scrunch.mutable_dataset import MutableDataset - + # Handling project vs owner conflict owner = kwargs.get("owner") @@ -2426,14 +2426,15 @@ def fork(self, description=None, name=None, is_published=False, if project: # Create fork in given Project path. try: - project = get_project(project).url + project_url = get_project(project).url except KeyError: # Creating full project URL for sub-folders - connection = _default_connection(connection=None) - site_url = connection.session.site_url - project = site_url + "/projects/{}/".format(project) + project_url = "{}/projects/{}/".format( + self.resource.session.site_url, + project + ) finally: - body["project"] = project + body["project"] = project_url else: raise ValueError( "Project parameter should be provided when preserve_owner=False."