From bf388fda244b31b0ee768c1c835b7096849583bc Mon Sep 17 00:00:00 2001 From: krassowski <5832902+krassowski@users.noreply.github.com> Date: Thu, 28 Sep 2023 14:18:47 +0100 Subject: [PATCH 1/5] Draft of the proposed path resolver API --- jupyter_server/services/api/handlers.py | 33 +++++++++++++++++++ .../services/contents/filemanager.py | 13 ++++++++ jupyter_server/services/contents/manager.py | 4 +++ 3 files changed, 50 insertions(+) diff --git a/jupyter_server/services/api/handlers.py b/jupyter_server/services/api/handlers.py index eee655b9ac..78e6d4c566 100644 --- a/jupyter_server/services/api/handlers.py +++ b/jupyter_server/services/api/handlers.py @@ -105,8 +105,41 @@ def get(self): self.write(json.dumps(model)) +class PathResolverHandler(APIHandler): + """Path resolver handler.""" + + auth_resource = AUTH_RESOURCE + _track_activity = False + + @web.authenticated + @authorized + async def get(self): + """Resolve the path.""" + path = self.get_query_argument("path") + kernel_uuid = self.get_query_argument("kernel", default=None) + + scopes = [ + self.contents_manager, + # TODO vs multi_kernel_manager? + # TODO: kernel lives in client so we may need to implement it there. + self.kernel_manager.get_kernel(kernel_uuid), + # *self.get_additional_scopes(kernel_uuid) + ] + # TODO: maybe client should be able to define their own scope? this way + # ipykernel can return {"scope": "source"} or {scope: 'kernel'} + # TODO: how to plug in the additional scopes? a traitlet? maybe... + resolved = [ + {"scope": scope, "path": await scope.resolve_path(path)} + for scope in scopes + if hasattr(scope, "resolve_path") + ] + response = {"resolved": [entry for entry in resolved if entry["path"] is not None]} + self.finish(json.dumps(response)) + + default_handlers = [ (r"/api/spec.yaml", APISpecHandler), (r"/api/status", APIStatusHandler), (r"/api/me", IdentityHandler), + (r"/api/resolvePath", PathResolverHandler), ] diff --git a/jupyter_server/services/contents/filemanager.py b/jupyter_server/services/contents/filemanager.py index 6df2c3ebfa..ccb5ff5245 100644 --- a/jupyter_server/services/contents/filemanager.py +++ b/jupyter_server/services/contents/filemanager.py @@ -219,6 +219,19 @@ def exists(self, path): os_path = self._get_os_path(path=path) return exists(os_path) + def resolve_path(self, path: str): + """Resolve path relative to root resource.""" + # transform OS path to API path + relative_path = to_api_path(path, self.root_dir) + # check if the API path is within contents directory + try: + os_path = self._get_os_path(path=relative_path) + except web.HTTPError: + return None + if exists(os_path): + return relative_path + return None + def _base_model(self, path): """Build the common base of a contents model""" os_path = self._get_os_path(path) diff --git a/jupyter_server/services/contents/manager.py b/jupyter_server/services/contents/manager.py index b703395c25..6b196e2e9f 100644 --- a/jupyter_server/services/contents/manager.py +++ b/jupyter_server/services/contents/manager.py @@ -873,6 +873,10 @@ async def rename_file(self, old_path, new_path): # ContentsManager API part 2: methods that have useable default # implementations, but can be overridden in subclasses. + async def resolve_path(self, path: str): + """Resolve path relative to root resource.""" + return None + async def delete(self, path): """Delete a file/directory and any associated checkpoints.""" path = path.strip("/") From 3ec03380a51b5bc9b109ef863f2d4e72255bf50c Mon Sep 17 00:00:00 2001 From: krassowski <5832902+krassowski@users.noreply.github.com> Date: Wed, 31 Jan 2024 15:27:28 +0000 Subject: [PATCH 2/5] Add name, ensure async, make kernel optional --- jupyter_server/services/api/handlers.py | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/jupyter_server/services/api/handlers.py b/jupyter_server/services/api/handlers.py index 78e6d4c566..7edc265393 100644 --- a/jupyter_server/services/api/handlers.py +++ b/jupyter_server/services/api/handlers.py @@ -117,20 +117,12 @@ async def get(self): """Resolve the path.""" path = self.get_query_argument("path") kernel_uuid = self.get_query_argument("kernel", default=None) - - scopes = [ - self.contents_manager, - # TODO vs multi_kernel_manager? - # TODO: kernel lives in client so we may need to implement it there. - self.kernel_manager.get_kernel(kernel_uuid), - # *self.get_additional_scopes(kernel_uuid) - ] - # TODO: maybe client should be able to define their own scope? this way - # ipykernel can return {"scope": "source"} or {scope: 'kernel'} - # TODO: how to plug in the additional scopes? a traitlet? maybe... + scopes = {"server": self.contents_manager} + if kernel_uuid: + scopes["kernel"] = self.kernel_manager.get_kernel(kernel_uuid) resolved = [ - {"scope": scope, "path": await scope.resolve_path(path)} - for scope in scopes + {"scope": name, "path": await ensure_async(scope.resolve_path(path))} + for name, scope in scopes.items() if hasattr(scope, "resolve_path") ] response = {"resolved": [entry for entry in resolved if entry["path"] is not None]} From 03ce546c35225224e3c3fc2a1b9ed9419110cbc5 Mon Sep 17 00:00:00 2001 From: krassowski <5832902+krassowski@users.noreply.github.com> Date: Wed, 31 Jan 2024 15:28:03 +0000 Subject: [PATCH 3/5] Add initial swagger API description --- jupyter_server/services/api/api.yaml | 41 ++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/jupyter_server/services/api/api.yaml b/jupyter_server/services/api/api.yaml index 976e5726f2..4704b34f01 100644 --- a/jupyter_server/services/api/api.yaml +++ b/jupyter_server/services/api/api.yaml @@ -352,6 +352,26 @@ paths: responses: 204: description: Checkpoint deleted + /api/resolvePath: + parameters: + - name: path + required: true + in: query + description: file path + type: string + - name: kernel_id + required: false + in: query + description: kernel uuid + type: string + format: uuid + get: + summary: Resolve path to a file + responses: + 200: + description: Resolved path with scope details + schema: + $ref: "#/definitions/ResolvedPath" /api/sessions/{session}: parameters: - $ref: "#/parameters/session" @@ -947,3 +967,24 @@ definitions: ISO 8601 timestamp for the last-seen activity on this terminal. Use this to identify which terminals have been inactive since a given time. Timestamps will be UTC, indicated 'Z' suffix. + ResolvedPath: + description: Resolved file path details + type: object + required: + - resolved + properties: + resolved: + type: array + description: array of paths to which the query path resolves + items: + type: object + required: + - scope + - path + properties: + scope: + type: string + description: the scope which owns the path + path: + type: string + description: fully specified path for file From bfb5bfe41b93bd65777c211df8bf49fe8f1a2b8f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 31 Jan 2024 15:28:44 +0000 Subject: [PATCH 4/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- jupyter_server/services/contents/manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter_server/services/contents/manager.py b/jupyter_server/services/contents/manager.py index 6b196e2e9f..9179e9d2d2 100644 --- a/jupyter_server/services/contents/manager.py +++ b/jupyter_server/services/contents/manager.py @@ -875,7 +875,7 @@ async def rename_file(self, old_path, new_path): async def resolve_path(self, path: str): """Resolve path relative to root resource.""" - return None + return async def delete(self, path): """Delete a file/directory and any associated checkpoints.""" From 35a73e76da563a8ae9e3d8caff247c437cefb97c Mon Sep 17 00:00:00 2001 From: krassowski <5832902+krassowski@users.noreply.github.com> Date: Thu, 1 Feb 2024 15:49:36 +0000 Subject: [PATCH 5/5] Add tests for the `resolvePath` API --- tests/services/kernels/test_api.py | 147 +++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) diff --git a/tests/services/kernels/test_api.py b/tests/services/kernels/test_api.py index dfbf1a47bc..1254b9fe2f 100644 --- a/tests/services/kernels/test_api.py +++ b/tests/services/kernels/test_api.py @@ -183,6 +183,153 @@ async def test_main_kernel_handler(jp_fetch, jp_base_url, jp_serverapp, pending_ await pending_kernel_is_ready(kernel3["id"]) +@pytest.mark.timeout(TEST_TIMEOUT) +async def test_resolve_path_kernel(jp_fetch, jp_serverapp, jp_root_dir): + query_path = "hello.py" + contents = "(irrelevant)" + + # Put the file in a kernel subdirectory + kernel_root = jp_root_dir / "more" / "path" / "segments" + kernel_root.mkdir(parents=True) + kernel_path = kernel_root / query_path + kernel_path.write_text(contents) + + # Create a kernel in temp path + r = await jp_fetch( + "api", + "kernels", + method="POST", + body=json.dumps( + {"name": NATIVE_KERNEL_NAME, "path": str(kernel_root.relative_to(jp_root_dir))} + ), + ) + kernel_id = json.loads(r.body.decode())["id"] + + # Resolve the path + r = await jp_fetch( + "api", "resolvePath", params={"kernel": kernel_id, "path": query_path}, method="GET" + ) + + assert r.code == 200 + resolution = json.loads(r.body.decode()) + assert len(resolution["resolved"]) == 1 + path = resolution["resolved"][0] + assert path["scope"] == "kernel" + + # Should reveal the kernel path + assert path["path"] == str(kernel_path) + + +@pytest.mark.timeout(TEST_TIMEOUT) +async def test_resolve_path_server_traversal(jp_fetch, jp_serverapp, jp_root_dir): + query_path = "hello.py" + contents = "(irrelevant)" + + # Put the file above the root dir + server_path = jp_root_dir / ".." / query_path + server_path.write_text(contents) + + # Resolve the path + r = await jp_fetch("api", "resolvePath", params={"path": query_path}, method="GET") + + assert r.code == 200 + resolution = json.loads(r.body.decode()) + + # Should NOT disclose file existence on path traversal + assert len(resolution["resolved"]) == 0 + + +@pytest.mark.timeout(TEST_TIMEOUT) +async def test_resolve_path_server(jp_fetch, jp_serverapp, jp_root_dir): + query_path = "hello.py" + contents = "(irrelevant)" + + # Put the file in the root dir + server_path = jp_root_dir / query_path + server_path.write_text(contents) + + # Resolve the path + r = await jp_fetch("api", "resolvePath", params={"path": query_path}, method="GET") + + assert r.code == 200 + resolution = json.loads(r.body.decode()) + assert len(resolution["resolved"]) == 1 + path = resolution["resolved"][0] + assert path["scope"] == "server" + + # Should NOT reveal server path, just acknowledge the name + assert path["path"] == server_path.name + + +@pytest.mark.timeout(TEST_TIMEOUT) +async def test_resolve_path_server_and_kernel(jp_fetch, jp_serverapp, jp_root_dir): + query_path = "hello.py" + contents = "(irrelevant)" + + # Put a file for server in the root and for kernel space in a subdirectory + kernel_root = jp_root_dir / "more" / "path" / "segments" + kernel_root.mkdir(parents=True) + kernel_path = kernel_root / query_path + kernel_path.write_text(contents) + server_path = jp_root_dir / query_path + server_path.write_text(contents) + + # Create a kernel in temp path + r = await jp_fetch( + "api", + "kernels", + method="POST", + body=json.dumps( + {"name": NATIVE_KERNEL_NAME, "path": str(kernel_root.relative_to(jp_root_dir))} + ), + ) + kernel_id = json.loads(r.body.decode())["id"] + + # Resolve the path + r = await jp_fetch( + "api", "resolvePath", params={"kernel": kernel_id, "path": query_path}, method="GET" + ) + assert r.code == 200 + resolution = json.loads(r.body.decode()) + + # Should present candidates for both server and kernel + assert len(resolution["resolved"]) == 2 + assert {k["scope"] for k in resolution["resolved"]} == {"server", "kernel"} + + +@pytest.mark.timeout(TEST_TIMEOUT) +async def test_resolve_path_missing(jp_fetch, jp_serverapp, jp_root_dir): + query_path = "hello.py" + contents = "(irrelevant)" + + # Put a file for server in the root and for kernel space in a subdirectory + kernel_root = jp_root_dir / "more" / "path" / "segments" + kernel_root.mkdir(parents=True) + kernel_path = kernel_root / query_path + kernel_path.write_text(contents) + server_path = jp_root_dir / query_path + server_path.write_text(contents) + + # Create a kernel in temp path + r = await jp_fetch( + "api", + "kernels", + method="POST", + body=json.dumps( + {"name": NATIVE_KERNEL_NAME, "path": str(kernel_root.relative_to(jp_root_dir))} + ), + ) + kernel_id = json.loads(r.body.decode())["id"] + + # Resolve the path + r = await jp_fetch( + "api", "resolvePath", params={"kernel": kernel_id, "path": "not_hello"}, method="GET" + ) + assert r.code == 200 + resolution = json.loads(r.body.decode()) + assert len(resolution["resolved"]) == 0 + + @pytest.mark.timeout(TEST_TIMEOUT) async def test_kernel_handler(jp_fetch, jp_serverapp, pending_kernel_is_ready): # Create a kernel