From 2f1b7c82a85a6601f48f0b75850a41976207cc3d Mon Sep 17 00:00:00 2001 From: MaojiaSheng Date: Thu, 9 Apr 2026 12:16:13 +0800 Subject: [PATCH] =?UTF-8?q?Revert=20"fix(resources):=20implement=20trailin?= =?UTF-8?q?g=20slash=20semantics=20for=20resource=20URIs=20=E2=80=A6"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit dec57bde80ed9a9937c22c48f9e57c6bd8d56db8. --- crates/ov_cli/src/client.rs | 18 +- crates/ov_cli/src/main.rs | 5 +- docs/en/api/02-resources.md | 21 +- docs/zh/api/02-resources.md | 21 +- openviking/parse/parsers/markdown.py | 12 +- openviking/parse/tree_builder.py | 65 +------ openviking/utils/media_processor.py | 22 +-- openviking/utils/resource_processor.py | 83 ++------ tests/api_test/api/client.py | 4 +- tests/api_test/resources/test_add_resource.py | 129 ------------- .../test_add_resource_to_uri_semantics.py | 182 ------------------ .../test_markdown_resource_name_override.py | 24 --- tests/server/test_api_resources.py | 173 ----------------- 13 files changed, 38 insertions(+), 721 deletions(-) delete mode 100644 tests/parse/test_add_resource_to_uri_semantics.py delete mode 100644 tests/parse/test_markdown_resource_name_override.py diff --git a/crates/ov_cli/src/client.rs b/crates/ov_cli/src/client.rs index 7d4beffa7..aed1ffef4 100644 --- a/crates/ov_cli/src/client.rs +++ b/crates/ov_cli/src/client.rs @@ -61,12 +61,6 @@ impl HttpClient { FileOptions::default().compression_method(CompressionMethod::Deflated); let walkdir = walkdir::WalkDir::new(dir_path); - let base_name = dir_path.file_name().and_then(|n| n.to_str()).ok_or_else(|| { - Error::InvalidPath(format!( - "Non-UTF-8 directory name: {}", - dir_path.to_string_lossy() - )) - })?; for entry in walkdir.into_iter().filter_map(|e| e.ok()) { let path = entry.path(); if path.is_file() { @@ -74,12 +68,7 @@ impl HttpClient { let name_str = name.to_str().ok_or_else(|| { Error::InvalidPath(format!("Non-UTF-8 path: {}", name.to_string_lossy())) })?; - let zip_name = if name_str.is_empty() { - base_name.to_string() - } else { - format!("{}/{}", base_name, name_str) - }; - zip.start_file(zip_name, options)?; + zip.start_file(name_str, options)?; let mut file = File::open(path)?; std::io::copy(&mut file, &mut zip)?; } @@ -607,15 +596,10 @@ impl HttpClient { self.post("/api/v1/resources", &body).await } else if path_obj.is_file() { - let source_name = path_obj - .file_stem() - .and_then(|n| n.to_str()) - .map(|s| s.to_string()); let temp_file_id = self.upload_temp_file(path_obj).await?; let body = serde_json::json!({ "temp_file_id": temp_file_id, - "source_name": source_name, "to": to, "parent": parent, "reason": reason, diff --git a/crates/ov_cli/src/main.rs b/crates/ov_cli/src/main.rs index c8fcf642a..8bcea277f 100644 --- a/crates/ov_cli/src/main.rs +++ b/crates/ov_cli/src/main.rs @@ -110,10 +110,7 @@ enum Commands { AddResource { /// Local path or URL to import path: String, - /// Target URI (cannot be used with --parent). - /// - If ends with '/': treated as a directory; preserves original file/dir name under it. - /// - Otherwise: treated as exact root URI (file path or directory root). - /// Note: 'viking://resources' (no trailing slash) is not allowed; use 'viking://resources/'. + /// Exact target URI (must not exist yet) (cannot be used with --parent) #[arg(long)] to: Option, /// Target parent URI (must already exist and be a directory) (cannot be used with --to) diff --git a/docs/en/api/02-resources.md b/docs/en/api/02-resources.md index 28fb7b68a..409a96d13 100644 --- a/docs/en/api/02-resources.md +++ b/docs/en/api/02-resources.md @@ -42,23 +42,12 @@ Add a resource to the knowledge base. |-----------|------|----------|---------|-------------| | path | str | Yes | - | SDK/CLI: local path, directory path, or URL. Raw HTTP: remote URL only | | temp_file_id | str | No | None | Upload ID returned by `POST /api/v1/resources/temp_upload` for raw HTTP local file ingestion | -| to | str | No | None | Target Viking URI (must be in `resources` scope; cannot be used with `parent`) | -| parent | str | No | None | Target parent URI (must exist and be a directory; cannot be used with `to`) | +| target | str | No | None | Target Viking URI (must be in `resources` scope) | | reason | str | No | "" | Why this resource is being added (improves search relevance) | | instruction | str | No | "" | Special processing instructions | | wait | bool | No | False | Wait for semantic processing to complete | | timeout | float | No | None | Timeout in seconds (only used when wait=True) | -| watch_interval | float | No | 0 | Watch interval (minutes). >0 enables/updates watch; <=0 disables watch. Only takes effect when `to` is provided | - -**Trailing slash semantics of `to` (applies after the resource is fetched; archives are extracted first)** - -- If the resource is a **file**: - - `to` ends with `/`: treat `to` as a directory; final location is `to/` - - `to` does not end with `/`: treat `to` as a file; final location is `to` -- If the resource is a **directory**: - - `to` ends with `/`: treat `to` as a directory; final location is `to//` - - `to` does not end with `/`: treat `to` as a directory; map the directory contents into `to` (no extra directory layer) -- If `to == viking://resources` and the request would map a file to a file or map directory contents into `viking://resources` (no trailing slash), the server returns an error and asks you to change `to` (e.g. add `/` or use a more specific path). +| watch_interval | float | No | 0 | Watch interval (minutes). >0 enables/updates watch; <=0 disables watch. Only takes effect when target is provided | **How local files and directories work** @@ -73,7 +62,7 @@ Add a resource to the knowledge base. When you call `add_resource()` repeatedly for the same resource URI, the system performs an incremental update instead of rebuilding everything from scratch: -- **Trigger**: `to` is provided and already exists in the knowledge base. +- **Trigger**: `target` is provided and already exists in the knowledge base. - **High-level idea**: each ingestion first builds a temporary resource tree from the new input. During asynchronous semantic processing, the temporary tree is compared against the existing tree at `target`, and only the changed parts are re-processed and synchronized. - **Incremental behavior in the semantic stage**: - **Unchanged files**: reuse existing L0 summaries and vector index records; skip vectorization. @@ -137,7 +126,7 @@ openviking add-resource ./documents/guide.md --reason "User guide documentation" ```python result = client.add_resource( "https://example.com/api-docs.md", - to="viking://resources/external/", + target="viking://resources/external/", reason="External API documentation" ) client.wait_processed() @@ -151,7 +140,7 @@ curl -X POST http://localhost:1933/api/v1/resources \ -H "X-API-Key: your-key" \ -d '{ "path": "https://example.com/api-docs.md", - "to": "viking://resources/external/", + "target": "viking://resources/external/", "reason": "External API documentation", "wait": true }' diff --git a/docs/zh/api/02-resources.md b/docs/zh/api/02-resources.md index 1639e584a..04a9c184d 100644 --- a/docs/zh/api/02-resources.md +++ b/docs/zh/api/02-resources.md @@ -42,23 +42,12 @@ Input -> Parser -> TreeBuilder -> AGFS -> SemanticQueue -> Vector Index |------|------|------|--------|------| | path | str | 是 | - | SDK/CLI 可传本地路径、目录路径或 URL;裸 HTTP 仅支持远端 URL | | temp_file_id | str | 否 | None | `POST /api/v1/resources/temp_upload` 返回的上传 ID,用于裸 HTTP 导入本地文件 | -| to | str | 否 | None | 目标 Viking URI(必须在 `resources` 作用域内;不可与 `parent` 同时使用) | -| parent | str | 否 | None | 目标父目录 URI(必须存在且为目录;不可与 `to` 同时使用) | +| target | str | 否 | None | 目标 Viking URI(必须在 `resources` 作用域内) | | reason | str | 否 | "" | 添加该资源的原因(可提升搜索相关性) | | instruction | str | 否 | "" | 特殊处理指令 | | wait | bool | 否 | False | 等待语义处理完成 | | timeout | float | 否 | None | 超时时间(秒),仅在 wait=True 时生效 | -| watch_interval | float | 否 | 0 | 定时更新间隔(分钟)。>0 开启/更新定时任务;<=0 关闭(停用)定时任务。仅在指定 `to` 时生效 | - -**`to` 的尾斜杠语义(资源获取完成后适用;压缩内容先解压)** - -- 资源为**文件**: - - `to` 以 `/` 结尾:`to` 视为目录,最终落点为 `to/<源文件名>` - - `to` 不以 `/` 结尾:`to` 视为文件,最终落点为 `to` -- 资源为**目录**: - - `to` 以 `/` 结尾:`to` 视为目录,最终落点为 `to/<源目录名>/` - - `to` 不以 `/` 结尾:`to` 视为目录,目录内容映射到 `to`(不额外新增一层目录) -- 当 `to == viking://resources` 且命中“文件映射到文件”或“目录内容映射到目录(无尾斜杠)”语义时,服务端会直接报错并提示修改 `to`(例如加 `/` 或指定更具体路径)。 +| watch_interval | float | 否 | 0 | 定时更新间隔(分钟)。>0 开启/更新定时任务;<=0 关闭(停用)定时任务。仅在指定 target 时生效 | **本地文件和目录如何处理** @@ -73,7 +62,7 @@ Input -> Parser -> TreeBuilder -> AGFS -> SemanticQueue -> Vector Index 当你为同一个资源 URI 反复调用 `add_resource()` 时,系统会走“增量更新”而不是每次全量重建: -- **触发条件**:请求里显式指定 `to`,且该 `to` 在知识库中已存在。 +- **触发条件**:请求里显式指定 `target`,且该 `target` 在知识库中已存在。 - **总体思路**:每次导入都会先把新内容解析/构建成一棵“临时资源树”,随后在异步语义处理阶段,将临时树与 `target` 对应的现有资源树进行对比,只对发生变化的部分做重算与同步。 - **语义阶段的增量**: - 对**未变化的文件**:复用已有 L0(摘要)与向量索引记录,跳过向量化。 @@ -137,7 +126,7 @@ openviking add-resource ./documents/guide.md --reason "User guide documentation" ```python result = client.add_resource( "https://example.com/api-docs.md", - to="viking://resources/external/", + target="viking://resources/external/", reason="External API documentation" ) client.wait_processed() @@ -151,7 +140,7 @@ curl -X POST http://localhost:1933/api/v1/resources \ -H "X-API-Key: your-key" \ -d '{ "path": "https://example.com/api-docs.md", - "to": "viking://resources/external/", + "target": "viking://resources/external/", "reason": "External API documentation", "wait": true }' diff --git a/openviking/parse/parsers/markdown.py b/openviking/parse/parsers/markdown.py index 44f5b7074..772a86445 100644 --- a/openviking/parse/parsers/markdown.py +++ b/openviking/parse/parsers/markdown.py @@ -174,10 +174,9 @@ async def parse_content( await viking_fs.mkdir(temp_uri) logger.debug(f"[MarkdownParser] Created temp directory: {temp_uri}") - name_hint = kwargs.get("resource_name") or kwargs.get("source_name") # Get document title doc_title = meta.get("frontmatter", {}).get( - "title", name_hint or (Path(source_path).stem if source_path else "Document") + "title", Path(source_path).stem if source_path else "Document" ) # Create root directory @@ -188,9 +187,7 @@ async def parse_content( logger.info(f"[MarkdownParser] Found {len(headings)} headings") # Parse and create directory structure - await self._parse_and_create_structure( - content, headings, root_dir, source_path, doc_name_override=name_hint - ) + await self._parse_and_create_structure(content, headings, root_dir, source_path) parse_time = time.time() - start_time logger.info(f"[MarkdownParser] Parse completed in {parse_time:.2f}s") @@ -368,7 +365,6 @@ async def _parse_and_create_structure( headings: List[Tuple[int, int, str, int]], root_dir: str, source_path: Optional[str] = None, - doc_name_override: Optional[str] = None, ) -> None: """ Parse markdown and create directory structure directly in VikingFS. @@ -399,9 +395,7 @@ async def _parse_and_create_structure( await viking_fs.mkdir(root_dir) # Get document name - doc_name = self._sanitize_for_path( - doc_name_override or (Path(source_path).stem if source_path else "content") - ) + doc_name = self._sanitize_for_path(Path(source_path).stem if source_path else "content") # Small document: save as single file (check both token and char limits) if estimated_tokens <= max_size and len(content) <= max_chars: diff --git a/openviking/parse/tree_builder.py b/openviking/parse/tree_builder.py index c8db0dc64..6f133f75c 100644 --- a/openviking/parse/tree_builder.py +++ b/openviking/parse/tree_builder.py @@ -29,7 +29,6 @@ from openviking.server.identity import RequestContext from openviking.storage.viking_fs import get_viking_fs from openviking.utils import parse_code_hosting_url -from openviking_cli.exceptions import InvalidArgumentError from openviking_cli.utils.uri import VikingURI logger = logging.getLogger(__name__) @@ -154,59 +153,9 @@ async def finalize_from_temp( # 2. Determine base_uri and final document name with org/repo for GitHub/GitLab auto_base_uri = self._get_base_uri(scope, source_path, source_format) base_uri = parent_uri or auto_base_uri - temp_artifact_uri = temp_doc_uri - artifact_kind = "dir" - - # 3. Determine candidate_uri / final_uri + # 3. Determine candidate_uri if to_uri: - candidate_uri = VikingURI.normalize(to_uri) - to_is_dir_semantics = candidate_uri.endswith("/") - to_is_protected_resources_root = candidate_uri == "viking://resources" - - entries = await viking_fs.ls(temp_doc_uri, ctx=ctx) - visible_entries = [ - e - for e in entries - if e.get("name") not in (".", "..") and not str(e.get("name", "")).startswith(".") - ] - force_directory_resource = source_format in ("directory", "repository") - is_single_file_resource = ( - not force_directory_resource - and len(visible_entries) == 1 - and not visible_entries[0].get("isDir", False) - ) - - if is_single_file_resource: - artifact_kind = "file" - source_filename = str(visible_entries[0].get("name", "")).strip() - if not source_filename: - raise ValueError(f"[TreeBuilder] Empty filename in {temp_doc_uri}") - temp_artifact_uri = f"{temp_doc_uri.rstrip('/')}/{source_filename}" - - if to_is_dir_semantics: - final_uri = VikingURI(candidate_uri).join(source_filename).uri - else: - if to_is_protected_resources_root: - raise InvalidArgumentError( - "`to` 不允许为 viking://resources(无尾斜杠)用于文件资源。" - "请修改 to:例如使用 viking://resources/ 以保留原文件名," - "或使用 viking://resources/<目录>/ 或 viking://resources/<目录>/<文件名>。" - ) - final_uri = candidate_uri - else: - artifact_kind = "dir" - temp_artifact_uri = temp_doc_uri - - if to_is_dir_semantics: - final_uri = VikingURI(candidate_uri).join(final_doc_name).uri.rstrip("/") + "/" - else: - if to_is_protected_resources_root: - raise InvalidArgumentError( - "`to` 不允许为 viking://resources(无尾斜杠)用于目录资源。" - "请修改 to:例如使用 viking://resources/ 以保留目录名," - "或使用 viking://resources/<目录名>/ 或 viking://resources/<目标目录>。" - ) - final_uri = candidate_uri + candidate_uri = to_uri else: if parent_uri: # Parent URI must exist and be a directory @@ -217,6 +166,10 @@ async def finalize_from_temp( if not stat_result.get("isDir"): raise ValueError(f"Parent URI is not a directory: {parent_uri}") candidate_uri = VikingURI(base_uri).join(final_doc_name).uri + + if to_uri: + final_uri = candidate_uri + else: final_uri = await self._resolve_unique_uri(candidate_uri) tree = BuildingTree( @@ -228,11 +181,7 @@ async def finalize_from_temp( tree._candidate_uri = candidate_uri # Create a minimal Context object for the root so that tree.root is not None - root_context = Context( - uri=final_uri, - temp_uri=temp_artifact_uri, - meta={"artifact_kind": artifact_kind}, - ) + root_context = Context(uri=final_uri, temp_uri=temp_doc_uri) tree.add_context(root_context) return tree diff --git a/openviking/utils/media_processor.py b/openviking/utils/media_processor.py index a8960acf6..1fb13b8ce 100644 --- a/openviking/utils/media_processor.py +++ b/openviking/utils/media_processor.py @@ -167,25 +167,7 @@ async def _process_file( try: with zipfile.ZipFile(file_path, "r") as zipf: safe_extract_zip(zipf, temp_dir) - visible = [ - p - for p in temp_dir.iterdir() - if p.name not in {".", ".."} and not p.name.startswith(".") - ] - dirs = [p for p in visible if p.is_dir()] - files = [p for p in visible if p.is_file()] - - if len(dirs) == 1 and not files: - inferred_root = dirs[0] - kwargs_for_dir = dict(kwargs) - kwargs_for_dir.pop("source_name", None) - return await self._process_directory(inferred_root, instruction, **kwargs_for_dir) - if len(files) == 1 and not dirs: - return await self._process_file(files[0], instruction, **kwargs) - - kwargs_for_dir = dict(kwargs) - kwargs_for_dir["source_name"] = kwargs_for_dir.get("source_name") or file_path.stem - return await self._process_directory(temp_dir, instruction, **kwargs_for_dir) + return await self._process_directory(temp_dir, instruction, **kwargs) finally: pass # Don't delete temp_dir yet, it will be used by TreeBuilder return await parse( @@ -193,5 +175,5 @@ async def _process_file( instruction=instruction, vlm_processor=self._get_vlm_processor(), storage=self.storage, - resource_name=kwargs.get("source_name") or kwargs.get("resource_name") or file_path.stem, + resource_name=file_path.stem, ) diff --git a/openviking/utils/resource_processor.py b/openviking/utils/resource_processor.py index 07498f016..01aaf75df 100644 --- a/openviking/utils/resource_processor.py +++ b/openviking/utils/resource_processor.py @@ -18,7 +18,6 @@ from openviking.telemetry import get_current_telemetry from openviking.utils.embedding_utils import index_resource from openviking.utils.summarizer import Summarizer -from openviking_cli.exceptions import InvalidArgumentError from openviking_cli.exceptions import OpenVikingError from openviking_cli.utils import get_logger from openviking_cli.utils.storage import StoragePath @@ -200,14 +199,6 @@ async def process_resource( "resource.finalize.duration_ms", round((time.perf_counter() - finalize_start) * 1000, 3), ) - except InvalidArgumentError as e: - telemetry.set_error("resource_processor.finalize", "INVALID_ARGUMENT", str(e)) - try: - if parse_result.temp_dir_path: - await get_viking_fs().delete_temp(parse_result.temp_dir_path, ctx=ctx) - except Exception: - pass - raise except Exception as e: result["status"] = "error" result["errors"].append(f"Finalize from temp error: {e}") @@ -234,12 +225,6 @@ async def process_resource( viking_fs = get_viking_fs() lock_manager = get_lock_manager() target_exists = await viking_fs.exists(root_uri, ctx=ctx) - temp_is_dir = False - try: - temp_stat = await viking_fs.stat(temp_uri, ctx=ctx) - temp_is_dir = bool(temp_stat.get("isDir", False)) if isinstance(temp_stat, dict) else False - except Exception: - temp_is_dir = False if not target_exists: # 第一次添加:锁保护下将 temp 移到 final @@ -264,7 +249,7 @@ async def process_resource( # 在 POINT 锁内获取 SUBTREE 锁(消除竞态窗口) lifecycle_lock_handle_id = await self._try_acquire_lifecycle_lock( - lock_manager, dst_path, is_dir=temp_is_dir + lock_manager, dst_path ) try: @@ -274,19 +259,10 @@ async def process_resource( result["temp_uri"] = root_uri else: - try: - existing_stat = await viking_fs.stat(root_uri, ctx=ctx) - existing_is_dir = ( - bool(existing_stat.get("isDir", False)) - if isinstance(existing_stat, dict) - else False - ) - except Exception: - existing_is_dir = True - + # 增量更新:对目标目录加 SUBTREE 锁 resource_path = viking_fs._uri_to_path(root_uri, ctx=ctx) lifecycle_lock_handle_id = await self._try_acquire_lifecycle_lock( - lock_manager, resource_path, is_dir=existing_is_dir + lock_manager, resource_path ) # ============ Phase 4: Optional Steps ============ @@ -298,43 +274,15 @@ async def process_resource( is_code_repo = parse_result.source_format == "repository" try: with telemetry.measure("resource.summarize"): - viking_fs = get_viking_fs() - root_stat = await viking_fs.stat(result["root_uri"], ctx=ctx) - root_is_dir = ( - bool(root_stat.get("isDir", False)) if isinstance(root_stat, dict) else False + await self._get_summarizer().summarize( + resource_uris=[result["root_uri"]], + ctx=ctx, + skip_vectorization=skip_vec, + lifecycle_lock_handle_id=lifecycle_lock_handle_id, + temp_uris=[temp_uri_for_summarize], + is_code_repo=is_code_repo, + **kwargs, ) - - if root_is_dir: - await self._get_summarizer().summarize( - resource_uris=[result["root_uri"]], - ctx=ctx, - skip_vectorization=skip_vec, - lifecycle_lock_handle_id=lifecycle_lock_handle_id, - temp_uris=[temp_uri_for_summarize], - is_code_repo=is_code_repo, - **kwargs, - ) - else: - if build_index: - from openviking.utils.embedding_utils import vectorize_file - from openviking_cli.utils.uri import VikingURI - - parent = VikingURI(result["root_uri"]).parent - parent_uri = parent.uri if parent else "viking://resources" - await vectorize_file( - file_path=result["root_uri"], - summary_dict={"name": result["root_uri"].split("/")[-1]}, - parent_uri=parent_uri, - ctx=ctx, - context_type="resource", - ) - if lifecycle_lock_handle_id: - from openviking.storage.transaction import get_lock_manager - - handle = get_lock_manager().get_handle(lifecycle_lock_handle_id) - if handle: - await get_lock_manager().release(handle) - lifecycle_lock_handle_id = "" except Exception as e: logger.error(f"Summarization failed: {e}") result["warnings"] = result.get("warnings", []) + [f"Summarization failed: {e}"] @@ -349,15 +297,10 @@ async def process_resource( return result @staticmethod - async def _try_acquire_lifecycle_lock(lock_manager, path: str, is_dir: bool = True) -> str: + async def _try_acquire_lifecycle_lock(lock_manager, path: str) -> str: """尝试获取 SUBTREE 生命周期锁,失败时优雅降级返回空字符串。""" handle = lock_manager.create_handle() - acquired = ( - await lock_manager.acquire_subtree(handle, path) - if is_dir - else await lock_manager.acquire_point(handle, path.rsplit("/", 1)[0] if "/" in path else path) - ) - if acquired: + if await lock_manager.acquire_subtree(handle, path): return handle.id logger.warning(f"[ResourceProcessor] Failed to acquire lifecycle lock on {path}") await lock_manager.release(handle) diff --git a/tests/api_test/api/client.py b/tests/api_test/api/client.py index 81bb1bb97..31f2e4a86 100644 --- a/tests/api_test/api/client.py +++ b/tests/api_test/api/client.py @@ -146,7 +146,7 @@ def _zip_directory_for_upload(self, directory_path: str) -> str: base_path = Path(directory_path) for file_path in base_path.rglob("*"): if file_path.is_file(): - zf.write(file_path, arcname=Path(base_path.name) / file_path.relative_to(base_path)) + zf.write(file_path, arcname=file_path.relative_to(base_path)) return temp_zip_path except Exception: Path(temp_zip_path).unlink(missing_ok=True) @@ -261,11 +261,9 @@ def add_resource( cleanup_path = None if os.path.isfile(path): payload["temp_file_id"] = self._upload_temp_file(path) - payload["source_name"] = Path(path).stem elif os.path.isdir(path): cleanup_path = self._zip_directory_for_upload(path) payload["temp_file_id"] = self._upload_temp_file(cleanup_path) - payload["source_name"] = Path(path).name else: payload["path"] = path if to: diff --git a/tests/api_test/resources/test_add_resource.py b/tests/api_test/resources/test_add_resource.py index 341111914..e25ce25f8 100644 --- a/tests/api_test/resources/test_add_resource.py +++ b/tests/api_test/resources/test_add_resource.py @@ -1,8 +1,6 @@ import json import os import tempfile -import uuid -import zipfile class TestAddResource: @@ -74,130 +72,3 @@ def test_add_resource_simple(self, api_client): except Exception as e: print(f"Error: {e}") raise - - def test_add_resource_to_dir_semantics_rule1_file(self, api_client): - with tempfile.TemporaryDirectory() as temp_dir: - test_file_path = os.path.join(temp_dir, "a.txt") - with open(test_file_path, "w", encoding="utf-8") as f: - f.write("rule1") - - to_dir = f"viking://resources/test-add-resource-{uuid.uuid4().hex}/" - response = api_client.add_resource(path=test_file_path, to=to_dir, wait=True) - assert response.status_code == 200 - data = response.json() - assert data.get("status") == "ok" - root_uri = data["result"]["root_uri"] - assert root_uri == f"{to_dir}a.txt" - - stat_resp = api_client.fs_stat(root_uri) - assert stat_resp.status_code == 200 - stat = stat_resp.json() - assert stat.get("status") == "ok" - - def test_add_resource_to_file_semantics_rule2_file(self, api_client): - with tempfile.TemporaryDirectory() as temp_dir: - test_file_path = os.path.join(temp_dir, "a.txt") - with open(test_file_path, "w", encoding="utf-8") as f: - f.write("rule2") - - to_file = f"viking://resources/test-add-resource-{uuid.uuid4().hex}/b.txt" - response = api_client.add_resource(path=test_file_path, to=to_file, wait=True) - assert response.status_code == 200 - data = response.json() - assert data.get("status") == "ok" - root_uri = data["result"]["root_uri"] - assert root_uri == to_file - - stat_resp = api_client.fs_stat(root_uri) - assert stat_resp.status_code == 200 - stat = stat_resp.json() - assert stat.get("status") == "ok" - - def test_add_resource_to_dir_semantics_rule3_directory(self, api_client): - with tempfile.TemporaryDirectory() as temp_dir: - myproj = os.path.join(temp_dir, "myproj") - os.makedirs(myproj, exist_ok=True) - with open(os.path.join(myproj, "README.md"), "w", encoding="utf-8") as f: - f.write("# myproj\n") - - to_dir = f"viking://resources/test-add-resource-{uuid.uuid4().hex}/" - response = api_client.add_resource(path=myproj, to=to_dir, wait=True) - assert response.status_code == 200 - data = response.json() - assert data.get("status") == "ok" - root_uri = data["result"]["root_uri"] - assert root_uri == f"{to_dir}myproj/" - - tree_resp = api_client.fs_tree(root_uri) - assert tree_resp.status_code == 200 - tree = tree_resp.json() - assert tree.get("status") == "ok" - - def test_add_resource_to_no_trailing_slash_rule4_directory(self, api_client): - with tempfile.TemporaryDirectory() as temp_dir: - myproj = os.path.join(temp_dir, "myproj") - os.makedirs(myproj, exist_ok=True) - with open(os.path.join(myproj, "README.md"), "w", encoding="utf-8") as f: - f.write("# myproj\n") - - to_dir = f"viking://resources/test-add-resource-{uuid.uuid4().hex}" - response = api_client.add_resource(path=myproj, to=to_dir, wait=True) - assert response.status_code == 200 - data = response.json() - assert data.get("status") == "ok" - root_uri = data["result"]["root_uri"] - assert root_uri == to_dir - - tree_resp = api_client.fs_tree(root_uri) - assert tree_resp.status_code == 200 - tree = tree_resp.json() - assert tree.get("status") == "ok" - - def test_add_resource_zip_then_apply_rules_rule5(self, api_client): - with tempfile.TemporaryDirectory() as temp_dir: - proj = os.path.join(temp_dir, "myproj") - os.makedirs(proj, exist_ok=True) - with open(os.path.join(proj, "README.md"), "w", encoding="utf-8") as f: - f.write("# zip\n") - - zip_path = os.path.join(temp_dir, "myproj.zip") - with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf: - zf.write(os.path.join(proj, "README.md"), arcname="myproj/README.md") - - to_dir = f"viking://resources/test-add-resource-{uuid.uuid4().hex}/" - response = api_client.add_resource(path=zip_path, to=to_dir, wait=True) - assert response.status_code == 200 - data = response.json() - assert data.get("status") == "ok" - root_uri = data["result"]["root_uri"] - assert root_uri == f"{to_dir}myproj/" - - tree_resp = api_client.fs_tree(root_uri) - assert tree_resp.status_code == 200 - tree = tree_resp.json() - assert tree.get("status") == "ok" - - def test_add_resource_protect_resources_root_rule6_file(self, api_client): - with tempfile.TemporaryDirectory() as temp_dir: - test_file_path = os.path.join(temp_dir, "a.txt") - with open(test_file_path, "w", encoding="utf-8") as f: - f.write("rule6-file") - - response = api_client.add_resource(path=test_file_path, to="viking://resources", wait=True) - assert response.status_code == 400 - data = response.json() - assert data.get("status") == "error" - assert data.get("error", {}).get("code") == "INVALID_ARGUMENT" - - def test_add_resource_protect_resources_root_rule6_dir(self, api_client): - with tempfile.TemporaryDirectory() as temp_dir: - myproj = os.path.join(temp_dir, "myproj") - os.makedirs(myproj, exist_ok=True) - with open(os.path.join(myproj, "README.md"), "w", encoding="utf-8") as f: - f.write("# myproj\n") - - response = api_client.add_resource(path=myproj, to="viking://resources", wait=True) - assert response.status_code == 400 - data = response.json() - assert data.get("status") == "error" - assert data.get("error", {}).get("code") == "INVALID_ARGUMENT" diff --git a/tests/parse/test_add_resource_to_uri_semantics.py b/tests/parse/test_add_resource_to_uri_semantics.py deleted file mode 100644 index ce00baba5..000000000 --- a/tests/parse/test_add_resource_to_uri_semantics.py +++ /dev/null @@ -1,182 +0,0 @@ -from unittest.mock import AsyncMock, MagicMock, patch - -import pytest - - -def _make_ctx(): - from openviking.server.identity import RequestContext, Role - from openviking_cli.session.user_id import UserIdentifier - - return RequestContext(user=UserIdentifier("default", "default", "default"), role=Role.USER) - - -def _make_viking_fs_mock(ls_map: dict[str, list[dict]]): - fs = MagicMock() - - async def _ls(uri: str, **kwargs): - return ls_map.get(uri, []) - - fs.ls = AsyncMock(side_effect=_ls) - fs.stat = AsyncMock(return_value={"isDir": True}) - return fs - - -class TestAddResourceToUriSemantics: - @pytest.mark.asyncio - async def test_file_resource_to_dir_semantics_rule1(self): - from openviking.parse.tree_builder import TreeBuilder - - temp_base = "viking://temp/t1" - temp_doc = f"{temp_base}/doc" - fs = _make_viking_fs_mock( - { - temp_base: [{"name": "doc", "isDir": True}], - temp_doc: [{"name": "a.txt", "isDir": False}], - } - ) - - builder = TreeBuilder() - with patch("openviking.parse.tree_builder.get_viking_fs", return_value=fs): - tree = await builder.finalize_from_temp( - temp_dir_path=temp_base, - ctx=_make_ctx(), - scope="resources", - to_uri="viking://resources/mydir/", - ) - - assert tree.root is not None - assert tree.root.uri == "viking://resources/mydir/a.txt" - assert tree.root.temp_uri == f"{temp_doc}/a.txt" - assert tree.root.meta.get("artifact_kind") == "file" - - @pytest.mark.asyncio - async def test_directory_resource_to_dir_semantics_rule3(self): - from openviking.parse.tree_builder import TreeBuilder - - temp_base = "viking://temp/t2" - temp_doc = f"{temp_base}/myproj" - fs = _make_viking_fs_mock( - { - temp_base: [{"name": "myproj", "isDir": True}], - temp_doc: [ - {"name": "README.md", "isDir": False}, - {"name": "src", "isDir": True}, - ], - } - ) - - builder = TreeBuilder() - with patch("openviking.parse.tree_builder.get_viking_fs", return_value=fs): - tree = await builder.finalize_from_temp( - temp_dir_path=temp_base, - ctx=_make_ctx(), - scope="resources", - to_uri="viking://resources/target/", - ) - - assert tree.root is not None - assert tree.root.uri == "viking://resources/target/myproj/" - assert tree.root.temp_uri == temp_doc - assert tree.root.meta.get("artifact_kind") == "dir" - - @pytest.mark.asyncio - async def test_directory_resource_to_no_trailing_slash_rule4(self): - from openviking.parse.tree_builder import TreeBuilder - - temp_base = "viking://temp/t3" - temp_doc = f"{temp_base}/myproj" - fs = _make_viking_fs_mock( - { - temp_base: [{"name": "myproj", "isDir": True}], - temp_doc: [{"name": "README.md", "isDir": False}, {"name": "src", "isDir": True}], - } - ) - - builder = TreeBuilder() - with patch("openviking.parse.tree_builder.get_viking_fs", return_value=fs): - tree = await builder.finalize_from_temp( - temp_dir_path=temp_base, - ctx=_make_ctx(), - scope="resources", - to_uri="viking://resources/target", - ) - - assert tree.root is not None - assert tree.root.uri == "viking://resources/target" - assert tree.root.temp_uri == temp_doc - - @pytest.mark.asyncio - async def test_force_directory_for_directory_source_even_single_file(self): - from openviking.parse.tree_builder import TreeBuilder - - temp_base = "viking://temp/t6" - temp_doc = f"{temp_base}/tt_a" - fs = _make_viking_fs_mock( - { - temp_base: [{"name": "tt_a", "isDir": True}], - temp_doc: [{"name": "aa.md", "isDir": False}], - } - ) - - builder = TreeBuilder() - with patch("openviking.parse.tree_builder.get_viking_fs", return_value=fs): - tree = await builder.finalize_from_temp( - temp_dir_path=temp_base, - ctx=_make_ctx(), - scope="resources", - to_uri="viking://resources/", - source_format="directory", - ) - - assert tree.root is not None - assert tree.root.uri == "viking://resources/tt_a/" - assert tree.root.temp_uri == temp_doc - assert tree.root.meta.get("artifact_kind") == "dir" - - @pytest.mark.asyncio - async def test_protect_resources_root_file_rule6(self): - from openviking.parse.tree_builder import TreeBuilder - from openviking_cli.exceptions import InvalidArgumentError - - temp_base = "viking://temp/t4" - temp_doc = f"{temp_base}/doc" - fs = _make_viking_fs_mock( - { - temp_base: [{"name": "doc", "isDir": True}], - temp_doc: [{"name": "a.txt", "isDir": False}], - } - ) - - builder = TreeBuilder() - with patch("openviking.parse.tree_builder.get_viking_fs", return_value=fs): - with pytest.raises(InvalidArgumentError): - await builder.finalize_from_temp( - temp_dir_path=temp_base, - ctx=_make_ctx(), - scope="resources", - to_uri="viking://resources", - ) - - @pytest.mark.asyncio - async def test_protect_resources_root_dir_rule6(self): - from openviking.parse.tree_builder import TreeBuilder - from openviking_cli.exceptions import InvalidArgumentError - - temp_base = "viking://temp/t5" - temp_doc = f"{temp_base}/myproj" - fs = _make_viking_fs_mock( - { - temp_base: [{"name": "myproj", "isDir": True}], - temp_doc: [{"name": "README.md", "isDir": False}, {"name": "src", "isDir": True}], - } - ) - - builder = TreeBuilder() - with patch("openviking.parse.tree_builder.get_viking_fs", return_value=fs): - with pytest.raises(InvalidArgumentError): - await builder.finalize_from_temp( - temp_dir_path=temp_base, - ctx=_make_ctx(), - scope="resources", - to_uri="viking://resources", - ) diff --git a/tests/parse/test_markdown_resource_name_override.py b/tests/parse/test_markdown_resource_name_override.py deleted file mode 100644 index 41b00a6fa..000000000 --- a/tests/parse/test_markdown_resource_name_override.py +++ /dev/null @@ -1,24 +0,0 @@ -from unittest.mock import AsyncMock, MagicMock, patch - -import pytest - - -@pytest.mark.asyncio -async def test_markdown_parser_uses_resource_name_for_output_naming(): - from openviking.parse.parsers.markdown import MarkdownParser - - fs = MagicMock() - fs.mkdir = AsyncMock() - fs.write_file = AsyncMock() - - parser = MarkdownParser() - with patch.object(parser, "_get_viking_fs", return_value=fs): - result = await parser.parse_content( - "# Title\n\nHello\n", - source_path="/tmp/upload_deadbeef.md", - resource_name="aa", - ) - - assert result is not None - written_paths = [call.args[0] for call in fs.write_file.call_args_list] - assert any(p.endswith("/aa.md") for p in written_paths) diff --git a/tests/server/test_api_resources.py b/tests/server/test_api_resources.py index 28d6a574d..2357fd5a4 100644 --- a/tests/server/test_api_resources.py +++ b/tests/server/test_api_resources.py @@ -3,20 +3,11 @@ """Tests for resource management endpoints.""" -import zipfile -from pathlib import Path - import httpx from openviking.telemetry import get_current_telemetry -def _write_zip(zip_path: Path, files: dict[str, str]) -> None: - with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as zf: - for arcname, content in files.items(): - zf.writestr(arcname, content) - - async def test_add_resource_success( client: httpx.AsyncClient, sample_markdown_file, @@ -216,170 +207,6 @@ async def test_add_resource_with_to( assert "custom" in body["result"]["root_uri"] -async def test_add_resource_file_to_trailing_slash_rule_1( - client: httpx.AsyncClient, - sample_markdown_file, - upload_temp_dir, -): - resp = await client.post( - "/api/v1/resources", - json={ - "temp_file_id": sample_markdown_file.name, - "to": "viking://resources/custom_dir/", - "reason": "rule 1", - "wait": True, - }, - ) - assert resp.status_code == 200 - body = resp.json() - assert body["status"] == "ok" - root_uri = body["result"]["root_uri"] - assert root_uri == "viking://resources/custom_dir/sample.md" - - stat_resp = await client.get("/api/v1/fs/stat", params={"uri": root_uri}) - assert stat_resp.status_code == 200 - assert stat_resp.json()["result"]["isDir"] is False - - -async def test_add_resource_file_to_no_trailing_slash_rule_2( - client: httpx.AsyncClient, - sample_markdown_file, - upload_temp_dir, -): - target = "viking://resources/custom_dir/renamed.md" - resp = await client.post( - "/api/v1/resources", - json={ - "temp_file_id": sample_markdown_file.name, - "to": target, - "reason": "rule 2", - "wait": True, - }, - ) - assert resp.status_code == 200 - body = resp.json() - assert body["status"] == "ok" - root_uri = body["result"]["root_uri"] - assert root_uri == target - - stat_resp = await client.get("/api/v1/fs/stat", params={"uri": root_uri}) - assert stat_resp.status_code == 200 - assert stat_resp.json()["result"]["isDir"] is False - - -async def test_add_resource_dir_to_trailing_slash_rule_3( - client: httpx.AsyncClient, - upload_temp_dir, -): - zip_path = upload_temp_dir / "demo_dir.zip" - _write_zip( - zip_path, - { - "demo_dir/a.md": "# a\n", - "demo_dir/b.md": "# b\n", - }, - ) - resp = await client.post( - "/api/v1/resources", - json={ - "temp_file_id": zip_path.name, - "to": "viking://resources/custom_dir/", - "reason": "rule 3", - "wait": True, - }, - ) - assert resp.status_code == 200 - body = resp.json() - assert body["status"] == "ok" - root_uri = body["result"]["root_uri"] - assert root_uri == "viking://resources/custom_dir/demo_dir/" - - stat_resp = await client.get("/api/v1/fs/stat", params={"uri": root_uri}) - assert stat_resp.status_code == 200 - assert stat_resp.json()["result"]["isDir"] is True - - -async def test_add_resource_dir_to_no_trailing_slash_rule_4( - client: httpx.AsyncClient, - upload_temp_dir, -): - zip_path = upload_temp_dir / "demo_dir.zip" - _write_zip( - zip_path, - { - "demo_dir/a.md": "# a\n", - "demo_dir/b.md": "# b\n", - }, - ) - target = "viking://resources/custom_dir/explicit_root" - resp = await client.post( - "/api/v1/resources", - json={ - "temp_file_id": zip_path.name, - "to": target, - "reason": "rule 4", - "wait": True, - }, - ) - assert resp.status_code == 200 - body = resp.json() - assert body["status"] == "ok" - root_uri = body["result"]["root_uri"] - assert root_uri == target - - stat_resp = await client.get("/api/v1/fs/stat", params={"uri": root_uri}) - assert stat_resp.status_code == 200 - assert stat_resp.json()["result"]["isDir"] is True - - -async def test_add_resource_zip_single_file_follows_rules_rule_5( - client: httpx.AsyncClient, - upload_temp_dir, -): - zip_path = upload_temp_dir / "single_file.zip" - _write_zip(zip_path, {"hello.md": "# hello\n"}) - resp = await client.post( - "/api/v1/resources", - json={ - "temp_file_id": zip_path.name, - "to": "viking://resources/custom_dir/", - "reason": "rule 5", - "wait": True, - }, - ) - assert resp.status_code == 200 - body = resp.json() - assert body["status"] == "ok" - root_uri = body["result"]["root_uri"] - assert root_uri == "viking://resources/custom_dir/hello.md" - - stat_resp = await client.get("/api/v1/fs/stat", params={"uri": root_uri}) - assert stat_resp.status_code == 200 - assert stat_resp.json()["result"]["isDir"] is False - - -async def test_add_resource_to_resources_root_is_rejected_rule_6( - client: httpx.AsyncClient, - sample_markdown_file, - upload_temp_dir, -): - resp = await client.post( - "/api/v1/resources", - json={ - "temp_file_id": sample_markdown_file.name, - "to": "viking://resources", - "reason": "rule 6", - "wait": True, - }, - ) - assert resp.status_code == 400 - body = resp.json() - assert body["status"] == "error" - assert body["error"]["code"] == "INVALID_ARGUMENT" - assert "viking://resources" in body["error"]["message"] - assert "viking://resources/" in body["error"]["message"] - - async def test_wait_processed_empty_queue(client: httpx.AsyncClient): resp = await client.post( "/api/v1/system/wait",