NVIDIA · charlesbluca · Apr 15, 2026 · Apr 8, 2026 · Apr 8, 2026 · Apr 8, 2026
@@ -0,0 +1,77 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
+# All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+name: Library Mode Integration Tests (Windows & macOS)
+
+on:
+  workflow_dispatch:
+    inputs:
+      source-ref:
+        description: 'Git ref to test (branch, tag, or SHA). Defaults to the dispatched branch.'
+        required: false
+        type: string
+        default: ''
+
+jobs:
+  integration-test:
+    name: Integration Tests (${{ matrix.os-label }})
+    runs-on: ${{ matrix.runner }}
+    timeout-minutes: 90
+
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - runner: windows-latest
+            os-label: windows-x64
+          - runner: macos-26
+            os-label: macos-arm64
+          - runner: macos-26-intel
+            os-label: macos-x64
+
+    env:
+      # NIM endpoint URLs — edit these directly to point at different deployments
+      PAGE_ELEMENTS_INVOKE_URL: "https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-page-elements-v3"
+      OCR_INVOKE_URL: "https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-ocr-v1"
+      GRAPHIC_ELEMENTS_INVOKE_URL: "https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-graphic-elements-v1"
+      TABLE_STRUCTURE_INVOKE_URL: "https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-table-structure-v1"
+      EMBED_INVOKE_URL: "https://integrate.api.nvidia.com/v1"
+      EMBED_MODEL_NAME: "nvidia/llama-nemotron-embed-1b-v2"
+
+    steps:
+      - name: Check out repository code
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ inputs.source-ref != '' && inputs.source-ref || github.ref }}
+
+      - name: Set up Python 3.12
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+
+      - name: Install uv
+        run: pip install uv
-
-      - name: Set up Python 3.12
-        uses: actions/setup-python@v5
-        with:
-          python-version: '3.12'
-
-      - name: Install uv
-        run: pip install uv
+      - name: Check out repository code
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683  # v4.2.2
+        with:
+          ref: ${{ inputs.source-ref != '' && inputs.source-ref || github.ref }}
+
+      - name: Set up Python 3.12
+        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065  # v5.6.0
+        with:
+          python-version: '3.12'
-
-      - name: Set up Python 3.12
-        uses: actions/setup-python@v5
-        with:
-          python-version: '3.12'
-
-      - name: Install uv
-        run: pip install uv
+      - name: Check out repository code
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683  # v4.2.2
+        with:
+          ref: ${{ inputs.source-ref != '' && inputs.source-ref || github.ref }}
+
+      - name: Set up Python 3.12
+        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065  # v5.6.0
+        with:
+          python-version: '3.12'
+
+      - name: Install nemo-retriever and dependencies
+        shell: bash
+        run: |
+          uv pip install --system -e api/ -e client/ -e "nemo_retriever[remote]"
+
+      - name: Run graph pipeline on PDFs
+        shell: bash
+        env:
+          PYTHONPATH: nemo_retriever/src
+        run: |
+          python -m nemo_retriever.examples.graph_pipeline ./data \
+            --run-mode inprocess \
+            --input-type pdf \
+            --api-key "${{ secrets.NGC_NV_DEVELOPER_NVCF }}" \
+            --page-elements-invoke-url "$PAGE_ELEMENTS_INVOKE_URL" \
+            --ocr-invoke-url "$OCR_INVOKE_URL" \
+            --use-graphic-elements \
+            --graphic-elements-invoke-url "$GRAPHIC_ELEMENTS_INVOKE_URL" \
+            --use-table-structure \
+            --table-structure-invoke-url "$TABLE_STRUCTURE_INVOKE_URL" \
+            --embed-invoke-url "$EMBED_INVOKE_URL" \
+            --embed-model-name "$EMBED_MODEL_NAME"
@@ -26,8 +26,7 @@ jobs:
 
       - name: Install unit test dependencies
         run: |
-          uv pip install --system -e src/ -e api/ -e client/
-          uv pip install --system -e nemo_retriever
+          uv pip install --system -e nemo_retriever[all,dev]
 
       - name: Run retriever unit tests
         env:

diff --git a/DEPENDENCY_LAYERS.md b/DEPENDENCY_LAYERS.md
@@ -0,0 +1,121 @@
+# Dependency Layering Plan
+
+This document describes the restructured optional-extras model for `nemo_retriever/pyproject.toml`.
+
+## Problem
+
+The previous `pyproject.toml` listed ~50 packages as required dependencies, meaning every install
+pulled in torch, vLLM, CUDA wheels, nemotron models, GPU monitoring tooling, etc. — regardless of
+whether the user intended to run local models or simply call remote NIM endpoints. This made the
+package impossible to install on Intel Macs and unnecessarily heavy everywhere.
+
+## Solution: Layered Optional Extras
+
+Dependencies are now split into a slim base plus composable optional extras. Each tier builds on
+the previous via self-referencing extras.
+
+### Tier hierarchy
+
+```
+nemo_retriever          ← slim base: ray, fastapi, pydantic, HTTP clients, nv-ingest*
+ └── [remote]           ← adds: pypdfium2, pillow, nltk, markitdown, langchain-nvidia-ai-endpoints
+      └── [local-cpu]   ← adds: torch CPU, transformers, nemotron models (ARM Mac compatible)
+           └── [local-gpu]  ← adds: nvidia-ml-py, vLLM  (Linux/CUDA only)
+                └── [multimedia]  ← adds: soundfile + scipy (ASR), cairosvg (SVG)
+                                    (can also be combined with any tier independently)
+
+[stores]       ← lancedb, duckdb, duckdb-engine, neo4j  (independent, add to any tier)
+[benchmarks]   ← datasets, open-clip-torch  (BEIR evaluation only)
+[dev]          ← build, pytest
+[all]          ← local-gpu + multimedia + stores + benchmarks
+```
+
+### Install commands by use case
+
+| Use case | Platform | Command |
+|---|---|---|
+| All remote (NIM) inference | Intel Mac, any | `uv pip install "nemo_retriever[remote,stores]"` |
+| Local PDF ingestion, CPU | ARM Mac | `uv pip install "nemo_retriever[local-cpu,stores]"` |
+| Local PDF ingestion, GPU | Linux + CUDA | `uv pip install "nemo_retriever[local-gpu,stores]"` |
+| Full multimedia (GPU + audio + SVG) | Linux + CUDA | `uv pip install "nemo_retriever[local-gpu,multimedia,stores]"` |
+| Everything | Linux + CUDA | `uv pip install "nemo_retriever[all]"` |
+
+## What Each Extra Contains
+
+### Base (always installed)
+Pure framework infrastructure — no ML, no storage.
+
+- `ray[data,serve]` — pipeline orchestration
+- `pandas`, `numpy`, `tqdm` — data handling
+- `fastapi`, `uvicorn`, `python-multipart` — service API
+- `httpx`, `requests`, `urllib3` — HTTP clients
+- `pydantic`, `typer`, `pyyaml`, `rich` — config, CLI, output
+- `universal-pathlib`, `debugpy` — utilities
+- `nv-ingest`, `nv-ingest-api`, `nv-ingest-client` — core ingest packages
+
+### `[remote]`
+Everything needed to run the full pipeline via remote NIM endpoints. No GPU, no local models.
+Installs cleanly on Intel Macs.
+
+- `pypdfium2` — PDF page splitting and rendering
+- `pillow` — image I/O
+- `nltk` — text splitting utilities
+- `markitdown` — HTML/document-to-markdown conversion
+- `langchain-nvidia-ai-endpoints` — LLM/SQL via NVIDIA NIM
+
+### `[local-cpu]`
+Adds local HuggingFace model inference. On Linux, torch resolves to a CUDA wheel from the
+PyTorch index; on Mac it falls through to the PyPI CPU wheel.
+
+- `transformers`, `tokenizers`, `accelerate==1.12.0` — HuggingFace model loading
+- `torch~=2.9.1`, `torchvision` — PyTorch (CPU on Mac, CUDA on Linux)
+- `einops`, `easydict`, `addict`, `timm`, `albumentations`, `scikit-learn` — model utilities
+- `nemotron-page-elements-v3`, `nemotron-graphic-elements-v1`, `nemotron-table-structure-v1` — layout/table/chart detection
+- `nemotron-ocr` — end-to-end OCR (Linux only)
+
+### `[local-gpu]`
+Adds GPU monitoring and fast LLM inference on top of `[local-cpu]`.
+
+- `nvidia-ml-py` — GPU memory and utilization monitoring
+- `vllm==0.16.0` — fast GPU-accelerated LLM inference (Linux only)
+
+### `[multimedia]`
+Specialized media format support. Can be combined with any inference tier.
+
+- `soundfile`, `scipy` — audio file I/O and resampling for local Parakeet ASR
+- `cairosvg` — SVG-to-image rendering (requires `libcairo` system library)
+
+### `[stores]`
+Vector, SQL, and graph storage backends. Independent of inference tier.
+
+- `lancedb` — vector database for embedding storage and hybrid search
+- `duckdb`, `duckdb-engine` — SQL execution on structured/tabular data
+- `neo4j` — graph database for knowledge graph ingestion
+
+### `[benchmarks]`
+BEIR evaluation tools. Not needed for production use.
+
+- `datasets` — HuggingFace datasets (used in `recall/beir.py`)
+- `open-clip-torch` — OpenAI CLIP implementation
+
+## Torch Index Configuration
+
+`[tool.uv.sources]` uses a platform marker so the right torch wheel is resolved automatically:
+
+```toml
+torch = [
+  { index = "pytorch-cu130", marker = "sys_platform == 'linux'" },
+  # Mac: falls through to PyPI CPU wheel
+]
+```
+
+No manual intervention needed — `uv` picks the right wheel per platform.
+
+## Cleanups Applied
+
+The following bugs in the original flat deps list were fixed:
+
+- `accelerate` was listed twice (`>=1.1.0` and `==1.12.0`) — kept `==1.12.0` only
+- `tqdm` was listed twice — deduplicated
+- `typer` was listed twice — deduplicated
+- `[svg]` extra merged into `[multimedia]` (cairosvg is a media format conversion tool)
@@ -25,11 +25,19 @@
 
 import pandas as pd
 import pypdfium2 as pdfium
-from unstructured_client import UnstructuredClient
-from unstructured_client.models import operations
-from unstructured_client.models import shared
-from unstructured_client.utils import BackoffStrategy
-from unstructured_client.utils import RetryConfig
+
+try:
+    from unstructured_client import UnstructuredClient
+    from unstructured_client.models import operations
+    from unstructured_client.models import shared
+    from unstructured_client.utils import BackoffStrategy
+    from unstructured_client.utils import RetryConfig
+except ImportError:
+    UnstructuredClient = None
+    operations = None
+    shared = None
+    BackoffStrategy = None
+    RetryConfig = None
 
 from nv_ingest_api.internal.enums.common import AccessLevelEnum, DocumentTypeEnum
 from nv_ingest_api.internal.enums.common import ContentTypeEnum

@@ -6,15 +6,19 @@
 from typing import Optional
 
 import backoff
-import cv2
 import numpy as np
 import requests
 
 from nv_ingest_api.internal.primitives.nim.model_interface.decorators import multiprocessing_cache
 from nv_ingest_api.util.image_processing.transforms import pad_image, normalize_image
 from nv_ingest_api.util.string_processing import generate_url, remove_url_endpoints
 
-cv2.setNumThreads(1)
+try:
+    import cv2
+
+    cv2.setNumThreads(1)
+except ImportError:
+    cv2 = None
 logger = logging.getLogger(__name__)
 
 

@@ -3,8 +3,6 @@
 # SPDX-License-Identifier: Apache-2.0
 
 
-import langdetect
-
 from nv_ingest_api.internal.enums.common import LanguageEnum
 from nv_ingest_api.util.exception_handlers.detectors import langdetect_exception_handler
 
@@ -24,6 +22,10 @@ def detect_language(text):
     LanguageEnum
         A value from `LanguageEnum` detected language code.
     """
+    try:
+        import langdetect
+    except ImportError:
+        return LanguageEnum.UNKNOWN
 
     try:
         language = langdetect.detect(text)

@@ -8,7 +8,10 @@
 from typing import Callable
 from typing import Dict
 
-from langdetect.lang_detect_exception import LangDetectException
+try:
+    from langdetect.lang_detect_exception import LangDetectException as _LangDetectException
+except ImportError:
+    _LangDetectException = None
 
 from nv_ingest_api.internal.enums.common import LanguageEnum
 
@@ -66,9 +69,10 @@ def langdetect_exception_handler(func: Callable, **kwargs: Dict[str, Any]) -> Ca
     def inner_function(*args, **kwargs):
         try:
             return func(*args, **kwargs)
-        except LangDetectException as e:
-            log_error_message = f"LangDetectException: {e}"
-            logger.warning(log_error_message)
-            return LanguageEnum.UNKNOWN
+        except Exception as e:
+            if _LangDetectException is not None and isinstance(e, _LangDetectException):
+                logger.warning(f"LangDetectException: {e}")
+                return LanguageEnum.UNKNOWN
+            raise
 
     return inner_function
@@ -8,7 +8,6 @@
 
 import numpy as np
 import pandas as pd
-from sklearn.cluster import DBSCAN
 
 
 logger = logging.getLogger(__name__)
@@ -173,10 +172,14 @@ def convert_ocr_response_to_psuedo_markdown(bboxes, texts):
     )
     preds_df = preds_df.sort_values("y0")
 
-    dbscan = DBSCAN(eps=10, min_samples=1)
-    dbscan.fit(preds_df["y0"].values[:, None])
+    try:
+        from sklearn.cluster import DBSCAN
 
-    preds_df["cluster"] = dbscan.labels_
+        dbscan = DBSCAN(eps=10, min_samples=1)
+        dbscan.fit(preds_df["y0"].values[:, None])
+        preds_df["cluster"] = dbscan.labels_
+    except ImportError:
+        preds_df["cluster"] = (preds_df["y0"] / 10).round().astype(int)
     preds_df = preds_df.sort_values(["cluster", "x0"])
 
     results = ""
@@ -483,12 +486,14 @@ def reorder_boxes(boxes, texts, confs, mode="top_left", dbscan_eps=10):
     if dbscan_eps:
         do_naive_sorting = False
         try:
+            from sklearn.cluster import DBSCAN
+
             dbscan = DBSCAN(eps=dbscan_eps, min_samples=1)
             dbscan.fit(df["y"].values[:, None])
             df["cluster"] = dbscan.labels_
             df["cluster_centers"] = df.groupby("cluster")["y"].transform("mean").astype(int)
             df = df.sort_values(["cluster_centers", "x"], ascending=[True, True], ignore_index=True)
-        except ValueError:
+        except (ImportError, ValueError):
             do_naive_sorting = True
     else:
         do_naive_sorting = True