datahub-project · shawnxiao105-afk · May 25, 2026
diff --git a/backend/src/analytics_agent/skills/datahub_skills.py b/backend/src/analytics_agent/skills/datahub_skills.py
@@ -356,6 +356,33 @@ def _save_correction_impl(
 # ---------------------------------------------------------------------------
 
 
+_EMPTY_FOLLOWUP_HINT = (
+    "No business context found across documentation, glossary terms, domains, "
+    "or data products. As a fallback, this skill also queried the catalog for "
+    "matching datasets — see `catalog_fallback` above. If it returned URNs, "
+    "call `get_entities` on them to read full metadata before answering. "
+    "Only conclude that the entity is absent from the catalog when "
+    "`catalog_fallback` is also empty."
+)
+
+
+def _all_results_empty(results: dict) -> bool:
+    """True iff every sub-search in `results` returned no hits.
+
+    A sub-search counts as 'no hits' if it errored, has no searchResults list,
+    or returned a list of length zero.
+    """
+    for value in results.values():
+        if not isinstance(value, dict):
+            continue
+        if "error" in value:
+            continue
+        items = value.get("searchResults")
+        if isinstance(items, list) and len(items) > 0:
+            return False
+    return True
+
+
 def _search_business_context_impl(topic: str) -> dict:
     """Fan out to DataHub docs, glossary terms, domains, and data products for a topic."""
     from analytics_agent.context.datahub import get_datahub_client
@@ -394,6 +421,21 @@ def _search_business_context_impl(topic: str) -> dict:
             except Exception as e:
                 results[label] = {"error": str(e)}
 
+        # When all four business-context sub-searches return empty, the user's topic
+        # may still name a real entity that simply lacks governance metadata. Fire a
+        # dataset search by name so the agent doesn't conflate "no docs" with
+        # "doesn't exist" — see SKILL.md fall-through guidance.
+        if _all_results_empty(results):
+            try:
+                results["catalog_fallback"] = search(
+                    query=topic,
+                    filter="entity_type = dataset",
+                    num_results=10,
+                )
+            except Exception as e:
+                results["catalog_fallback"] = {"error": str(e)}
+            results["_followup_hint"] = _EMPTY_FOLLOWUP_HINT
+
     return results
 
 

diff --git a/backend/src/analytics_agent/skills/search-business-context/SKILL.md b/backend/src/analytics_agent/skills/search-business-context/SKILL.md
@@ -103,7 +103,25 @@ the doc, glossary term, or data product that led you there. If documentation
 and catalog results disagree, state the conflict explicitly and resolve it
 before proceeding.
 
-**If nothing is found**, note the gap and proceed with catalog search
-(`search` + `get_entities`), but flag to the user that no governed definition
-exists. After answering, suggest using `/improve-context` to capture what you
-learned.
+---
+
+### When all sub-searches return empty
+
+`search_business_context` covers documentation, glossary terms, domains, and
+data products — **not raw entities like datasets or dashboards**. An empty
+result therefore means *no governed definition or documentation* for the
+topic; it does **not** mean the entity is absent from the catalog.
+
+When this happens, the skill automatically fires a catalog dataset search
+by name and returns the results under a `catalog_fallback` key. Inspect it
+before concluding non-existence:
+
+- If `catalog_fallback.searchResults` is non-empty, call `get_entities` on
+  the returned URNs to read schema, ownership, and other metadata. Report
+  what you find and flag that no governed definition exists — then suggest
+  `/improve-context` to capture what you learn.
+- If `catalog_fallback` is also empty, the entity is likely absent from the
+  catalog; you may tell the user so.
+
+Do **not** call the SQL engine's `list_tables` to look for the entity — it
+searches the connected query database, not the DataHub catalog.
diff --git a/tests/unit/test_skill_business_context.py b/tests/unit/test_skill_business_context.py
@@ -0,0 +1,50 @@
+"""Tests for the search_business_context skill helpers."""
+
+from __future__ import annotations
+
+from analytics_agent.skills.datahub_skills import _all_results_empty
+
+
+def test_all_empty_when_every_subsearch_has_no_results():
+    results = {
+        "documentation": {"searchResults": [], "total": 0},
+        "glossary_terms": {"searchResults": [], "total": 0},
+        "domains": {"searchResults": [], "total": 0},
+        "data_products": {"searchResults": [], "total": 0},
+    }
+    assert _all_results_empty(results) is True
+
+
+def test_not_empty_when_any_subsearch_has_a_hit():
+    results = {
+        "documentation": {"searchResults": [], "total": 0},
+        "glossary_terms": {
+            "searchResults": [{"entity": {"urn": "urn:li:glossaryTerm:revenue"}}],
+            "total": 1,
+        },
+        "domains": {"searchResults": [], "total": 0},
+        "data_products": {"searchResults": [], "total": 0},
+    }
+    assert _all_results_empty(results) is False
+
+
+def test_errors_count_as_empty():
+    """A sub-search that errored is not a 'found something'."""
+    results = {
+        "documentation": {"error": "API down"},
+        "glossary_terms": {"searchResults": [], "total": 0},
+        "domains": {"searchResults": [], "total": 0},
+        "data_products": {"searchResults": [], "total": 0},
+    }
+    assert _all_results_empty(results) is True
+
+
+def test_missing_search_results_key_counts_as_empty():
+    """Unknown / partial dict shape is treated as empty rather than crashing."""
+    results = {
+        "documentation": {"facets": {}},
+        "glossary_terms": {},
+        "domains": {"searchResults": []},
+        "data_products": {"searchResults": []},
+    }
+    assert _all_results_empty(results) is True