Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions backend/src/analytics_agent/skills/datahub_skills.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,33 @@ def _save_correction_impl(
# ---------------------------------------------------------------------------


_EMPTY_FOLLOWUP_HINT = (
"No business context found across documentation, glossary terms, domains, "
"or data products. As a fallback, this skill also queried the catalog for "
"matching datasets — see `catalog_fallback` above. If it returned URNs, "
"call `get_entities` on them to read full metadata before answering. "
"Only conclude that the entity is absent from the catalog when "
"`catalog_fallback` is also empty."
)


def _all_results_empty(results: dict) -> bool:
"""True iff every sub-search in `results` returned no hits.

A sub-search counts as 'no hits' if it errored, has no searchResults list,
or returned a list of length zero.
"""
for value in results.values():
if not isinstance(value, dict):
continue
if "error" in value:
continue
items = value.get("searchResults")
if isinstance(items, list) and len(items) > 0:
return False
return True


def _search_business_context_impl(topic: str) -> dict:
"""Fan out to DataHub docs, glossary terms, domains, and data products for a topic."""
from analytics_agent.context.datahub import get_datahub_client
Expand Down Expand Up @@ -394,6 +421,21 @@ def _search_business_context_impl(topic: str) -> dict:
except Exception as e:
results[label] = {"error": str(e)}

# When all four business-context sub-searches return empty, the user's topic
# may still name a real entity that simply lacks governance metadata. Fire a
# dataset search by name so the agent doesn't conflate "no docs" with
# "doesn't exist" — see SKILL.md fall-through guidance.
if _all_results_empty(results):
try:
results["catalog_fallback"] = search(
query=topic,
filter="entity_type = dataset",
num_results=10,
)
except Exception as e:
results["catalog_fallback"] = {"error": str(e)}
results["_followup_hint"] = _EMPTY_FOLLOWUP_HINT

return results


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,25 @@ the doc, glossary term, or data product that led you there. If documentation
and catalog results disagree, state the conflict explicitly and resolve it
before proceeding.

**If nothing is found**, note the gap and proceed with catalog search
(`search` + `get_entities`), but flag to the user that no governed definition
exists. After answering, suggest using `/improve-context` to capture what you
learned.
---

### When all sub-searches return empty

`search_business_context` covers documentation, glossary terms, domains, and
data products — **not raw entities like datasets or dashboards**. An empty
result therefore means *no governed definition or documentation* for the
topic; it does **not** mean the entity is absent from the catalog.

When this happens, the skill automatically fires a catalog dataset search
by name and returns the results under a `catalog_fallback` key. Inspect it
before concluding non-existence:

- If `catalog_fallback.searchResults` is non-empty, call `get_entities` on
the returned URNs to read schema, ownership, and other metadata. Report
what you find and flag that no governed definition exists — then suggest
`/improve-context` to capture what you learn.
- If `catalog_fallback` is also empty, the entity is likely absent from the
catalog; you may tell the user so.

Do **not** call the SQL engine's `list_tables` to look for the entity — it
searches the connected query database, not the DataHub catalog.
50 changes: 50 additions & 0 deletions tests/unit/test_skill_business_context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
"""Tests for the search_business_context skill helpers."""

from __future__ import annotations

from analytics_agent.skills.datahub_skills import _all_results_empty


def test_all_empty_when_every_subsearch_has_no_results():
results = {
"documentation": {"searchResults": [], "total": 0},
"glossary_terms": {"searchResults": [], "total": 0},
"domains": {"searchResults": [], "total": 0},
"data_products": {"searchResults": [], "total": 0},
}
assert _all_results_empty(results) is True


def test_not_empty_when_any_subsearch_has_a_hit():
results = {
"documentation": {"searchResults": [], "total": 0},
"glossary_terms": {
"searchResults": [{"entity": {"urn": "urn:li:glossaryTerm:revenue"}}],
"total": 1,
},
"domains": {"searchResults": [], "total": 0},
"data_products": {"searchResults": [], "total": 0},
}
assert _all_results_empty(results) is False


def test_errors_count_as_empty():
"""A sub-search that errored is not a 'found something'."""
results = {
"documentation": {"error": "API down"},
"glossary_terms": {"searchResults": [], "total": 0},
"domains": {"searchResults": [], "total": 0},
"data_products": {"searchResults": [], "total": 0},
}
assert _all_results_empty(results) is True


def test_missing_search_results_key_counts_as_empty():
"""Unknown / partial dict shape is treated as empty rather than crashing."""
results = {
"documentation": {"facets": {}},
"glossary_terms": {},
"domains": {"searchResults": []},
"data_products": {"searchResults": []},
}
assert _all_results_empty(results) is True
Loading