diff --git a/backend/src/analytics_agent/skills/datahub_skills.py b/backend/src/analytics_agent/skills/datahub_skills.py index a98ee58..10920f1 100644 --- a/backend/src/analytics_agent/skills/datahub_skills.py +++ b/backend/src/analytics_agent/skills/datahub_skills.py @@ -356,6 +356,33 @@ def _save_correction_impl( # --------------------------------------------------------------------------- +_EMPTY_FOLLOWUP_HINT = ( + "No business context found across documentation, glossary terms, domains, " + "or data products. As a fallback, this skill also queried the catalog for " + "matching datasets — see `catalog_fallback` above. If it returned URNs, " + "call `get_entities` on them to read full metadata before answering. " + "Only conclude that the entity is absent from the catalog when " + "`catalog_fallback` is also empty." +) + + +def _all_results_empty(results: dict) -> bool: + """True iff every sub-search in `results` returned no hits. + + A sub-search counts as 'no hits' if it errored, has no searchResults list, + or returned a list of length zero. + """ + for value in results.values(): + if not isinstance(value, dict): + continue + if "error" in value: + continue + items = value.get("searchResults") + if isinstance(items, list) and len(items) > 0: + return False + return True + + def _search_business_context_impl(topic: str) -> dict: """Fan out to DataHub docs, glossary terms, domains, and data products for a topic.""" from analytics_agent.context.datahub import get_datahub_client @@ -394,6 +421,21 @@ def _search_business_context_impl(topic: str) -> dict: except Exception as e: results[label] = {"error": str(e)} + # When all four business-context sub-searches return empty, the user's topic + # may still name a real entity that simply lacks governance metadata. Fire a + # dataset search by name so the agent doesn't conflate "no docs" with + # "doesn't exist" — see SKILL.md fall-through guidance. + if _all_results_empty(results): + try: + results["catalog_fallback"] = search( + query=topic, + filter="entity_type = dataset", + num_results=10, + ) + except Exception as e: + results["catalog_fallback"] = {"error": str(e)} + results["_followup_hint"] = _EMPTY_FOLLOWUP_HINT + return results diff --git a/backend/src/analytics_agent/skills/search-business-context/SKILL.md b/backend/src/analytics_agent/skills/search-business-context/SKILL.md index cd29e0b..eefab41 100644 --- a/backend/src/analytics_agent/skills/search-business-context/SKILL.md +++ b/backend/src/analytics_agent/skills/search-business-context/SKILL.md @@ -103,7 +103,25 @@ the doc, glossary term, or data product that led you there. If documentation and catalog results disagree, state the conflict explicitly and resolve it before proceeding. -**If nothing is found**, note the gap and proceed with catalog search -(`search` + `get_entities`), but flag to the user that no governed definition -exists. After answering, suggest using `/improve-context` to capture what you -learned. +--- + +### When all sub-searches return empty + +`search_business_context` covers documentation, glossary terms, domains, and +data products — **not raw entities like datasets or dashboards**. An empty +result therefore means *no governed definition or documentation* for the +topic; it does **not** mean the entity is absent from the catalog. + +When this happens, the skill automatically fires a catalog dataset search +by name and returns the results under a `catalog_fallback` key. Inspect it +before concluding non-existence: + +- If `catalog_fallback.searchResults` is non-empty, call `get_entities` on + the returned URNs to read schema, ownership, and other metadata. Report + what you find and flag that no governed definition exists — then suggest + `/improve-context` to capture what you learn. +- If `catalog_fallback` is also empty, the entity is likely absent from the + catalog; you may tell the user so. + +Do **not** call the SQL engine's `list_tables` to look for the entity — it +searches the connected query database, not the DataHub catalog. diff --git a/tests/unit/test_skill_business_context.py b/tests/unit/test_skill_business_context.py new file mode 100644 index 0000000..9032b46 --- /dev/null +++ b/tests/unit/test_skill_business_context.py @@ -0,0 +1,50 @@ +"""Tests for the search_business_context skill helpers.""" + +from __future__ import annotations + +from analytics_agent.skills.datahub_skills import _all_results_empty + + +def test_all_empty_when_every_subsearch_has_no_results(): + results = { + "documentation": {"searchResults": [], "total": 0}, + "glossary_terms": {"searchResults": [], "total": 0}, + "domains": {"searchResults": [], "total": 0}, + "data_products": {"searchResults": [], "total": 0}, + } + assert _all_results_empty(results) is True + + +def test_not_empty_when_any_subsearch_has_a_hit(): + results = { + "documentation": {"searchResults": [], "total": 0}, + "glossary_terms": { + "searchResults": [{"entity": {"urn": "urn:li:glossaryTerm:revenue"}}], + "total": 1, + }, + "domains": {"searchResults": [], "total": 0}, + "data_products": {"searchResults": [], "total": 0}, + } + assert _all_results_empty(results) is False + + +def test_errors_count_as_empty(): + """A sub-search that errored is not a 'found something'.""" + results = { + "documentation": {"error": "API down"}, + "glossary_terms": {"searchResults": [], "total": 0}, + "domains": {"searchResults": [], "total": 0}, + "data_products": {"searchResults": [], "total": 0}, + } + assert _all_results_empty(results) is True + + +def test_missing_search_results_key_counts_as_empty(): + """Unknown / partial dict shape is treated as empty rather than crashing.""" + results = { + "documentation": {"facets": {}}, + "glossary_terms": {}, + "domains": {"searchResults": []}, + "data_products": {"searchResults": []}, + } + assert _all_results_empty(results) is True