From 1a8f552dff3f6b1e198bd4a4d653e3d03aa3a893 Mon Sep 17 00:00:00 2001 From: traut Date: Fri, 1 Aug 2025 22:46:30 +0200 Subject: [PATCH 01/93] Style fixes --- detection_rules/rule.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/detection_rules/rule.py b/detection_rules/rule.py index 0c293141b70..983f1b04555 100644 --- a/detection_rules/rule.py +++ b/detection_rules/rule.py @@ -1432,15 +1432,14 @@ def get_packaged_integrations( # if both exist, rule tags are only used if defined in definitions for non-dataset packages # of machine learning analytic packages - rule_integrations = meta.get("integration", []) - if rule_integrations: - for integration in rule_integrations: - ineligible_integrations = [ - *definitions.NON_DATASET_PACKAGES, - *map(str.lower, definitions.MACHINE_LEARNING_PACKAGES), - ] - if integration in ineligible_integrations or isinstance(data, MachineLearningRuleData): - packaged_integrations.append({"package": integration, "integration": None}) + rule_integrations = meta.get("integration") or [] + for integration in rule_integrations: + ineligible_integrations = [ + *definitions.NON_DATASET_PACKAGES, + *map(str.lower, definitions.MACHINE_LEARNING_PACKAGES), + ] + if integration in ineligible_integrations or isinstance(data, MachineLearningRuleData): + packaged_integrations.append({"package": integration, "integration": None}) packaged_integrations.extend(parse_datasets(list(datasets), package_manifest)) @@ -1754,7 +1753,7 @@ def parse_datasets(datasets: list[str], package_manifest: dict[str, Any]) -> lis else: package = value - if package in list(package_manifest): + if package in package_manifest: packaged_integrations.append({"package": package, "integration": integration}) return packaged_integrations From aa21e969deedaa4660da06cfaf02db5daba18ccf Mon Sep 17 00:00:00 2001 From: traut Date: Fri, 1 Aug 2025 22:48:10 +0200 Subject: [PATCH 02/93] Typo fix --- hunting/definitions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hunting/definitions.py b/hunting/definitions.py index 01e519958e3..717196b6712 100644 --- a/hunting/definitions.py +++ b/hunting/definitions.py @@ -59,5 +59,5 @@ def validate_esql_query(self, query: str) -> None: # Check if either "stats by" or "| keep" exists in the query if not stats_by_pattern.search(query) and not keep_pattern.search(query): raise ValueError( - f"Hunt: {self.name} contains an ES|QL query that mustcontain either 'stats by' or 'keep' functions." + f"Hunt: {self.name} contains an ES|QL query that must contain either 'stats by' or 'keep' functions" ) From 46248f7e8a45a9d8db44b320f36306ae2cc3d193 Mon Sep 17 00:00:00 2001 From: traut Date: Fri, 1 Aug 2025 22:52:03 +0200 Subject: [PATCH 03/93] Initial logic --- detection_rules/rule_validators.py | 185 ++++++++++++++++++++++++++++- detection_rules/utils.py | 13 ++ tests/test_rules_remote.py | 37 +++++- 3 files changed, 227 insertions(+), 8 deletions(-) diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index cbdd7fe2eb2..894c1499ecb 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -6,6 +6,7 @@ """Validation logic for rules containing queries.""" import re +import time import typing from collections.abc import Callable from enum import Enum @@ -15,16 +16,18 @@ import click import eql # type: ignore[reportMissingTypeStubs] import kql # type: ignore[reportMissingTypeStubs] +from elasticsearch import Elasticsearch # type: ignore[reportMissingTypeStubs] from eql import ast # type: ignore[reportMissingTypeStubs] from eql.parser import KvTree, LarkToEQL, NodeInfo, TypeHint # type: ignore[reportMissingTypeStubs] from eql.parser import _parse as base_parse # type: ignore[reportMissingTypeStubs] +from kibana import Kibana # type: ignore[reportMissingTypeStubs] from marshmallow import ValidationError from semver import Version -from . import ecs, endgame +from . import ecs, endgame, integrations, utils from .config import CUSTOM_RULES_DIR, load_current_package_version, parse_rules_config from .custom_schemas import update_auto_generated_schema -from .integrations import get_integration_schema_data, load_integrations_manifests +from .integrations import get_integration_schema_data, load_integrations_manifests, load_integrations_schemas from .rule import EQLRuleData, QueryRuleData, QueryValidator, RuleMeta, TOMLRuleContents, set_eql_config from .schemas import get_stack_schemas @@ -647,3 +650,181 @@ def extract_error_field(source: str, exc: eql.EqlParseError | kql.KqlParseError) start = exc.column # type: ignore[reportUnknownMemberType] stop = start + len(exc.caret.strip()) # type: ignore[reportUnknownVariableType] return re.sub(r"^\W+|\W+$", "", line[start:stop]) # type: ignore[reportUnknownArgumentType] + + +def validate_esql_rule(kibana_client: Kibana, elastic_client: Elasticsearch, contents: TOMLRuleContents) -> None: + + rule_id = contents.data.rule_id + + def log(val: str) -> None: + print(f"{rule_id}:", val) + + kibana_details = kibana_client.get("/api/status") + stack_version = kibana_details["version"]["number"] + + log(f"Validating against {stack_version} stack") + + indices_str, indices = utils.get_esql_query_indices(contents.data.query) + log(f"Extracted indices from query: {', '.join(indices)}") + + # Get mappings for all matching existing index templates + + existing_mappings = {} + + for index in indices: + index_tmpl_mappings = get_simulated_template_mappings(elastic_client, index) + merge_dicts(existing_mappings, index_tmpl_mappings) + + log(f"Collected mappigns: {len(existing_mappings)}") + + # Collect mappings for the integrations + + rule_integrations = [] + if contents.metadata.integration: + if isinstance(contents.metadata.integration, list): + rule_integrations = contents.metadata.integration + else: + rule_integrations = [contents.metadata.integration] + + if "endpoint." in index: + rule_integrations.append("endpoint") + + log(f"Working with rule integrations: {', '.join(rule_integrations)}") + + package_manifests = load_integrations_manifests() + integration_schemas = load_integrations_schemas() + + integration_mappings = {} + + for integration in rule_integrations: + # Assume the integration value is a package name + package = integration + + package_version, _ = integrations.find_latest_compatible_version( + package, None, Version.parse(stack_version), package_manifests, + ) + + package_schema = integration_schemas[package][package_version] + + for stream in package_schema: + flat_schema = package_schema[stream] + stream_mappings = flat_schema_to_mapping(flat_schema) + merge_dicts(integration_mappings, stream_mappings) + + log(f"Integration mappings prepared: {len(integration_mappings)}") + + combined_mappings = {} + merge_dicts(combined_mappings, existing_mappings) + merge_dicts(combined_mappings, integration_mappings) + + # Creating a test index with the test name + suffix = str(int(time.time())) + test_index = f"rule-test-index-{suffix}" + + # Setting up missing mapping properties + + # FIXME: `alias` types require `path` arguments that are missing in the integration mappings + # https://www.elastic.co/docs/reference/elasticsearch/mapping-reference/field-alias + + # add missing `scaling_factor` + # https://www.elastic.co/docs/reference/elasticsearch/mapping-reference/number#scaled-float-params + set_scaling_factor(combined_mappings) + + # add @timestamp to the mappings + combined_mappings["@timestamp"] = {"type": "date"} + + # creating an index + response = elastic_client.indices.create( + index=test_index, + mappings={"properties": combined_mappings}, + settings={ + "index.mapping.total_fields.limit": 10000, + "index.mapping.nested_fields.limit": 500, + "index.mapping.nested_objects.limit": 10000, + }, + ) + log(f"Index `{test_index}` created: {response}") + + # Replace all sources with the test index + query = contents.data.query + query = query.replace(indices_str, test_index) + + log(f"Executing a query against `{test_index}`") + + response = elastic_client.esql.query(query=query) + + query_columns = response.get("columns", []) + query_column_names = [c["name"] for c in query_columns] + + log(f"Got query columns: {', '.join(query_column_names)}") + + # FIXME: validate the dynamic columns + + +def get_simulated_template_mappings(elastic_client: Elasticsearch, name: str) -> dict[str, Any]: + """ + Return the mappings from the index configuration that would be applied + to the specified index from an existing index template + + https://elasticsearch-py.readthedocs.io/en/stable/api/indices.html#elasticsearch.client.IndicesClient.simulate_index_template + """ + template = elastic_client.indices.simulate_index_template(name=name) + if not template: + return {} + return template["template"]["mappings"]["properties"] + + +def get_indices(elastic_client: Kibana, index: str) -> list[str]: + """Fetch indices that match the provided name from Elasticsearch""" + # `index` supports wildcards + return [i["index"] for i in elastic_client.cat.indices(index=index, format="json")] + + +def merge_dicts(dest: dict[Any, Any], src: dict[Any, Any]) -> dict[Any, Any]: + """Merge two dictionaries recursively.""" + for k, v in src.items(): + if k in dest and isinstance(dest[k], dict) and isinstance(v, dict): + merge_dicts(dest[k], v) + else: + dest[k] = v + + +def flat_schema_to_mapping(flat_mapping: dict[str, str]) -> dict[str, Any]: + """ + Convert dicts with flat JSON paths and values into a nested mapping with + intermediary `properties` and `type` fields. + """ + + result = {} + + for key, value in flat_mapping.items(): + path = key.split(".") + + # add "properties" wrappers + extended_path = [] + for part in path: + extended_path.append(part) + extended_path.append("properties") + + # drop last `properties` + extended_path.pop() + extended_path.append("type") + + new_key = ".".join(extended_path) + utils.set_nested_value(result, new_key, value) + + return result + + +def set_scaling_factor(val: Any) -> None: + """ + Recursively set `scaling_factor` property for `scaled_float` field type. + """ + if isinstance(val, dict): + if "type" in val and val["type"] == "scaled_float": + val["scaling_factor"] = 1000 + return + + for v in val.values(): + set_scaling_factor(v) + diff --git a/detection_rules/utils.py b/detection_rules/utils.py index c1d3a9e4dfb..fac4fbebe29 100644 --- a/detection_rules/utils.py +++ b/detection_rules/utils.py @@ -527,3 +527,16 @@ def get_identifiers(self) -> list[str]: # another group we're not expecting raise ValueError("Unrecognized named group in pattern", self.pattern) return ids + + +FROM_SOURCES_REGEX = re.compile(r"^\s*FROM\s+(?P.+?)\s*(?:\||\bmetadata\b|//|$)", re.IGNORECASE | re.MULTILINE) + + +def get_esql_query_indices(query: str) -> tuple[str, list[str]]: + match = FROM_SOURCES_REGEX.search(query) + + if not match: + return "", [] + + sources_str = match.group("sources") + return sources_str, [source.strip() for source in sources_str.split(",")] diff --git a/tests/test_rules_remote.py b/tests/test_rules_remote.py index 11ff1c36be3..60dceb53dc2 100644 --- a/tests/test_rules_remote.py +++ b/tests/test_rules_remote.py @@ -5,8 +5,9 @@ import unittest -from detection_rules.misc import get_default_config -from detection_rules.remote_validation import RemoteValidator +from detection_rules import ecs +from detection_rules.misc import get_default_config, get_elasticsearch_client, get_kibana_client, getdefault +from detection_rules.rule_validators import validate_esql_rule from .base import BaseRuleTest @@ -15,9 +16,33 @@ class TestRemoteRules(BaseRuleTest): """Test rules against a remote Elastic stack instance.""" - @unittest.skip("Temporarily disabled") def test_esql_rules(self): - """Temporarily explicitly test all ES|QL rules remotely pending parsing lib.""" + """Test all ES|QL rules against a cluster.""" + esql_rules = [r for r in self.all_rules if r.contents.data.type == "esql"] - rv = RemoteValidator(parse_config=True) - rv.validate_rules(esql_rules) + + print("ESQL rules loaded:", len(esql_rules)) + + # Temporarily limit the number of rules + esql_rules = esql_rules[:10] + + if not esql_rules: + return + + kibana_client = get_kibana_client( + api_key=getdefault("api_key")(), + cloud_id=getdefault("cloud_id")(), + kibana_url=getdefault("kibana_url")(), + space=getdefault("space")(), + ignore_ssl_errors=getdefault("ignore_ssl_errors")(), + ) + + elastic_client = get_elasticsearch_client( + api_key=getdefault("api_key")(), + cloud_id=getdefault("cloud_id")(), + elasticsearch_url=getdefault("elasticsearch_url")(), + ignore_ssl_errors=getdefault("ignore_ssl_errors")(), + ) + + for r in esql_rules: + validate_esql_rule(kibana_client, elastic_client, r.contents) From 08b0c15c816d7ed633c2074402bf76bb21cab5cb Mon Sep 17 00:00:00 2001 From: traut Date: Sat, 2 Aug 2025 02:19:39 +0200 Subject: [PATCH 04/93] Comments and small fixes --- detection_rules/rule_validators.py | 107 +++++++++++++++-------------- tests/test_rules_remote.py | 19 +++-- 2 files changed, 68 insertions(+), 58 deletions(-) diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index 894c1499ecb..983843d81cf 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -16,7 +16,7 @@ import click import eql # type: ignore[reportMissingTypeStubs] import kql # type: ignore[reportMissingTypeStubs] -from elasticsearch import Elasticsearch # type: ignore[reportMissingTypeStubs] +from elasticsearch import Elasticsearch # type: ignore[reportMissingTypeStubs] from eql import ast # type: ignore[reportMissingTypeStubs] from eql.parser import KvTree, LarkToEQL, NodeInfo, TypeHint # type: ignore[reportMissingTypeStubs] from eql.parser import _parse as base_parse # type: ignore[reportMissingTypeStubs] @@ -653,7 +653,6 @@ def extract_error_field(source: str, exc: eql.EqlParseError | kql.KqlParseError) def validate_esql_rule(kibana_client: Kibana, elastic_client: Elasticsearch, contents: TOMLRuleContents) -> None: - rule_id = contents.data.rule_id def log(val: str) -> None: @@ -669,7 +668,7 @@ def log(val: str) -> None: # Get mappings for all matching existing index templates - existing_mappings = {} + existing_mappings: dict[str, Any] = {} for index in indices: index_tmpl_mappings = get_simulated_template_mappings(elastic_client, index) @@ -686,10 +685,10 @@ def log(val: str) -> None: else: rule_integrations = [contents.metadata.integration] - if "endpoint." in index: - rule_integrations.append("endpoint") - - log(f"Working with rule integrations: {', '.join(rule_integrations)}") + if len(rule_integrations) > 0: + log(f"Working with rule integrations: {', '.join(rule_integrations)}") + else: + log("No integrations found in the rule") package_manifests = load_integrations_manifests() integration_schemas = load_integrations_schemas() @@ -701,11 +700,15 @@ def log(val: str) -> None: package = integration package_version, _ = integrations.find_latest_compatible_version( - package, None, Version.parse(stack_version), package_manifests, + package, + "", + Version.parse(stack_version), + package_manifests, ) package_schema = integration_schemas[package][package_version] + # Add schemas for all streams in the package for stream in package_schema: flat_schema = package_schema[stream] stream_mappings = flat_schema_to_mapping(flat_schema) @@ -717,21 +720,18 @@ def log(val: str) -> None: merge_dicts(combined_mappings, existing_mappings) merge_dicts(combined_mappings, integration_mappings) - # Creating a test index with the test name - suffix = str(int(time.time())) - test_index = f"rule-test-index-{suffix}" - - # Setting up missing mapping properties + if not combined_mappings: + log("ERROR: no mappings found for the rule") + raise ValueError("No mappings found") - # FIXME: `alias` types require `path` arguments that are missing in the integration mappings - # https://www.elastic.co/docs/reference/elasticsearch/mapping-reference/field-alias + import json - # add missing `scaling_factor` - # https://www.elastic.co/docs/reference/elasticsearch/mapping-reference/number#scaled-float-params - set_scaling_factor(combined_mappings) + with open("./mappings.json", "w") as f: + _ = f.write(json.dumps(combined_mappings, indent=4, sort_keys=True)) - # add @timestamp to the mappings - combined_mappings["@timestamp"] = {"type": "date"} + # Creating a test index with the test name + suffix = str(int(time.time() * 1000)) + test_index = f"rule-test-index-{suffix}" # creating an index response = elastic_client.indices.create( @@ -749,13 +749,16 @@ def log(val: str) -> None: query = contents.data.query query = query.replace(indices_str, test_index) - log(f"Executing a query against `{test_index}`") - - response = elastic_client.esql.query(query=query) + try: + log(f"Executing a query against `{test_index}`") + response = elastic_client.esql.query(query=query) + log(f"Got query response: {response}") + query_columns = response.get("columns", []) + finally: + response = elastic_client.indices.delete(index=test_index) + log(f"Test index `{test_index}` deleted: {response}") - query_columns = response.get("columns", []) query_column_names = [c["name"] for c in query_columns] - log(f"Got query columns: {', '.join(query_column_names)}") # FIXME: validate the dynamic columns @@ -776,11 +779,11 @@ def get_simulated_template_mappings(elastic_client: Elasticsearch, name: str) -> def get_indices(elastic_client: Kibana, index: str) -> list[str]: """Fetch indices that match the provided name from Elasticsearch""" - # `index` supports wildcards + # `index` arg here supports wildcards return [i["index"] for i in elastic_client.cat.indices(index=index, format="json")] -def merge_dicts(dest: dict[Any, Any], src: dict[Any, Any]) -> dict[Any, Any]: +def merge_dicts(dest: dict[Any, Any], src: dict[Any, Any]): """Merge two dictionaries recursively.""" for k, v in src.items(): if k in dest and isinstance(dest[k], dict) and isinstance(v, dict): @@ -789,42 +792,40 @@ def merge_dicts(dest: dict[Any, Any], src: dict[Any, Any]) -> dict[Any, Any]: dest[k] = v -def flat_schema_to_mapping(flat_mapping: dict[str, str]) -> dict[str, Any]: +def flat_schema_to_mapping(flat_schema: dict[str, str]) -> dict[str, Any]: """ Convert dicts with flat JSON paths and values into a nested mapping with - intermediary `properties` and `type` fields. + intermediary `properties`, `fields` and `type` fields. """ + # Sorting here ensures that 'a.b' processed before 'a.b.c', allowing us to correctly + # detect and handle multi-fields. + sorted_items = sorted(flat_schema.items()) result = {} - for key, value in flat_mapping.items(): - path = key.split(".") - - # add "properties" wrappers - extended_path = [] - for part in path: - extended_path.append(part) - extended_path.append("properties") + for field_path, field_type in sorted_items: - # drop last `properties` - extended_path.pop() - extended_path.append("type") + parts = field_path.split(".") + current_level = result - new_key = ".".join(extended_path) - utils.set_nested_value(result, new_key, value) + for part in parts[:-1]: + node = current_level.setdefault(part, {}) - return result + if "type" in node and node["type"] not in ("nested", "object"): + current_level = node.setdefault("fields", {}) + else: + current_level = node.setdefault("properties", {}) + leaf_key = parts[-1] + current_level[leaf_key] = {"type": field_type} -def set_scaling_factor(val: Any) -> None: - """ - Recursively set `scaling_factor` property for `scaled_float` field type. - """ - if isinstance(val, dict): - if "type" in val and val["type"] == "scaled_float": - val["scaling_factor"] = 1000 - return + # add `scaling_factor` field missing in the schema + # https://www.elastic.co/docs/reference/elasticsearch/mapping-reference/number#scaled-float-params + if field_type == "scaled_float": + current_level[leaf_key]["scaling_factor"] = 1000 - for v in val.values(): - set_scaling_factor(v) + # add `path` field for `alias` fields, set to a dummy value + if field_type == "alias": + current_level[leaf_key]["path"] = "@timestamp" + return result diff --git a/tests/test_rules_remote.py b/tests/test_rules_remote.py index 60dceb53dc2..52c781913a9 100644 --- a/tests/test_rules_remote.py +++ b/tests/test_rules_remote.py @@ -5,7 +5,6 @@ import unittest -from detection_rules import ecs from detection_rules.misc import get_default_config, get_elasticsearch_client, get_kibana_client, getdefault from detection_rules.rule_validators import validate_esql_rule @@ -23,9 +22,6 @@ def test_esql_rules(self): print("ESQL rules loaded:", len(esql_rules)) - # Temporarily limit the number of rules - esql_rules = esql_rules[:10] - if not esql_rules: return @@ -44,5 +40,18 @@ def test_esql_rules(self): ignore_ssl_errors=getdefault("ignore_ssl_errors")(), ) + failed_count = 0 + for r in esql_rules: - validate_esql_rule(kibana_client, elastic_client, r.contents) + print() + try: + validate_esql_rule(kibana_client, elastic_client, r.contents) + except Exception as e: + print(f"FAILURE: {e}") + failed_count += 1 + + print(f"Total rules: {len(esql_rules)}") + print(f"Failed rules: {failed_count}") + + if failed_count > 0: + self.fail(f"Found {failed_count} invalid rules") From 65e5eda4c9682fd620f60c1dfc297537ec1986bf Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Wed, 20 Aug 2025 12:46:30 -0400 Subject: [PATCH 05/93] Add initial dynamic field validation --- detection_rules/rule_validators.py | 74 ++++++++++++++++++++++++++++-- tests/test_rules_remote.py | 2 + 2 files changed, 72 insertions(+), 4 deletions(-) diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index 6907a7e9d2a..8d69727e45e 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -652,9 +652,69 @@ def extract_error_field(source: str, exc: eql.EqlParseError | kql.KqlParseError) return re.sub(r"^\W+|\W+$", "", line[start:stop]) # type: ignore[reportUnknownArgumentType] +def traverse_schema( + keys: list[str], current_schema: dict[str, Any] | None, traversed: bool = False +) -> tuple[str | None, bool]: + """Recursively traverse the schema to find the type of the column.""" + key = keys[0] + if not current_schema: + return None, traversed + column = current_schema.get(key) or {} + column_type = column.get("type") if column else None + if not column_type and len(keys) > 1: + return traverse_schema(keys[1:], current_schema=column.get("properties"), traversed=True) + return column_type, traversed + + +def get_column_type_from_schemas(column_name: str, schemas: dict[str, Any]) -> str | None: + """Check if a column is present in the provided schema. If present, returns its type.""" + keys = column_name.split(".") + schema_type, traversed = traverse_schema(keys, schemas) + # FIXME if suffix, check matches ESQL function mapping (reverse recursive) + # suffix = parse_suffix_type + suffix = None + # If matches function mapping, use this function mapping's type, instead of schema_type + # If suffix and traversed, then return type, else unknown type + if suffix and traversed: + return suffix + return schema_type + + +def validate_columns_input_mapping(query_columns: list[dict[str, str]], combined_mappings: dict[str, Any]): + """Validate that the columns in the ESQL query match the provided mappings.""" + mismatched_columns: list[str] = [] + + for column in query_columns: + column_name = column["name"] + if not (column_name.startswith("Esql.") or column_name.startswith("Esql_priv.")): + # FIXME do we want to validate the columns against the schemas separately from the stack? + continue + column_type = column["type"] + formatted_column_name = column_name.replace("Esql.", "").replace("Esql_priv.", "").replace("_", ".") + + # Check if the column exists in combined_mappings or a valid field generated from a function or operator + schema_type = get_column_type_from_schemas(formatted_column_name, combined_mappings) + if not schema_type: + mismatched_columns.append(f"Column `{column_name}` is not defined in the mappings.") + continue + + # Validate the type + if column_type != schema_type: + mismatched_columns.append( + f"Type mismatch for column `{column_name}`: expected `{schema_type}`, got `{column_type}`." + ) + + # Raise an error if there are mismatches + if mismatched_columns: + raise ValueError("Column validation errors:\n" + "\n".join(mismatched_columns)) + + return True + + def validate_esql_rule(kibana_client: Kibana, elastic_client: Elasticsearch, contents: TOMLRuleContents) -> None: rule_id = contents.data.rule_id + # FIXME perhaps move this to utils def log(val: str) -> None: print(f"{rule_id}:", val) @@ -674,7 +734,7 @@ def log(val: str) -> None: index_tmpl_mappings = get_simulated_template_mappings(elastic_client, index) merge_dicts(existing_mappings, index_tmpl_mappings) - log(f"Collected mappigns: {len(existing_mappings)}") + log(f"Collected mappings: {len(existing_mappings)}") # Collect mappings for the integrations @@ -719,6 +779,8 @@ def log(val: str) -> None: combined_mappings = {} merge_dicts(combined_mappings, existing_mappings) merge_dicts(combined_mappings, integration_mappings) + # NOTE non-ecs schema needs to have formatting updates prior to merge + # merge_dicts(combined_mappings, ecs.get_non_ecs_schema()) if not combined_mappings: log("ERROR: no mappings found for the rule") @@ -726,6 +788,7 @@ def log(val: str) -> None: import json + # FIXME update this with utils function with open("./mappings.json", "w") as f: _ = f.write(json.dumps(combined_mappings, indent=4, sort_keys=True)) @@ -761,7 +824,11 @@ def log(val: str) -> None: query_column_names = [c["name"] for c in query_columns] log(f"Got query columns: {', '.join(query_column_names)}") - # FIXME: validate the dynamic columns + # FIXME Perhaps update rule_validator's get_required_fields as well + if validate_columns_input_mapping(query_columns, combined_mappings): + log("All column types match the mappings.") + else: + log("All column types DO NOT match the mappings.") def get_simulated_template_mappings(elastic_client: Elasticsearch, name: str) -> dict[str, Any]: @@ -783,7 +850,7 @@ def get_indices(elastic_client: Kibana, index: str) -> list[str]: return [i["index"] for i in elastic_client.cat.indices(index=index, format="json")] -def merge_dicts(dest: dict[Any, Any], src: dict[Any, Any]): +def merge_dicts(dest: dict[Any, Any], src: dict[Any, Any]) -> None: """Merge two dictionaries recursively.""" for k, v in src.items(): if k in dest and isinstance(dest[k], dict) and isinstance(v, dict): @@ -804,7 +871,6 @@ def flat_schema_to_mapping(flat_schema: dict[str, str]) -> dict[str, Any]: result = {} for field_path, field_type in sorted_items: - parts = field_path.split(".") current_level = result diff --git a/tests/test_rules_remote.py b/tests/test_rules_remote.py index 52c781913a9..415eb0f0714 100644 --- a/tests/test_rules_remote.py +++ b/tests/test_rules_remote.py @@ -41,6 +41,7 @@ def test_esql_rules(self): ) failed_count = 0 + fail_list = [] for r in esql_rules: print() @@ -48,6 +49,7 @@ def test_esql_rules(self): validate_esql_rule(kibana_client, elastic_client, r.contents) except Exception as e: print(f"FAILURE: {e}") + fail_list.append(f"FAILURE: {e}") failed_count += 1 print(f"Total rules: {len(esql_rules)}") From 2046d63b2fd20e4d9cc9b50764f272668153b144 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Wed, 20 Aug 2025 13:24:42 -0400 Subject: [PATCH 06/93] Update dynamic field validation --- detection_rules/rule_validators.py | 58 +++++++++++++++--------------- 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index 8d69727e45e..af052e5b680 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -666,18 +666,18 @@ def traverse_schema( return column_type, traversed -def get_column_type_from_schemas(column_name: str, schemas: dict[str, Any]) -> str | None: +def check_expected_dynamic_field(column_name: str, schemas: dict[str, Any]) -> list[str] | None: """Check if a column is present in the provided schema. If present, returns its type.""" + errors: list[str] = [] keys = column_name.split(".") - schema_type, traversed = traverse_schema(keys, schemas) - # FIXME if suffix, check matches ESQL function mapping (reverse recursive) + column_type, traversed = traverse_schema(keys, schemas) + if not traversed and not column_type: + errors.append(f"Dynamic field `{column_name}` is not based on known field in schema.") + + # FIXME perhaps check suffix format? if suffix, check matches ESQL function mapping (reverse recursive) # suffix = parse_suffix_type - suffix = None - # If matches function mapping, use this function mapping's type, instead of schema_type - # If suffix and traversed, then return type, else unknown type - if suffix and traversed: - return suffix - return schema_type + # if suffix_invalid add error + return errors if errors else None def validate_columns_input_mapping(query_columns: list[dict[str, str]], combined_mappings: dict[str, Any]): @@ -686,23 +686,24 @@ def validate_columns_input_mapping(query_columns: list[dict[str, str]], combined for column in query_columns: column_name = column["name"] - if not (column_name.startswith("Esql.") or column_name.startswith("Esql_priv.")): - # FIXME do we want to validate the columns against the schemas separately from the stack? - continue - column_type = column["type"] - formatted_column_name = column_name.replace("Esql.", "").replace("Esql_priv.", "").replace("_", ".") - - # Check if the column exists in combined_mappings or a valid field generated from a function or operator - schema_type = get_column_type_from_schemas(formatted_column_name, combined_mappings) - if not schema_type: - mismatched_columns.append(f"Column `{column_name}` is not defined in the mappings.") - continue - - # Validate the type - if column_type != schema_type: - mismatched_columns.append( - f"Type mismatch for column `{column_name}`: expected `{schema_type}`, got `{column_type}`." - ) + if column_name.startswith("Esql.") or column_name.startswith("Esql_priv."): + column_name = column_name.replace("Esql.", "").replace("Esql_priv.", "").replace("_", ".") + errors = check_expected_dynamic_field(column_name, combined_mappings) + if errors: + mismatched_columns.extend(errors) + else: + column_type = column["type"] + + # Check if the column exists in combined_mappings or a valid field generated from a function or operator + keys = column_name.split(".") + schema_type, _ = traverse_schema(keys, combined_mappings) + + # Validate the type + if not schema_type or column_type != schema_type: + mismatched_columns.append( + f"Dynamic field `{column_name}` is not correctly mapped. " + f"If not dynamic: expected `{schema_type}`, got `{column_type}`." + ) # Raise an error if there are mismatches if mismatched_columns: @@ -825,10 +826,11 @@ def log(val: str) -> None: log(f"Got query columns: {', '.join(query_column_names)}") # FIXME Perhaps update rule_validator's get_required_fields as well + # to everything needs to either be directly mapped to schema or be annotated as dynamic field if validate_columns_input_mapping(query_columns, combined_mappings): - log("All column types match the mappings.") + log("All dynamic columns have proper formatting.") else: - log("All column types DO NOT match the mappings.") + log("Dynamic column(s) have improper formatting.") def get_simulated_template_mappings(elastic_client: Elasticsearch, name: str) -> dict[str, Any]: From 5a2e81cd4997ca3adf4cd209750c8b2c73312d7c Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Wed, 20 Aug 2025 14:50:51 -0400 Subject: [PATCH 07/93] Remove sub query ecs enforcement --- detection_rules/rule_validators.py | 76 ++++++++++-------------------- 1 file changed, 25 insertions(+), 51 deletions(-) diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index af052e5b680..aa4a717626f 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -652,32 +652,16 @@ def extract_error_field(source: str, exc: eql.EqlParseError | kql.KqlParseError) return re.sub(r"^\W+|\W+$", "", line[start:stop]) # type: ignore[reportUnknownArgumentType] -def traverse_schema( - keys: list[str], current_schema: dict[str, Any] | None, traversed: bool = False -) -> tuple[str | None, bool]: +def traverse_schema(keys: list[str], current_schema: dict[str, Any] | None) -> str | None: """Recursively traverse the schema to find the type of the column.""" key = keys[0] if not current_schema: - return None, traversed + return None column = current_schema.get(key) or {} column_type = column.get("type") if column else None if not column_type and len(keys) > 1: - return traverse_schema(keys[1:], current_schema=column.get("properties"), traversed=True) - return column_type, traversed - - -def check_expected_dynamic_field(column_name: str, schemas: dict[str, Any]) -> list[str] | None: - """Check if a column is present in the provided schema. If present, returns its type.""" - errors: list[str] = [] - keys = column_name.split(".") - column_type, traversed = traverse_schema(keys, schemas) - if not traversed and not column_type: - errors.append(f"Dynamic field `{column_name}` is not based on known field in schema.") - - # FIXME perhaps check suffix format? if suffix, check matches ESQL function mapping (reverse recursive) - # suffix = parse_suffix_type - # if suffix_invalid add error - return errors if errors else None + return traverse_schema(keys[1:], current_schema=column.get("properties")) + return column_type def validate_columns_input_mapping(query_columns: list[dict[str, str]], combined_mappings: dict[str, Any]): @@ -687,23 +671,19 @@ def validate_columns_input_mapping(query_columns: list[dict[str, str]], combined for column in query_columns: column_name = column["name"] if column_name.startswith("Esql.") or column_name.startswith("Esql_priv."): - column_name = column_name.replace("Esql.", "").replace("Esql_priv.", "").replace("_", ".") - errors = check_expected_dynamic_field(column_name, combined_mappings) - if errors: - mismatched_columns.extend(errors) - else: - column_type = column["type"] - - # Check if the column exists in combined_mappings or a valid field generated from a function or operator - keys = column_name.split(".") - schema_type, _ = traverse_schema(keys, combined_mappings) - - # Validate the type - if not schema_type or column_type != schema_type: - mismatched_columns.append( - f"Dynamic field `{column_name}` is not correctly mapped. " - f"If not dynamic: expected `{schema_type}`, got `{column_type}`." - ) + continue + column_type = column["type"] + + # Check if the column exists in combined_mappings or a valid field generated from a function or operator + keys = column_name.split(".") + schema_type = traverse_schema(keys, combined_mappings) + + # Validate the type + if not schema_type or column_type != schema_type: + mismatched_columns.append( + f"Dynamic field `{column_name}` is not correctly mapped. " + f"If not dynamic: expected `{schema_type}`, got `{column_type}`." + ) # Raise an error if there are mismatches if mismatched_columns: @@ -733,7 +713,7 @@ def log(val: str) -> None: for index in indices: index_tmpl_mappings = get_simulated_template_mappings(elastic_client, index) - merge_dicts(existing_mappings, index_tmpl_mappings) + combine_dicts(existing_mappings, index_tmpl_mappings) log(f"Collected mappings: {len(existing_mappings)}") @@ -773,26 +753,20 @@ def log(val: str) -> None: for stream in package_schema: flat_schema = package_schema[stream] stream_mappings = flat_schema_to_mapping(flat_schema) - merge_dicts(integration_mappings, stream_mappings) + combine_dicts(integration_mappings, stream_mappings) log(f"Integration mappings prepared: {len(integration_mappings)}") combined_mappings = {} - merge_dicts(combined_mappings, existing_mappings) - merge_dicts(combined_mappings, integration_mappings) + combine_dicts(combined_mappings, existing_mappings) + combine_dicts(combined_mappings, integration_mappings) # NOTE non-ecs schema needs to have formatting updates prior to merge - # merge_dicts(combined_mappings, ecs.get_non_ecs_schema()) + # combine_dicts(combined_mappings, ecs.get_non_ecs_schema()) if not combined_mappings: log("ERROR: no mappings found for the rule") raise ValueError("No mappings found") - import json - - # FIXME update this with utils function - with open("./mappings.json", "w") as f: - _ = f.write(json.dumps(combined_mappings, indent=4, sort_keys=True)) - # Creating a test index with the test name suffix = str(int(time.time() * 1000)) test_index = f"rule-test-index-{suffix}" @@ -852,11 +826,11 @@ def get_indices(elastic_client: Kibana, index: str) -> list[str]: return [i["index"] for i in elastic_client.cat.indices(index=index, format="json")] -def merge_dicts(dest: dict[Any, Any], src: dict[Any, Any]) -> None: - """Merge two dictionaries recursively.""" +def combine_dicts(dest: dict[Any, Any], src: dict[Any, Any]) -> None: + """Combine two dictionaries recursively.""" for k, v in src.items(): if k in dest and isinstance(dest[k], dict) and isinstance(v, dict): - merge_dicts(dest[k], v) + combine_dicts(dest[k], v) else: dest[k] = v From 0cc54439e71c558e17de2b594297e0218be57de8 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Tue, 26 Aug 2025 18:58:24 -0400 Subject: [PATCH 08/93] Add initial non ecs support --- detection_rules/rule_validators.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index aa4a717626f..87808f1d976 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -642,6 +642,22 @@ def validate_integration( pass +def convert_to_nested_schema(flat_schemas: dict[str, str]) -> dict[str, Any]: + """Convert a flat schema to a nested schema with 'properties' for each sub-key.""" + nested_schema = {} + + for key, value in flat_schemas.items(): + parts = key.split(".") + current_level = nested_schema + + for part in parts[:-1]: + current_level = current_level.setdefault(part, {}).setdefault("properties", {}) + + current_level[parts[-1]] = {"type": value} + + return nested_schema + + def extract_error_field(source: str, exc: eql.EqlParseError | kql.KqlParseError) -> str | None: """Extract the field name from an EQL or KQL parse error.""" lines = source.splitlines() @@ -761,7 +777,16 @@ def log(val: str) -> None: combine_dicts(combined_mappings, existing_mappings) combine_dicts(combined_mappings, integration_mappings) # NOTE non-ecs schema needs to have formatting updates prior to merge - # combine_dicts(combined_mappings, ecs.get_non_ecs_schema()) + # NOTE non-ecs schema uses Kibana reserved word "properties" as a field name + # e.g. "azure.auditlogs.properties.target_resources.0.display_name": "keyword", + non_ecs_mapping = {} + non_ecs = ecs.get_non_ecs_schema() + for index in indices: + non_ecs_mapping.update(non_ecs.get(index, {})) + non_ecs_mapping = ecs.flatten(non_ecs_mapping) + non_ecs_mapping = convert_to_nested_schema(non_ecs_mapping) + if non_ecs_mapping: + combine_dicts(combined_mappings, non_ecs_mapping) if not combined_mappings: log("ERROR: no mappings found for the rule") From 6f018b5e6b7a92b8f4c657a923fa84ad6dc82b1f Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Thu, 28 Aug 2025 21:31:24 -0400 Subject: [PATCH 09/93] Add initial workflow --- .github/workflows/esql-validation.yml | 70 +++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 .github/workflows/esql-validation.yml diff --git a/.github/workflows/esql-validation.yml b/.github/workflows/esql-validation.yml new file mode 100644 index 00000000000..baa27055964 --- /dev/null +++ b/.github/workflows/esql-validation.yml @@ -0,0 +1,70 @@ +name: ES|QL Validation +on: + push: + branches: [ "main", "7.*", "8.*", "9.*" ] + pull_request: + branches: [ "*" ] + paths: + - 'rules/**/*.toml' +jobs: + build-and-validate: + runs-on: ubuntu-latest + + steps: + - name: Check out repository + uses: actions/checkout@v4 + with: + path: elastic-container + repository: eric-forte-elastic/elastic-container + + - name: Build and run containers + run: | + cd elastic-container + GENERATED_PASSWORD=$(openssl rand -base64 16) + sed -i 's/changeme/$GENERATED_PASSWORD/' .env + echo "GENERATED_PASSWORD=$GENERATED_PASSWORD" >> $GITHUB_ENV + set -x + bash elastic-container.sh start + + + - name: Setup Detection Rules + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Python 3.13 + uses: actions/setup-python@v5 + with: + python-version: '3.13' + + - name: Get API Key and setup auth + env: + DR_KIBANA_URL: "https://localhost:5601" + DR_ELASTICSEARCH_URL: "https://localhost:9200" + ES_USER: "elastic" + ES_PASSWORD: ${{ env.GENERATED_PASSWORD }} + run: | + cd detection-rules + response=$(curl -k -X POST -u "$ES_USER:$ES_PASSWORD" -H "Content-Type: application/json" -d '{ + "name": "tmp-api-key", + "expiration": "1d" + }' "$ELASTICSEARCH_URL/_security/api_key") + + DR_API_KEY=$(echo "$response" | jq -r '.api_key') + echo "DR_API_KEY=$DR_API_KEY" >> $GITHUB_ENV + + - name: Install dependencies + run: | + cd detection-rules + python -m pip install --upgrade pip + pip cache purge + pip install .[dev] + + - name: Validate Test ESQL Rule + env: + DR_KIBANA_URL: "https://localhost:5601" + DR_ES_USER: "elastic" + DR_API_KEY: ${{ env.DR_API_KEY }} + run: | + cd detection-rules + python -m pytest tests/test_rules_remote.py::TestRemoteRules::test_esql_rules From 632949300e024469c57175b6f56be1866a814e1c Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Thu, 4 Sep 2025 21:14:46 -0400 Subject: [PATCH 10/93] Add optional multi index method --- detection_rules/rule_validators.py | 72 ++++++++++++++++++++++++++---- 1 file changed, 63 insertions(+), 9 deletions(-) diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index 87808f1d976..3afbf2eb121 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -727,8 +727,14 @@ def log(val: str) -> None: existing_mappings: dict[str, Any] = {} + # TODO do we need an index mapping for each index in the query? This is accomplished via index_lookup: dict[str, Any] = {} + # Do we also need separate indexes for each integration? Probably, at least it is dynamic for each rule (do we really want to load these per rule?) + index_lookup: dict[str, Any] = {} + # NOTE do we need to cache what integration indexes have been loaded to prevent pushing tons into the evaluation? + for index in indices: index_tmpl_mappings = get_simulated_template_mappings(elastic_client, index) + index_lookup[index] = index_tmpl_mappings combine_dicts(existing_mappings, index_tmpl_mappings) log(f"Collected mappings: {len(existing_mappings)}") @@ -769,7 +775,14 @@ def log(val: str) -> None: for stream in package_schema: flat_schema = package_schema[stream] stream_mappings = flat_schema_to_mapping(flat_schema) + # NOTE perhaps we need to actually create many test indexes for this to work properly + # TODO update this for double defined cases like integration_mappings["aws"]["properties"]["inspector"]["properties"]["remediation"] + # FIXED VIA NESTED FIELDS + # which is both a keyword, and has fields + # "aws.properties.inspector.properties.remediation.type": "keyword", + # "aws.properties.inspector.properties.remediation.fields.recommendation.properties.text.type": "keyword", combine_dicts(integration_mappings, stream_mappings) + index_lookup[f"{integration}-{stream}"] = stream_mappings log(f"Integration mappings prepared: {len(integration_mappings)}") @@ -777,26 +790,47 @@ def log(val: str) -> None: combine_dicts(combined_mappings, existing_mappings) combine_dicts(combined_mappings, integration_mappings) # NOTE non-ecs schema needs to have formatting updates prior to merge - # NOTE non-ecs schema uses Kibana reserved word "properties" as a field name - # e.g. "azure.auditlogs.properties.target_resources.0.display_name": "keyword", + # NOTE non-ecs and ecs schema can conflict e.g. 'authentication_details': {'type': 'flattened'} + # FIXED VIA NESTED FIELDS + # "azure.signinlogs.properties.authentication_details.authentication_method": "keyword" + # FAILURE: BadRequestError(400, 'illegal_argument_exception', "can't merge a non object mapping [azure.signinlogs.properties.authentication_details] with an object mapping") non_ecs_mapping = {} non_ecs = ecs.get_non_ecs_schema() for index in indices: non_ecs_mapping.update(non_ecs.get(index, {})) non_ecs_mapping = ecs.flatten(non_ecs_mapping) non_ecs_mapping = convert_to_nested_schema(non_ecs_mapping) - if non_ecs_mapping: - combine_dicts(combined_mappings, non_ecs_mapping) - - if not combined_mappings: + if not combined_mappings and not non_ecs_mapping: log("ERROR: no mappings found for the rule") raise ValueError("No mappings found") # Creating a test index with the test name suffix = str(int(time.time() * 1000)) test_index = f"rule-test-index-{suffix}" + test_non_ecs_index = f"rule-test-non-ecs-index-{suffix}" + # TODO if works, switch to non-ecs index only + # NOTE we will always have to have a base test index + # This test index could have the index_tmpl_mappings for example + full_index_str = test_index + if non_ecs_mapping: + full_index_str = test_non_ecs_index + + for index in index_lookup: + # log(f"Mappings for `{index}`: {index_lookup[index]}") + ind_index_str = f"test-{index.rstrip('*')}{suffix}" + response = elastic_client.indices.create( + index=ind_index_str, + mappings={"properties": index_lookup[index]}, + settings={ + "index.mapping.total_fields.limit": 10000, + "index.mapping.nested_fields.limit": 500, + "index.mapping.nested_objects.limit": 10000, + }, + ) + log(f"Index `{test_non_ecs_index}` created: {response}") + full_index_str = f"{full_index_str}, {ind_index_str}" - # creating an index + # create indexes response = elastic_client.indices.create( index=test_index, mappings={"properties": combined_mappings}, @@ -807,19 +841,39 @@ def log(val: str) -> None: }, ) log(f"Index `{test_index}` created: {response}") + test_index_str = test_index + if non_ecs_mapping: + response = elastic_client.indices.create( + index=test_non_ecs_index, + mappings={"properties": non_ecs_mapping}, + settings={ + "index.mapping.total_fields.limit": 10000, + "index.mapping.nested_fields.limit": 500, + "index.mapping.nested_objects.limit": 10000, + }, + ) + log(f"Index `{test_non_ecs_index}` created: {response}") + test_index_str = f"{test_index}, {test_non_ecs_index}" # Replace all sources with the test index query = contents.data.query - query = query.replace(indices_str, test_index) + query = query.replace(indices_str, full_index_str) try: - log(f"Executing a query against `{test_index}`") + log(f"Executing a query against `{test_index_str}`") response = elastic_client.esql.query(query=query) log(f"Got query response: {response}") query_columns = response.get("columns", []) finally: response = elastic_client.indices.delete(index=test_index) log(f"Test index `{test_index}` deleted: {response}") + if non_ecs_mapping: + response = elastic_client.indices.delete(index=test_non_ecs_index) + log(f"Test index `{test_non_ecs_index}` deleted: {response}") + for index in index_lookup: + ind_index_str = f"test-{index.rstrip('*')}{suffix}" + response = elastic_client.indices.delete(index=ind_index_str) + log(f"Test index `{ind_index_str}` deleted: {response}") query_column_names = [c["name"] for c in query_columns] log(f"Got query columns: {', '.join(query_column_names)}") From fd67c6590e247c9ca0e85f85a8e45d5543eaac3d Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Fri, 5 Sep 2025 15:23:56 -0400 Subject: [PATCH 11/93] Code cleanup --- detection_rules/misc.py | 13 + detection_rules/rule_validators.py | 457 ++++++++++++----------------- detection_rules/utils.py | 76 +++++ tests/test_rules_remote.py | 6 +- 4 files changed, 274 insertions(+), 278 deletions(-) diff --git a/detection_rules/misc.py b/detection_rules/misc.py index 7992639ea72..ea5ff41f663 100644 --- a/detection_rules/misc.py +++ b/detection_rules/misc.py @@ -489,3 +489,16 @@ def _wrapped(*args: Any, **kwargs: Any) -> Any: # noqa: PLR0912 return _wrapped return _wrapper + + +def get_simulated_index_template_mappings(elastic_client: Elasticsearch, name: str) -> dict[str, Any]: + """ + Return the mappings from the index configuration that would be applied + to the specified index from an existing index template + + https://elasticsearch-py.readthedocs.io/en/stable/api/indices.html#elasticsearch.client.IndicesClient.simulate_index_template + """ + template = elastic_client.indices.simulate_index_template(name=name) + if not template: + return {} + return template["template"]["mappings"]["properties"] diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index 3afbf2eb121..057c228f5b8 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -24,7 +24,7 @@ from marshmallow import ValidationError from semver import Version -from . import ecs, endgame, integrations, utils +from . import ecs, endgame, integrations, misc, utils from .config import CUSTOM_RULES_DIR, load_current_package_version, parse_rules_config from .custom_schemas import update_auto_generated_schema from .integrations import get_integration_schema_data, load_integrations_manifests, load_integrations_schemas @@ -641,312 +641,219 @@ def validate_integration( # Disabling self.validate(data, meta) pass + def validate_columns_index_mapping(self, query_columns: list[dict[str, str]], combined_mappings: dict[str, Any]): + """Validate that the columns in the ESQL query match the provided mappings.""" + mismatched_columns: list[str] = [] + + for column in query_columns: + column_name = column["name"] + if column_name.startswith("Esql.") or column_name.startswith("Esql_priv."): + continue + column_type = column["type"] + + # Check if the column exists in combined_mappings or a valid field generated from a function or operator + keys = column_name.split(".") + schema_type = utils.get_column_from_index_mapping_schema(keys, combined_mappings) + + # Validate the type + if not schema_type or column_type != schema_type: + mismatched_columns.append( + f"Dynamic field `{column_name}` is not correctly mapped. " + f"If not dynamic: expected `{schema_type}`, got `{column_type}`." + ) -def convert_to_nested_schema(flat_schemas: dict[str, str]) -> dict[str, Any]: - """Convert a flat schema to a nested schema with 'properties' for each sub-key.""" - nested_schema = {} - - for key, value in flat_schemas.items(): - parts = key.split(".") - current_level = nested_schema - - for part in parts[:-1]: - current_level = current_level.setdefault(part, {}).setdefault("properties", {}) - - current_level[parts[-1]] = {"type": value} - - return nested_schema - - -def extract_error_field(source: str, exc: eql.EqlParseError | kql.KqlParseError) -> str | None: - """Extract the field name from an EQL or KQL parse error.""" - lines = source.splitlines() - mod = -1 if exc.line == len(lines) else 0 # type: ignore[reportUnknownMemberType] - line = lines[exc.line + mod] # type: ignore[reportUnknownMemberType] - start = exc.column # type: ignore[reportUnknownMemberType] - stop = start + len(exc.caret.strip()) # type: ignore[reportUnknownVariableType] - return re.sub(r"^\W+|\W+$", "", line[start:stop]) # type: ignore[reportUnknownArgumentType] - - -def traverse_schema(keys: list[str], current_schema: dict[str, Any] | None) -> str | None: - """Recursively traverse the schema to find the type of the column.""" - key = keys[0] - if not current_schema: - return None - column = current_schema.get(key) or {} - column_type = column.get("type") if column else None - if not column_type and len(keys) > 1: - return traverse_schema(keys[1:], current_schema=column.get("properties")) - return column_type - - -def validate_columns_input_mapping(query_columns: list[dict[str, str]], combined_mappings: dict[str, Any]): - """Validate that the columns in the ESQL query match the provided mappings.""" - mismatched_columns: list[str] = [] - - for column in query_columns: - column_name = column["name"] - if column_name.startswith("Esql.") or column_name.startswith("Esql_priv."): - continue - column_type = column["type"] - - # Check if the column exists in combined_mappings or a valid field generated from a function or operator - keys = column_name.split(".") - schema_type = traverse_schema(keys, combined_mappings) - - # Validate the type - if not schema_type or column_type != schema_type: - mismatched_columns.append( - f"Dynamic field `{column_name}` is not correctly mapped. " - f"If not dynamic: expected `{schema_type}`, got `{column_type}`." - ) - - # Raise an error if there are mismatches - if mismatched_columns: - raise ValueError("Column validation errors:\n" + "\n".join(mismatched_columns)) + # Raise an error if there are mismatches + if mismatched_columns: + raise ValueError("Column validation errors:\n" + "\n".join(mismatched_columns)) - return True + return True + def remote_validate_rule( + self, kibana_client: Kibana, elastic_client: Elasticsearch, contents: TOMLRuleContents + ) -> None: + """Uses remote validation from an Elastic Stack to validate ES|QL a given rule""" + rule_id = contents.data.rule_id -def validate_esql_rule(kibana_client: Kibana, elastic_client: Elasticsearch, contents: TOMLRuleContents) -> None: - rule_id = contents.data.rule_id + # FIXME perhaps move this to utils + def log(val: str) -> None: + print(f"{rule_id}:", val) - # FIXME perhaps move this to utils - def log(val: str) -> None: - print(f"{rule_id}:", val) + kibana_details = kibana_client.get("/api/status") + stack_version = kibana_details["version"]["number"] - kibana_details = kibana_client.get("/api/status") - stack_version = kibana_details["version"]["number"] + log(f"Validating against {stack_version} stack") - log(f"Validating against {stack_version} stack") + indices_str, indices = utils.get_esql_query_indices(contents.data.query) + log(f"Extracted indices from query: {', '.join(indices)}") - indices_str, indices = utils.get_esql_query_indices(contents.data.query) - log(f"Extracted indices from query: {', '.join(indices)}") + # Get mappings for all matching existing index templates - # Get mappings for all matching existing index templates + existing_mappings: dict[str, Any] = {} - existing_mappings: dict[str, Any] = {} + # TODO do we need an index mapping for each index in the query? This is accomplished via index_lookup: dict[str, Any] = {} + # Do we also need separate indexes for each integration? Probably, at least it is dynamic for each rule (do we really want to load these per rule?) + index_lookup: dict[str, Any] = {} + # NOTE do we need to cache what integration indexes have been loaded to prevent pushing tons into the evaluation? - # TODO do we need an index mapping for each index in the query? This is accomplished via index_lookup: dict[str, Any] = {} - # Do we also need separate indexes for each integration? Probably, at least it is dynamic for each rule (do we really want to load these per rule?) - index_lookup: dict[str, Any] = {} - # NOTE do we need to cache what integration indexes have been loaded to prevent pushing tons into the evaluation? + for index in indices: + index_tmpl_mappings = misc.get_simulated_index_template_mappings(elastic_client, index) + index_lookup[index] = index_tmpl_mappings + utils.combine_dicts(existing_mappings, index_tmpl_mappings) - for index in indices: - index_tmpl_mappings = get_simulated_template_mappings(elastic_client, index) - index_lookup[index] = index_tmpl_mappings - combine_dicts(existing_mappings, index_tmpl_mappings) + log(f"Collected mappings: {len(existing_mappings)}") - log(f"Collected mappings: {len(existing_mappings)}") + # Collect mappings for the integrations - # Collect mappings for the integrations + rule_integrations = [] + if contents.metadata.integration: + if isinstance(contents.metadata.integration, list): + rule_integrations = contents.metadata.integration + else: + rule_integrations = [contents.metadata.integration] - rule_integrations = [] - if contents.metadata.integration: - if isinstance(contents.metadata.integration, list): - rule_integrations = contents.metadata.integration + if len(rule_integrations) > 0: + log(f"Working with rule integrations: {', '.join(rule_integrations)}") else: - rule_integrations = [contents.metadata.integration] + log("No integrations found in the rule") - if len(rule_integrations) > 0: - log(f"Working with rule integrations: {', '.join(rule_integrations)}") - else: - log("No integrations found in the rule") + package_manifests = load_integrations_manifests() + integration_schemas = load_integrations_schemas() - package_manifests = load_integrations_manifests() - integration_schemas = load_integrations_schemas() + integration_mappings = {} - integration_mappings = {} + for integration in rule_integrations: + # Assume the integration value is a package name + package = integration - for integration in rule_integrations: - # Assume the integration value is a package name - package = integration + package_version, _ = integrations.find_latest_compatible_version( + package, + "", + Version.parse(stack_version), + package_manifests, + ) - package_version, _ = integrations.find_latest_compatible_version( - package, - "", - Version.parse(stack_version), - package_manifests, - ) + package_schema = integration_schemas[package][package_version] + + # Add schemas for all streams in the package + for stream in package_schema: + flat_schema = package_schema[stream] + stream_mappings = utils.flat_schema_to_index_mapping(flat_schema) + # NOTE perhaps we need to actually create many test indexes for this to work properly + # TODO update this for double defined cases like integration_mappings["aws"]["properties"]["inspector"]["properties"]["remediation"] + # FIXED VIA NESTED FIELDS + # which is both a keyword, and has fields + # "aws.properties.inspector.properties.remediation.type": "keyword", + # "aws.properties.inspector.properties.remediation.fields.recommendation.properties.text.type": "keyword", + utils.combine_dicts(integration_mappings, stream_mappings) + index_lookup[f"{integration}-{stream}"] = stream_mappings + + log(f"Integration mappings prepared: {len(integration_mappings)}") + + combined_mappings = {} + utils.combine_dicts(combined_mappings, existing_mappings) + utils.combine_dicts(combined_mappings, integration_mappings) + # NOTE non-ecs schema needs to have formatting updates prior to merge + # NOTE non-ecs and ecs schema can conflict e.g. 'authentication_details': {'type': 'flattened'} + # FIXED VIA NESTED FIELDS + # "azure.signinlogs.properties.authentication_details.authentication_method": "keyword" + # FAILURE: BadRequestError(400, 'illegal_argument_exception', "can't merge a non object mapping [azure.signinlogs.properties.authentication_details] with an object mapping") + non_ecs_mapping = {} + non_ecs = ecs.get_non_ecs_schema() + for index in indices: + non_ecs_mapping.update(non_ecs.get(index, {})) + non_ecs_mapping = ecs.flatten(non_ecs_mapping) + non_ecs_mapping = utils.convert_to_nested_schema(non_ecs_mapping) + if not combined_mappings and not non_ecs_mapping: + log("ERROR: no mappings found for the rule") + raise ValueError("No mappings found") + + # Creating a test index with the test name + suffix = str(int(time.time() * 1000)) + test_index = f"rule-test-index-{suffix}" + test_non_ecs_index = f"rule-test-non-ecs-index-{suffix}" + # TODO if works, switch to non-ecs index only + # NOTE we will always have to have a base test index + # This test index could have the index_tmpl_mappings for example + full_index_str = test_index + if non_ecs_mapping: + full_index_str = test_non_ecs_index - package_schema = integration_schemas[package][package_version] - - # Add schemas for all streams in the package - for stream in package_schema: - flat_schema = package_schema[stream] - stream_mappings = flat_schema_to_mapping(flat_schema) - # NOTE perhaps we need to actually create many test indexes for this to work properly - # TODO update this for double defined cases like integration_mappings["aws"]["properties"]["inspector"]["properties"]["remediation"] - # FIXED VIA NESTED FIELDS - # which is both a keyword, and has fields - # "aws.properties.inspector.properties.remediation.type": "keyword", - # "aws.properties.inspector.properties.remediation.fields.recommendation.properties.text.type": "keyword", - combine_dicts(integration_mappings, stream_mappings) - index_lookup[f"{integration}-{stream}"] = stream_mappings - - log(f"Integration mappings prepared: {len(integration_mappings)}") - - combined_mappings = {} - combine_dicts(combined_mappings, existing_mappings) - combine_dicts(combined_mappings, integration_mappings) - # NOTE non-ecs schema needs to have formatting updates prior to merge - # NOTE non-ecs and ecs schema can conflict e.g. 'authentication_details': {'type': 'flattened'} - # FIXED VIA NESTED FIELDS - # "azure.signinlogs.properties.authentication_details.authentication_method": "keyword" - # FAILURE: BadRequestError(400, 'illegal_argument_exception', "can't merge a non object mapping [azure.signinlogs.properties.authentication_details] with an object mapping") - non_ecs_mapping = {} - non_ecs = ecs.get_non_ecs_schema() - for index in indices: - non_ecs_mapping.update(non_ecs.get(index, {})) - non_ecs_mapping = ecs.flatten(non_ecs_mapping) - non_ecs_mapping = convert_to_nested_schema(non_ecs_mapping) - if not combined_mappings and not non_ecs_mapping: - log("ERROR: no mappings found for the rule") - raise ValueError("No mappings found") - - # Creating a test index with the test name - suffix = str(int(time.time() * 1000)) - test_index = f"rule-test-index-{suffix}" - test_non_ecs_index = f"rule-test-non-ecs-index-{suffix}" - # TODO if works, switch to non-ecs index only - # NOTE we will always have to have a base test index - # This test index could have the index_tmpl_mappings for example - full_index_str = test_index - if non_ecs_mapping: - full_index_str = test_non_ecs_index - - for index in index_lookup: - # log(f"Mappings for `{index}`: {index_lookup[index]}") - ind_index_str = f"test-{index.rstrip('*')}{suffix}" - response = elastic_client.indices.create( - index=ind_index_str, - mappings={"properties": index_lookup[index]}, - settings={ - "index.mapping.total_fields.limit": 10000, - "index.mapping.nested_fields.limit": 500, - "index.mapping.nested_objects.limit": 10000, - }, - ) - log(f"Index `{test_non_ecs_index}` created: {response}") - full_index_str = f"{full_index_str}, {ind_index_str}" - - # create indexes - response = elastic_client.indices.create( - index=test_index, - mappings={"properties": combined_mappings}, - settings={ - "index.mapping.total_fields.limit": 10000, - "index.mapping.nested_fields.limit": 500, - "index.mapping.nested_objects.limit": 10000, - }, - ) - log(f"Index `{test_index}` created: {response}") - test_index_str = test_index - if non_ecs_mapping: + for index in index_lookup: + # log(f"Mappings for `{index}`: {index_lookup[index]}") + ind_index_str = f"test-{index.rstrip('*')}{suffix}" + response = elastic_client.indices.create( + index=ind_index_str, + mappings={"properties": index_lookup[index]}, + settings={ + "index.mapping.total_fields.limit": 10000, + "index.mapping.nested_fields.limit": 500, + "index.mapping.nested_objects.limit": 10000, + }, + ) + log(f"Index `{test_non_ecs_index}` created: {response}") + full_index_str = f"{full_index_str}, {ind_index_str}" + + # create indexes response = elastic_client.indices.create( - index=test_non_ecs_index, - mappings={"properties": non_ecs_mapping}, + index=test_index, + mappings={"properties": combined_mappings}, settings={ "index.mapping.total_fields.limit": 10000, "index.mapping.nested_fields.limit": 500, "index.mapping.nested_objects.limit": 10000, }, ) - log(f"Index `{test_non_ecs_index}` created: {response}") - test_index_str = f"{test_index}, {test_non_ecs_index}" - - # Replace all sources with the test index - query = contents.data.query - query = query.replace(indices_str, full_index_str) - - try: - log(f"Executing a query against `{test_index_str}`") - response = elastic_client.esql.query(query=query) - log(f"Got query response: {response}") - query_columns = response.get("columns", []) - finally: - response = elastic_client.indices.delete(index=test_index) - log(f"Test index `{test_index}` deleted: {response}") + log(f"Index `{test_index}` created: {response}") + test_index_str = test_index if non_ecs_mapping: - response = elastic_client.indices.delete(index=test_non_ecs_index) - log(f"Test index `{test_non_ecs_index}` deleted: {response}") - for index in index_lookup: - ind_index_str = f"test-{index.rstrip('*')}{suffix}" - response = elastic_client.indices.delete(index=ind_index_str) - log(f"Test index `{ind_index_str}` deleted: {response}") - - query_column_names = [c["name"] for c in query_columns] - log(f"Got query columns: {', '.join(query_column_names)}") - - # FIXME Perhaps update rule_validator's get_required_fields as well - # to everything needs to either be directly mapped to schema or be annotated as dynamic field - if validate_columns_input_mapping(query_columns, combined_mappings): - log("All dynamic columns have proper formatting.") - else: - log("Dynamic column(s) have improper formatting.") - - -def get_simulated_template_mappings(elastic_client: Elasticsearch, name: str) -> dict[str, Any]: - """ - Return the mappings from the index configuration that would be applied - to the specified index from an existing index template - - https://elasticsearch-py.readthedocs.io/en/stable/api/indices.html#elasticsearch.client.IndicesClient.simulate_index_template - """ - template = elastic_client.indices.simulate_index_template(name=name) - if not template: - return {} - return template["template"]["mappings"]["properties"] - - -def get_indices(elastic_client: Kibana, index: str) -> list[str]: - """Fetch indices that match the provided name from Elasticsearch""" - # `index` arg here supports wildcards - return [i["index"] for i in elastic_client.cat.indices(index=index, format="json")] + response = elastic_client.indices.create( + index=test_non_ecs_index, + mappings={"properties": non_ecs_mapping}, + settings={ + "index.mapping.total_fields.limit": 10000, + "index.mapping.nested_fields.limit": 500, + "index.mapping.nested_objects.limit": 10000, + }, + ) + log(f"Index `{test_non_ecs_index}` created: {response}") + test_index_str = f"{test_index}, {test_non_ecs_index}" + # Replace all sources with the test index + query = contents.data.query + query = query.replace(indices_str, full_index_str) -def combine_dicts(dest: dict[Any, Any], src: dict[Any, Any]) -> None: - """Combine two dictionaries recursively.""" - for k, v in src.items(): - if k in dest and isinstance(dest[k], dict) and isinstance(v, dict): - combine_dicts(dest[k], v) + try: + log(f"Executing a query against `{test_index_str}`") + response = elastic_client.esql.query(query=query) + log(f"Got query response: {response}") + query_columns = response.get("columns", []) + finally: + response = elastic_client.indices.delete(index=test_index) + log(f"Test index `{test_index}` deleted: {response}") + if non_ecs_mapping: + response = elastic_client.indices.delete(index=test_non_ecs_index) + log(f"Test index `{test_non_ecs_index}` deleted: {response}") + for index in index_lookup: + ind_index_str = f"test-{index.rstrip('*')}{suffix}" + response = elastic_client.indices.delete(index=ind_index_str) + log(f"Test index `{ind_index_str}` deleted: {response}") + + query_column_names = [c["name"] for c in query_columns] + log(f"Got query columns: {', '.join(query_column_names)}") + + # FIXME Perhaps update rule_validator's get_required_fields as well + # to everything needs to either be directly mapped to schema or be annotated as dynamic field + if self.validate_columns_index_mapping(query_columns, combined_mappings): + log("All dynamic columns have proper formatting.") else: - dest[k] = v - - -def flat_schema_to_mapping(flat_schema: dict[str, str]) -> dict[str, Any]: - """ - Convert dicts with flat JSON paths and values into a nested mapping with - intermediary `properties`, `fields` and `type` fields. - """ + log("Dynamic column(s) have improper formatting.") - # Sorting here ensures that 'a.b' processed before 'a.b.c', allowing us to correctly - # detect and handle multi-fields. - sorted_items = sorted(flat_schema.items()) - result = {} - for field_path, field_type in sorted_items: - parts = field_path.split(".") - current_level = result - - for part in parts[:-1]: - node = current_level.setdefault(part, {}) - - if "type" in node and node["type"] not in ("nested", "object"): - current_level = node.setdefault("fields", {}) - else: - current_level = node.setdefault("properties", {}) - - leaf_key = parts[-1] - current_level[leaf_key] = {"type": field_type} - - # add `scaling_factor` field missing in the schema - # https://www.elastic.co/docs/reference/elasticsearch/mapping-reference/number#scaled-float-params - if field_type == "scaled_float": - current_level[leaf_key]["scaling_factor"] = 1000 - - # add `path` field for `alias` fields, set to a dummy value - if field_type == "alias": - current_level[leaf_key]["path"] = "@timestamp" - - return result +def extract_error_field(source: str, exc: eql.EqlParseError | kql.KqlParseError) -> str | None: + """Extract the field name from an EQL or KQL parse error.""" + lines = source.splitlines() + mod = -1 if exc.line == len(lines) else 0 # type: ignore[reportUnknownMemberType] + line = lines[exc.line + mod] # type: ignore[reportUnknownMemberType] + start = exc.column # type: ignore[reportUnknownMemberType] + stop = start + len(exc.caret.strip()) # type: ignore[reportUnknownVariableType] + return re.sub(r"^\W+|\W+$", "", line[start:stop]) # type: ignore[reportUnknownArgumentType] diff --git a/detection_rules/utils.py b/detection_rules/utils.py index c060fba910e..bb780f24d3a 100644 --- a/detection_rules/utils.py +++ b/detection_rules/utils.py @@ -534,6 +534,7 @@ def get_identifiers(self) -> list[str]: def get_esql_query_indices(query: str) -> tuple[str, list[str]]: + """Extract indices from an ES|QL query.""" match = FROM_SOURCES_REGEX.search(query) if not match: @@ -541,3 +542,78 @@ def get_esql_query_indices(query: str) -> tuple[str, list[str]]: sources_str = match.group("sources") return sources_str, [source.strip() for source in sources_str.split(",")] + + +def convert_to_nested_schema(flat_schemas: dict[str, str]) -> dict[str, Any]: + """Convert a flat schema to a nested schema with 'properties' for each sub-key.""" + nested_schema = {} + + for key, value in flat_schemas.items(): + parts = key.split(".") + current_level = nested_schema + + for part in parts[:-1]: + current_level = current_level.setdefault(part, {}).setdefault("properties", {}) + + current_level[parts[-1]] = {"type": value} + + return nested_schema + + +def combine_dicts(dest: dict[Any, Any], src: dict[Any, Any]) -> None: + """Combine two dictionaries recursively.""" + for k, v in src.items(): + if k in dest and isinstance(dest[k], dict) and isinstance(v, dict): + combine_dicts(dest[k], v) + else: + dest[k] = v + + +def flat_schema_to_index_mapping(flat_schema: dict[str, str]) -> dict[str, Any]: + """ + Convert dicts with flat JSON paths and values into a nested mapping with + intermediary `properties`, `fields` and `type` fields. + """ + + # Sorting here ensures that 'a.b' processed before 'a.b.c', allowing us to correctly + # detect and handle multi-fields. + sorted_items = sorted(flat_schema.items()) + result = {} + + for field_path, field_type in sorted_items: + parts = field_path.split(".") + current_level = result + + for part in parts[:-1]: + node = current_level.setdefault(part, {}) + + if "type" in node and node["type"] not in ("nested", "object"): + current_level = node.setdefault("fields", {}) + else: + current_level = node.setdefault("properties", {}) + + leaf_key = parts[-1] + current_level[leaf_key] = {"type": field_type} + + # add `scaling_factor` field missing in the schema + # https://www.elastic.co/docs/reference/elasticsearch/mapping-reference/number#scaled-float-params + if field_type == "scaled_float": + current_level[leaf_key]["scaling_factor"] = 1000 + + # add `path` field for `alias` fields, set to a dummy value + if field_type == "alias": + current_level[leaf_key]["path"] = "@timestamp" + + return result + + +def get_column_from_index_mapping_schema(keys: list[str], current_schema: dict[str, Any] | None) -> str | None: + """Recursively traverse the schema to find the type of the column.""" + key = keys[0] + if not current_schema: + return None + column = current_schema.get(key) or {} + column_type = column.get("type") if column else None + if not column_type and len(keys) > 1: + return get_column_from_index_mapping_schema(keys[1:], current_schema=column.get("properties")) + return column_type diff --git a/tests/test_rules_remote.py b/tests/test_rules_remote.py index 415eb0f0714..2b37f95932a 100644 --- a/tests/test_rules_remote.py +++ b/tests/test_rules_remote.py @@ -6,7 +6,7 @@ import unittest from detection_rules.misc import get_default_config, get_elasticsearch_client, get_kibana_client, getdefault -from detection_rules.rule_validators import validate_esql_rule +from detection_rules.rule_validators import ESQLValidator from .base import BaseRuleTest @@ -42,11 +42,11 @@ def test_esql_rules(self): failed_count = 0 fail_list = [] - for r in esql_rules: print() try: - validate_esql_rule(kibana_client, elastic_client, r.contents) + validator = ESQLValidator(r.contents.data.query) + validator.remote_validate_rule(kibana_client, elastic_client, r.contents) except Exception as e: print(f"FAILURE: {e}") fail_list.append(f"FAILURE: {e}") From 1cf93678308971f3f7bfe8794cb6b6ba5f4ce545 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Fri, 5 Sep 2025 17:39:06 -0400 Subject: [PATCH 12/93] Reduce function complexity --- detection_rules/misc.py | 29 ++- detection_rules/rule_validators.py | 276 ++++++++++++++--------------- tests/test_rules_remote.py | 8 +- 3 files changed, 164 insertions(+), 149 deletions(-) diff --git a/detection_rules/misc.py b/detection_rules/misc.py index ea5ff41f663..45e72e198ce 100644 --- a/detection_rules/misc.py +++ b/detection_rules/misc.py @@ -17,10 +17,11 @@ import click import requests +from elastic_transport import ObjectApiResponse from elasticsearch import AuthenticationException, Elasticsearch from kibana import Kibana # type: ignore[reportMissingTypeStubs] -from .utils import add_params, cached, load_etc_dump +from .utils import add_params, cached, combine_dicts, load_etc_dump LICENSE_HEADER = """ Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one @@ -502,3 +503,29 @@ def get_simulated_index_template_mappings(elastic_client: Elasticsearch, name: s if not template: return {} return template["template"]["mappings"]["properties"] + + +def create_index_with_index_mapping( + elastic_client: Elasticsearch, index_name: str, mappings: dict[str, Any] +) -> ObjectApiResponse[Any]: + """Create an index with the specified mappings and settings to support large number of fields and nested objects.""" + return elastic_client.indices.create( + index=index_name, + mappings={"properties": mappings}, + settings={ + "index.mapping.total_fields.limit": 10000, + "index.mapping.nested_fields.limit": 500, + "index.mapping.nested_objects.limit": 10000, + }, + ) + + +def get_existing_mappings(elastic_client: Elasticsearch, indices: list[str]) -> tuple[dict[str, Any], dict[str, Any]]: + """Retrieve mappings for all matching existing index templates.""" + existing_mappings: dict[str, Any] = {} + index_lookup: dict[str, Any] = {} + for index in indices: + index_tmpl_mappings = get_simulated_index_template_mappings(elastic_client, index) + index_lookup[index] = index_tmpl_mappings + combine_dicts(existing_mappings, index_tmpl_mappings) + return existing_mappings, index_lookup diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index 057c228f5b8..efc23e186d6 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -641,13 +641,49 @@ def validate_integration( # Disabling self.validate(data, meta) pass - def validate_columns_index_mapping(self, query_columns: list[dict[str, str]], combined_mappings: dict[str, Any]): + def get_rule_integrations(self, contents: TOMLRuleContents) -> list[str]: + """Retrieve rule integrations from metadata.""" + rule_integrations: list[str] = [] + if contents.metadata.integration: + if isinstance(contents.metadata.integration, list): + rule_integrations = contents.metadata.integration + else: + rule_integrations = [contents.metadata.integration] + return rule_integrations + + def prepare_integration_mappings( + self, rule_integrations: list[str], stack_version: str, package_manifests: Any, integration_schemas: Any + ) -> tuple[dict[str, Any], dict[str, Any]]: + """Prepare integration mappings for the given rule integrations.""" + integration_mappings: dict[str, Any] = {} + index_lookup: dict[str, Any] = {} + for integration in rule_integrations: + package = integration + package_version, _ = integrations.find_latest_compatible_version( + package, + "", + Version.parse(stack_version), + package_manifests, + ) + package_schema = integration_schemas[package][package_version] + + for stream in package_schema: + flat_schema = package_schema[stream] + stream_mappings = utils.flat_schema_to_index_mapping(flat_schema) + utils.combine_dicts(integration_mappings, stream_mappings) + index_lookup[f"{integration}-{stream}"] = stream_mappings + + return integration_mappings, index_lookup + + def validate_columns_index_mapping( + self, query_columns: list[dict[str, str]], combined_mappings: dict[str, Any] + ) -> bool: """Validate that the columns in the ESQL query match the provided mappings.""" mismatched_columns: list[str] = [] for column in query_columns: column_name = column["name"] - if column_name.startswith("Esql.") or column_name.startswith("Esql_priv."): + if column_name.startswith(("Esql.", "Esql_priv.")): continue column_type = column["type"] @@ -668,181 +704,129 @@ def validate_columns_index_mapping(self, query_columns: list[dict[str, str]], co return True - def remote_validate_rule( - self, kibana_client: Kibana, elastic_client: Elasticsearch, contents: TOMLRuleContents - ) -> None: - """Uses remote validation from an Elastic Stack to validate ES|QL a given rule""" - rule_id = contents.data.rule_id - - # FIXME perhaps move this to utils - def log(val: str) -> None: - print(f"{rule_id}:", val) - - kibana_details = kibana_client.get("/api/status") - stack_version = kibana_details["version"]["number"] - - log(f"Validating against {stack_version} stack") - - indices_str, indices = utils.get_esql_query_indices(contents.data.query) - log(f"Extracted indices from query: {', '.join(indices)}") + def create_remote_indices( + self, + elastic_client: Elasticsearch, + existing_mappings: dict[str, Any], + index_lookup: dict[str, Any], + log: Callable[[str], None], + ) -> str: + """Create remote indices for validation and return the index string.""" + suffix = str(int(time.time() * 1000)) + test_index = f"rule-test-index-{suffix}" + response = misc.create_index_with_index_mapping(elastic_client, test_index, existing_mappings) + log(f"Index `{test_index}` created: {response}") + full_index_str = test_index - # Get mappings for all matching existing index templates + # create all integration indices + for index, properties in index_lookup.items(): + ind_index_str = f"test-{index.rstrip('*')}{suffix}" + response = misc.create_index_with_index_mapping(elastic_client, ind_index_str, properties) + log(f"Index `{ind_index_str}` created: {response}") + full_index_str = f"{full_index_str}, {ind_index_str}" - existing_mappings: dict[str, Any] = {} + return full_index_str - # TODO do we need an index mapping for each index in the query? This is accomplished via index_lookup: dict[str, Any] = {} - # Do we also need separate indexes for each integration? Probably, at least it is dynamic for each rule (do we really want to load these per rule?) - index_lookup: dict[str, Any] = {} - # NOTE do we need to cache what integration indexes have been loaded to prevent pushing tons into the evaluation? + def execute_query_against_indices( + self, + elastic_client: Elasticsearch, + query: str, + test_index_str: str, + log: Callable[[str], None], + delete_indices: bool = True, + ) -> list[Any]: + """Execute the ESQL query against the test indices on a remote Stack and return the columns.""" + try: + log(f"Executing a query against `{test_index_str}`") + response = elastic_client.esql.query(query=query) + log(f"Got query response: {response}") + query_columns = response.get("columns", []) + finally: + if delete_indices: + for index_str in test_index_str.split(","): + response = elastic_client.indices.delete(index=index_str.strip()) + log(f"Test index `{index_str}` deleted: {response}") - for index in indices: - index_tmpl_mappings = misc.get_simulated_index_template_mappings(elastic_client, index) - index_lookup[index] = index_tmpl_mappings - utils.combine_dicts(existing_mappings, index_tmpl_mappings) + query_column_names = [c["name"] for c in query_columns] + log(f"Got query columns: {', '.join(query_column_names)}") + return query_columns - log(f"Collected mappings: {len(existing_mappings)}") + def prepare_mappings( + self, elastic_client: Elasticsearch, indices: list[str], stack_version: str, contents: TOMLRuleContents + ) -> tuple[dict[str, Any], dict[str, Any], dict[str, Any]]: + """Prepare index mappings for the given indices and rule integrations.""" + existing_mappings, index_lookup = misc.get_existing_mappings(elastic_client, indices) # Collect mappings for the integrations + rule_integrations = self.get_rule_integrations(contents) - rule_integrations = [] - if contents.metadata.integration: - if isinstance(contents.metadata.integration, list): - rule_integrations = contents.metadata.integration - else: - rule_integrations = [contents.metadata.integration] - - if len(rule_integrations) > 0: - log(f"Working with rule integrations: {', '.join(rule_integrations)}") - else: - log("No integrations found in the rule") - + # Collect mappings for all relevant integrations for the given stack version package_manifests = load_integrations_manifests() integration_schemas = load_integrations_schemas() - integration_mappings = {} - - for integration in rule_integrations: - # Assume the integration value is a package name - package = integration - - package_version, _ = integrations.find_latest_compatible_version( - package, - "", - Version.parse(stack_version), - package_manifests, - ) - - package_schema = integration_schemas[package][package_version] - - # Add schemas for all streams in the package - for stream in package_schema: - flat_schema = package_schema[stream] - stream_mappings = utils.flat_schema_to_index_mapping(flat_schema) - # NOTE perhaps we need to actually create many test indexes for this to work properly - # TODO update this for double defined cases like integration_mappings["aws"]["properties"]["inspector"]["properties"]["remediation"] - # FIXED VIA NESTED FIELDS - # which is both a keyword, and has fields - # "aws.properties.inspector.properties.remediation.type": "keyword", - # "aws.properties.inspector.properties.remediation.fields.recommendation.properties.text.type": "keyword", - utils.combine_dicts(integration_mappings, stream_mappings) - index_lookup[f"{integration}-{stream}"] = stream_mappings + integration_mappings, integration_index_lookup = self.prepare_integration_mappings( + rule_integrations, stack_version, package_manifests, integration_schemas + ) - log(f"Integration mappings prepared: {len(integration_mappings)}") + index_lookup.update(integration_index_lookup) - combined_mappings = {} + # Combine existing and integration mappings into a single mapping dict + combined_mappings: dict[str, Any] = {} utils.combine_dicts(combined_mappings, existing_mappings) utils.combine_dicts(combined_mappings, integration_mappings) - # NOTE non-ecs schema needs to have formatting updates prior to merge - # NOTE non-ecs and ecs schema can conflict e.g. 'authentication_details': {'type': 'flattened'} - # FIXED VIA NESTED FIELDS - # "azure.signinlogs.properties.authentication_details.authentication_method": "keyword" - # FAILURE: BadRequestError(400, 'illegal_argument_exception', "can't merge a non object mapping [azure.signinlogs.properties.authentication_details] with an object mapping") - non_ecs_mapping = {} + + # Load non-ecs schema and convert to index mapping format (nested schema) + non_ecs_mapping: dict[str, Any] = {} non_ecs = ecs.get_non_ecs_schema() for index in indices: non_ecs_mapping.update(non_ecs.get(index, {})) non_ecs_mapping = ecs.flatten(non_ecs_mapping) non_ecs_mapping = utils.convert_to_nested_schema(non_ecs_mapping) if not combined_mappings and not non_ecs_mapping: - log("ERROR: no mappings found for the rule") raise ValueError("No mappings found") + index_lookup.update({"rule-non-ecs-index": non_ecs_mapping}) - # Creating a test index with the test name - suffix = str(int(time.time() * 1000)) - test_index = f"rule-test-index-{suffix}" - test_non_ecs_index = f"rule-test-non-ecs-index-{suffix}" - # TODO if works, switch to non-ecs index only - # NOTE we will always have to have a base test index - # This test index could have the index_tmpl_mappings for example - full_index_str = test_index - if non_ecs_mapping: - full_index_str = test_non_ecs_index + return existing_mappings, index_lookup, combined_mappings - for index in index_lookup: - # log(f"Mappings for `{index}`: {index_lookup[index]}") - ind_index_str = f"test-{index.rstrip('*')}{suffix}" - response = elastic_client.indices.create( - index=ind_index_str, - mappings={"properties": index_lookup[index]}, - settings={ - "index.mapping.total_fields.limit": 10000, - "index.mapping.nested_fields.limit": 500, - "index.mapping.nested_objects.limit": 10000, - }, - ) - log(f"Index `{test_non_ecs_index}` created: {response}") - full_index_str = f"{full_index_str}, {ind_index_str}" + def remote_validate_rule( + self, kibana_client: Kibana, elastic_client: Elasticsearch, contents: TOMLRuleContents, verbosity: int = 0 + ) -> None: + """Uses remote validation from an Elastic Stack to validate ES|QL a given rule""" + rule_id = contents.data.rule_id + + def log(val: str) -> None: + """Log if verbosity is 1 or greater (1 corresponds to `-v` in pytest)""" + unit_test_verbose_level = 1 + if verbosity >= unit_test_verbose_level: + print(f"{rule_id}:", val) + + stack_version = "" + kibana_details = kibana_client.get("/api/status") + stack_version = str(kibana_details["version"]["number"]) + + log(f"Validating against {stack_version} stack") + + indices_str, indices = utils.get_esql_query_indices(contents.data.query) + log(f"Extracted indices from query: {', '.join(indices)}") - # create indexes - response = elastic_client.indices.create( - index=test_index, - mappings={"properties": combined_mappings}, - settings={ - "index.mapping.total_fields.limit": 10000, - "index.mapping.nested_fields.limit": 500, - "index.mapping.nested_objects.limit": 10000, - }, + # Get mappings for all matching existing index templates + existing_mappings, index_lookup, combined_mappings = self.prepare_mappings( + elastic_client, indices, stack_version, contents ) - log(f"Index `{test_index}` created: {response}") - test_index_str = test_index - if non_ecs_mapping: - response = elastic_client.indices.create( - index=test_non_ecs_index, - mappings={"properties": non_ecs_mapping}, - settings={ - "index.mapping.total_fields.limit": 10000, - "index.mapping.nested_fields.limit": 500, - "index.mapping.nested_objects.limit": 10000, - }, - ) - log(f"Index `{test_non_ecs_index}` created: {response}") - test_index_str = f"{test_index}, {test_non_ecs_index}" + log(f"Collected mappings: {len(existing_mappings)}") + log(f"Combined mappings prepared: {len(combined_mappings)}") - # Replace all sources with the test index + # Create remote indices + full_index_str = self.create_remote_indices(elastic_client, existing_mappings, index_lookup, log) + + # Replace all sources with the test indices query = contents.data.query query = query.replace(indices_str, full_index_str) - try: - log(f"Executing a query against `{test_index_str}`") - response = elastic_client.esql.query(query=query) - log(f"Got query response: {response}") - query_columns = response.get("columns", []) - finally: - response = elastic_client.indices.delete(index=test_index) - log(f"Test index `{test_index}` deleted: {response}") - if non_ecs_mapping: - response = elastic_client.indices.delete(index=test_non_ecs_index) - log(f"Test index `{test_non_ecs_index}` deleted: {response}") - for index in index_lookup: - ind_index_str = f"test-{index.rstrip('*')}{suffix}" - response = elastic_client.indices.delete(index=ind_index_str) - log(f"Test index `{ind_index_str}` deleted: {response}") - - query_column_names = [c["name"] for c in query_columns] - log(f"Got query columns: {', '.join(query_column_names)}") + query_columns = self.execute_query_against_indices(elastic_client, query, full_index_str, log) - # FIXME Perhaps update rule_validator's get_required_fields as well - # to everything needs to either be directly mapped to schema or be annotated as dynamic field + # Validation all fields (columns) are either dynamic fields or correctly mapped + # against the combined mapping of all the indices if self.validate_columns_index_mapping(query_columns, combined_mappings): log("All dynamic columns have proper formatting.") else: diff --git a/tests/test_rules_remote.py b/tests/test_rules_remote.py index 2b37f95932a..a42dc91a225 100644 --- a/tests/test_rules_remote.py +++ b/tests/test_rules_remote.py @@ -7,6 +7,7 @@ from detection_rules.misc import get_default_config, get_elasticsearch_client, get_kibana_client, getdefault from detection_rules.rule_validators import ESQLValidator +from elasticsearch import BadRequestError from .base import BaseRuleTest @@ -40,14 +41,17 @@ def test_esql_rules(self): ignore_ssl_errors=getdefault("ignore_ssl_errors")(), ) + # Retrieve verbosity level from pytest + verbosity = self._outcome.result.config.get_verbosity() + failed_count = 0 fail_list = [] for r in esql_rules: print() try: validator = ESQLValidator(r.contents.data.query) - validator.remote_validate_rule(kibana_client, elastic_client, r.contents) - except Exception as e: + validator.remote_validate_rule(kibana_client, elastic_client, r.contents, verbosity) + except (ValueError, BadRequestError) as e: print(f"FAILURE: {e}") fail_list.append(f"FAILURE: {e}") failed_count += 1 From d17d377b1ad6fde174f64bc0dc8b1097ca28fa9a Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Fri, 5 Sep 2025 17:44:33 -0400 Subject: [PATCH 13/93] Minor Version Bump --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d17608bdd65..85c1850f165 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "detection_rules" -version = "1.3.30" +version = "1.4.0" description = "Detection Rules is the home for rules used by Elastic Security. This repository is used for the development, maintenance, testing, validation, and release of rules for Elastic Security’s Detection Engine." readme = "README.md" requires-python = ">=3.12" From e84e56330c578b5d61e7608cca8360b6bc0d426b Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Fri, 5 Sep 2025 17:57:46 -0400 Subject: [PATCH 14/93] Linting --- detection_rules/rule.py | 2 +- detection_rules/rule_validators.py | 13 +++++++------ detection_rules/utils.py | 22 +++++++++++----------- tests/test_rules_remote.py | 3 ++- 4 files changed, 21 insertions(+), 19 deletions(-) diff --git a/detection_rules/rule.py b/detection_rules/rule.py index 26ce72bed3b..1b9717bb246 100644 --- a/detection_rules/rule.py +++ b/detection_rules/rule.py @@ -1470,7 +1470,7 @@ def get_packaged_integrations( # if both exist, rule tags are only used if defined in definitions for non-dataset packages # of machine learning analytic packages - rule_integrations = meta.get("integration") or [] + rule_integrations: str | list[str] = meta.get("integration") or [] for integration in rule_integrations: ineligible_integrations = [ *definitions.NON_DATASET_PACKAGES, diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index efc23e186d6..1f17c01c4cc 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -801,12 +801,13 @@ def log(val: str) -> None: print(f"{rule_id}:", val) stack_version = "" - kibana_details = kibana_client.get("/api/status") + kibana_details: dict[str, Any] = kibana_client.get("/api/status", {}) # type: ignore[reportUnknownVariableType] + if "version" not in kibana_details: + raise ValueError("Failed to retrieve Kibana details.") stack_version = str(kibana_details["version"]["number"]) - log(f"Validating against {stack_version} stack") - indices_str, indices = utils.get_esql_query_indices(contents.data.query) + indices_str, indices = utils.get_esql_query_indices(contents.data.query) # type: ignore[reportUnknownVariableType] log(f"Extracted indices from query: {', '.join(indices)}") # Get mappings for all matching existing index templates @@ -820,10 +821,10 @@ def log(val: str) -> None: full_index_str = self.create_remote_indices(elastic_client, existing_mappings, index_lookup, log) # Replace all sources with the test indices - query = contents.data.query - query = query.replace(indices_str, full_index_str) + query = contents.data.query # type: ignore[reportUnknownVariableType] + query = query.replace(indices_str, full_index_str) # type: ignore[reportUnknownVariableType] - query_columns = self.execute_query_against_indices(elastic_client, query, full_index_str, log) + query_columns = self.execute_query_against_indices(elastic_client, query, full_index_str, log) # type: ignore[reportUnknownVariableType] # Validation all fields (columns) are either dynamic fields or correctly mapped # against the combined mapping of all the indices diff --git a/detection_rules/utils.py b/detection_rules/utils.py index bb780f24d3a..990e8fc1623 100644 --- a/detection_rules/utils.py +++ b/detection_rules/utils.py @@ -553,18 +553,18 @@ def convert_to_nested_schema(flat_schemas: dict[str, str]) -> dict[str, Any]: current_level = nested_schema for part in parts[:-1]: - current_level = current_level.setdefault(part, {}).setdefault("properties", {}) + current_level = current_level.setdefault(part, {}).setdefault("properties", {}) # type: ignore[reportUnknownVariableType] current_level[parts[-1]] = {"type": value} - return nested_schema + return nested_schema # type: ignore[reportUnknownVariableType] def combine_dicts(dest: dict[Any, Any], src: dict[Any, Any]) -> None: """Combine two dictionaries recursively.""" for k, v in src.items(): if k in dest and isinstance(dest[k], dict) and isinstance(v, dict): - combine_dicts(dest[k], v) + combine_dicts(dest[k], v) # type: ignore[reportUnknownVariableType] else: dest[k] = v @@ -585,12 +585,12 @@ def flat_schema_to_index_mapping(flat_schema: dict[str, str]) -> dict[str, Any]: current_level = result for part in parts[:-1]: - node = current_level.setdefault(part, {}) + node = current_level.setdefault(part, {}) # type: ignore[reportUnknownVariableType] if "type" in node and node["type"] not in ("nested", "object"): - current_level = node.setdefault("fields", {}) + current_level = node.setdefault("fields", {}) # type: ignore[reportUnknownVariableType] else: - current_level = node.setdefault("properties", {}) + current_level = node.setdefault("properties", {}) # type: ignore[reportUnknownVariableType] leaf_key = parts[-1] current_level[leaf_key] = {"type": field_type} @@ -604,7 +604,7 @@ def flat_schema_to_index_mapping(flat_schema: dict[str, str]) -> dict[str, Any]: if field_type == "alias": current_level[leaf_key]["path"] = "@timestamp" - return result + return result # type: ignore[reportUnknownVariableType] def get_column_from_index_mapping_schema(keys: list[str], current_schema: dict[str, Any] | None) -> str | None: @@ -612,8 +612,8 @@ def get_column_from_index_mapping_schema(keys: list[str], current_schema: dict[s key = keys[0] if not current_schema: return None - column = current_schema.get(key) or {} - column_type = column.get("type") if column else None + column = current_schema.get(key) or {} # type: ignore[reportUnknownVariableType] + column_type = column.get("type") if column else None # type: ignore[reportUnknownVariableType] if not column_type and len(keys) > 1: - return get_column_from_index_mapping_schema(keys[1:], current_schema=column.get("properties")) - return column_type + return get_column_from_index_mapping_schema(keys[1:], current_schema=column.get("properties")) # type: ignore[reportUnknownVariableType] + return column_type # type: ignore[reportUnknownVariableType] diff --git a/tests/test_rules_remote.py b/tests/test_rules_remote.py index a42dc91a225..11372df2a1c 100644 --- a/tests/test_rules_remote.py +++ b/tests/test_rules_remote.py @@ -5,9 +5,10 @@ import unittest +from elasticsearch import BadRequestError + from detection_rules.misc import get_default_config, get_elasticsearch_client, get_kibana_client, getdefault from detection_rules.rule_validators import ESQLValidator -from elasticsearch import BadRequestError from .base import BaseRuleTest From f827f91345df5425dc5e33df6eccd9c280726989 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Mon, 8 Sep 2025 10:24:22 -0400 Subject: [PATCH 15/93] Add connection retry handling --- tests/test_rules_remote.py | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/tests/test_rules_remote.py b/tests/test_rules_remote.py index 11372df2a1c..568da6f66a2 100644 --- a/tests/test_rules_remote.py +++ b/tests/test_rules_remote.py @@ -3,9 +3,11 @@ # 2.0; you may not use this file except in compliance with the Elastic License # 2.0. +import time import unittest from elasticsearch import BadRequestError +from elasticsearch import ConnectionError as ESConnectionError from detection_rules.misc import get_default_config, get_elasticsearch_client, get_kibana_client, getdefault from detection_rules.rule_validators import ESQLValidator @@ -47,15 +49,28 @@ def test_esql_rules(self): failed_count = 0 fail_list = [] + max_retries = 3 for r in esql_rules: print() - try: - validator = ESQLValidator(r.contents.data.query) - validator.remote_validate_rule(kibana_client, elastic_client, r.contents, verbosity) - except (ValueError, BadRequestError) as e: - print(f"FAILURE: {e}") - fail_list.append(f"FAILURE: {e}") - failed_count += 1 + retry_count = 0 + while retry_count < max_retries: + try: + validator = ESQLValidator(r.contents.data.query) + validator.remote_validate_rule(kibana_client, elastic_client, r.contents, verbosity) + break + except (ValueError, BadRequestError) as e: + print(f"FAILURE: {e}") + fail_list.append(f"FAILURE: {e}") + failed_count += 1 + break + except ESConnectionError as e: + retry_count += 1 + print(f"Connection error: {e}. Retrying {retry_count}/{max_retries}...") + time.sleep(30) + if retry_count == max_retries: + print(f"FAILURE: {e} after {max_retries} retries") + fail_list.append(f"FAILURE: {e} after {max_retries} retries") + failed_count += 1 print(f"Total rules: {len(esql_rules)}") print(f"Failed rules: {failed_count}") From 34cfb335e3f097c94dc8071de29e16cf8d3e2f0d Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Mon, 8 Sep 2025 10:31:53 -0400 Subject: [PATCH 16/93] Switch to encoded var --- .github/workflows/esql-validation.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/esql-validation.yml b/.github/workflows/esql-validation.yml index baa27055964..a3494a4fb42 100644 --- a/.github/workflows/esql-validation.yml +++ b/.github/workflows/esql-validation.yml @@ -39,7 +39,6 @@ jobs: - name: Get API Key and setup auth env: - DR_KIBANA_URL: "https://localhost:5601" DR_ELASTICSEARCH_URL: "https://localhost:9200" ES_USER: "elastic" ES_PASSWORD: ${{ env.GENERATED_PASSWORD }} @@ -48,9 +47,9 @@ jobs: response=$(curl -k -X POST -u "$ES_USER:$ES_PASSWORD" -H "Content-Type: application/json" -d '{ "name": "tmp-api-key", "expiration": "1d" - }' "$ELASTICSEARCH_URL/_security/api_key") + }' "$DR_ELASTICSEARCH_URL/_security/api_key") - DR_API_KEY=$(echo "$response" | jq -r '.api_key') + DR_API_KEY=$(echo "$response" | jq -r '.encoded') echo "DR_API_KEY=$DR_API_KEY" >> $GITHUB_ENV - name: Install dependencies From bf3955d31fc3f67ab4fc794b3eb400b6a7763f0f Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Mon, 8 Sep 2025 10:38:56 -0400 Subject: [PATCH 17/93] comment cleanup --- detection_rules/rule_validators.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index 1f17c01c4cc..0639a5695a6 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -698,7 +698,6 @@ def validate_columns_index_mapping( f"If not dynamic: expected `{schema_type}`, got `{column_type}`." ) - # Raise an error if there are mismatches if mismatched_columns: raise ValueError("Column validation errors:\n" + "\n".join(mismatched_columns)) @@ -826,7 +825,7 @@ def log(val: str) -> None: query_columns = self.execute_query_against_indices(elastic_client, query, full_index_str, log) # type: ignore[reportUnknownVariableType] - # Validation all fields (columns) are either dynamic fields or correctly mapped + # Validate that all fields (columns) are either dynamic fields or correctly mapped # against the combined mapping of all the indices if self.validate_columns_index_mapping(query_columns, combined_mappings): log("All dynamic columns have proper formatting.") From a38b195c0f61f8fc451cf14e0260860d8b362107 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Mon, 8 Sep 2025 10:47:50 -0400 Subject: [PATCH 18/93] Skip internal fields on validation --- detection_rules/rule_validators.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index 0639a5695a6..0b614303d75 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -683,8 +683,12 @@ def validate_columns_index_mapping( for column in query_columns: column_name = column["name"] + # Skip Dynamic fields if column_name.startswith(("Esql.", "Esql_priv.")): continue + # Skip internal fields + if column_name in ("_id", "_index", "_type"): + continue column_type = column["type"] # Check if the column exists in combined_mappings or a valid field generated from a function or operator From 0b61ca7de243c603f08d8bcc604cc5f0c430e0ab Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Mon, 8 Sep 2025 10:51:26 -0400 Subject: [PATCH 19/93] Fix typo in internal fields --- detection_rules/rule_validators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index 0b614303d75..67d7ed070b8 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -687,7 +687,7 @@ def validate_columns_index_mapping( if column_name.startswith(("Esql.", "Esql_priv.")): continue # Skip internal fields - if column_name in ("_id", "_index", "_type"): + if column_name in ("_id", "_version", "_index"): continue column_type = column["type"] From b0d3fb8b0c51f20e0ab5de0e1cd48925e5445546 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Tue, 9 Sep 2025 11:10:28 -0400 Subject: [PATCH 20/93] Handle ECS mappings like fleet --- detection_rules/rule_validators.py | 86 ++++++++++++++++++++++++++++-- detection_rules/utils.py | 11 ++++ 2 files changed, 93 insertions(+), 4 deletions(-) diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index 67d7ed070b8..f705f5b3cba 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -630,8 +630,14 @@ def unique_fields(self) -> list[str]: # type: ignore[reportIncompatibleMethodOv def validate(self, _: "QueryRuleData", __: RuleMeta) -> None: # type: ignore[reportIncompatibleMethodOverride] """Validate an ESQL query while checking TOMLRule.""" + # TODO # temporarily override to NOP until ES|QL query parsing is supported + # if ENV VAR : + # self.remote_validate_rule + # else: + # ESQLRuleData validation + # NOTE will go away def validate_integration( self, _: QueryRuleData, @@ -652,7 +658,12 @@ def get_rule_integrations(self, contents: TOMLRuleContents) -> list[str]: return rule_integrations def prepare_integration_mappings( - self, rule_integrations: list[str], stack_version: str, package_manifests: Any, integration_schemas: Any + self, + rule_integrations: list[str], + stack_version: str, + package_manifests: Any, + integration_schemas: Any, + log: Callable[[str], None], ) -> tuple[dict[str, Any], dict[str, Any]]: """Prepare integration mappings for the given rule integrations.""" integration_mappings: dict[str, Any] = {} @@ -670,6 +681,14 @@ def prepare_integration_mappings( for stream in package_schema: flat_schema = package_schema[stream] stream_mappings = utils.flat_schema_to_index_mapping(flat_schema) + nested_multifields = self.find_nested_multifields(stream_mappings) + for field in nested_multifields: + field_name = str(field).split(".fields.")[0].replace(".", ".properties.") + ".fields" + log( + f"Warning: Nested multi-field `{field}` found in `{integration}-{stream}`. " + f"Removing parent field from schema for ES|QL validation." + ) + utils.delete_nested_key_from_dict(stream_mappings, field_name) utils.combine_dicts(integration_mappings, stream_mappings) index_lookup[f"{integration}-{stream}"] = stream_mappings @@ -754,8 +773,58 @@ def execute_query_against_indices( log(f"Got query columns: {', '.join(query_column_names)}") return query_columns + def find_nested_multifields(self, mapping: dict[str, Any], path: str = "") -> list[Any]: + """Recursively search for nested multi-fields in Elasticsearch mappings.""" + nested_multifields = [] + + for field, properties in mapping.items(): + current_path = f"{path}.{field}" if path else field + + if isinstance(properties, dict): + # Check if the field has a `fields` key + if "fields" in properties: + # Check if any subfield in `fields` also has a `fields` key + for subfield, subproperties in properties["fields"].items(): # type: ignore[reportUnknownVariableType] + if isinstance(subproperties, dict) and "fields" in subproperties: + nested_multifields.append(f"{current_path}.fields.{subfield}") # type: ignore[reportUnknownVariableType] + + # Recurse into subfields + if "properties" in properties: + nested_multifields.extend( # type: ignore[reportUnknownVariableType] + self.find_nested_multifields(properties["properties"], current_path) # type: ignore[reportUnknownVariableType] + ) + + return nested_multifields # type: ignore[reportUnknownVariableType] + + def get_ecs_schema_mappings(self, current_version: Version) -> dict[str, Any]: + """Get the ECS schema in an index mapping format (nested schema) handling scaled floats.""" + ecs_version = get_stack_schemas()[str(current_version)]["ecs"] + ecs_schemas = ecs.get_schemas() + ecs_schema_flattened: dict[str, Any] = {} + ecs_schema_scaled_floats: dict[str, Any] = {} + for index, info in ecs_schemas[ecs_version]["ecs_flat"].items(): + if info["type"] == "scaled_float": + ecs_schema_scaled_floats.update({index: info["scaling_factor"]}) + ecs_schema_flattened.update({index: info["type"]}) + ecs_schema = utils.convert_to_nested_schema(ecs_schema_flattened) + for index, info in ecs_schema_scaled_floats.items(): + parts = index.split(".") + current = ecs_schema + + # Traverse the ecs_schema to the correct nested dictionary + for part in parts[:-1]: # Traverse all parts except the last one + current = current.setdefault(part, {}).setdefault("properties", {}) + + current[parts[-1]].update({"scaling_factor": info}) + return ecs_schema + def prepare_mappings( - self, elastic_client: Elasticsearch, indices: list[str], stack_version: str, contents: TOMLRuleContents + self, + elastic_client: Elasticsearch, + indices: list[str], + stack_version: str, + contents: TOMLRuleContents, + log: Callable[[str], None], ) -> tuple[dict[str, Any], dict[str, Any], dict[str, Any]]: """Prepare index mappings for the given indices and rule integrations.""" existing_mappings, index_lookup = misc.get_existing_mappings(elastic_client, indices) @@ -768,7 +837,7 @@ def prepare_mappings( integration_schemas = load_integrations_schemas() integration_mappings, integration_index_lookup = self.prepare_integration_mappings( - rule_integrations, stack_version, package_manifests, integration_schemas + rule_integrations, stack_version, package_manifests, integration_schemas, log ) index_lookup.update(integration_index_lookup) @@ -789,6 +858,12 @@ def prepare_mappings( raise ValueError("No mappings found") index_lookup.update({"rule-non-ecs-index": non_ecs_mapping}) + # Load ECS in an index mapping format (nested schema) + current_version = Version.parse(load_current_package_version(), optional_minor_and_patch=True) + ecs_schema = self.get_ecs_schema_mappings(current_version) + + index_lookup.update({"rule-ecs-index": ecs_schema}) + return existing_mappings, index_lookup, combined_mappings def remote_validate_rule( @@ -815,18 +890,21 @@ def log(val: str) -> None: # Get mappings for all matching existing index templates existing_mappings, index_lookup, combined_mappings = self.prepare_mappings( - elastic_client, indices, stack_version, contents + elastic_client, indices, stack_version, contents, log ) log(f"Collected mappings: {len(existing_mappings)}") log(f"Combined mappings prepared: {len(combined_mappings)}") # Create remote indices full_index_str = self.create_remote_indices(elastic_client, existing_mappings, index_lookup, log) + utils.combine_dicts(combined_mappings, index_lookup["rule-non-ecs-index"]) + utils.combine_dicts(combined_mappings, index_lookup["rule-ecs-index"]) # Replace all sources with the test indices query = contents.data.query # type: ignore[reportUnknownVariableType] query = query.replace(indices_str, full_index_str) # type: ignore[reportUnknownVariableType] + # TODO these query_columns are the unique fields query_columns = self.execute_query_against_indices(elastic_client, query, full_index_str, log) # type: ignore[reportUnknownVariableType] # Validate that all fields (columns) are either dynamic fields or correctly mapped diff --git a/detection_rules/utils.py b/detection_rules/utils.py index 990e8fc1623..5bf667727bb 100644 --- a/detection_rules/utils.py +++ b/detection_rules/utils.py @@ -617,3 +617,14 @@ def get_column_from_index_mapping_schema(keys: list[str], current_schema: dict[s if not column_type and len(keys) > 1: return get_column_from_index_mapping_schema(keys[1:], current_schema=column.get("properties")) # type: ignore[reportUnknownVariableType] return column_type # type: ignore[reportUnknownVariableType] + + +def delete_nested_key_from_dict(d: dict[str, Any], compound_key: str) -> None: + """Delete a nested key from a dictionary.""" + keys = compound_key.split(".") + for key in keys[:-1]: + if key in d and isinstance(d[key], dict): + d = d[key] # type: ignore[reportUnknownVariableType] + else: + return + d.pop(keys[-1], None) From 8f58df650714d1db45b8a611e7e2a5279b878d50 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Tue, 9 Sep 2025 13:01:40 -0400 Subject: [PATCH 21/93] Add validate support via env var --- detection_rules/rule.py | 2 - detection_rules/rule_validators.py | 85 +++++++++++++++++++++--------- tests/test_rules_remote.py | 8 +-- 3 files changed, 64 insertions(+), 31 deletions(-) diff --git a/detection_rules/rule.py b/detection_rules/rule.py index 1b9717bb246..0136da90dd9 100644 --- a/detection_rules/rule.py +++ b/detection_rules/rule.py @@ -650,8 +650,6 @@ def validate(self, _: "QueryRuleData", __: RuleMeta) -> None: @cached def get_required_fields(self, index: str) -> list[dict[str, Any]]: """Retrieves fields needed for the query along with type information from the schema.""" - if isinstance(self, ESQLValidator): - return [] current_version = Version.parse(load_current_package_version(), optional_minor_and_patch=True) ecs_version = get_stack_schemas()[str(current_version)]["ecs"] diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index f705f5b3cba..f3e35aa6f30 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -618,26 +618,44 @@ def validate_rule_type_configurations(self, data: EQLRuleData, meta: RuleMeta) - class ESQLValidator(QueryValidator): """Validate specific fields for ESQL query event types.""" + esql_unique_fields: list[str] + @cached_property def ast(self) -> None: # type: ignore[reportIncompatibleMethodOverride] return None @cached_property def unique_fields(self) -> list[str]: # type: ignore[reportIncompatibleMethodOverride] - """Return a list of unique fields in the query.""" - # return empty list for ES|QL rules until ast is available (friendlier than raising error) - return [] + """Return a list of unique fields in the query. Requires remote validation to have occurred.""" + if not self.esql_unique_fields: + return [] + return self.esql_unique_fields - def validate(self, _: "QueryRuleData", __: RuleMeta) -> None: # type: ignore[reportIncompatibleMethodOverride] + def validate(self, rule_data: "QueryRuleData", rule_meta: RuleMeta) -> None: # type: ignore[reportIncompatibleMethodOverride] """Validate an ESQL query while checking TOMLRule.""" - # TODO - # temporarily override to NOP until ES|QL query parsing is supported - # if ENV VAR : - # self.remote_validate_rule - # else: - # ESQLRuleData validation - - # NOTE will go away + if misc.getdefault("remote_esql_validation")(): + kibana_client = misc.get_kibana_client( + api_key=misc.getdefault("api_key")(), + cloud_id=misc.getdefault("cloud_id")(), + kibana_url=misc.getdefault("kibana_url")(), + space=misc.getdefault("space")(), + ignore_ssl_errors=misc.getdefault("ignore_ssl_errors")(), + ) + + elastic_client = misc.get_elasticsearch_client( + api_key=misc.getdefault("api_key")(), + cloud_id=misc.getdefault("cloud_id")(), + elasticsearch_url=misc.getdefault("elasticsearch_url")(), + ignore_ssl_errors=misc.getdefault("ignore_ssl_errors")(), + ) + self.remote_validate_rule( + kibana_client, + elastic_client, + rule_data.query, + rule_meta, + rule_data.rule_id, + ) + def validate_integration( self, _: QueryRuleData, @@ -647,14 +665,14 @@ def validate_integration( # Disabling self.validate(data, meta) pass - def get_rule_integrations(self, contents: TOMLRuleContents) -> list[str]: + def get_rule_integrations(self, metadata: RuleMeta) -> list[str]: """Retrieve rule integrations from metadata.""" rule_integrations: list[str] = [] - if contents.metadata.integration: - if isinstance(contents.metadata.integration, list): - rule_integrations = contents.metadata.integration + if metadata.integration: + if isinstance(metadata.integration, list): + rule_integrations = metadata.integration else: - rule_integrations = [contents.metadata.integration] + rule_integrations = [metadata.integration] return rule_integrations def prepare_integration_mappings( @@ -823,14 +841,14 @@ def prepare_mappings( elastic_client: Elasticsearch, indices: list[str], stack_version: str, - contents: TOMLRuleContents, + metadata: RuleMeta, log: Callable[[str], None], ) -> tuple[dict[str, Any], dict[str, Any], dict[str, Any]]: """Prepare index mappings for the given indices and rule integrations.""" existing_mappings, index_lookup = misc.get_existing_mappings(elastic_client, indices) # Collect mappings for the integrations - rule_integrations = self.get_rule_integrations(contents) + rule_integrations = self.get_rule_integrations(metadata) # Collect mappings for all relevant integrations for the given stack version package_manifests = load_integrations_manifests() @@ -866,11 +884,29 @@ def prepare_mappings( return existing_mappings, index_lookup, combined_mappings - def remote_validate_rule( + def remote_validate_rule_contents( self, kibana_client: Kibana, elastic_client: Elasticsearch, contents: TOMLRuleContents, verbosity: int = 0 + ) -> None: + """Remote validate a rule's ES|QL query using an Elastic Stack.""" + self.remote_validate_rule( + kibana_client=kibana_client, + elastic_client=elastic_client, + query=contents.data.query, # type: ignore[reportUnknownVariableType] + metadata=contents.metadata, + rule_id=contents.data.rule_id, + verbosity=verbosity, + ) + + def remote_validate_rule( # noqa: PLR0913 + self, + kibana_client: Kibana, + elastic_client: Elasticsearch, + query: str, + metadata: RuleMeta, + rule_id: str = "", + verbosity: int = 0, ) -> None: """Uses remote validation from an Elastic Stack to validate ES|QL a given rule""" - rule_id = contents.data.rule_id def log(val: str) -> None: """Log if verbosity is 1 or greater (1 corresponds to `-v` in pytest)""" @@ -885,12 +921,12 @@ def log(val: str) -> None: stack_version = str(kibana_details["version"]["number"]) log(f"Validating against {stack_version} stack") - indices_str, indices = utils.get_esql_query_indices(contents.data.query) # type: ignore[reportUnknownVariableType] + indices_str, indices = utils.get_esql_query_indices(query) # type: ignore[reportUnknownVariableType] log(f"Extracted indices from query: {', '.join(indices)}") # Get mappings for all matching existing index templates existing_mappings, index_lookup, combined_mappings = self.prepare_mappings( - elastic_client, indices, stack_version, contents, log + elastic_client, indices, stack_version, metadata, log ) log(f"Collected mappings: {len(existing_mappings)}") log(f"Combined mappings prepared: {len(combined_mappings)}") @@ -901,11 +937,10 @@ def log(val: str) -> None: utils.combine_dicts(combined_mappings, index_lookup["rule-ecs-index"]) # Replace all sources with the test indices - query = contents.data.query # type: ignore[reportUnknownVariableType] query = query.replace(indices_str, full_index_str) # type: ignore[reportUnknownVariableType] - # TODO these query_columns are the unique fields query_columns = self.execute_query_against_indices(elastic_client, query, full_index_str, log) # type: ignore[reportUnknownVariableType] + self.esql_unique_fields = query_columns # Validate that all fields (columns) are either dynamic fields or correctly mapped # against the combined mapping of all the indices diff --git a/tests/test_rules_remote.py b/tests/test_rules_remote.py index 568da6f66a2..477e59e59d4 100644 --- a/tests/test_rules_remote.py +++ b/tests/test_rules_remote.py @@ -45,18 +45,18 @@ def test_esql_rules(self): ) # Retrieve verbosity level from pytest - verbosity = self._outcome.result.config.get_verbosity() + verbosity: int = int(self._outcome.result.config.get_verbosity()) # type: ignore[reportIncompatibleMethodOverride] failed_count = 0 - fail_list = [] + fail_list: list[str] = [] max_retries = 3 for r in esql_rules: print() retry_count = 0 while retry_count < max_retries: try: - validator = ESQLValidator(r.contents.data.query) - validator.remote_validate_rule(kibana_client, elastic_client, r.contents, verbosity) + validator = ESQLValidator(r.contents.data.query) # type: ignore[reportIncompatibleMethodOverride] + validator.remote_validate_rule_contents(kibana_client, elastic_client, r.contents, verbosity) break except (ValueError, BadRequestError) as e: print(f"FAILURE: {e}") From 90793e46826df4287c22c19405f56bcf2335ac2b Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Tue, 9 Sep 2025 14:00:06 -0400 Subject: [PATCH 22/93] Add unique field support --- detection_rules/remote_validation.py | 15 +++++------- detection_rules/rule.py | 11 ++++++++- detection_rules/rule_validators.py | 36 +++++++++++++++++++--------- 3 files changed, 41 insertions(+), 21 deletions(-) diff --git a/detection_rules/remote_validation.py b/detection_rules/remote_validation.py index 90c8d1a24f2..7bb27858f35 100644 --- a/detection_rules/remote_validation.py +++ b/detection_rules/remote_validation.py @@ -19,6 +19,7 @@ from .config import load_current_package_version from .misc import ClientError, get_elasticsearch_client, get_kibana_client, getdefault from .rule import TOMLRule, TOMLRuleContents +from .rule_validators import ESQLValidator from .schemas import definitions @@ -180,18 +181,14 @@ def request(c: TOMLRuleContents) -> None: def validate_esql(self, contents: TOMLRuleContents) -> dict[str, Any]: query = contents.data.query # type: ignore[reportAttributeAccessIssue] rule_id = contents.data.rule_id - headers = {"accept": "application/json", "content-type": "application/json"} - body = {"query": f"{query} | LIMIT 0"} if not self.es_client: raise ValueError("No ES client found") + + if not self.kibana_client: + raise ValueError("No Kibana client found") try: - response = self.es_client.perform_request( - "POST", - "/_query", - headers=headers, - params={"pretty": True}, - body=body, - ) + validator = ESQLValidator(contents.data.query) # type: ignore[reportIncompatibleMethodOverride] + response = validator.remote_validate_rule_contents(self.kibana_client, self.es_client, contents) except Exception as exc: if isinstance(exc, elasticsearch.BadRequestError): raise ValidationError(f"ES|QL query failed: {exc} for rule: {rule_id}, query: \n{query}") from exc diff --git a/detection_rules/rule.py b/detection_rules/rule.py index 0136da90dd9..58308579c0b 100644 --- a/detection_rules/rule.py +++ b/detection_rules/rule.py @@ -647,6 +647,10 @@ def unique_fields(self) -> Any: def validate(self, _: "QueryRuleData", __: RuleMeta) -> None: raise NotImplementedError + def get_unique_field_type(self, __: str) -> None: + """Used to get unique field types when schema is not used""" + raise NotImplementedError + @cached def get_required_fields(self, index: str) -> list[dict[str, Any]]: """Retrieves fields needed for the query along with type information from the schema.""" @@ -663,7 +667,9 @@ def get_required_fields(self, index: str) -> list[dict[str, Any]]: # construct integration schemas packages_manifest = load_integrations_manifests() integrations_schemas = load_integrations_schemas() - datasets, _ = beats.get_datasets_and_modules(self.ast) + datasets: set[str] = set() + if self.ast: + datasets, _ = beats.get_datasets_and_modules(self.ast) package_integrations = parse_datasets(list(datasets), packages_manifest) int_schema: dict[str, Any] = {} data = {"notify": False} @@ -691,6 +697,9 @@ def get_required_fields(self, index: str) -> list[dict[str, Any]]: elif endgame_schema: field_type = endgame_schema.endgame_schema.get(fld, None) + if not field_type and isinstance(self, ESQLValidator): + field_type = self.get_unique_field_type(fld) + required.append({"name": fld, "type": field_type or "unknown", "ecs": is_ecs}) return sorted(required, key=lambda f: f["name"]) diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index f3e35aa6f30..f6f76aa0bf6 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -16,6 +16,7 @@ import click import eql # type: ignore[reportMissingTypeStubs] import kql # type: ignore[reportMissingTypeStubs] +from elastic_transport import ObjectApiResponse from elasticsearch import Elasticsearch # type: ignore[reportMissingTypeStubs] from eql import ast # type: ignore[reportMissingTypeStubs] from eql.parser import KvTree, LarkToEQL, NodeInfo, TypeHint # type: ignore[reportMissingTypeStubs] @@ -618,18 +619,29 @@ def validate_rule_type_configurations(self, data: EQLRuleData, meta: RuleMeta) - class ESQLValidator(QueryValidator): """Validate specific fields for ESQL query event types.""" - esql_unique_fields: list[str] + def __init__(self, query: str) -> None: + """Initialize the ESQLValidator with the given query.""" + super().__init__(query) + self.esql_unique_fields: list[dict[str, str]] = [] @cached_property def ast(self) -> None: # type: ignore[reportIncompatibleMethodOverride] + """There is no AST for ESQL until we have an ESQL parser.""" return None @cached_property def unique_fields(self) -> list[str]: # type: ignore[reportIncompatibleMethodOverride] """Return a list of unique fields in the query. Requires remote validation to have occurred.""" - if not self.esql_unique_fields: - return [] - return self.esql_unique_fields + if self.esql_unique_fields: + return [field["name"] for field in self.esql_unique_fields] + return [] + + def get_unique_field_type(self, field_name: str) -> str | None: # type: ignore[reportIncompatibleMethodOverride] + """Get the type of the unique field. Requires remote validation to have occurred.""" + for field in self.esql_unique_fields: + if field["name"] == field_name: + return field["type"] + return None def validate(self, rule_data: "QueryRuleData", rule_meta: RuleMeta) -> None: # type: ignore[reportIncompatibleMethodOverride] """Validate an ESQL query while checking TOMLRule.""" @@ -648,7 +660,7 @@ def validate(self, rule_data: "QueryRuleData", rule_meta: RuleMeta) -> None: # elasticsearch_url=misc.getdefault("elasticsearch_url")(), ignore_ssl_errors=misc.getdefault("ignore_ssl_errors")(), ) - self.remote_validate_rule( + _ = self.remote_validate_rule( kibana_client, elastic_client, rule_data.query, @@ -774,7 +786,7 @@ def execute_query_against_indices( test_index_str: str, log: Callable[[str], None], delete_indices: bool = True, - ) -> list[Any]: + ) -> tuple[list[Any], ObjectApiResponse[Any]]: """Execute the ESQL query against the test indices on a remote Stack and return the columns.""" try: log(f"Executing a query against `{test_index_str}`") @@ -789,7 +801,7 @@ def execute_query_against_indices( query_column_names = [c["name"] for c in query_columns] log(f"Got query columns: {', '.join(query_column_names)}") - return query_columns + return query_columns, response def find_nested_multifields(self, mapping: dict[str, Any], path: str = "") -> list[Any]: """Recursively search for nested multi-fields in Elasticsearch mappings.""" @@ -886,9 +898,9 @@ def prepare_mappings( def remote_validate_rule_contents( self, kibana_client: Kibana, elastic_client: Elasticsearch, contents: TOMLRuleContents, verbosity: int = 0 - ) -> None: + ) -> ObjectApiResponse[Any]: """Remote validate a rule's ES|QL query using an Elastic Stack.""" - self.remote_validate_rule( + return self.remote_validate_rule( kibana_client=kibana_client, elastic_client=elastic_client, query=contents.data.query, # type: ignore[reportUnknownVariableType] @@ -905,7 +917,7 @@ def remote_validate_rule( # noqa: PLR0913 metadata: RuleMeta, rule_id: str = "", verbosity: int = 0, - ) -> None: + ) -> ObjectApiResponse[Any]: """Uses remote validation from an Elastic Stack to validate ES|QL a given rule""" def log(val: str) -> None: @@ -939,7 +951,7 @@ def log(val: str) -> None: # Replace all sources with the test indices query = query.replace(indices_str, full_index_str) # type: ignore[reportUnknownVariableType] - query_columns = self.execute_query_against_indices(elastic_client, query, full_index_str, log) # type: ignore[reportUnknownVariableType] + query_columns, response = self.execute_query_against_indices(elastic_client, query, full_index_str, log) # type: ignore[reportUnknownVariableType] self.esql_unique_fields = query_columns # Validate that all fields (columns) are either dynamic fields or correctly mapped @@ -949,6 +961,8 @@ def log(val: str) -> None: else: log("Dynamic column(s) have improper formatting.") + return response + def extract_error_field(source: str, exc: eql.EqlParseError | kql.KqlParseError) -> str | None: """Extract the field name from an EQL or KQL parse error.""" From f7c14769cc0a567d04d7f2f242409a197ff30cf0 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Tue, 9 Sep 2025 14:04:12 -0400 Subject: [PATCH 23/93] Update with DR_REMOTE_ESQL_VALIDATION --- CLI.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CLI.md b/CLI.md index d59e025cca5..5015fe22f18 100644 --- a/CLI.md +++ b/CLI.md @@ -49,6 +49,8 @@ Using the environment variable `DR_BYPASS_TIMELINE_TEMPLATE_VALIDATION` will byp Using the environment variable `DR_CLI_MAX_WIDTH` will set a custom max width for the click CLI. For instance, some users may want to increase the default value in cases where help messages are cut off. +Using the environment variable `DR_REMOTE_ESQL_VALIDATION` will enable remote ESQL validation for rules that use ESQL queries. This validation will be performed whenever the rule is loaded including for example the view-rule command. This requires the appropriate kibana_url or cloud_id, api_key, and es_url to be set in the config file or as environment variables. + ## Importing rules into the repo You can import rules into the repo using the `create-rule` or `import-rules-to-repo` commands. Both of these commands will From d18b4939371864941a2d9232a89623dfa1f242e7 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Tue, 9 Sep 2025 14:56:11 -0400 Subject: [PATCH 24/93] Update to use remote validation logic --- tests/test_rules_remote.py | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/tests/test_rules_remote.py b/tests/test_rules_remote.py index 477e59e59d4..03405b4d527 100644 --- a/tests/test_rules_remote.py +++ b/tests/test_rules_remote.py @@ -9,7 +9,8 @@ from elasticsearch import BadRequestError from elasticsearch import ConnectionError as ESConnectionError -from detection_rules.misc import get_default_config, get_elasticsearch_client, get_kibana_client, getdefault +from detection_rules.misc import get_default_config +from detection_rules.remote_validation import RemoteConnector from detection_rules.rule_validators import ESQLValidator from .base import BaseRuleTest @@ -29,20 +30,9 @@ def test_esql_rules(self): if not esql_rules: return - kibana_client = get_kibana_client( - api_key=getdefault("api_key")(), - cloud_id=getdefault("cloud_id")(), - kibana_url=getdefault("kibana_url")(), - space=getdefault("space")(), - ignore_ssl_errors=getdefault("ignore_ssl_errors")(), - ) - - elastic_client = get_elasticsearch_client( - api_key=getdefault("api_key")(), - cloud_id=getdefault("cloud_id")(), - elasticsearch_url=getdefault("elasticsearch_url")(), - ignore_ssl_errors=getdefault("ignore_ssl_errors")(), - ) + remote_connector = RemoteConnector() + if not remote_connector.es_client or not remote_connector.kibana_client: + self.skipTest("Skipping remote validation due to missing client") # Retrieve verbosity level from pytest verbosity: int = int(self._outcome.result.config.get_verbosity()) # type: ignore[reportIncompatibleMethodOverride] @@ -56,7 +46,9 @@ def test_esql_rules(self): while retry_count < max_retries: try: validator = ESQLValidator(r.contents.data.query) # type: ignore[reportIncompatibleMethodOverride] - validator.remote_validate_rule_contents(kibana_client, elastic_client, r.contents, verbosity) + _ = validator.remote_validate_rule_contents( + remote_connector.kibana_client, remote_connector.es_client, r.contents, verbosity + ) break except (ValueError, BadRequestError) as e: print(f"FAILURE: {e}") From 6456cbc9f66e2a8267916ee9236c4e3637296f51 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Tue, 9 Sep 2025 15:36:45 -0400 Subject: [PATCH 25/93] Add index_replacement option --- detection_rules/remote_validation.py | 34 +++++++++++++++++++++------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/detection_rules/remote_validation.py b/detection_rules/remote_validation.py index 7bb27858f35..d5ddbd84dc6 100644 --- a/detection_rules/remote_validation.py +++ b/detection_rules/remote_validation.py @@ -178,7 +178,7 @@ def request(c: TOMLRuleContents) -> None: return responses # type: ignore[reportUnknownVariableType] - def validate_esql(self, contents: TOMLRuleContents) -> dict[str, Any]: + def validate_esql(self, contents: TOMLRuleContents, index_replacement: bool = False) -> dict[str, Any]: query = contents.data.query # type: ignore[reportAttributeAccessIssue] rule_id = contents.data.rule_id if not self.es_client: @@ -186,13 +186,31 @@ def validate_esql(self, contents: TOMLRuleContents) -> dict[str, Any]: if not self.kibana_client: raise ValueError("No Kibana client found") - try: - validator = ESQLValidator(contents.data.query) # type: ignore[reportIncompatibleMethodOverride] - response = validator.remote_validate_rule_contents(self.kibana_client, self.es_client, contents) - except Exception as exc: - if isinstance(exc, elasticsearch.BadRequestError): - raise ValidationError(f"ES|QL query failed: {exc} for rule: {rule_id}, query: \n{query}") from exc - raise Exception(f"ES|QL query failed for rule: {rule_id}, query: \n{query}") from exc # noqa: TRY002 + if index_replacement: + try: + validator = ESQLValidator(contents.data.query) # type: ignore[reportIncompatibleMethodOverride] + response = validator.remote_validate_rule_contents(self.kibana_client, self.es_client, contents) + except Exception as exc: + if isinstance(exc, elasticsearch.BadRequestError): + raise ValidationError(f"ES|QL query failed: {exc} for rule: {rule_id}, query: \n{query}") from exc + raise Exception(f"ES|QL query failed for rule: {rule_id}, query: \n{query}") from exc # noqa: TRY002 + else: + headers = {"accept": "application/json", "content-type": "application/json"} + body = {"query": f"{query} | LIMIT 0"} + if not self.es_client: + raise ValueError("No ES client found") + try: + response = self.es_client.perform_request( + "POST", + "/_query", + headers=headers, + params={"pretty": True}, + body=body, + ) + except Exception as exc: + if isinstance(exc, elasticsearch.BadRequestError): + raise ValidationError(f"ES|QL query failed: {exc} for rule: {rule_id}, query: \n{query}") from exc + raise Exception(f"ES|QL query failed for rule: {rule_id}, query: \n{query}") from exc # noqa: TRY002 return response.body From a0798aa041d81cd954a701f5b4bc3524223bd42a Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Tue, 9 Sep 2025 15:38:03 -0400 Subject: [PATCH 26/93] Add docstring --- detection_rules/remote_validation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/detection_rules/remote_validation.py b/detection_rules/remote_validation.py index d5ddbd84dc6..36fc78acffa 100644 --- a/detection_rules/remote_validation.py +++ b/detection_rules/remote_validation.py @@ -179,13 +179,14 @@ def request(c: TOMLRuleContents) -> None: return responses # type: ignore[reportUnknownVariableType] def validate_esql(self, contents: TOMLRuleContents, index_replacement: bool = False) -> dict[str, Any]: + """Validate query for "esql" rule types. Optionally replace indices and use ESQLValidator.""" query = contents.data.query # type: ignore[reportAttributeAccessIssue] rule_id = contents.data.rule_id if not self.es_client: raise ValueError("No ES client found") - if not self.kibana_client: raise ValueError("No Kibana client found") + if index_replacement: try: validator = ESQLValidator(contents.data.query) # type: ignore[reportIncompatibleMethodOverride] From 252aafab967ce44ead5e6cdb539fe883aab8a603 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Tue, 9 Sep 2025 15:46:55 -0400 Subject: [PATCH 27/93] minor bump --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6f00b6310ff..85c1850f165 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "detection_rules" -version = "1.5.0" +version = "1.4.0" description = "Detection Rules is the home for rules used by Elastic Security. This repository is used for the development, maintenance, testing, validation, and release of rules for Elastic Security’s Detection Engine." readme = "README.md" requires-python = ">=3.12" From edd18ee0d6773d8abd6ffdbe1930c85263aa6a3d Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Wed, 10 Sep 2025 15:33:00 -0400 Subject: [PATCH 28/93] Remove excess function --- detection_rules/rule_validators.py | 17 ++++++----------- pyproject.toml | 2 +- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index f72727d7191..403611b315c 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -27,14 +27,18 @@ ) from eql.parser import _parse as base_parse # type: ignore[reportMissingTypeStubs] from kibana import Kibana # type: ignore[reportMissingTypeStubs] -from marshmallow import ValidationError from semver import Version from . import ecs, endgame, integrations, misc, utils from .beats import get_datasets_and_modules, parse_beats_from_index from .config import CUSTOM_RULES_DIR, load_current_package_version, parse_rules_config from .custom_schemas import update_auto_generated_schema -from .integrations import get_integration_schema_data, load_integrations_manifests, load_integrations_schemas, parse_datasets +from .integrations import ( + get_integration_schema_data, + load_integrations_manifests, + load_integrations_schemas, + parse_datasets, +) from .rule import EQLRuleData, QueryRuleData, QueryValidator, RuleMeta, TOMLRuleContents, set_eql_config from .schemas import get_stack_schemas @@ -766,15 +770,6 @@ def validate(self, rule_data: "QueryRuleData", rule_meta: RuleMeta) -> None: # rule_data.rule_id, ) - def validate_integration( - self, - _: QueryRuleData, - __: RuleMeta, - ___: list[dict[str, Any]], - ) -> ValidationError | None | ValueError: - # Disabling self.validate(data, meta) - pass - def get_rule_integrations(self, metadata: RuleMeta) -> list[str]: """Retrieve rule integrations from metadata.""" rule_integrations: list[str] = [] diff --git a/pyproject.toml b/pyproject.toml index 85c1850f165..6f00b6310ff 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "detection_rules" -version = "1.4.0" +version = "1.5.0" description = "Detection Rules is the home for rules used by Elastic Security. This repository is used for the development, maintenance, testing, validation, and release of rules for Elastic Security’s Detection Engine." readme = "README.md" requires-python = ">=3.12" From f21442dde755da07cb2c3454ccb8b44ab95a5301 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Tue, 16 Sep 2025 00:56:20 -0400 Subject: [PATCH 29/93] Add support for event.dataset --- detection_rules/rule.py | 13 ++++++++++ detection_rules/rule_validators.py | 30 +++++++++++++++++++++-- detection_rules/utils.py | 38 +++++++++++++++++++++++++++++- 3 files changed, 78 insertions(+), 3 deletions(-) diff --git a/detection_rules/rule.py b/detection_rules/rule.py index 0b7f8276150..29da3c64095 100644 --- a/detection_rules/rule.py +++ b/detection_rules/rule.py @@ -430,6 +430,8 @@ def save_schema(cls) -> None: def validate_query(self, _: RuleMeta) -> None: pass + # TODO do we need a get_restricted fields for ESQL? I expect no + @cached_property def get_restricted_fields(self) -> dict[str, tuple[Version | None, Version | None]] | None: """Get stack version restricted fields.""" @@ -1366,6 +1368,13 @@ def _convert_add_related_integrations(self, obj: dict[str, Any]) -> None: field_name = "related_integrations" package_integrations = obj.get(field_name, []) + # TODO self.metadata.integration is blank for ESQL, perhaps populate this from remote validation + # type(self.data) + # + # self.data.validator (grab related integrations from validator) + # ESQLValidator + if self.type == "esql": + return if not package_integrations and self.metadata.integration: packages_manifest = load_integrations_manifests() current_stack_version = load_current_package_version() @@ -1393,6 +1402,7 @@ def _convert_add_related_integrations(self, obj: dict[str, Any]) -> None: packages_manifest=packages_manifest, ) + # TODO do we use policy templates in ESQL? # if integration is not a policy template remove if package["version"]: version_data = packages_manifest.get(package["package"], {}).get( @@ -1507,6 +1517,9 @@ def get_packaged_integrations( ) -> list[dict[str, Any]] | None: packaged_integrations: list[dict[str, Any]] = [] datasets, _ = beats.get_datasets_and_modules(data.get("ast") or []) # type: ignore[reportArgumentType] + # TODO above for esql + # Use function from utils? Or have it set? + # Combine with package_manifeest # integration is None to remove duplicate references upstream in Kibana # chronologically, event.dataset, data_stream.dataset is checked for package:integration, then rule tags diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index 403611b315c..47606d969fa 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -402,6 +402,7 @@ def build_validation_plan(self, data: "QueryRuleData", meta: RuleMeta) -> list[V # Helper for union-by-stack integration targets def add_accumulated_integration_targets(query_text: str, packaged: list[dict[str, Any]], context: str) -> None: + """Add integration-based validation targets by accumulating schemas per stack version.""" combined_by_stack: dict[str, dict[str, Any]] = {} ecs_by_stack: dict[str, str] = {} packages_by_stack: dict[str, set[str]] = {} @@ -725,6 +726,7 @@ def __init__(self, query: str) -> None: """Initialize the ESQLValidator with the given query.""" super().__init__(query) self.esql_unique_fields: list[dict[str, str]] = [] + self.related_integrations: list[dict[str, str]] = [] @cached_property def ast(self) -> None: # type: ignore[reportIncompatibleMethodOverride] @@ -783,6 +785,7 @@ def get_rule_integrations(self, metadata: RuleMeta) -> list[str]: def prepare_integration_mappings( self, rule_integrations: list[str], + event_dataset_integrations: list[utils.EventDataset], stack_version: str, package_manifests: Any, integration_schemas: Any, @@ -791,6 +794,20 @@ def prepare_integration_mappings( """Prepare integration mappings for the given rule integrations.""" integration_mappings: dict[str, Any] = {} index_lookup: dict[str, Any] = {} + dataset_restriction: dict[str, str] = {} + + # Process restrictions, note we need this for loops to be separate + for event_dataset in event_dataset_integrations: + # Ensure the integration is in rule_integrations + if event_dataset.integration not in rule_integrations: + dataset_restriction.setdefault(event_dataset.integration, []).append(event_dataset.datastream) # type: ignore[reportIncompatibleMethodOverride] + for event_dataset in event_dataset_integrations: + if event_dataset.integration not in rule_integrations: + rule_integrations.append(event_dataset.integration) + + # TODO add self setting for list of integrations + # perhaps self.related_integrations + for integration in rule_integrations: package = integration package_version, _ = integrations.find_latest_compatible_version( @@ -801,6 +818,11 @@ def prepare_integration_mappings( ) package_schema = integration_schemas[package][package_version] + # Apply dataset restrictions if any + if integration in dataset_restriction: + allowed_keys = dataset_restriction[integration] + package_schema = {key: value for key, value in package_schema.items() if key in allowed_keys} + for stream in package_schema: flat_schema = package_schema[stream] stream_mappings = utils.flat_schema_to_index_mapping(flat_schema) @@ -945,6 +967,7 @@ def prepare_mappings( self, elastic_client: Elasticsearch, indices: list[str], + event_dataset_integrations: list[utils.EventDataset], stack_version: str, metadata: RuleMeta, log: Callable[[str], None], @@ -960,7 +983,7 @@ def prepare_mappings( integration_schemas = load_integrations_schemas() integration_mappings, integration_index_lookup = self.prepare_integration_mappings( - rule_integrations, stack_version, package_manifests, integration_schemas, log + rule_integrations, event_dataset_integrations, stack_version, package_manifests, integration_schemas, log ) index_lookup.update(integration_index_lookup) @@ -1029,9 +1052,12 @@ def log(val: str) -> None: indices_str, indices = utils.get_esql_query_indices(query) # type: ignore[reportUnknownVariableType] log(f"Extracted indices from query: {', '.join(indices)}") + event_dataset_integrations = utils.get_esql_query_event_dataset_integrations(query) + log(f"Extracted Event Dataset integrations from query: {', '.join(indices)}") + # Get mappings for all matching existing index templates existing_mappings, index_lookup, combined_mappings = self.prepare_mappings( - elastic_client, indices, stack_version, metadata, log + elastic_client, indices, event_dataset_integrations, stack_version, metadata, log ) log(f"Collected mappings: {len(existing_mappings)}") log(f"Combined mappings prepared: {len(combined_mappings)}") diff --git a/detection_rules/utils.py b/detection_rules/utils.py index 5bf667727bb..12548bf5686 100644 --- a/detection_rules/utils.py +++ b/detection_rules/utils.py @@ -18,7 +18,7 @@ import subprocess import zipfile from collections.abc import Callable, Iterator -from dataclasses import astuple, is_dataclass +from dataclasses import astuple, is_dataclass, dataclass from datetime import UTC, date, datetime from pathlib import Path from string import Template @@ -544,6 +544,42 @@ def get_esql_query_indices(query: str) -> tuple[str, list[str]]: return sources_str, [source.strip() for source in sources_str.split(",")] +# NOTE This is done with a dataclass but could also be done with dict, etc. +# Also other places this is called an instead. +# such as in integrations.py def parse_datasets +@dataclass +class EventDataset: + integration: str + datastream: str + + +def get_esql_query_event_dataset_integrations(query: str) -> list[EventDataset]: + """Extract event.dataset integrations from an ES|QL query and return as EventDataset objects.""" + + # Regex to match event.dataset in ("value1", "value2") or event.dataset == "value" + dataset_in_regex = re.compile(r"event\.dataset\s+in\s*\(\s*([^)]+)\s*\)") + dataset_eq_regex = re.compile(r'event\.dataset\s*==\s*"([^"]+)"') + + # Extract datasets from `event.dataset in (...)` + datasets: list[str] = [] + in_match = dataset_in_regex.search(query) + if in_match: + datasets.extend([ds.strip().strip('"') for ds in in_match.group(1).split(",")]) + + # Extract datasets from `event.dataset == "..."` + eq_match = dataset_eq_regex.search(query) + if eq_match: + datasets.append(eq_match.group(1)) + + event_datasets: list[EventDataset] = [] + for dataset in datasets: + parts = dataset.split(".") + if len(parts) == 2: # Ensure there are exactly two parts + event_datasets.append(EventDataset(integration=parts[0], datastream=parts[1])) + + return event_datasets + + def convert_to_nested_schema(flat_schemas: dict[str, str]) -> dict[str, Any]: """Convert a flat schema to a nested schema with 'properties' for each sub-key.""" nested_schema = {} From e17ab0e4c64f9020f58816b75637ba46d179f3c3 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Tue, 16 Sep 2025 15:02:43 -0400 Subject: [PATCH 30/93] Update related integrations --- detection_rules/rule.py | 8 ++++---- detection_rules/utils.py | 11 ++++++++--- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/detection_rules/rule.py b/detection_rules/rule.py index 29da3c64095..86573e72f90 100644 --- a/detection_rules/rule.py +++ b/detection_rules/rule.py @@ -1368,13 +1368,11 @@ def _convert_add_related_integrations(self, obj: dict[str, Any]) -> None: field_name = "related_integrations" package_integrations = obj.get(field_name, []) - # TODO self.metadata.integration is blank for ESQL, perhaps populate this from remote validation + # TODO self.metadata.integration is blank for ESQL, perhaps populate this from remote validation? # type(self.data) # # self.data.validator (grab related integrations from validator) # ESQLValidator - if self.type == "esql": - return if not package_integrations and self.metadata.integration: packages_manifest = load_integrations_manifests() current_stack_version = load_current_package_version() @@ -1520,7 +1518,9 @@ def get_packaged_integrations( # TODO above for esql # Use function from utils? Or have it set? # Combine with package_manifeest - + if isinstance(data, ESQLRuleData): + dataset_objs = utils.get_esql_query_event_dataset_integrations(data.query) + datasets.update(str(obj) for obj in dataset_objs) # integration is None to remove duplicate references upstream in Kibana # chronologically, event.dataset, data_stream.dataset is checked for package:integration, then rule tags # if both exist, rule tags are only used if defined in definitions for non-dataset packages diff --git a/detection_rules/utils.py b/detection_rules/utils.py index 12548bf5686..504b839ccf5 100644 --- a/detection_rules/utils.py +++ b/detection_rules/utils.py @@ -18,7 +18,7 @@ import subprocess import zipfile from collections.abc import Callable, Iterator -from dataclasses import astuple, is_dataclass, dataclass +from dataclasses import astuple, dataclass, is_dataclass from datetime import UTC, date, datetime from pathlib import Path from string import Template @@ -549,13 +549,18 @@ def get_esql_query_indices(query: str) -> tuple[str, list[str]]: # such as in integrations.py def parse_datasets @dataclass class EventDataset: + """Dataclass for event.dataset with integration and datastream parts.""" + integration: str datastream: str + def __str__(self) -> str: + return f"{self.integration}.{self.datastream}" + def get_esql_query_event_dataset_integrations(query: str) -> list[EventDataset]: """Extract event.dataset integrations from an ES|QL query and return as EventDataset objects.""" - + number_of_parts = 2 # Regex to match event.dataset in ("value1", "value2") or event.dataset == "value" dataset_in_regex = re.compile(r"event\.dataset\s+in\s*\(\s*([^)]+)\s*\)") dataset_eq_regex = re.compile(r'event\.dataset\s*==\s*"([^"]+)"') @@ -574,7 +579,7 @@ def get_esql_query_event_dataset_integrations(query: str) -> list[EventDataset]: event_datasets: list[EventDataset] = [] for dataset in datasets: parts = dataset.split(".") - if len(parts) == 2: # Ensure there are exactly two parts + if len(parts) == number_of_parts: # Ensure there are exactly two parts event_datasets.append(EventDataset(integration=parts[0], datastream=parts[1])) return event_datasets From 676503ee76095d3a0a7642ac1f518043d484b1e7 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Tue, 16 Sep 2025 18:49:20 -0400 Subject: [PATCH 31/93] Cleanup --- detection_rules/rule.py | 11 -------- detection_rules/rule_validators.py | 17 +++++------- detection_rules/utils.py | 43 ++++++++++++++++++++---------- 3 files changed, 35 insertions(+), 36 deletions(-) diff --git a/detection_rules/rule.py b/detection_rules/rule.py index 86573e72f90..a5d37259d55 100644 --- a/detection_rules/rule.py +++ b/detection_rules/rule.py @@ -430,8 +430,6 @@ def save_schema(cls) -> None: def validate_query(self, _: RuleMeta) -> None: pass - # TODO do we need a get_restricted fields for ESQL? I expect no - @cached_property def get_restricted_fields(self) -> dict[str, tuple[Version | None, Version | None]] | None: """Get stack version restricted fields.""" @@ -1368,11 +1366,6 @@ def _convert_add_related_integrations(self, obj: dict[str, Any]) -> None: field_name = "related_integrations" package_integrations = obj.get(field_name, []) - # TODO self.metadata.integration is blank for ESQL, perhaps populate this from remote validation? - # type(self.data) - # - # self.data.validator (grab related integrations from validator) - # ESQLValidator if not package_integrations and self.metadata.integration: packages_manifest = load_integrations_manifests() current_stack_version = load_current_package_version() @@ -1400,7 +1393,6 @@ def _convert_add_related_integrations(self, obj: dict[str, Any]) -> None: packages_manifest=packages_manifest, ) - # TODO do we use policy templates in ESQL? # if integration is not a policy template remove if package["version"]: version_data = packages_manifest.get(package["package"], {}).get( @@ -1515,9 +1507,6 @@ def get_packaged_integrations( ) -> list[dict[str, Any]] | None: packaged_integrations: list[dict[str, Any]] = [] datasets, _ = beats.get_datasets_and_modules(data.get("ast") or []) # type: ignore[reportArgumentType] - # TODO above for esql - # Use function from utils? Or have it set? - # Combine with package_manifeest if isinstance(data, ESQLRuleData): dataset_objs = utils.get_esql_query_event_dataset_integrations(data.query) datasets.update(str(obj) for obj in dataset_objs) diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index 47606d969fa..68b24bd05e2 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -727,6 +727,7 @@ def __init__(self, query: str) -> None: super().__init__(query) self.esql_unique_fields: list[dict[str, str]] = [] self.related_integrations: list[dict[str, str]] = [] + self.stack_version: str = "" @cached_property def ast(self) -> None: # type: ignore[reportIncompatibleMethodOverride] @@ -786,7 +787,6 @@ def prepare_integration_mappings( self, rule_integrations: list[str], event_dataset_integrations: list[utils.EventDataset], - stack_version: str, package_manifests: Any, integration_schemas: Any, log: Callable[[str], None], @@ -805,15 +805,12 @@ def prepare_integration_mappings( if event_dataset.integration not in rule_integrations: rule_integrations.append(event_dataset.integration) - # TODO add self setting for list of integrations - # perhaps self.related_integrations - for integration in rule_integrations: package = integration package_version, _ = integrations.find_latest_compatible_version( package, "", - Version.parse(stack_version), + Version.parse(self.stack_version), package_manifests, ) package_schema = integration_schemas[package][package_version] @@ -968,7 +965,6 @@ def prepare_mappings( elastic_client: Elasticsearch, indices: list[str], event_dataset_integrations: list[utils.EventDataset], - stack_version: str, metadata: RuleMeta, log: Callable[[str], None], ) -> tuple[dict[str, Any], dict[str, Any], dict[str, Any]]: @@ -983,7 +979,7 @@ def prepare_mappings( integration_schemas = load_integrations_schemas() integration_mappings, integration_index_lookup = self.prepare_integration_mappings( - rule_integrations, event_dataset_integrations, stack_version, package_manifests, integration_schemas, log + rule_integrations, event_dataset_integrations, package_manifests, integration_schemas, log ) index_lookup.update(integration_index_lookup) @@ -1042,12 +1038,11 @@ def log(val: str) -> None: if verbosity >= unit_test_verbose_level: print(f"{rule_id}:", val) - stack_version = "" kibana_details: dict[str, Any] = kibana_client.get("/api/status", {}) # type: ignore[reportUnknownVariableType] if "version" not in kibana_details: raise ValueError("Failed to retrieve Kibana details.") - stack_version = str(kibana_details["version"]["number"]) - log(f"Validating against {stack_version} stack") + self.stack_version = str(kibana_details["version"]["number"]) + log(f"Validating against {self.stack_version} stack") indices_str, indices = utils.get_esql_query_indices(query) # type: ignore[reportUnknownVariableType] log(f"Extracted indices from query: {', '.join(indices)}") @@ -1057,7 +1052,7 @@ def log(val: str) -> None: # Get mappings for all matching existing index templates existing_mappings, index_lookup, combined_mappings = self.prepare_mappings( - elastic_client, indices, event_dataset_integrations, stack_version, metadata, log + elastic_client, indices, event_dataset_integrations, metadata, log ) log(f"Collected mappings: {len(existing_mappings)}") log(f"Combined mappings prepared: {len(combined_mappings)}") diff --git a/detection_rules/utils.py b/detection_rules/utils.py index 504b839ccf5..671b89b6d60 100644 --- a/detection_rules/utils.py +++ b/detection_rules/utils.py @@ -559,22 +559,37 @@ def __str__(self) -> str: def get_esql_query_event_dataset_integrations(query: str) -> list[EventDataset]: - """Extract event.dataset integrations from an ES|QL query and return as EventDataset objects.""" + """Extract event.dataset, event.module, and data_stream.dataset integrations from an ES|QL query.""" number_of_parts = 2 - # Regex to match event.dataset in ("value1", "value2") or event.dataset == "value" - dataset_in_regex = re.compile(r"event\.dataset\s+in\s*\(\s*([^)]+)\s*\)") - dataset_eq_regex = re.compile(r'event\.dataset\s*==\s*"([^"]+)"') - - # Extract datasets from `event.dataset in (...)` + # Regex patterns for event.dataset, event.module, and data_stream.dataset + # This mimics the logic in get_datasets_and_modules but for ES|QL as we do not have an ast + + regex_patterns = { + "in": [ + re.compile(r"event\.dataset\s+in\s*\(\s*([^)]+)\s*\)"), + re.compile(r"event\.module\s+in\s*\(\s*([^)]+)\s*\)"), + re.compile(r"data_stream\.dataset\s+in\s*\(\s*([^)]+)\s*\)"), + ], + "eq": [ + re.compile(r'event\.dataset\s*==\s*"([^"]+)"'), + re.compile(r'event\.module\s*==\s*"([^"]+)"'), + re.compile(r'data_stream\.dataset\s*==\s*"([^"]+)"'), + ], + } + + # Extract datasets datasets: list[str] = [] - in_match = dataset_in_regex.search(query) - if in_match: - datasets.extend([ds.strip().strip('"') for ds in in_match.group(1).split(",")]) - - # Extract datasets from `event.dataset == "..."` - eq_match = dataset_eq_regex.search(query) - if eq_match: - datasets.append(eq_match.group(1)) + for regex_list in regex_patterns.values(): + for regex in regex_list: + matches = regex.findall(query) + if matches: + for match in matches: + if "," in match: + # Handle `in` case with multiple values + datasets.extend([ds.strip().strip('"') for ds in match.split(",")]) + else: + # Handle `==` case + datasets.append(match.strip()) event_datasets: list[EventDataset] = [] for dataset in datasets: From bf21646cfa2e3115a10ac5a9f39e725c9211a326 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Tue, 16 Sep 2025 19:37:30 -0400 Subject: [PATCH 32/93] Add view rule flag --- detection_rules/main.py | 20 +++++++++++++++++--- detection_rules/misc.py | 21 +++++++++++++++++++++ detection_rules/rule_validators.py | 20 ++++---------------- tests/test_rules_remote.py | 5 ++++- 4 files changed, 46 insertions(+), 20 deletions(-) diff --git a/detection_rules/main.py b/detection_rules/main.py index 7de190520d2..24809ba3a59 100644 --- a/detection_rules/main.py +++ b/detection_rules/main.py @@ -30,10 +30,16 @@ from .config import load_current_package_version, parse_rules_config from .exception import TOMLExceptionContents, build_exception_objects, parse_exceptions_results_from_api from .generic_loader import GenericCollection -from .misc import add_client, nested_set, parse_user_config, raise_client_error -from .rule import DeprecatedRule, QueryRuleData, TOMLRule, TOMLRuleContents +from .misc import ( + add_client, + nested_set, + parse_user_config, + raise_client_error, +) +from .rule import DeprecatedRule, ESQLRuleData, QueryRuleData, RuleMeta, TOMLRule, TOMLRuleContents from .rule_formatter import toml_write from .rule_loader import RawRuleCollection, RuleCollection, update_metadata_from_file +from .rule_validators import ESQLValidator from .schemas import all_versions, definitions, get_incompatible_fields, get_schema_file from .utils import ( Ndjson, @@ -446,10 +452,18 @@ def mass_update( @root.command("view-rule") @click.argument("rule-file", type=Path) @click.option("--api-format/--rule-format", default=True, help="Print the rule in final api or rule format") +@click.option("--esql-remote-validation", is_flag=True, default=False, help="Enable remote validation for the rule") @click.pass_context -def view_rule(_: click.Context, rule_file: Path, api_format: str) -> TOMLRule | DeprecatedRule: +def view_rule(_: click.Context, rule_file: Path, api_format: str, esql_remote_validation: bool) -> TOMLRule | DeprecatedRule: """View an internal rule or specified rule file.""" rule = RuleCollection().load_file(rule_file) + if ( + esql_remote_validation + and isinstance(rule.contents.data, ESQLRuleData) + and isinstance(rule.contents.data.validator, ESQLValidator) + and isinstance(rule.contents.metadata, RuleMeta) + ): + rule.contents.data.validator.validate(rule.contents.data, rule.contents.metadata, force_remote_validation=True) if api_format: click.echo(json.dumps(rule.contents.to_api_format(), indent=2, sort_keys=True)) diff --git a/detection_rules/misc.py b/detection_rules/misc.py index 45e72e198ce..aa77dbfdc36 100644 --- a/detection_rules/misc.py +++ b/detection_rules/misc.py @@ -386,6 +386,16 @@ def get_elasticsearch_client( # noqa: PLR0913 return client +def get_default_elasticsearch_client() -> Elasticsearch: + """Get an default authenticated elasticsearch client.""" + return get_elasticsearch_client( + api_key=getdefault("api_key")(), + cloud_id=getdefault("cloud_id")(), + elasticsearch_url=getdefault("elasticsearch_url")(), + ignore_ssl_errors=getdefault("ignore_ssl_errors")(), + ) + + def get_kibana_client( *, api_key: str, @@ -403,6 +413,17 @@ def get_kibana_client( return Kibana(cloud_id=cloud_id, kibana_url=kibana_url, space=space, verify=verify, api_key=api_key, **kwargs) +def get_default_kibana_client() -> Kibana: + """Get an default authenticated Kibana client.""" + return get_kibana_client( + api_key=getdefault("api_key")(), + cloud_id=getdefault("cloud_id")(), + kibana_url=getdefault("kibana_url")(), + space=getdefault("space")(), + ignore_ssl_errors=getdefault("ignore_ssl_errors")(), + ) + + client_options = { "kibana": { "kibana_url": click.Option(["--kibana-url"], default=getdefault("kibana_url")), diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index 68b24bd05e2..7e08e090553 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -748,23 +748,11 @@ def get_unique_field_type(self, field_name: str) -> str | None: # type: ignore[ return field["type"] return None - def validate(self, rule_data: "QueryRuleData", rule_meta: RuleMeta) -> None: # type: ignore[reportIncompatibleMethodOverride] + def validate(self, rule_data: "QueryRuleData", rule_meta: RuleMeta, force_remote_validation: bool = False) -> None: # type: ignore[reportIncompatibleMethodOverride] """Validate an ESQL query while checking TOMLRule.""" - if misc.getdefault("remote_esql_validation")(): - kibana_client = misc.get_kibana_client( - api_key=misc.getdefault("api_key")(), - cloud_id=misc.getdefault("cloud_id")(), - kibana_url=misc.getdefault("kibana_url")(), - space=misc.getdefault("space")(), - ignore_ssl_errors=misc.getdefault("ignore_ssl_errors")(), - ) - - elastic_client = misc.get_elasticsearch_client( - api_key=misc.getdefault("api_key")(), - cloud_id=misc.getdefault("cloud_id")(), - elasticsearch_url=misc.getdefault("elasticsearch_url")(), - ignore_ssl_errors=misc.getdefault("ignore_ssl_errors")(), - ) + if misc.getdefault("remote_esql_validation")() or force_remote_validation: + kibana_client = misc.get_default_kibana_client() + elastic_client = misc.get_default_elasticsearch_client() _ = self.remote_validate_rule( kibana_client, elastic_client, diff --git a/tests/test_rules_remote.py b/tests/test_rules_remote.py index 03405b4d527..f7fb53857ad 100644 --- a/tests/test_rules_remote.py +++ b/tests/test_rules_remote.py @@ -9,7 +9,7 @@ from elasticsearch import BadRequestError from elasticsearch import ConnectionError as ESConnectionError -from detection_rules.misc import get_default_config +from detection_rules.misc import get_default_config, getdefault from detection_rules.remote_validation import RemoteConnector from detection_rules.rule_validators import ESQLValidator @@ -17,6 +17,9 @@ @unittest.skipIf(get_default_config() is None, "Skipping remote validation due to missing config") +@unittest.skipIf( + not getdefault("remote_esql_validation")(), "Skipping remote validation because remote_esql_validation is False" +) class TestRemoteRules(BaseRuleTest): """Test rules against a remote Elastic stack instance.""" From 2a6b0efe8427a6424b6429523e809a88420aae64 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Tue, 16 Sep 2025 19:43:43 -0400 Subject: [PATCH 33/93] Linting --- detection_rules/main.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/detection_rules/main.py b/detection_rules/main.py index 24809ba3a59..c9c3ef61e70 100644 --- a/detection_rules/main.py +++ b/detection_rules/main.py @@ -454,7 +454,9 @@ def mass_update( @click.option("--api-format/--rule-format", default=True, help="Print the rule in final api or rule format") @click.option("--esql-remote-validation", is_flag=True, default=False, help="Enable remote validation for the rule") @click.pass_context -def view_rule(_: click.Context, rule_file: Path, api_format: str, esql_remote_validation: bool) -> TOMLRule | DeprecatedRule: +def view_rule( + _: click.Context, rule_file: Path, api_format: str, esql_remote_validation: bool +) -> TOMLRule | DeprecatedRule: """View an internal rule or specified rule file.""" rule = RuleCollection().load_file(rule_file) if ( From 9246c162c6a8f3550646f268976198196fa6bb66 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Wed, 17 Sep 2025 10:58:46 -0400 Subject: [PATCH 34/93] Fix unit test bug --- detection_rules/utils.py | 2 +- ...collection_cloudtrail_logging_created.toml | 63 ++++++++++++++++++ ...on_cloudtrail_logging_created_correct.toml | 64 +++++++++++++++++++ tests/test_rules_remote.py | 17 +++++ 4 files changed, 145 insertions(+), 1 deletion(-) create mode 100644 tests/data/collection_cloudtrail_logging_created.toml create mode 100644 tests/data/collection_cloudtrail_logging_created_correct.toml diff --git a/detection_rules/utils.py b/detection_rules/utils.py index 671b89b6d60..dd3a19e258a 100644 --- a/detection_rules/utils.py +++ b/detection_rules/utils.py @@ -589,7 +589,7 @@ def get_esql_query_event_dataset_integrations(query: str) -> list[EventDataset]: datasets.extend([ds.strip().strip('"') for ds in match.split(",")]) else: # Handle `==` case - datasets.append(match.strip()) + datasets.append(match.strip().strip('"')) event_datasets: list[EventDataset] = [] for dataset in datasets: diff --git a/tests/data/collection_cloudtrail_logging_created.toml b/tests/data/collection_cloudtrail_logging_created.toml new file mode 100644 index 00000000000..ab123a45b5f --- /dev/null +++ b/tests/data/collection_cloudtrail_logging_created.toml @@ -0,0 +1,63 @@ +[metadata] +creation_date = "2020/06/10" +maturity = "production" +updated_date = "2025/01/15" + +[rule] +author = ["Elastic"] +description = "Rule used for testing." +false_positives = [ + """ + Trail creations may be made by a system or network administrator. Verify whether the user identity, user agent, + and/or hostname should be making changes in your environment. Trail creations by unfamiliar users or hosts should be + investigated. If known behavior is causing false positives, it can be exempted from the rule. + """, +] +from = "now-32m" +interval = "5m" +language = "esql" +license = "Elastic License v2" +name = "AWS CloudTrail Testing Rule" +note = """Rule used for testing.""" +references = [ + "https://docs.aws.amazon.com/awscloudtrail/latest/APIReference/API_CreateTrail.html", + "https://awscli.amazonaws.com/v2/documentation/api/latest/reference/cloudtrail/create-trail.html", +] +risk_score = 21 +rule_id = "cd088b8b-7bca-40d5-b71c-16ffb5309e66" +severity = "low" +tags = [ + "Domain: Cloud", + "Data Source: AWS", + "Data Source: Amazon Web Services", + "Use Case: Log Auditing", + "Tactic: Collection", + "Resources: Investigation Guide", +] +timestamp_override = "event.ingested" +type = "esql" + +query = ''' +from logs-aws.billing* metadata _id, _version, _index +| where @timestamp > now() - 30 minutes + and event.dataset in ("aws.billing") + and aws.cloudtrail.user_identity.arn is not null + and aws.cloudtrail.user_identity.type == "IAMUser" +| keep + aws.cloudtrail.user_identity.type +''' + + +[[rule.threat]] +framework = "MITRE ATT&CK" +[[rule.threat.technique]] +id = "T1530" +name = "Data from Cloud Storage" +reference = "https://attack.mitre.org/techniques/T1530/" + + +[rule.threat.tactic] +id = "TA0009" +name = "Collection" +reference = "https://attack.mitre.org/tactics/TA0009/" + diff --git a/tests/data/collection_cloudtrail_logging_created_correct.toml b/tests/data/collection_cloudtrail_logging_created_correct.toml new file mode 100644 index 00000000000..65b1a2a0590 --- /dev/null +++ b/tests/data/collection_cloudtrail_logging_created_correct.toml @@ -0,0 +1,64 @@ +[metadata] +creation_date = "2020/06/10" +maturity = "production" +integration = ["aws"] +updated_date = "2025/01/15" + +[rule] +author = ["Elastic"] +description = "Rule used for testing." +false_positives = [ + """ + Trail creations may be made by a system or network administrator. Verify whether the user identity, user agent, + and/or hostname should be making changes in your environment. Trail creations by unfamiliar users or hosts should be + investigated. If known behavior is causing false positives, it can be exempted from the rule. + """, +] +from = "now-32m" +interval = "5m" +language = "esql" +license = "Elastic License v2" +name = "AWS CloudTrail Testing Rule" +note = """Rule used for testing.""" +references = [ + "https://docs.aws.amazon.com/awscloudtrail/latest/APIReference/API_CreateTrail.html", + "https://awscli.amazonaws.com/v2/documentation/api/latest/reference/cloudtrail/create-trail.html", +] +risk_score = 21 +rule_id = "1d46d30f-1c66-4d0f-8a53-afeeb455196b" +severity = "low" +tags = [ + "Domain: Cloud", + "Data Source: AWS", + "Data Source: Amazon Web Services", + "Use Case: Log Auditing", + "Tactic: Collection", + "Resources: Investigation Guide", +] +timestamp_override = "event.ingested" +type = "esql" + +query = ''' +from logs-aws.cloudtrail* metadata _id, _version, _index +| where @timestamp > now() - 30 minutes + and event.dataset in ("aws.cloudtrail", "aws.billing") + and aws.cloudtrail.user_identity.arn is not null + and aws.cloudtrail.user_identity.type == "IAMUser" +| keep + aws.cloudtrail.user_identity.type +''' + + +[[rule.threat]] +framework = "MITRE ATT&CK" +[[rule.threat.technique]] +id = "T1530" +name = "Data from Cloud Storage" +reference = "https://attack.mitre.org/techniques/T1530/" + + +[rule.threat.tactic] +id = "TA0009" +name = "Collection" +reference = "https://attack.mitre.org/tactics/TA0009/" + diff --git a/tests/test_rules_remote.py b/tests/test_rules_remote.py index f7fb53857ad..9ecf0082d2c 100644 --- a/tests/test_rules_remote.py +++ b/tests/test_rules_remote.py @@ -6,12 +6,15 @@ import time import unittest +import pytest from elasticsearch import BadRequestError from elasticsearch import ConnectionError as ESConnectionError from detection_rules.misc import get_default_config, getdefault from detection_rules.remote_validation import RemoteConnector +from detection_rules.rule_loader import RuleCollection from detection_rules.rule_validators import ESQLValidator +from detection_rules.utils import get_path from .base import BaseRuleTest @@ -72,3 +75,17 @@ def test_esql_rules(self): if failed_count > 0: self.fail(f"Found {failed_count} invalid rules") + + def test_esql_related_integrations(self): + """Test an ESQL rule has its related integrations built correctly.""" + file_path = get_path(["tests", "data", "collection_cloudtrail_logging_created_correct.toml"]) + rule = RuleCollection().load_file(file_path) + related_integrations = rule.contents.to_api_format()["related_integrations"] + for integration in related_integrations: + assert integration["package"] == "aws", f"Expected 'aws', but got {integration['package']}" + + def test_esql_event_dataset(self): + """Test an ESQL rules that uses event.dataset field in the query validated the fields correctly.""" + file_path = get_path(["tests", "data", "collection_cloudtrail_logging_created.toml"]) + with pytest.raises(BadRequestError, match="Unknown column .*"): + _ = RuleCollection().load_file(file_path) From 86cb0a4ab3facaa0aeb41412182199aafdf1a227 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Wed, 17 Sep 2025 11:02:52 -0400 Subject: [PATCH 35/93] explicit raw string --- tests/test_rules_remote.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_rules_remote.py b/tests/test_rules_remote.py index 9ecf0082d2c..8340a9338df 100644 --- a/tests/test_rules_remote.py +++ b/tests/test_rules_remote.py @@ -87,5 +87,5 @@ def test_esql_related_integrations(self): def test_esql_event_dataset(self): """Test an ESQL rules that uses event.dataset field in the query validated the fields correctly.""" file_path = get_path(["tests", "data", "collection_cloudtrail_logging_created.toml"]) - with pytest.raises(BadRequestError, match="Unknown column .*"): + with pytest.raises(BadRequestError, match=r"Unknown column .*"): _ = RuleCollection().load_file(file_path) From c25d18d00457b8283484f2715e70c7bec7df238f Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Mon, 22 Sep 2025 21:39:41 -0400 Subject: [PATCH 36/93] CI updates --- .github/workflows/esql-validation.yml | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/.github/workflows/esql-validation.yml b/.github/workflows/esql-validation.yml index a3494a4fb42..bf77d610a18 100644 --- a/.github/workflows/esql-validation.yml +++ b/.github/workflows/esql-validation.yml @@ -1,7 +1,7 @@ name: ES|QL Validation on: push: - branches: [ "main", "7.*", "8.*", "9.*" ] + branches: [ "main", "8.*", "9.*" ] pull_request: branches: [ "*" ] paths: @@ -12,12 +12,14 @@ jobs: steps: - name: Check out repository - uses: actions/checkout@v4 + if: ${{ !env.cloud_id && !env.api_key }} + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 with: path: elastic-container repository: eric-forte-elastic/elastic-container - name: Build and run containers + if: ${{ !env.cloud_id && !env.api_key }} run: | cd elastic-container GENERATED_PASSWORD=$(openssl rand -base64 16) @@ -28,16 +30,17 @@ jobs: - name: Setup Detection Rules - uses: actions/checkout@v4 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 with: fetch-depth: 0 - name: Set up Python 3.13 - uses: actions/setup-python@v5 + uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: python-version: '3.13' - name: Get API Key and setup auth + if: ${{ !env.cloud_id && !env.api_key }} env: DR_ELASTICSEARCH_URL: "https://localhost:9200" ES_USER: "elastic" @@ -61,9 +64,10 @@ jobs: - name: Validate Test ESQL Rule env: - DR_KIBANA_URL: "https://localhost:5601" - DR_ES_USER: "elastic" - DR_API_KEY: ${{ env.DR_API_KEY }} + DR_CLOUD_ID: ${{ env.cloud_id }} + DR_KIBANA_URL: ${{ env.cloud_id == '' && 'https://localhost:5601' || '' }} + DR_ES_USER: ${{ env.cloud_id == '' && 'elastic' || '' }} + DR_API_KEY: ${{ env.api_key || env.DR_API_KEY }} run: | cd detection-rules python -m pytest tests/test_rules_remote.py::TestRemoteRules::test_esql_rules From b6e83bde532911991d0edcf288ae39eef04741cd Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Mon, 22 Sep 2025 21:41:12 -0400 Subject: [PATCH 37/93] fix typo --- .github/workflows/esql-validation.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/esql-validation.yml b/.github/workflows/esql-validation.yml index bf77d610a18..1fb66ae0623 100644 --- a/.github/workflows/esql-validation.yml +++ b/.github/workflows/esql-validation.yml @@ -12,14 +12,14 @@ jobs: steps: - name: Check out repository - if: ${{ !env.cloud_id && !env.api_key }} + if: ${{ !vars.cloud_id && !vars.api_key }} uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 with: path: elastic-container repository: eric-forte-elastic/elastic-container - name: Build and run containers - if: ${{ !env.cloud_id && !env.api_key }} + if: ${{ !vars.cloud_id && !vars.api_key }} run: | cd elastic-container GENERATED_PASSWORD=$(openssl rand -base64 16) @@ -40,7 +40,7 @@ jobs: python-version: '3.13' - name: Get API Key and setup auth - if: ${{ !env.cloud_id && !env.api_key }} + if: ${{ !vars.cloud_id && !vars.api_key }} env: DR_ELASTICSEARCH_URL: "https://localhost:9200" ES_USER: "elastic" @@ -64,10 +64,10 @@ jobs: - name: Validate Test ESQL Rule env: - DR_CLOUD_ID: ${{ env.cloud_id }} - DR_KIBANA_URL: ${{ env.cloud_id == '' && 'https://localhost:5601' || '' }} - DR_ES_USER: ${{ env.cloud_id == '' && 'elastic' || '' }} - DR_API_KEY: ${{ env.api_key || env.DR_API_KEY }} + DR_CLOUD_ID: ${{ vars.cloud_id }} + DR_KIBANA_URL: ${{ vars.cloud_id == '' && 'https://localhost:5601' || '' }} + DR_ES_USER: ${{ vars.cloud_id == '' && 'elastic' || '' }} + DR_API_KEY: ${{ vars.api_key || vars.DR_API_KEY }} run: | cd detection-rules python -m pytest tests/test_rules_remote.py::TestRemoteRules::test_esql_rules From f23d8396558a633cea683c32a966285c8136ef29 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Mon, 22 Sep 2025 21:44:31 -0400 Subject: [PATCH 38/93] Initial Error Classes --- detection_rules/cli_utils.py | 102 ++++++++- detection_rules/esql_errors.py | 30 +++ detection_rules/index_mappings.py | 232 +++++++++++++++++++++ detection_rules/integrations.py | 2 +- detection_rules/misc.py | 101 --------- detection_rules/rule_validators.py | 320 ++++++----------------------- tests/test_rules_remote.py | 17 +- 7 files changed, 442 insertions(+), 362 deletions(-) create mode 100644 detection_rules/esql_errors.py create mode 100644 detection_rules/index_mappings.py diff --git a/detection_rules/cli_utils.py b/detection_rules/cli_utils.py index 49fd98e3486..2d6809b7b0c 100644 --- a/detection_rules/cli_utils.py +++ b/detection_rules/cli_utils.py @@ -7,7 +7,10 @@ import datetime import functools import os +import re +import time import typing +import uuid from collections.abc import Callable from pathlib import Path from typing import Any @@ -26,6 +29,104 @@ RULES_CONFIG = parse_rules_config() +def schema_prompt(name: str, value: Any | None = None, is_required: bool = False, **options: Any) -> Any: # noqa: PLR0911, PLR0912, PLR0915 + """Interactively prompt based on schema requirements.""" + field_type = options.get("type") + pattern: str | None = options.get("pattern") + enum = options.get("enum", []) + minimum = int(options["minimum"]) if "minimum" in options else None + maximum = int(options["maximum"]) if "maximum" in options else None + min_item = int(options.get("min_items", 0)) + max_items = int(options.get("max_items", 9999)) + + default = options.get("default") + if default is not None and str(default).lower() in ("true", "false"): + default = str(default).lower() + + if "date" in name: + default = time.strftime("%Y/%m/%d") + + if name == "rule_id": + default = str(uuid.uuid4()) + + if len(enum) == 1 and is_required and field_type not in ("array", ["array"]): + return enum[0] + + def _check_type(_val: Any) -> bool: # noqa: PLR0911 + if field_type in ("number", "integer") and not str(_val).isdigit(): + print(f"Number expected but got: {_val}") + return False + if pattern: + match = re.match(pattern, _val) + if not match or len(match.group(0)) != len(_val): + print(f"{_val} did not match pattern: {pattern}!") + return False + if enum and _val not in enum: + print("{} not in valid options: {}".format(_val, ", ".join(enum))) + return False + if minimum and (type(_val) is int and int(_val) < minimum): + print(f"{_val!s} is less than the minimum: {minimum!s}") + return False + if maximum and (type(_val) is int and int(_val) > maximum): + print(f"{_val!s} is greater than the maximum: {maximum!s}") + return False + if type(_val) is str and field_type == "boolean" and _val.lower() not in ("true", "false"): + print(f"Boolean expected but got: {_val!s}") + return False + return True + + def _convert_type(_val: Any) -> Any: + if field_type == "boolean" and type(_val) is not bool: + _val = _val.lower() == "true" + return int(_val) if field_type in ("number", "integer") else _val + + prompt = ( + "{name}{default}{required}{multi}".format( + name=name, + default=f' [{default}] ("n/a" to leave blank) ' if default else "", + required=" (required) " if is_required else "", + multi=(" (multi, comma separated) " if field_type in ("array", ["array"]) else ""), + ).strip() + + ": " + ) + + while True: + result = value or input(prompt) or default + if result == "n/a": + result = None + + if not result: + if is_required: + value = None + continue + return None + + if field_type in ("array", ["array"]): + result_list = result.split(",") + + if not (min_item < len(result_list) < max_items): + if is_required: + value = None + break + return [] + + for value in result_list: + if not _check_type(value): + if is_required: + value = None # noqa: PLW2901 + break + return [] + if is_required and value is None: + continue + return [_convert_type(r) for r in result_list] + if _check_type(result): + return _convert_type(result) + if is_required: + value = None + continue + return None + + def single_collection(f: Callable[..., Any]) -> Callable[..., Any]: """Add arguments to get a RuleCollection by file, directory or a list of IDs""" from .misc import raise_client_error @@ -144,7 +245,6 @@ def rule_prompt( # noqa: PLR0912, PLR0913, PLR0915 **kwargs: Any, ) -> TOMLRule | str: """Prompt loop to build a rule.""" - from .misc import schema_prompt additional_required = additional_required or [] creation_date = datetime.date.today().strftime("%Y/%m/%d") # noqa: DTZ011 diff --git a/detection_rules/esql_errors.py b/detection_rules/esql_errors.py new file mode 100644 index 00000000000..a689a89ff0e --- /dev/null +++ b/detection_rules/esql_errors.py @@ -0,0 +1,30 @@ +"""ESQL exceptions.""" + +from marshmallow.exceptions import ValidationError + +__all__ = ( + "EsqlSchemaError", + "EsqlSemanticError", + "EsqlSyntaxError", + "EsqlTypeMismatchError", +) + + +class EsqlSchemaError(ValidationError): + """Error for missing fields in ESQL.""" + + +class EsqlSyntaxError(ValidationError): + """Error with ESQL syntax.""" + + # TODO: Update this to a Kibana Error extension? Perhaps? + + +class EsqlSemanticError(ValidationError): + """Error with ESQL semantics.""" + + # TODO: Update this to a Kibana Error extension? Perhaps? + + +class EsqlTypeMismatchError(ValidationError): + """Error when validating types in ESQL.""" diff --git a/detection_rules/index_mappings.py b/detection_rules/index_mappings.py new file mode 100644 index 00000000000..653e0e6d9f6 --- /dev/null +++ b/detection_rules/index_mappings.py @@ -0,0 +1,232 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License +# 2.0; you may not use this file except in compliance with the Elastic License +# 2.0. + +"""Validation logic for rules containing queries.""" + +import time +from collections.abc import Callable +from typing import Any + +from elastic_transport import ObjectApiResponse +from elasticsearch import Elasticsearch # type: ignore[reportMissingTypeStubs] +from semver import Version + +from . import ecs, integrations, misc, utils +from .config import load_current_package_version +from .integrations import ( + load_integrations_manifests, + load_integrations_schemas, +) +from .rule import RuleMeta +from .schemas import get_stack_schemas + + +def get_rule_integrations(metadata: RuleMeta) -> list[str]: + """Retrieve rule integrations from metadata.""" + rule_integrations: list[str] = [] + if metadata.integration: + if isinstance(metadata.integration, list): + rule_integrations = metadata.integration + else: + rule_integrations = [metadata.integration] + return rule_integrations + + +def prepare_integration_mappings( + rule_integrations: list[str], + event_dataset_integrations: list[utils.EventDataset], + package_manifests: Any, + integration_schemas: Any, + stack_version: str, + log: Callable[[str], None], +) -> tuple[dict[str, Any], dict[str, Any]]: + """Prepare integration mappings for the given rule integrations.""" + integration_mappings: dict[str, Any] = {} + index_lookup: dict[str, Any] = {} + dataset_restriction: dict[str, str] = {} + + # Process restrictions, note we need this for loops to be separate + for event_dataset in event_dataset_integrations: + # Ensure the integration is in rule_integrations + if event_dataset.integration not in rule_integrations: + dataset_restriction.setdefault(event_dataset.integration, []).append(event_dataset.datastream) # type: ignore[reportIncompatibleMethodOverride] + for event_dataset in event_dataset_integrations: + if event_dataset.integration not in rule_integrations: + rule_integrations.append(event_dataset.integration) + + for integration in rule_integrations: + package = integration + # TODO check should be latest or least? + package_version, _ = integrations.find_latest_compatible_version( + package, + "", + Version.parse(stack_version), + package_manifests, + ) + package_schema = integration_schemas[package][package_version] + + # Apply dataset restrictions if any + if integration in dataset_restriction: + allowed_keys = dataset_restriction[integration] + package_schema = {key: value for key, value in package_schema.items() if key in allowed_keys} + + for stream in package_schema: + flat_schema = package_schema[stream] + stream_mappings = utils.flat_schema_to_index_mapping(flat_schema) + nested_multifields = find_nested_multifields(stream_mappings) + for field in nested_multifields: + field_name = str(field).split(".fields.")[0].replace(".", ".properties.") + ".fields" + log( + f"Warning: Nested multi-field `{field}` found in `{integration}-{stream}`. " + f"Removing parent field from schema for ES|QL validation." + ) + utils.delete_nested_key_from_dict(stream_mappings, field_name) + utils.combine_dicts(integration_mappings, stream_mappings) + index_lookup[f"{integration}-{stream}"] = stream_mappings + + return integration_mappings, index_lookup + + +def create_remote_indices( + elastic_client: Elasticsearch, + existing_mappings: dict[str, Any], + index_lookup: dict[str, Any], + log: Callable[[str], None], +) -> str: + """Create remote indices for validation and return the index string.""" + suffix = str(int(time.time() * 1000)) + test_index = f"rule-test-index-{suffix}" + response = misc.create_index_with_index_mapping(elastic_client, test_index, existing_mappings) + log(f"Index `{test_index}` created: {response}") + full_index_str = test_index + + # create all integration indices + for index, properties in index_lookup.items(): + ind_index_str = f"test-{index.rstrip('*')}{suffix}" + response = misc.create_index_with_index_mapping(elastic_client, ind_index_str, properties) + log(f"Index `{ind_index_str}` created: {response}") + full_index_str = f"{full_index_str}, {ind_index_str}" + + return full_index_str + + +def execute_query_against_indices( + elastic_client: Elasticsearch, + query: str, + test_index_str: str, + log: Callable[[str], None], + delete_indices: bool = True, +) -> tuple[list[Any], ObjectApiResponse[Any]]: + """Execute the ESQL query against the test indices on a remote Stack and return the columns.""" + try: + log(f"Executing a query against `{test_index_str}`") + response = elastic_client.esql.query(query=query) + log(f"Got query response: {response}") + query_columns = response.get("columns", []) + finally: + if delete_indices: + for index_str in test_index_str.split(","): + response = elastic_client.indices.delete(index=index_str.strip()) + log(f"Test index `{index_str}` deleted: {response}") + + query_column_names = [c["name"] for c in query_columns] + log(f"Got query columns: {', '.join(query_column_names)}") + return query_columns, response + + +def find_nested_multifields(mapping: dict[str, Any], path: str = "") -> list[Any]: + """Recursively search for nested multi-fields in Elasticsearch mappings.""" + nested_multifields = [] + + for field, properties in mapping.items(): + current_path = f"{path}.{field}" if path else field + + if isinstance(properties, dict): + # Check if the field has a `fields` key + if "fields" in properties: + # Check if any subfield in `fields` also has a `fields` key + for subfield, subproperties in properties["fields"].items(): # type: ignore[reportUnknownVariableType] + if isinstance(subproperties, dict) and "fields" in subproperties: + nested_multifields.append(f"{current_path}.fields.{subfield}") # type: ignore[reportUnknownVariableType] + + # Recurse into subfields + if "properties" in properties: + nested_multifields.extend( # type: ignore[reportUnknownVariableType] + find_nested_multifields(properties["properties"], current_path) # type: ignore[reportUnknownVariableType] + ) + + return nested_multifields # type: ignore[reportUnknownVariableType] + + +def get_ecs_schema_mappings(current_version: Version) -> dict[str, Any]: + """Get the ECS schema in an index mapping format (nested schema) handling scaled floats.""" + ecs_version = get_stack_schemas()[str(current_version)]["ecs"] + ecs_schemas = ecs.get_schemas() + ecs_schema_flattened: dict[str, Any] = {} + ecs_schema_scaled_floats: dict[str, Any] = {} + for index, info in ecs_schemas[ecs_version]["ecs_flat"].items(): + if info["type"] == "scaled_float": + ecs_schema_scaled_floats.update({index: info["scaling_factor"]}) + ecs_schema_flattened.update({index: info["type"]}) + ecs_schema = utils.convert_to_nested_schema(ecs_schema_flattened) + for index, info in ecs_schema_scaled_floats.items(): + parts = index.split(".") + current = ecs_schema + + # Traverse the ecs_schema to the correct nested dictionary + for part in parts[:-1]: # Traverse all parts except the last one + current = current.setdefault(part, {}).setdefault("properties", {}) + + current[parts[-1]].update({"scaling_factor": info}) + return ecs_schema + + +def prepare_mappings( + elastic_client: Elasticsearch, + indices: list[str], + event_dataset_integrations: list[utils.EventDataset], + metadata: RuleMeta, + stack_version: str, + log: Callable[[str], None], +) -> tuple[dict[str, Any], dict[str, Any], dict[str, Any]]: + """Prepare index mappings for the given indices and rule integrations.""" + existing_mappings, index_lookup = misc.get_existing_mappings(elastic_client, indices) + + # Collect mappings for the integrations + rule_integrations = get_rule_integrations(metadata) + + # Collect mappings for all relevant integrations for the given stack version + package_manifests = load_integrations_manifests() + integration_schemas = load_integrations_schemas() + + integration_mappings, integration_index_lookup = prepare_integration_mappings( + rule_integrations, event_dataset_integrations, package_manifests, integration_schemas, stack_version, log + ) + + index_lookup.update(integration_index_lookup) + + # Combine existing and integration mappings into a single mapping dict + combined_mappings: dict[str, Any] = {} + utils.combine_dicts(combined_mappings, existing_mappings) + utils.combine_dicts(combined_mappings, integration_mappings) + + # Load non-ecs schema and convert to index mapping format (nested schema) + non_ecs_mapping: dict[str, Any] = {} + non_ecs = ecs.get_non_ecs_schema() + for index in indices: + non_ecs_mapping.update(non_ecs.get(index, {})) + non_ecs_mapping = ecs.flatten(non_ecs_mapping) + non_ecs_mapping = utils.convert_to_nested_schema(non_ecs_mapping) + if not combined_mappings and not non_ecs_mapping: + raise ValueError("No mappings found") + index_lookup.update({"rule-non-ecs-index": non_ecs_mapping}) + + # Load ECS in an index mapping format (nested schema) + current_version = Version.parse(load_current_package_version(), optional_minor_and_patch=True) + ecs_schema = get_ecs_schema_mappings(current_version) + + index_lookup.update({"rule-ecs-index": ecs_schema}) + + return existing_mappings, index_lookup, combined_mappings diff --git a/detection_rules/integrations.py b/detection_rules/integrations.py index 5c96667296e..02c92fe0474 100644 --- a/detection_rules/integrations.py +++ b/detection_rules/integrations.py @@ -225,7 +225,7 @@ def find_latest_compatible_version( rule_stack_version: Version, packages_manifest: dict[str, Any], ) -> tuple[str, list[str]]: - """Finds least compatible version for specified integration based on stack version supplied.""" + """Finds latest compatible version for specified integration based on stack version supplied.""" if not package: raise ValueError("Package must be specified") diff --git a/detection_rules/misc.py b/detection_rules/misc.py index aa77dbfdc36..fd0a48aad3e 100644 --- a/detection_rules/misc.py +++ b/detection_rules/misc.py @@ -6,10 +6,7 @@ """Misc support.""" import os -import re -import time import unittest -import uuid from collections.abc import Callable from functools import wraps from pathlib import Path @@ -108,104 +105,6 @@ def nest_from_dot(dots: str, value: Any) -> Any: return nested -def schema_prompt(name: str, value: Any | None = None, is_required: bool = False, **options: Any) -> Any: # noqa: PLR0911, PLR0912, PLR0915 - """Interactively prompt based on schema requirements.""" - field_type = options.get("type") - pattern: str | None = options.get("pattern") - enum = options.get("enum", []) - minimum = int(options["minimum"]) if "minimum" in options else None - maximum = int(options["maximum"]) if "maximum" in options else None - min_item = int(options.get("min_items", 0)) - max_items = int(options.get("max_items", 9999)) - - default = options.get("default") - if default is not None and str(default).lower() in ("true", "false"): - default = str(default).lower() - - if "date" in name: - default = time.strftime("%Y/%m/%d") - - if name == "rule_id": - default = str(uuid.uuid4()) - - if len(enum) == 1 and is_required and field_type not in ("array", ["array"]): - return enum[0] - - def _check_type(_val: Any) -> bool: # noqa: PLR0911 - if field_type in ("number", "integer") and not str(_val).isdigit(): - print(f"Number expected but got: {_val}") - return False - if pattern: - match = re.match(pattern, _val) - if not match or len(match.group(0)) != len(_val): - print(f"{_val} did not match pattern: {pattern}!") - return False - if enum and _val not in enum: - print("{} not in valid options: {}".format(_val, ", ".join(enum))) - return False - if minimum and (type(_val) is int and int(_val) < minimum): - print(f"{_val!s} is less than the minimum: {minimum!s}") - return False - if maximum and (type(_val) is int and int(_val) > maximum): - print(f"{_val!s} is greater than the maximum: {maximum!s}") - return False - if type(_val) is str and field_type == "boolean" and _val.lower() not in ("true", "false"): - print(f"Boolean expected but got: {_val!s}") - return False - return True - - def _convert_type(_val: Any) -> Any: - if field_type == "boolean" and type(_val) is not bool: - _val = _val.lower() == "true" - return int(_val) if field_type in ("number", "integer") else _val - - prompt = ( - "{name}{default}{required}{multi}".format( - name=name, - default=f' [{default}] ("n/a" to leave blank) ' if default else "", - required=" (required) " if is_required else "", - multi=(" (multi, comma separated) " if field_type in ("array", ["array"]) else ""), - ).strip() - + ": " - ) - - while True: - result = value or input(prompt) or default - if result == "n/a": - result = None - - if not result: - if is_required: - value = None - continue - return None - - if field_type in ("array", ["array"]): - result_list = result.split(",") - - if not (min_item < len(result_list) < max_items): - if is_required: - value = None - break - return [] - - for value in result_list: - if not _check_type(value): - if is_required: - value = None # noqa: PLW2901 - break - return [] - if is_required and value is None: - continue - return [_convert_type(r) for r in result_list] - if _check_type(result): - return _convert_type(result) - if is_required: - value = None - continue - return None - - def get_kibana_rules_map(repo: str = "elastic/kibana", branch: str = "master") -> dict[str, Any]: """Get list of available rules from the Kibana repo and return a list of URLs.""" diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index 7e08e090553..3f0da251b64 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -6,7 +6,6 @@ """Validation logic for rules containing queries.""" import re -import time import typing from collections.abc import Callable from dataclasses import dataclass @@ -29,14 +28,19 @@ from kibana import Kibana # type: ignore[reportMissingTypeStubs] from semver import Version -from . import ecs, endgame, integrations, misc, utils +from . import ecs, endgame, misc, utils from .beats import get_datasets_and_modules, parse_beats_from_index from .config import CUSTOM_RULES_DIR, load_current_package_version, parse_rules_config from .custom_schemas import update_auto_generated_schema +from .index_mappings import ( + create_remote_indices, + execute_query_against_indices, + get_rule_integrations, + prepare_mappings, +) from .integrations import ( get_integration_schema_data, load_integrations_manifests, - load_integrations_schemas, parse_datasets, ) from .rule import EQLRuleData, QueryRuleData, QueryValidator, RuleMeta, TOMLRuleContents, set_eql_config @@ -53,6 +57,15 @@ ) KQL_ERROR_TYPES = kql.KqlCompileError | kql.KqlParseError RULES_CONFIG = parse_rules_config() +# TODO ESQL specific error message to catch Kibana Bad Request Errors +# TODO ESQL.py file to hold ESQL specific logic for Errors, subclass exceptions +# Expect to support the following as ESQL (middle 2 from Kibana) +""" + EsqlSchemaError + EsqlSemanticError + EsqlSyntaxError + EsqlTypeMismatchError +""" @dataclass(frozen=True) @@ -536,11 +549,7 @@ def add_stack_targets(query_text: str, include_endgame: bool) -> None: add_stack_targets(synthetic_sequence, include_endgame=False) else: # Datasetless subquery: try metadata integrations first, else add per-subquery stack targets - meta_integrations = meta.integration - if isinstance(meta_integrations, str): - meta_integrations = [meta_integrations] - elif meta_integrations is None: - meta_integrations = [] + meta_integrations = get_rule_integrations(meta) if meta_integrations: meta_pkg_ints = [ @@ -722,17 +731,18 @@ def validate_rule_type_configurations(self, data: EQLRuleData, meta: RuleMeta) - class ESQLValidator(QueryValidator): """Validate specific fields for ESQL query event types.""" - def __init__(self, query: str) -> None: - """Initialize the ESQLValidator with the given query.""" - super().__init__(query) - self.esql_unique_fields: list[dict[str, str]] = [] - self.related_integrations: list[dict[str, str]] = [] - self.stack_version: str = "" + kibana_client: Kibana + elastic_client: Elasticsearch + metadata: RuleMeta + rule_id: str + verbosity: int + esql_unique_fields: list[dict[str, str]] - @cached_property - def ast(self) -> None: # type: ignore[reportIncompatibleMethodOverride] - """There is no AST for ESQL until we have an ESQL parser.""" - return None + def log(self, val: str) -> None: + """Log if verbosity is 1 or greater (1 corresponds to `-v` in pytest)""" + unit_test_verbose_level = 1 + if self.verbosity >= unit_test_verbose_level: + print(f"{self.rule_id}:", val) @cached_property def unique_fields(self) -> list[str]: # type: ignore[reportIncompatibleMethodOverride] @@ -748,82 +758,6 @@ def get_unique_field_type(self, field_name: str) -> str | None: # type: ignore[ return field["type"] return None - def validate(self, rule_data: "QueryRuleData", rule_meta: RuleMeta, force_remote_validation: bool = False) -> None: # type: ignore[reportIncompatibleMethodOverride] - """Validate an ESQL query while checking TOMLRule.""" - if misc.getdefault("remote_esql_validation")() or force_remote_validation: - kibana_client = misc.get_default_kibana_client() - elastic_client = misc.get_default_elasticsearch_client() - _ = self.remote_validate_rule( - kibana_client, - elastic_client, - rule_data.query, - rule_meta, - rule_data.rule_id, - ) - - def get_rule_integrations(self, metadata: RuleMeta) -> list[str]: - """Retrieve rule integrations from metadata.""" - rule_integrations: list[str] = [] - if metadata.integration: - if isinstance(metadata.integration, list): - rule_integrations = metadata.integration - else: - rule_integrations = [metadata.integration] - return rule_integrations - - def prepare_integration_mappings( - self, - rule_integrations: list[str], - event_dataset_integrations: list[utils.EventDataset], - package_manifests: Any, - integration_schemas: Any, - log: Callable[[str], None], - ) -> tuple[dict[str, Any], dict[str, Any]]: - """Prepare integration mappings for the given rule integrations.""" - integration_mappings: dict[str, Any] = {} - index_lookup: dict[str, Any] = {} - dataset_restriction: dict[str, str] = {} - - # Process restrictions, note we need this for loops to be separate - for event_dataset in event_dataset_integrations: - # Ensure the integration is in rule_integrations - if event_dataset.integration not in rule_integrations: - dataset_restriction.setdefault(event_dataset.integration, []).append(event_dataset.datastream) # type: ignore[reportIncompatibleMethodOverride] - for event_dataset in event_dataset_integrations: - if event_dataset.integration not in rule_integrations: - rule_integrations.append(event_dataset.integration) - - for integration in rule_integrations: - package = integration - package_version, _ = integrations.find_latest_compatible_version( - package, - "", - Version.parse(self.stack_version), - package_manifests, - ) - package_schema = integration_schemas[package][package_version] - - # Apply dataset restrictions if any - if integration in dataset_restriction: - allowed_keys = dataset_restriction[integration] - package_schema = {key: value for key, value in package_schema.items() if key in allowed_keys} - - for stream in package_schema: - flat_schema = package_schema[stream] - stream_mappings = utils.flat_schema_to_index_mapping(flat_schema) - nested_multifields = self.find_nested_multifields(stream_mappings) - for field in nested_multifields: - field_name = str(field).split(".fields.")[0].replace(".", ".properties.") + ".fields" - log( - f"Warning: Nested multi-field `{field}` found in `{integration}-{stream}`. " - f"Removing parent field from schema for ES|QL validation." - ) - utils.delete_nested_key_from_dict(stream_mappings, field_name) - utils.combine_dicts(integration_mappings, stream_mappings) - index_lookup[f"{integration}-{stream}"] = stream_mappings - - return integration_mappings, index_lookup - def validate_columns_index_mapping( self, query_columns: list[dict[str, str]], combined_mappings: dict[str, Any] ) -> bool: @@ -852,149 +786,34 @@ def validate_columns_index_mapping( ) if mismatched_columns: + # TODO this should be an ESQL type Error (check to match EQL error structure) raise ValueError("Column validation errors:\n" + "\n".join(mismatched_columns)) return True - def create_remote_indices( - self, - elastic_client: Elasticsearch, - existing_mappings: dict[str, Any], - index_lookup: dict[str, Any], - log: Callable[[str], None], - ) -> str: - """Create remote indices for validation and return the index string.""" - suffix = str(int(time.time() * 1000)) - test_index = f"rule-test-index-{suffix}" - response = misc.create_index_with_index_mapping(elastic_client, test_index, existing_mappings) - log(f"Index `{test_index}` created: {response}") - full_index_str = test_index - - # create all integration indices - for index, properties in index_lookup.items(): - ind_index_str = f"test-{index.rstrip('*')}{suffix}" - response = misc.create_index_with_index_mapping(elastic_client, ind_index_str, properties) - log(f"Index `{ind_index_str}` created: {response}") - full_index_str = f"{full_index_str}, {ind_index_str}" - - return full_index_str - - def execute_query_against_indices( - self, - elastic_client: Elasticsearch, - query: str, - test_index_str: str, - log: Callable[[str], None], - delete_indices: bool = True, - ) -> tuple[list[Any], ObjectApiResponse[Any]]: - """Execute the ESQL query against the test indices on a remote Stack and return the columns.""" - try: - log(f"Executing a query against `{test_index_str}`") - response = elastic_client.esql.query(query=query) - log(f"Got query response: {response}") - query_columns = response.get("columns", []) - finally: - if delete_indices: - for index_str in test_index_str.split(","): - response = elastic_client.indices.delete(index=index_str.strip()) - log(f"Test index `{index_str}` deleted: {response}") - - query_column_names = [c["name"] for c in query_columns] - log(f"Got query columns: {', '.join(query_column_names)}") - return query_columns, response - - def find_nested_multifields(self, mapping: dict[str, Any], path: str = "") -> list[Any]: - """Recursively search for nested multi-fields in Elasticsearch mappings.""" - nested_multifields = [] - - for field, properties in mapping.items(): - current_path = f"{path}.{field}" if path else field - - if isinstance(properties, dict): - # Check if the field has a `fields` key - if "fields" in properties: - # Check if any subfield in `fields` also has a `fields` key - for subfield, subproperties in properties["fields"].items(): # type: ignore[reportUnknownVariableType] - if isinstance(subproperties, dict) and "fields" in subproperties: - nested_multifields.append(f"{current_path}.fields.{subfield}") # type: ignore[reportUnknownVariableType] - - # Recurse into subfields - if "properties" in properties: - nested_multifields.extend( # type: ignore[reportUnknownVariableType] - self.find_nested_multifields(properties["properties"], current_path) # type: ignore[reportUnknownVariableType] - ) - - return nested_multifields # type: ignore[reportUnknownVariableType] - - def get_ecs_schema_mappings(self, current_version: Version) -> dict[str, Any]: - """Get the ECS schema in an index mapping format (nested schema) handling scaled floats.""" - ecs_version = get_stack_schemas()[str(current_version)]["ecs"] - ecs_schemas = ecs.get_schemas() - ecs_schema_flattened: dict[str, Any] = {} - ecs_schema_scaled_floats: dict[str, Any] = {} - for index, info in ecs_schemas[ecs_version]["ecs_flat"].items(): - if info["type"] == "scaled_float": - ecs_schema_scaled_floats.update({index: info["scaling_factor"]}) - ecs_schema_flattened.update({index: info["type"]}) - ecs_schema = utils.convert_to_nested_schema(ecs_schema_flattened) - for index, info in ecs_schema_scaled_floats.items(): - parts = index.split(".") - current = ecs_schema - - # Traverse the ecs_schema to the correct nested dictionary - for part in parts[:-1]: # Traverse all parts except the last one - current = current.setdefault(part, {}).setdefault("properties", {}) - - current[parts[-1]].update({"scaling_factor": info}) - return ecs_schema - - def prepare_mappings( - self, - elastic_client: Elasticsearch, - indices: list[str], - event_dataset_integrations: list[utils.EventDataset], - metadata: RuleMeta, - log: Callable[[str], None], - ) -> tuple[dict[str, Any], dict[str, Any], dict[str, Any]]: - """Prepare index mappings for the given indices and rule integrations.""" - existing_mappings, index_lookup = misc.get_existing_mappings(elastic_client, indices) - - # Collect mappings for the integrations - rule_integrations = self.get_rule_integrations(metadata) - - # Collect mappings for all relevant integrations for the given stack version - package_manifests = load_integrations_manifests() - integration_schemas = load_integrations_schemas() - - integration_mappings, integration_index_lookup = self.prepare_integration_mappings( - rule_integrations, event_dataset_integrations, package_manifests, integration_schemas, log - ) - - index_lookup.update(integration_index_lookup) - - # Combine existing and integration mappings into a single mapping dict - combined_mappings: dict[str, Any] = {} - utils.combine_dicts(combined_mappings, existing_mappings) - utils.combine_dicts(combined_mappings, integration_mappings) - - # Load non-ecs schema and convert to index mapping format (nested schema) - non_ecs_mapping: dict[str, Any] = {} - non_ecs = ecs.get_non_ecs_schema() - for index in indices: - non_ecs_mapping.update(non_ecs.get(index, {})) - non_ecs_mapping = ecs.flatten(non_ecs_mapping) - non_ecs_mapping = utils.convert_to_nested_schema(non_ecs_mapping) - if not combined_mappings and not non_ecs_mapping: - raise ValueError("No mappings found") - index_lookup.update({"rule-non-ecs-index": non_ecs_mapping}) - - # Load ECS in an index mapping format (nested schema) - current_version = Version.parse(load_current_package_version(), optional_minor_and_patch=True) - ecs_schema = self.get_ecs_schema_mappings(current_version) - - index_lookup.update({"rule-ecs-index": ecs_schema}) - - return existing_mappings, index_lookup, combined_mappings + def validate(self, data: "QueryRuleData", rule_meta: RuleMeta, force_remote_validation: bool = False) -> None: # type: ignore[reportIncompatibleMethodOverride] + """Validate an ESQL query while checking TOMLRule.""" + if misc.getdefault("remote_esql_validation")() or force_remote_validation: + resolved_kibana_options = { + str(option.name): option.default() if callable(option.default) else option.default + for option in misc.kibana_options + if option.name is not None + } + + kibana_client = misc.get_kibana_client(**resolved_kibana_options) + resolved_elastic_options = { + option.name: option.default() if callable(option.default) else option.default + for option in misc.elasticsearch_options + if option.name is not None + } + elastic_client = misc.get_elasticsearch_client(**resolved_elastic_options) + _ = self.remote_validate_rule( + kibana_client, + elastic_client, + data.query, + rule_meta, + data.rule_id, + ) def remote_validate_rule_contents( self, kibana_client: Kibana, elastic_client: Elasticsearch, contents: TOMLRuleContents, verbosity: int = 0 @@ -1020,48 +839,45 @@ def remote_validate_rule( # noqa: PLR0913 ) -> ObjectApiResponse[Any]: """Uses remote validation from an Elastic Stack to validate ES|QL a given rule""" - def log(val: str) -> None: - """Log if verbosity is 1 or greater (1 corresponds to `-v` in pytest)""" - unit_test_verbose_level = 1 - if verbosity >= unit_test_verbose_level: - print(f"{rule_id}:", val) + self.rule_id = rule_id + self.verbosity = verbosity + # Validate that all fields (columns) are either dynamic fields or correctly mapped + # against the combined mapping of all the indices kibana_details: dict[str, Any] = kibana_client.get("/api/status", {}) # type: ignore[reportUnknownVariableType] if "version" not in kibana_details: raise ValueError("Failed to retrieve Kibana details.") - self.stack_version = str(kibana_details["version"]["number"]) - log(f"Validating against {self.stack_version} stack") + stack_version = str(kibana_details["version"]["number"]) + self.log(f"Validating against {stack_version} stack") indices_str, indices = utils.get_esql_query_indices(query) # type: ignore[reportUnknownVariableType] - log(f"Extracted indices from query: {', '.join(indices)}") + self.log(f"Extracted indices from query: {', '.join(indices)}") event_dataset_integrations = utils.get_esql_query_event_dataset_integrations(query) - log(f"Extracted Event Dataset integrations from query: {', '.join(indices)}") + self.log(f"Extracted Event Dataset integrations from query: {', '.join(indices)}") # Get mappings for all matching existing index templates - existing_mappings, index_lookup, combined_mappings = self.prepare_mappings( - elastic_client, indices, event_dataset_integrations, metadata, log + existing_mappings, index_lookup, combined_mappings = prepare_mappings( + elastic_client, indices, event_dataset_integrations, metadata, stack_version, self.log ) - log(f"Collected mappings: {len(existing_mappings)}") - log(f"Combined mappings prepared: {len(combined_mappings)}") + self.log(f"Collected mappings: {len(existing_mappings)}") + self.log(f"Combined mappings prepared: {len(combined_mappings)}") # Create remote indices - full_index_str = self.create_remote_indices(elastic_client, existing_mappings, index_lookup, log) + full_index_str = create_remote_indices(elastic_client, existing_mappings, index_lookup, self.log) utils.combine_dicts(combined_mappings, index_lookup["rule-non-ecs-index"]) utils.combine_dicts(combined_mappings, index_lookup["rule-ecs-index"]) # Replace all sources with the test indices query = query.replace(indices_str, full_index_str) # type: ignore[reportUnknownVariableType] - query_columns, response = self.execute_query_against_indices(elastic_client, query, full_index_str, log) # type: ignore[reportUnknownVariableType] + query_columns, response = execute_query_against_indices(elastic_client, query, full_index_str, self.log) # type: ignore[reportUnknownVariableType] self.esql_unique_fields = query_columns - # Validate that all fields (columns) are either dynamic fields or correctly mapped - # against the combined mapping of all the indices if self.validate_columns_index_mapping(query_columns, combined_mappings): - log("All dynamic columns have proper formatting.") + self.log("All dynamic columns have proper formatting.") else: - log("Dynamic column(s) have improper formatting.") + self.log("Dynamic column(s) have improper formatting.") return response diff --git a/tests/test_rules_remote.py b/tests/test_rules_remote.py index 8340a9338df..396a81d1a90 100644 --- a/tests/test_rules_remote.py +++ b/tests/test_rules_remote.py @@ -10,8 +10,12 @@ from elasticsearch import BadRequestError from elasticsearch import ConnectionError as ESConnectionError -from detection_rules.misc import get_default_config, getdefault -from detection_rules.remote_validation import RemoteConnector +from detection_rules.misc import ( + get_default_config, + get_default_elasticsearch_client, + get_default_kibana_client, + getdefault, +) from detection_rules.rule_loader import RuleCollection from detection_rules.rule_validators import ESQLValidator from detection_rules.utils import get_path @@ -36,8 +40,9 @@ def test_esql_rules(self): if not esql_rules: return - remote_connector = RemoteConnector() - if not remote_connector.es_client or not remote_connector.kibana_client: + kibana_client = get_default_kibana_client() + elastic_client = get_default_elasticsearch_client() + if not kibana_client or not elastic_client: self.skipTest("Skipping remote validation due to missing client") # Retrieve verbosity level from pytest @@ -52,9 +57,7 @@ def test_esql_rules(self): while retry_count < max_retries: try: validator = ESQLValidator(r.contents.data.query) # type: ignore[reportIncompatibleMethodOverride] - _ = validator.remote_validate_rule_contents( - remote_connector.kibana_client, remote_connector.es_client, r.contents, verbosity - ) + _ = validator.remote_validate_rule_contents(kibana_client, elastic_client, r.contents, verbosity) break except (ValueError, BadRequestError) as e: print(f"FAILURE: {e}") From 013ad5f87441dae7ffbb7138509fc86b3e6d5bb3 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Tue, 23 Sep 2025 11:59:27 -0400 Subject: [PATCH 39/93] Update Error Types --- detection_rules/esql_errors.py | 28 +++++++++++++++++----------- detection_rules/index_mappings.py | 1 - detection_rules/misc.py | 26 ++++++++++++++++---------- detection_rules/rule_validators.py | 13 ++----------- 4 files changed, 35 insertions(+), 33 deletions(-) diff --git a/detection_rules/esql_errors.py b/detection_rules/esql_errors.py index a689a89ff0e..dd0c9833edb 100644 --- a/detection_rules/esql_errors.py +++ b/detection_rules/esql_errors.py @@ -1,7 +1,5 @@ """ESQL exceptions.""" -from marshmallow.exceptions import ValidationError - __all__ = ( "EsqlSchemaError", "EsqlSemanticError", @@ -10,21 +8,29 @@ ) -class EsqlSchemaError(ValidationError): - """Error for missing fields in ESQL.""" +class EsqlSchemaError(Exception): + """Error in ESQL schema. Validated via Kibana until AST is available.""" + + def __init__(self, message: str): + super().__init__(message) -class EsqlSyntaxError(ValidationError): - """Error with ESQL syntax.""" +class EsqlSyntaxError(Exception): + """Error with ESQL syntax. Validated via Kibana until AST is available.""" - # TODO: Update this to a Kibana Error extension? Perhaps? + def __init__(self, message: str): + super().__init__(message) -class EsqlSemanticError(ValidationError): - """Error with ESQL semantics.""" +class EsqlSemanticError(Exception): + """Error with ESQL semantics. Validated via Kibana until AST is available.""" - # TODO: Update this to a Kibana Error extension? Perhaps? + def __init__(self, message: str): + super().__init__(message) -class EsqlTypeMismatchError(ValidationError): +class EsqlTypeMismatchError(Exception): """Error when validating types in ESQL.""" + + def __init__(self, message: str): + super().__init__(message) diff --git a/detection_rules/index_mappings.py b/detection_rules/index_mappings.py index 653e0e6d9f6..dab3d29a30e 100644 --- a/detection_rules/index_mappings.py +++ b/detection_rules/index_mappings.py @@ -58,7 +58,6 @@ def prepare_integration_mappings( for integration in rule_integrations: package = integration - # TODO check should be latest or least? package_version, _ = integrations.find_latest_compatible_version( package, "", diff --git a/detection_rules/misc.py b/detection_rules/misc.py index fd0a48aad3e..4c84583d686 100644 --- a/detection_rules/misc.py +++ b/detection_rules/misc.py @@ -16,8 +16,10 @@ import requests from elastic_transport import ObjectApiResponse from elasticsearch import AuthenticationException, Elasticsearch +from elasticsearch.exceptions import BadRequestError from kibana import Kibana # type: ignore[reportMissingTypeStubs] +from .esql_errors import EsqlSchemaError from .utils import add_params, cached, combine_dicts, load_etc_dump LICENSE_HEADER = """ @@ -427,17 +429,21 @@ def get_simulated_index_template_mappings(elastic_client: Elasticsearch, name: s def create_index_with_index_mapping( elastic_client: Elasticsearch, index_name: str, mappings: dict[str, Any] -) -> ObjectApiResponse[Any]: +) -> ObjectApiResponse[Any] | None: """Create an index with the specified mappings and settings to support large number of fields and nested objects.""" - return elastic_client.indices.create( - index=index_name, - mappings={"properties": mappings}, - settings={ - "index.mapping.total_fields.limit": 10000, - "index.mapping.nested_fields.limit": 500, - "index.mapping.nested_objects.limit": 10000, - }, - ) + try: + return elastic_client.indices.create( + index=index_name, + mappings={"properties": mappings}, + settings={ + "index.mapping.total_fields.limit": 10000, + "index.mapping.nested_fields.limit": 500, + "index.mapping.nested_objects.limit": 10000, + }, + ) + except BadRequestError as e: + if e.status_code == 400 and "validation_exception" in str(e): + raise EsqlSchemaError(str(e)) from e def get_existing_mappings(elastic_client: Elasticsearch, indices: list[str]) -> tuple[dict[str, Any], dict[str, Any]]: diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index 3f0da251b64..cd99533f873 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -32,6 +32,7 @@ from .beats import get_datasets_and_modules, parse_beats_from_index from .config import CUSTOM_RULES_DIR, load_current_package_version, parse_rules_config from .custom_schemas import update_auto_generated_schema +from .esql_errors import EsqlTypeMismatchError from .index_mappings import ( create_remote_indices, execute_query_against_indices, @@ -57,15 +58,6 @@ ) KQL_ERROR_TYPES = kql.KqlCompileError | kql.KqlParseError RULES_CONFIG = parse_rules_config() -# TODO ESQL specific error message to catch Kibana Bad Request Errors -# TODO ESQL.py file to hold ESQL specific logic for Errors, subclass exceptions -# Expect to support the following as ESQL (middle 2 from Kibana) -""" - EsqlSchemaError - EsqlSemanticError - EsqlSyntaxError - EsqlTypeMismatchError -""" @dataclass(frozen=True) @@ -786,8 +778,7 @@ def validate_columns_index_mapping( ) if mismatched_columns: - # TODO this should be an ESQL type Error (check to match EQL error structure) - raise ValueError("Column validation errors:\n" + "\n".join(mismatched_columns)) + raise EsqlTypeMismatchError("Column validation errors:\n" + "\n".join(mismatched_columns)) return True From f9d4dba7795f3ecb4bd647b81e37d3cec30e116c Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Tue, 23 Sep 2025 15:29:30 -0400 Subject: [PATCH 40/93] Update Validation Errors for Index handling --- CLI.md | 2 + detection_rules/esql_errors.py | 28 ++++++-- detection_rules/index_mappings.py | 92 +++++++++++++++++++++++--- detection_rules/misc.py | 48 +------------- detection_rules/rule_validators.py | 27 +++++--- detection_rules/schemas/definitions.py | 1 + tests/test_rules_remote.py | 61 +++++++++-------- 7 files changed, 157 insertions(+), 102 deletions(-) diff --git a/CLI.md b/CLI.md index 5015fe22f18..6fe7485a9f6 100644 --- a/CLI.md +++ b/CLI.md @@ -51,6 +51,8 @@ For instance, some users may want to increase the default value in cases where h Using the environment variable `DR_REMOTE_ESQL_VALIDATION` will enable remote ESQL validation for rules that use ESQL queries. This validation will be performed whenever the rule is loaded including for example the view-rule command. This requires the appropriate kibana_url or cloud_id, api_key, and es_url to be set in the config file or as environment variables. +Using the environment variable `DR_SKIP_EMPTY_INDEX_CLEANUP` will disable the cleanup of remote testing indexes that are created as part of the remote ESQL validation. By default, these indexes are deleted after the validation is complete, or upon validation error. + ## Importing rules into the repo You can import rules into the repo using the `create-rule` or `import-rules-to-repo` commands. Both of these commands will diff --git a/detection_rules/esql_errors.py b/detection_rules/esql_errors.py index dd0c9833edb..8167131c695 100644 --- a/detection_rules/esql_errors.py +++ b/detection_rules/esql_errors.py @@ -1,5 +1,9 @@ """ESQL exceptions.""" +from elasticsearch import Elasticsearch # type: ignore[reportMissingTypeStubs] + +from .misc import getdefault + __all__ = ( "EsqlSchemaError", "EsqlSemanticError", @@ -8,29 +12,45 @@ ) +def cleanup_empty_indices( + elastic_client: Elasticsearch, index_patterns: tuple[str, ...] = ("rule-test-*", "test-*") +) -> None: + """Delete empty indices matching the given patterns.""" + if getdefault("skip_empty_index_cleanup")(): + return + for pattern in index_patterns: + indices = elastic_client.cat.indices(index=pattern, format="json") + empty_indices = [index["index"] for index in indices if index["docs.count"] == "0"] # type: ignore[reportMissingTypeStubs] + for empty_index in empty_indices: + _ = elastic_client.indices.delete(index=empty_index) + + class EsqlSchemaError(Exception): """Error in ESQL schema. Validated via Kibana until AST is available.""" - def __init__(self, message: str): + def __init__(self, message: str, elastic_client: Elasticsearch) -> None: + cleanup_empty_indices(elastic_client) super().__init__(message) class EsqlSyntaxError(Exception): """Error with ESQL syntax. Validated via Kibana until AST is available.""" - def __init__(self, message: str): + def __init__(self, message: str, elastic_client: Elasticsearch) -> None: + cleanup_empty_indices(elastic_client) super().__init__(message) class EsqlSemanticError(Exception): """Error with ESQL semantics. Validated via Kibana until AST is available.""" - def __init__(self, message: str): + def __init__(self, message: str, elastic_client: Elasticsearch) -> None: + cleanup_empty_indices(elastic_client) super().__init__(message) class EsqlTypeMismatchError(Exception): """Error when validating types in ESQL.""" - def __init__(self, message: str): + def __init__(self, message: str) -> None: super().__init__(message) diff --git a/detection_rules/index_mappings.py b/detection_rules/index_mappings.py index dab3d29a30e..9e8bdd5466d 100644 --- a/detection_rules/index_mappings.py +++ b/detection_rules/index_mappings.py @@ -11,30 +11,95 @@ from elastic_transport import ObjectApiResponse from elasticsearch import Elasticsearch # type: ignore[reportMissingTypeStubs] +from elasticsearch.exceptions import BadRequestError from semver import Version from . import ecs, integrations, misc, utils from .config import load_current_package_version +from .esql_errors import EsqlSchemaError, EsqlSemanticError, EsqlSyntaxError, cleanup_empty_indices from .integrations import ( load_integrations_manifests, load_integrations_schemas, ) from .rule import RuleMeta from .schemas import get_stack_schemas +from .schemas.definitions import HTTP_STATUS_BAD_REQUEST +from .utils import combine_dicts def get_rule_integrations(metadata: RuleMeta) -> list[str]: """Retrieve rule integrations from metadata.""" - rule_integrations: list[str] = [] if metadata.integration: - if isinstance(metadata.integration, list): - rule_integrations = metadata.integration - else: - rule_integrations = [metadata.integration] + rule_integrations: list[str] = ( + metadata.integration if isinstance(metadata.integration, list) else [metadata.integration] + ) + else: + rule_integrations: list[str] = [] return rule_integrations -def prepare_integration_mappings( +def create_index_with_index_mapping( + elastic_client: Elasticsearch, index_name: str, mappings: dict[str, Any] +) -> ObjectApiResponse[Any] | None: + """Create an index with the specified mappings and settings to support large number of fields and nested objects.""" + try: + return elastic_client.indices.create( + index=index_name, + mappings={"properties": mappings}, + settings={ + "index.mapping.total_fields.limit": 10000, + "index.mapping.nested_fields.limit": 500, + "index.mapping.nested_objects.limit": 10000, + }, + ) + except BadRequestError as e: + error_message = str(e) + if ( + e.status_code == HTTP_STATUS_BAD_REQUEST + and "validation_exception" in error_message + and "Validation Failed: 1: this action would add [2] shards" in error_message + ): + cleanup_empty_indices(elastic_client) + try: + return elastic_client.indices.create( + index=index_name, + mappings={"properties": mappings}, + settings={ + "index.mapping.total_fields.limit": 10000, + "index.mapping.nested_fields.limit": 500, + "index.mapping.nested_objects.limit": 10000, + }, + ) + except BadRequestError as retry_error: + raise EsqlSchemaError(str(retry_error), elastic_client) from retry_error + raise EsqlSchemaError(error_message, elastic_client) from e + + +def get_existing_mappings(elastic_client: Elasticsearch, indices: list[str]) -> tuple[dict[str, Any], dict[str, Any]]: + """Retrieve mappings for all matching existing index templates.""" + existing_mappings: dict[str, Any] = {} + index_lookup: dict[str, Any] = {} + for index in indices: + index_tmpl_mappings = get_simulated_index_template_mappings(elastic_client, index) + index_lookup[index] = index_tmpl_mappings + combine_dicts(existing_mappings, index_tmpl_mappings) + return existing_mappings, index_lookup + + +def get_simulated_index_template_mappings(elastic_client: Elasticsearch, name: str) -> dict[str, Any]: + """ + Return the mappings from the index configuration that would be applied + to the specified index from an existing index template + + https://elasticsearch-py.readthedocs.io/en/stable/api/indices.html#elasticsearch.client.IndicesClient.simulate_index_template + """ + template = elastic_client.indices.simulate_index_template(name=name) + if not template: + return {} + return template["template"]["mappings"]["properties"] + + +def prepare_integration_mappings( # noqa: PLR0913 rule_integrations: list[str], event_dataset_integrations: list[utils.EventDataset], package_manifests: Any, @@ -97,14 +162,14 @@ def create_remote_indices( """Create remote indices for validation and return the index string.""" suffix = str(int(time.time() * 1000)) test_index = f"rule-test-index-{suffix}" - response = misc.create_index_with_index_mapping(elastic_client, test_index, existing_mappings) + response = create_index_with_index_mapping(elastic_client, test_index, existing_mappings) log(f"Index `{test_index}` created: {response}") full_index_str = test_index # create all integration indices for index, properties in index_lookup.items(): ind_index_str = f"test-{index.rstrip('*')}{suffix}" - response = misc.create_index_with_index_mapping(elastic_client, ind_index_str, properties) + response = create_index_with_index_mapping(elastic_client, ind_index_str, properties) log(f"Index `{ind_index_str}` created: {response}") full_index_str = f"{full_index_str}, {ind_index_str}" @@ -124,8 +189,13 @@ def execute_query_against_indices( response = elastic_client.esql.query(query=query) log(f"Got query response: {response}") query_columns = response.get("columns", []) + except BadRequestError as e: + error_msg = str(e) + if "parsing_exception" in error_msg: + raise EsqlSyntaxError(str(e), elastic_client) from e + raise EsqlSemanticError(str(e), elastic_client) from e finally: - if delete_indices: + if delete_indices or misc.getdefault("skip_empty_index_cleanup")(): for index_str in test_index_str.split(","): response = elastic_client.indices.delete(index=index_str.strip()) log(f"Test index `{index_str}` deleted: {response}") @@ -182,7 +252,7 @@ def get_ecs_schema_mappings(current_version: Version) -> dict[str, Any]: return ecs_schema -def prepare_mappings( +def prepare_mappings( # noqa: PLR0913 elastic_client: Elasticsearch, indices: list[str], event_dataset_integrations: list[utils.EventDataset], @@ -191,7 +261,7 @@ def prepare_mappings( log: Callable[[str], None], ) -> tuple[dict[str, Any], dict[str, Any], dict[str, Any]]: """Prepare index mappings for the given indices and rule integrations.""" - existing_mappings, index_lookup = misc.get_existing_mappings(elastic_client, indices) + existing_mappings, index_lookup = get_existing_mappings(elastic_client, indices) # Collect mappings for the integrations rule_integrations = get_rule_integrations(metadata) diff --git a/detection_rules/misc.py b/detection_rules/misc.py index 4c84583d686..f8285c35e13 100644 --- a/detection_rules/misc.py +++ b/detection_rules/misc.py @@ -14,13 +14,10 @@ import click import requests -from elastic_transport import ObjectApiResponse from elasticsearch import AuthenticationException, Elasticsearch -from elasticsearch.exceptions import BadRequestError from kibana import Kibana # type: ignore[reportMissingTypeStubs] -from .esql_errors import EsqlSchemaError -from .utils import add_params, cached, combine_dicts, load_etc_dump +from .utils import add_params, cached, load_etc_dump LICENSE_HEADER = """ Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one @@ -412,46 +409,3 @@ def _wrapped(*args: Any, **kwargs: Any) -> Any: # noqa: PLR0912 return _wrapped return _wrapper - - -def get_simulated_index_template_mappings(elastic_client: Elasticsearch, name: str) -> dict[str, Any]: - """ - Return the mappings from the index configuration that would be applied - to the specified index from an existing index template - - https://elasticsearch-py.readthedocs.io/en/stable/api/indices.html#elasticsearch.client.IndicesClient.simulate_index_template - """ - template = elastic_client.indices.simulate_index_template(name=name) - if not template: - return {} - return template["template"]["mappings"]["properties"] - - -def create_index_with_index_mapping( - elastic_client: Elasticsearch, index_name: str, mappings: dict[str, Any] -) -> ObjectApiResponse[Any] | None: - """Create an index with the specified mappings and settings to support large number of fields and nested objects.""" - try: - return elastic_client.indices.create( - index=index_name, - mappings={"properties": mappings}, - settings={ - "index.mapping.total_fields.limit": 10000, - "index.mapping.nested_fields.limit": 500, - "index.mapping.nested_objects.limit": 10000, - }, - ) - except BadRequestError as e: - if e.status_code == 400 and "validation_exception" in str(e): - raise EsqlSchemaError(str(e)) from e - - -def get_existing_mappings(elastic_client: Elasticsearch, indices: list[str]) -> tuple[dict[str, Any], dict[str, Any]]: - """Retrieve mappings for all matching existing index templates.""" - existing_mappings: dict[str, Any] = {} - index_lookup: dict[str, Any] = {} - for index in indices: - index_tmpl_mappings = get_simulated_index_template_mappings(elastic_client, index) - index_lookup[index] = index_tmpl_mappings - combine_dicts(existing_mappings, index_tmpl_mappings) - return existing_mappings, index_lookup diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index cd99533f873..f2abff9fc23 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -736,6 +736,12 @@ def log(self, val: str) -> None: if self.verbosity >= unit_test_verbose_level: print(f"{self.rule_id}:", val) + @property + def ast(self) -> Any: + """Return the AST of the ESQL query. Dependant in ESQL parser which is not implemented""" + # Needs to return none to prevent not implemented error + return None + @cached_property def unique_fields(self) -> list[str]: # type: ignore[reportIncompatibleMethodOverride] """Return a list of unique fields in the query. Requires remote validation to have occurred.""" @@ -791,20 +797,23 @@ def validate(self, data: "QueryRuleData", rule_meta: RuleMeta, force_remote_vali if option.name is not None } - kibana_client = misc.get_kibana_client(**resolved_kibana_options) resolved_elastic_options = { option.name: option.default() if callable(option.default) else option.default for option in misc.elasticsearch_options if option.name is not None } - elastic_client = misc.get_elasticsearch_client(**resolved_elastic_options) - _ = self.remote_validate_rule( - kibana_client, - elastic_client, - data.query, - rule_meta, - data.rule_id, - ) + + with ( + misc.get_kibana_client(**resolved_kibana_options) as kibana_client, # type: ignore[reportUnknownVariableType] + misc.get_elasticsearch_client(**resolved_elastic_options) as elastic_client, # type: ignore[reportUnknownVariableType] + ): + _ = self.remote_validate_rule( + kibana_client, + elastic_client, + data.query, + rule_meta, + data.rule_id, + ) def remote_validate_rule_contents( self, kibana_client: Kibana, elastic_client: Elasticsearch, contents: TOMLRuleContents, verbosity: int = 0 diff --git a/detection_rules/schemas/definitions.py b/detection_rules/schemas/definitions.py index ac2966e2afd..599d170a6eb 100644 --- a/detection_rules/schemas/definitions.py +++ b/detection_rules/schemas/definitions.py @@ -56,6 +56,7 @@ def validator_wrapper(value: Any) -> Any: return validator_wrapper +HTTP_STATUS_BAD_REQUEST = 400 ASSET_TYPE = "security_rule" SAVED_OBJECT_TYPE = "security-rule" diff --git a/tests/test_rules_remote.py b/tests/test_rules_remote.py index 396a81d1a90..bea28f78d8c 100644 --- a/tests/test_rules_remote.py +++ b/tests/test_rules_remote.py @@ -40,38 +40,37 @@ def test_esql_rules(self): if not esql_rules: return - kibana_client = get_default_kibana_client() - elastic_client = get_default_elasticsearch_client() - if not kibana_client or not elastic_client: - self.skipTest("Skipping remote validation due to missing client") - - # Retrieve verbosity level from pytest - verbosity: int = int(self._outcome.result.config.get_verbosity()) # type: ignore[reportIncompatibleMethodOverride] - - failed_count = 0 - fail_list: list[str] = [] - max_retries = 3 - for r in esql_rules: - print() - retry_count = 0 - while retry_count < max_retries: - try: - validator = ESQLValidator(r.contents.data.query) # type: ignore[reportIncompatibleMethodOverride] - _ = validator.remote_validate_rule_contents(kibana_client, elastic_client, r.contents, verbosity) - break - except (ValueError, BadRequestError) as e: - print(f"FAILURE: {e}") - fail_list.append(f"FAILURE: {e}") - failed_count += 1 - break - except ESConnectionError as e: - retry_count += 1 - print(f"Connection error: {e}. Retrying {retry_count}/{max_retries}...") - time.sleep(30) - if retry_count == max_retries: - print(f"FAILURE: {e} after {max_retries} retries") - fail_list.append(f"FAILURE: {e} after {max_retries} retries") + with get_default_kibana_client() as kibana_client, get_default_elasticsearch_client() as elastic_client: + if not kibana_client or not elastic_client: + self.skipTest("Skipping remote validation due to missing client") + + # Retrieve verbosity level from pytest + verbosity: int = int(self._outcome.result.config.get_verbosity()) # type: ignore[reportIncompatibleMethodOverride] + + failed_count = 0 + fail_list: list[str] = [] + max_retries = 3 + for r in esql_rules: + print() + retry_count = 0 + while retry_count < max_retries: + try: + validator = ESQLValidator(r.contents.data.query) # type: ignore[reportIncompatibleMethodOverride] + _ = validator.remote_validate_rule_contents(kibana_client, elastic_client, r.contents, verbosity) + break + except (ValueError, BadRequestError) as e: + print(f"FAILURE: {e}") + fail_list.append(f"FAILURE: {e}") failed_count += 1 + break + except ESConnectionError as e: + retry_count += 1 + print(f"Connection error: {e}. Retrying {retry_count}/{max_retries}...") + time.sleep(30) + if retry_count == max_retries: + print(f"FAILURE: {e} after {max_retries} retries") + fail_list.append(f"FAILURE: {e} after {max_retries} retries") + failed_count += 1 print(f"Total rules: {len(esql_rules)}") print(f"Failed rules: {failed_count}") From bdd7ed4a62f00bff77182e709319531922c1f6e0 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Tue, 23 Sep 2025 15:31:11 -0400 Subject: [PATCH 41/93] Formatting --- tests/test_rules_remote.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_rules_remote.py b/tests/test_rules_remote.py index bea28f78d8c..07e5bf57039 100644 --- a/tests/test_rules_remote.py +++ b/tests/test_rules_remote.py @@ -56,7 +56,9 @@ def test_esql_rules(self): while retry_count < max_retries: try: validator = ESQLValidator(r.contents.data.query) # type: ignore[reportIncompatibleMethodOverride] - _ = validator.remote_validate_rule_contents(kibana_client, elastic_client, r.contents, verbosity) + _ = validator.remote_validate_rule_contents( + kibana_client, elastic_client, r.contents, verbosity + ) break except (ValueError, BadRequestError) as e: print(f"FAILURE: {e}") From 84e36a5f55448b69d2dc899c79447c8343bca0da Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Tue, 23 Sep 2025 15:33:12 -0400 Subject: [PATCH 42/93] Add license --- detection_rules/esql_errors.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/detection_rules/esql_errors.py b/detection_rules/esql_errors.py index 8167131c695..4ed8e897692 100644 --- a/detection_rules/esql_errors.py +++ b/detection_rules/esql_errors.py @@ -1,3 +1,8 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License +# 2.0; you may not use this file except in compliance with the Elastic License +# 2.0. + """ESQL exceptions.""" from elasticsearch import Elasticsearch # type: ignore[reportMissingTypeStubs] From 39116a17df2b377c96775002402ca010154bd3e7 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Tue, 23 Sep 2025 15:33:51 -0400 Subject: [PATCH 43/93] Re order error classes --- detection_rules/esql_errors.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/detection_rules/esql_errors.py b/detection_rules/esql_errors.py index 4ed8e897692..790c55c3b0f 100644 --- a/detection_rules/esql_errors.py +++ b/detection_rules/esql_errors.py @@ -38,16 +38,16 @@ def __init__(self, message: str, elastic_client: Elasticsearch) -> None: super().__init__(message) -class EsqlSyntaxError(Exception): - """Error with ESQL syntax. Validated via Kibana until AST is available.""" +class EsqlSemanticError(Exception): + """Error with ESQL semantics. Validated via Kibana until AST is available.""" def __init__(self, message: str, elastic_client: Elasticsearch) -> None: cleanup_empty_indices(elastic_client) super().__init__(message) -class EsqlSemanticError(Exception): - """Error with ESQL semantics. Validated via Kibana until AST is available.""" +class EsqlSyntaxError(Exception): + """Error with ESQL syntax. Validated via Kibana until AST is available.""" def __init__(self, message: str, elastic_client: Elasticsearch) -> None: cleanup_empty_indices(elastic_client) From 4d3de2ef8f6e18fb3ee6b0673755b2b5f9e499ab Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Tue, 23 Sep 2025 15:44:48 -0400 Subject: [PATCH 44/93] Handle nested flattened fields --- detection_rules/index_mappings.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/detection_rules/index_mappings.py b/detection_rules/index_mappings.py index 9e8bdd5466d..169ac43ec73 100644 --- a/detection_rules/index_mappings.py +++ b/detection_rules/index_mappings.py @@ -147,6 +147,14 @@ def prepare_integration_mappings( # noqa: PLR0913 f"Removing parent field from schema for ES|QL validation." ) utils.delete_nested_key_from_dict(stream_mappings, field_name) + nested_flattened_fields = find_flattened_fields_with_subfields(stream_mappings) + for field in nested_flattened_fields: + field_name = str(field).split(".fields.")[0].replace(".", ".properties.") + ".fields" + log( + f"Warning: flattened field `{field}` found in `{integration}-{stream}` with sub fields. " + f"Removing parent field from schema for ES|QL validation." + ) + utils.delete_nested_key_from_dict(stream_mappings, field_name) utils.combine_dicts(integration_mappings, stream_mappings) index_lookup[f"{integration}-{stream}"] = stream_mappings @@ -229,6 +237,27 @@ def find_nested_multifields(mapping: dict[str, Any], path: str = "") -> list[Any return nested_multifields # type: ignore[reportUnknownVariableType] +def find_flattened_fields_with_subfields(mapping: dict[str, Any], path: str = "") -> list[str]: + """Recursively search for fields of type 'flattened' that have a 'fields' key in Elasticsearch mappings.""" + flattened_fields_with_subfields = [] + + for field, properties in mapping.items(): + current_path = f"{path}.{field}" if path else field + + if isinstance(properties, dict): + # Check if the field is of type 'flattened' and has a 'fields' key + if properties.get("type") == "flattened" and "fields" in properties: # type: ignore[reportUnknownVariableType] + flattened_fields_with_subfields.append(current_path) # type: ignore[reportUnknownVariableType] + + # Recurse into subfields + if "properties" in properties: + flattened_fields_with_subfields.extend( # type: ignore[reportUnknownVariableType] + find_flattened_fields_with_subfields(properties["properties"], current_path) # type: ignore[reportUnknownVariableType] + ) + + return flattened_fields_with_subfields # type: ignore[reportUnknownVariableType] + + def get_ecs_schema_mappings(current_version: Version) -> dict[str, Any]: """Get the ECS schema in an index mapping format (nested schema) handling scaled floats.""" ecs_version = get_stack_schemas()[str(current_version)]["ecs"] From ae7e7a37be008507d6e7f3d554e0436f3253358c Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Tue, 23 Sep 2025 15:51:06 -0400 Subject: [PATCH 45/93] Handle cases where ESQL validator is not fully initialized --- detection_rules/rule_validators.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index f2abff9fc23..c6bc52cd382 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -745,13 +745,15 @@ def ast(self) -> Any: @cached_property def unique_fields(self) -> list[str]: # type: ignore[reportIncompatibleMethodOverride] """Return a list of unique fields in the query. Requires remote validation to have occurred.""" - if self.esql_unique_fields: + esql_unique_fields = getattr(self, "esql_unique_fields", None) + if esql_unique_fields: return [field["name"] for field in self.esql_unique_fields] return [] def get_unique_field_type(self, field_name: str) -> str | None: # type: ignore[reportIncompatibleMethodOverride] """Get the type of the unique field. Requires remote validation to have occurred.""" - for field in self.esql_unique_fields: + esql_unique_fields = getattr(self, "esql_unique_fields", []) + for field in esql_unique_fields: if field["name"] == field_name: return field["type"] return None From 1c87dc6e99476aa256c2f3d0d8ed1b7d49382f7d Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Tue, 23 Sep 2025 16:22:44 -0400 Subject: [PATCH 46/93] Making package/integrations consistent --- detection_rules/esql.py | 65 +++++++++++++++++++++++ detection_rules/index_mappings.py | 19 +++---- detection_rules/rule.py | 7 +-- detection_rules/rule_validators.py | 16 +++++- detection_rules/schemas/definitions.py | 1 + detection_rules/utils.py | 72 +------------------------- 6 files changed, 93 insertions(+), 87 deletions(-) create mode 100644 detection_rules/esql.py diff --git a/detection_rules/esql.py b/detection_rules/esql.py new file mode 100644 index 00000000000..92a50db8336 --- /dev/null +++ b/detection_rules/esql.py @@ -0,0 +1,65 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License +# 2.0; you may not use this file except in compliance with the Elastic License +# 2.0. + +"""ESQL exceptions.""" + +import re +from dataclasses import dataclass + + +# NOTE This is done with a dataclass but could also be done with dict, etc. +# Also other places this is called an instead. +# such as in integrations.py def parse_datasets +@dataclass +class EventDataset: + """Dataclass for event.dataset with integration and datastream parts.""" + + package: str + integration: str + + def __str__(self) -> str: + return f"{self.package}.{self.integration}" + + +def get_esql_query_event_dataset_integrations(query: str) -> list[EventDataset]: + """Extract event.dataset, event.module, and data_stream.dataset integrations from an ES|QL query.""" + number_of_parts = 2 + # Regex patterns for event.dataset, event.module, and data_stream.dataset + # This mimics the logic in get_datasets_and_modules but for ES|QL as we do not have an ast + + regex_patterns = { + "in": [ + re.compile(r"event\.dataset\s+in\s*\(\s*([^)]+)\s*\)"), + re.compile(r"event\.module\s+in\s*\(\s*([^)]+)\s*\)"), + re.compile(r"data_stream\.dataset\s+in\s*\(\s*([^)]+)\s*\)"), + ], + "eq": [ + re.compile(r'event\.dataset\s*==\s*"([^"]+)"'), + re.compile(r'event\.module\s*==\s*"([^"]+)"'), + re.compile(r'data_stream\.dataset\s*==\s*"([^"]+)"'), + ], + } + + # Extract datasets + datasets: list[str] = [] + for regex_list in regex_patterns.values(): + for regex in regex_list: + matches = regex.findall(query) + if matches: + for match in matches: + if "," in match: + # Handle `in` case with multiple values + datasets.extend([ds.strip().strip('"') for ds in match.split(",")]) + else: + # Handle `==` case + datasets.append(match.strip().strip('"')) + + event_datasets: list[EventDataset] = [] + for dataset in datasets: + parts = dataset.split(".") + if len(parts) == number_of_parts: # Ensure there are exactly two parts + event_datasets.append(EventDataset(package=parts[0], integration=parts[1])) + + return event_datasets diff --git a/detection_rules/index_mappings.py b/detection_rules/index_mappings.py index 169ac43ec73..85c14548111 100644 --- a/detection_rules/index_mappings.py +++ b/detection_rules/index_mappings.py @@ -16,6 +16,7 @@ from . import ecs, integrations, misc, utils from .config import load_current_package_version +from .esql import EventDataset from .esql_errors import EsqlSchemaError, EsqlSemanticError, EsqlSyntaxError, cleanup_empty_indices from .integrations import ( load_integrations_manifests, @@ -101,7 +102,7 @@ def get_simulated_index_template_mappings(elastic_client: Elasticsearch, name: s def prepare_integration_mappings( # noqa: PLR0913 rule_integrations: list[str], - event_dataset_integrations: list[utils.EventDataset], + event_dataset_integrations: list[EventDataset], package_manifests: Any, integration_schemas: Any, stack_version: str, @@ -115,20 +116,20 @@ def prepare_integration_mappings( # noqa: PLR0913 # Process restrictions, note we need this for loops to be separate for event_dataset in event_dataset_integrations: # Ensure the integration is in rule_integrations - if event_dataset.integration not in rule_integrations: - dataset_restriction.setdefault(event_dataset.integration, []).append(event_dataset.datastream) # type: ignore[reportIncompatibleMethodOverride] + if event_dataset.package not in rule_integrations: + dataset_restriction.setdefault(event_dataset.package, []).append(event_dataset.integration) # type: ignore[reportIncompatibleMethodOverride] for event_dataset in event_dataset_integrations: - if event_dataset.integration not in rule_integrations: - rule_integrations.append(event_dataset.integration) + if event_dataset.package not in rule_integrations: + rule_integrations.append(event_dataset.package) for integration in rule_integrations: package = integration - package_version, _ = integrations.find_latest_compatible_version( + package_version = integrations.find_least_compatible_version( package, "", - Version.parse(stack_version), + stack_version, package_manifests, - ) + ).lstrip("^") package_schema = integration_schemas[package][package_version] # Apply dataset restrictions if any @@ -284,7 +285,7 @@ def get_ecs_schema_mappings(current_version: Version) -> dict[str, Any]: def prepare_mappings( # noqa: PLR0913 elastic_client: Elasticsearch, indices: list[str], - event_dataset_integrations: list[utils.EventDataset], + event_dataset_integrations: list[EventDataset], metadata: RuleMeta, stack_version: str, log: Callable[[str], None], diff --git a/detection_rules/rule.py b/detection_rules/rule.py index a5d37259d55..5f9e0dd36c5 100644 --- a/detection_rules/rule.py +++ b/detection_rules/rule.py @@ -29,6 +29,7 @@ from . import beats, ecs, endgame, utils from .config import load_current_package_version, parse_rules_config +from .esql import get_esql_query_event_dataset_integrations from .integrations import ( find_least_compatible_version, get_integration_schema_fields, @@ -647,10 +648,6 @@ def unique_fields(self) -> Any: def validate(self, _: "QueryRuleData", __: RuleMeta) -> None: raise NotImplementedError - def get_unique_field_type(self, __: str) -> None: - """Used to get unique field types when schema is not used""" - raise NotImplementedError - @cached def get_required_fields(self, index: str) -> list[dict[str, Any]]: """Retrieves fields needed for the query along with type information from the schema.""" @@ -1508,7 +1505,7 @@ def get_packaged_integrations( packaged_integrations: list[dict[str, Any]] = [] datasets, _ = beats.get_datasets_and_modules(data.get("ast") or []) # type: ignore[reportArgumentType] if isinstance(data, ESQLRuleData): - dataset_objs = utils.get_esql_query_event_dataset_integrations(data.query) + dataset_objs = get_esql_query_event_dataset_integrations(data.query) datasets.update(str(obj) for obj in dataset_objs) # integration is None to remove duplicate references upstream in Kibana # chronologically, event.dataset, data_stream.dataset is checked for package:integration, then rule tags diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index c6bc52cd382..917843213bf 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -32,6 +32,7 @@ from .beats import get_datasets_and_modules, parse_beats_from_index from .config import CUSTOM_RULES_DIR, load_current_package_version, parse_rules_config from .custom_schemas import update_auto_generated_schema +from .esql import get_esql_query_event_dataset_integrations from .esql_errors import EsqlTypeMismatchError from .index_mappings import ( create_remote_indices, @@ -46,6 +47,7 @@ ) from .rule import EQLRuleData, QueryRuleData, QueryValidator, RuleMeta, TOMLRuleContents, set_eql_config from .schemas import get_stack_schemas +from .schemas.definitions import FROM_SOURCES_REGEX EQL_ERROR_TYPES = ( eql.EqlCompileError @@ -750,6 +752,16 @@ def unique_fields(self) -> list[str]: # type: ignore[reportIncompatibleMethodOv return [field["name"] for field in self.esql_unique_fields] return [] + def get_esql_query_indices(self, query: str) -> tuple[str, list[str]]: + """Extract indices from an ES|QL query.""" + match = FROM_SOURCES_REGEX.search(query) + + if not match: + return "", [] + + sources_str = match.group("sources") + return sources_str, [source.strip() for source in sources_str.split(",")] + def get_unique_field_type(self, field_name: str) -> str | None: # type: ignore[reportIncompatibleMethodOverride] """Get the type of the unique field. Requires remote validation to have occurred.""" esql_unique_fields = getattr(self, "esql_unique_fields", []) @@ -852,10 +864,10 @@ def remote_validate_rule( # noqa: PLR0913 stack_version = str(kibana_details["version"]["number"]) self.log(f"Validating against {stack_version} stack") - indices_str, indices = utils.get_esql_query_indices(query) # type: ignore[reportUnknownVariableType] + indices_str, indices = self.get_esql_query_indices(query) # type: ignore[reportUnknownVariableType] self.log(f"Extracted indices from query: {', '.join(indices)}") - event_dataset_integrations = utils.get_esql_query_event_dataset_integrations(query) + event_dataset_integrations = get_esql_query_event_dataset_integrations(query) self.log(f"Extracted Event Dataset integrations from query: {', '.join(indices)}") # Get mappings for all matching existing index templates diff --git a/detection_rules/schemas/definitions.py b/detection_rules/schemas/definitions.py index 599d170a6eb..0fd2be2e4ed 100644 --- a/detection_rules/schemas/definitions.py +++ b/detection_rules/schemas/definitions.py @@ -76,6 +76,7 @@ def validator_wrapper(value: Any) -> Any: CONDITION_VERSION_PATTERN = re.compile(rf"^\^{_version}$") VERSION_PATTERN = f"^{_version}$" MINOR_SEMVER = re.compile(r"^\d+\.\d+$") +FROM_SOURCES_REGEX = re.compile(r"^\s*FROM\s+(?P.+?)\s*(?:\||\bmetadata\b|//|$)", re.IGNORECASE | re.MULTILINE) BRANCH_PATTERN = f"{VERSION_PATTERN}|^master$" ELASTICSEARCH_EQL_FEATURES = { "allow_negation": (Version.parse("8.9.0"), None), diff --git a/detection_rules/utils.py b/detection_rules/utils.py index dd3a19e258a..6262a322098 100644 --- a/detection_rules/utils.py +++ b/detection_rules/utils.py @@ -18,7 +18,7 @@ import subprocess import zipfile from collections.abc import Callable, Iterator -from dataclasses import astuple, dataclass, is_dataclass +from dataclasses import astuple, is_dataclass from datetime import UTC, date, datetime from pathlib import Path from string import Template @@ -530,76 +530,6 @@ def get_identifiers(self) -> list[str]: return ids -FROM_SOURCES_REGEX = re.compile(r"^\s*FROM\s+(?P.+?)\s*(?:\||\bmetadata\b|//|$)", re.IGNORECASE | re.MULTILINE) - - -def get_esql_query_indices(query: str) -> tuple[str, list[str]]: - """Extract indices from an ES|QL query.""" - match = FROM_SOURCES_REGEX.search(query) - - if not match: - return "", [] - - sources_str = match.group("sources") - return sources_str, [source.strip() for source in sources_str.split(",")] - - -# NOTE This is done with a dataclass but could also be done with dict, etc. -# Also other places this is called an instead. -# such as in integrations.py def parse_datasets -@dataclass -class EventDataset: - """Dataclass for event.dataset with integration and datastream parts.""" - - integration: str - datastream: str - - def __str__(self) -> str: - return f"{self.integration}.{self.datastream}" - - -def get_esql_query_event_dataset_integrations(query: str) -> list[EventDataset]: - """Extract event.dataset, event.module, and data_stream.dataset integrations from an ES|QL query.""" - number_of_parts = 2 - # Regex patterns for event.dataset, event.module, and data_stream.dataset - # This mimics the logic in get_datasets_and_modules but for ES|QL as we do not have an ast - - regex_patterns = { - "in": [ - re.compile(r"event\.dataset\s+in\s*\(\s*([^)]+)\s*\)"), - re.compile(r"event\.module\s+in\s*\(\s*([^)]+)\s*\)"), - re.compile(r"data_stream\.dataset\s+in\s*\(\s*([^)]+)\s*\)"), - ], - "eq": [ - re.compile(r'event\.dataset\s*==\s*"([^"]+)"'), - re.compile(r'event\.module\s*==\s*"([^"]+)"'), - re.compile(r'data_stream\.dataset\s*==\s*"([^"]+)"'), - ], - } - - # Extract datasets - datasets: list[str] = [] - for regex_list in regex_patterns.values(): - for regex in regex_list: - matches = regex.findall(query) - if matches: - for match in matches: - if "," in match: - # Handle `in` case with multiple values - datasets.extend([ds.strip().strip('"') for ds in match.split(",")]) - else: - # Handle `==` case - datasets.append(match.strip().strip('"')) - - event_datasets: list[EventDataset] = [] - for dataset in datasets: - parts = dataset.split(".") - if len(parts) == number_of_parts: # Ensure there are exactly two parts - event_datasets.append(EventDataset(integration=parts[0], datastream=parts[1])) - - return event_datasets - - def convert_to_nested_schema(flat_schemas: dict[str, str]) -> dict[str, Any]: """Convert a flat schema to a nested schema with 'properties' for each sub-key.""" nested_schema = {} From afd9cef6f2b8c12897efde9ce0a10b644f09d72a Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Tue, 23 Sep 2025 16:24:08 -0400 Subject: [PATCH 47/93] Add note --- detection_rules/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/detection_rules/utils.py b/detection_rules/utils.py index 6262a322098..4bdd09c3908 100644 --- a/detection_rules/utils.py +++ b/detection_rules/utils.py @@ -532,6 +532,7 @@ def get_identifiers(self) -> list[str]: def convert_to_nested_schema(flat_schemas: dict[str, str]) -> dict[str, Any]: """Convert a flat schema to a nested schema with 'properties' for each sub-key.""" + # NOTE this is needed to conform to Kibana's index mapping format nested_schema = {} for key, value in flat_schemas.items(): From ace39500881cc493d225b2e725d73e306e5007ed Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Wed, 24 Sep 2025 14:41:19 -0400 Subject: [PATCH 48/93] Add FIXME --- detection_rules/integrations.py | 1 + 1 file changed, 1 insertion(+) diff --git a/detection_rules/integrations.py b/detection_rules/integrations.py index 02c92fe0474..7845619fe55 100644 --- a/detection_rules/integrations.py +++ b/detection_rules/integrations.py @@ -430,6 +430,7 @@ def collect_schema_fields( def parse_datasets(datasets: list[str], package_manifest: dict[str, Any]) -> list[dict[str, Any]]: """Parses datasets into packaged integrations from rule data.""" packaged_integrations: list[dict[str, Any]] = [] + # FIXME evaluate using EventDataset dataclass from esql.py for parsing for _value in sorted(datasets): # cleanup extra quotes pulled from ast field value = _value.strip('"') From e82d41240b87ab1a4307c415b3f0a53af5a70a97 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Wed, 24 Sep 2025 14:51:39 -0400 Subject: [PATCH 49/93] Remove Note --- detection_rules/esql.py | 3 --- detection_rules/integrations.py | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/detection_rules/esql.py b/detection_rules/esql.py index 92a50db8336..5adb5017079 100644 --- a/detection_rules/esql.py +++ b/detection_rules/esql.py @@ -9,9 +9,6 @@ from dataclasses import dataclass -# NOTE This is done with a dataclass but could also be done with dict, etc. -# Also other places this is called an instead. -# such as in integrations.py def parse_datasets @dataclass class EventDataset: """Dataclass for event.dataset with integration and datastream parts.""" diff --git a/detection_rules/integrations.py b/detection_rules/integrations.py index 7845619fe55..da8c34a24af 100644 --- a/detection_rules/integrations.py +++ b/detection_rules/integrations.py @@ -430,7 +430,7 @@ def collect_schema_fields( def parse_datasets(datasets: list[str], package_manifest: dict[str, Any]) -> list[dict[str, Any]]: """Parses datasets into packaged integrations from rule data.""" packaged_integrations: list[dict[str, Any]] = [] - # FIXME evaluate using EventDataset dataclass from esql.py for parsing + # FIXME @eric-forte-elastic: evaluate using EventDataset dataclass for parsing # noqa: FIX001, TD001, TD003 for _value in sorted(datasets): # cleanup extra quotes pulled from ast field value = _value.strip('"') From 2ce790b4c8113b23082204acbc7e2befa59223ec Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Wed, 24 Sep 2025 14:55:19 -0400 Subject: [PATCH 50/93] Update ESQL class with a base error class --- detection_rules/esql_errors.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/detection_rules/esql_errors.py b/detection_rules/esql_errors.py index 790c55c3b0f..9e745ac2c82 100644 --- a/detection_rules/esql_errors.py +++ b/detection_rules/esql_errors.py @@ -10,6 +10,7 @@ from .misc import getdefault __all__ = ( + "EsqlKibanaBaseError", "EsqlSchemaError", "EsqlSemanticError", "EsqlSyntaxError", @@ -30,28 +31,24 @@ def cleanup_empty_indices( _ = elastic_client.indices.delete(index=empty_index) -class EsqlSchemaError(Exception): - """Error in ESQL schema. Validated via Kibana until AST is available.""" +class EsqlKibanaBaseError(Exception): + """Base class for ESQL exceptions with cleanup logic.""" def __init__(self, message: str, elastic_client: Elasticsearch) -> None: cleanup_empty_indices(elastic_client) super().__init__(message) -class EsqlSemanticError(Exception): - """Error with ESQL semantics. Validated via Kibana until AST is available.""" +class EsqlSchemaError(EsqlKibanaBaseError): + """Error in ESQL schema. Validated via Kibana until AST is available.""" - def __init__(self, message: str, elastic_client: Elasticsearch) -> None: - cleanup_empty_indices(elastic_client) - super().__init__(message) +class EsqlSemanticError(EsqlKibanaBaseError): + """Error with ESQL semantics. Validated via Kibana until AST is available.""" -class EsqlSyntaxError(Exception): - """Error with ESQL syntax. Validated via Kibana until AST is available.""" - def __init__(self, message: str, elastic_client: Elasticsearch) -> None: - cleanup_empty_indices(elastic_client) - super().__init__(message) +class EsqlSyntaxError(EsqlKibanaBaseError): + """Error with ESQL syntax. Validated via Kibana until AST is available.""" class EsqlTypeMismatchError(Exception): From b3df752b94c1663a24d90729a74a72f49fe02ccf Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Wed, 24 Sep 2025 15:34:32 -0400 Subject: [PATCH 51/93] Remove event.module from parsing --- detection_rules/esql.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/detection_rules/esql.py b/detection_rules/esql.py index 5adb5017079..a14cc91004d 100644 --- a/detection_rules/esql.py +++ b/detection_rules/esql.py @@ -21,20 +21,18 @@ def __str__(self) -> str: def get_esql_query_event_dataset_integrations(query: str) -> list[EventDataset]: - """Extract event.dataset, event.module, and data_stream.dataset integrations from an ES|QL query.""" + """Extract event.dataset and data_stream.dataset integrations from an ES|QL query.""" number_of_parts = 2 - # Regex patterns for event.dataset, event.module, and data_stream.dataset + # Regex patterns for event.dataset, and data_stream.dataset # This mimics the logic in get_datasets_and_modules but for ES|QL as we do not have an ast regex_patterns = { "in": [ re.compile(r"event\.dataset\s+in\s*\(\s*([^)]+)\s*\)"), - re.compile(r"event\.module\s+in\s*\(\s*([^)]+)\s*\)"), re.compile(r"data_stream\.dataset\s+in\s*\(\s*([^)]+)\s*\)"), ], "eq": [ re.compile(r'event\.dataset\s*==\s*"([^"]+)"'), - re.compile(r'event\.module\s*==\s*"([^"]+)"'), re.compile(r'data_stream\.dataset\s*==\s*"([^"]+)"'), ], } From 0fa8c0f53919e80344cbe06fb3ed5c674eea7b2b Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Wed, 24 Sep 2025 15:44:51 -0400 Subject: [PATCH 52/93] Prevent double validation on view rule --- detection_rules/main.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/detection_rules/main.py b/detection_rules/main.py index c9c3ef61e70..879b252e413 100644 --- a/detection_rules/main.py +++ b/detection_rules/main.py @@ -32,6 +32,7 @@ from .generic_loader import GenericCollection from .misc import ( add_client, + getdefault, nested_set, parse_user_config, raise_client_error, @@ -464,6 +465,7 @@ def view_rule( and isinstance(rule.contents.data, ESQLRuleData) and isinstance(rule.contents.data.validator, ESQLValidator) and isinstance(rule.contents.metadata, RuleMeta) + and not getdefault("remote_esql_validation")() ): rule.contents.data.validator.validate(rule.contents.data, rule.contents.metadata, force_remote_validation=True) From 289bbefa54ca58be71778b0d79c961ee7f76b730 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Wed, 24 Sep 2025 15:59:10 -0400 Subject: [PATCH 53/93] Move functions out of utils for clarity --- detection_rules/index_mappings.py | 55 +++++++++++++++++++++++++++++-- detection_rules/utils.py | 49 --------------------------- 2 files changed, 52 insertions(+), 52 deletions(-) diff --git a/detection_rules/index_mappings.py b/detection_rules/index_mappings.py index 85c14548111..47f9647ade3 100644 --- a/detection_rules/index_mappings.py +++ b/detection_rules/index_mappings.py @@ -28,6 +28,55 @@ from .utils import combine_dicts +def delete_nested_key_from_dict(d: dict[str, Any], compound_key: str) -> None: + """Delete a nested key from a dictionary.""" + keys = compound_key.split(".") + for key in keys[:-1]: + if key in d and isinstance(d[key], dict): + d = d[key] # type: ignore[reportUnknownVariableType] + else: + return + d.pop(keys[-1], None) + + +def flat_schema_to_index_mapping(flat_schema: dict[str, str]) -> dict[str, Any]: + """ + Convert dicts with flat JSON paths and values into a nested mapping with + intermediary `properties`, `fields` and `type` fields. + """ + + # Sorting here ensures that 'a.b' processed before 'a.b.c', allowing us to correctly + # detect and handle multi-fields. + sorted_items = sorted(flat_schema.items()) + result = {} + + for field_path, field_type in sorted_items: + parts = field_path.split(".") + current_level = result + + for part in parts[:-1]: + node = current_level.setdefault(part, {}) # type: ignore[reportUnknownVariableType] + + if "type" in node and node["type"] not in ("nested", "object"): + current_level = node.setdefault("fields", {}) # type: ignore[reportUnknownVariableType] + else: + current_level = node.setdefault("properties", {}) # type: ignore[reportUnknownVariableType] + + leaf_key = parts[-1] + current_level[leaf_key] = {"type": field_type} + + # add `scaling_factor` field missing in the schema + # https://www.elastic.co/docs/reference/elasticsearch/mapping-reference/number#scaled-float-params + if field_type == "scaled_float": + current_level[leaf_key]["scaling_factor"] = 1000 + + # add `path` field for `alias` fields, set to a dummy value + if field_type == "alias": + current_level[leaf_key]["path"] = "@timestamp" + + return result # type: ignore[reportUnknownVariableType] + + def get_rule_integrations(metadata: RuleMeta) -> list[str]: """Retrieve rule integrations from metadata.""" if metadata.integration: @@ -139,7 +188,7 @@ def prepare_integration_mappings( # noqa: PLR0913 for stream in package_schema: flat_schema = package_schema[stream] - stream_mappings = utils.flat_schema_to_index_mapping(flat_schema) + stream_mappings = flat_schema_to_index_mapping(flat_schema) nested_multifields = find_nested_multifields(stream_mappings) for field in nested_multifields: field_name = str(field).split(".fields.")[0].replace(".", ".properties.") + ".fields" @@ -147,7 +196,7 @@ def prepare_integration_mappings( # noqa: PLR0913 f"Warning: Nested multi-field `{field}` found in `{integration}-{stream}`. " f"Removing parent field from schema for ES|QL validation." ) - utils.delete_nested_key_from_dict(stream_mappings, field_name) + delete_nested_key_from_dict(stream_mappings, field_name) nested_flattened_fields = find_flattened_fields_with_subfields(stream_mappings) for field in nested_flattened_fields: field_name = str(field).split(".fields.")[0].replace(".", ".properties.") + ".fields" @@ -155,7 +204,7 @@ def prepare_integration_mappings( # noqa: PLR0913 f"Warning: flattened field `{field}` found in `{integration}-{stream}` with sub fields. " f"Removing parent field from schema for ES|QL validation." ) - utils.delete_nested_key_from_dict(stream_mappings, field_name) + delete_nested_key_from_dict(stream_mappings, field_name) utils.combine_dicts(integration_mappings, stream_mappings) index_lookup[f"{integration}-{stream}"] = stream_mappings diff --git a/detection_rules/utils.py b/detection_rules/utils.py index 4bdd09c3908..9d9dbad3ccd 100644 --- a/detection_rules/utils.py +++ b/detection_rules/utils.py @@ -556,44 +556,6 @@ def combine_dicts(dest: dict[Any, Any], src: dict[Any, Any]) -> None: dest[k] = v -def flat_schema_to_index_mapping(flat_schema: dict[str, str]) -> dict[str, Any]: - """ - Convert dicts with flat JSON paths and values into a nested mapping with - intermediary `properties`, `fields` and `type` fields. - """ - - # Sorting here ensures that 'a.b' processed before 'a.b.c', allowing us to correctly - # detect and handle multi-fields. - sorted_items = sorted(flat_schema.items()) - result = {} - - for field_path, field_type in sorted_items: - parts = field_path.split(".") - current_level = result - - for part in parts[:-1]: - node = current_level.setdefault(part, {}) # type: ignore[reportUnknownVariableType] - - if "type" in node and node["type"] not in ("nested", "object"): - current_level = node.setdefault("fields", {}) # type: ignore[reportUnknownVariableType] - else: - current_level = node.setdefault("properties", {}) # type: ignore[reportUnknownVariableType] - - leaf_key = parts[-1] - current_level[leaf_key] = {"type": field_type} - - # add `scaling_factor` field missing in the schema - # https://www.elastic.co/docs/reference/elasticsearch/mapping-reference/number#scaled-float-params - if field_type == "scaled_float": - current_level[leaf_key]["scaling_factor"] = 1000 - - # add `path` field for `alias` fields, set to a dummy value - if field_type == "alias": - current_level[leaf_key]["path"] = "@timestamp" - - return result # type: ignore[reportUnknownVariableType] - - def get_column_from_index_mapping_schema(keys: list[str], current_schema: dict[str, Any] | None) -> str | None: """Recursively traverse the schema to find the type of the column.""" key = keys[0] @@ -604,14 +566,3 @@ def get_column_from_index_mapping_schema(keys: list[str], current_schema: dict[s if not column_type and len(keys) > 1: return get_column_from_index_mapping_schema(keys[1:], current_schema=column.get("properties")) # type: ignore[reportUnknownVariableType] return column_type # type: ignore[reportUnknownVariableType] - - -def delete_nested_key_from_dict(d: dict[str, Any], compound_key: str) -> None: - """Delete a nested key from a dictionary.""" - keys = compound_key.split(".") - for key in keys[:-1]: - if key in d and isinstance(d[key], dict): - d = d[key] # type: ignore[reportUnknownVariableType] - else: - return - d.pop(keys[-1], None) From 5942aebb9e8beddb5728e0753704cd75bbabe7ec Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Wed, 24 Sep 2025 17:56:37 -0400 Subject: [PATCH 54/93] Cleanup Esql Error Types --- detection_rules/esql_errors.py | 13 +- detection_rules/index_mappings.py | 31 ++-- detection_rules/rule.py | 7 +- detection_rules/rule_validators.py | 3 +- ...collection_cloudtrail_logging_created.toml | 63 -------- ...on_cloudtrail_logging_created_correct.toml | 64 -------- tests/test_rules_remote.py | 137 ++++++++++-------- tests/test_schemas.py | 13 +- 8 files changed, 117 insertions(+), 214 deletions(-) delete mode 100644 tests/data/collection_cloudtrail_logging_created.toml delete mode 100644 tests/data/collection_cloudtrail_logging_created_correct.toml diff --git a/detection_rules/esql_errors.py b/detection_rules/esql_errors.py index 9e745ac2c82..39ea8cb552d 100644 --- a/detection_rules/esql_errors.py +++ b/detection_rules/esql_errors.py @@ -43,10 +43,6 @@ class EsqlSchemaError(EsqlKibanaBaseError): """Error in ESQL schema. Validated via Kibana until AST is available.""" -class EsqlSemanticError(EsqlKibanaBaseError): - """Error with ESQL semantics. Validated via Kibana until AST is available.""" - - class EsqlSyntaxError(EsqlKibanaBaseError): """Error with ESQL syntax. Validated via Kibana until AST is available.""" @@ -54,5 +50,14 @@ class EsqlSyntaxError(EsqlKibanaBaseError): class EsqlTypeMismatchError(Exception): """Error when validating types in ESQL.""" + def __init__(self, message: str, elastic_client: Elasticsearch | None = None) -> None: + if elastic_client: + cleanup_empty_indices(elastic_client) + super().__init__(message) + + +class EsqlSemanticError(Exception): + """Error with ESQL semantics. Validated via Kibana until AST is available.""" + def __init__(self, message: str) -> None: super().__init__(message) diff --git a/detection_rules/index_mappings.py b/detection_rules/index_mappings.py index 47f9647ade3..ffb01b6b98c 100644 --- a/detection_rules/index_mappings.py +++ b/detection_rules/index_mappings.py @@ -17,7 +17,13 @@ from . import ecs, integrations, misc, utils from .config import load_current_package_version from .esql import EventDataset -from .esql_errors import EsqlSchemaError, EsqlSemanticError, EsqlSyntaxError, cleanup_empty_indices +from .esql_errors import ( + EsqlKibanaBaseError, + EsqlSchemaError, + EsqlSyntaxError, + EsqlTypeMismatchError, + cleanup_empty_indices, +) from .integrations import ( load_integrations_manifests, load_integrations_schemas, @@ -251,12 +257,15 @@ def execute_query_against_indices( error_msg = str(e) if "parsing_exception" in error_msg: raise EsqlSyntaxError(str(e), elastic_client) from e - raise EsqlSemanticError(str(e), elastic_client) from e - finally: - if delete_indices or misc.getdefault("skip_empty_index_cleanup")(): - for index_str in test_index_str.split(","): - response = elastic_client.indices.delete(index=index_str.strip()) - log(f"Test index `{index_str}` deleted: {response}") + if "Unknown column" in error_msg: + raise EsqlSchemaError(str(e), elastic_client) from e + if "verification_exception" in error_msg: + raise EsqlTypeMismatchError(str(e), elastic_client) from e + raise EsqlKibanaBaseError(str(e), elastic_client) from e + if delete_indices or misc.getdefault("skip_empty_index_cleanup")(): + for index_str in test_index_str.split(","): + response = elastic_client.indices.delete(index=index_str.strip()) + log(f"Test index `{index_str}` deleted: {response}") query_column_names = [c["name"] for c in query_columns] log(f"Got query columns: {', '.join(query_column_names)}") @@ -367,14 +376,16 @@ def prepare_mappings( # noqa: PLR0913 non_ecs_mapping.update(non_ecs.get(index, {})) non_ecs_mapping = ecs.flatten(non_ecs_mapping) non_ecs_mapping = utils.convert_to_nested_schema(non_ecs_mapping) - if not combined_mappings and not non_ecs_mapping: - raise ValueError("No mappings found") - index_lookup.update({"rule-non-ecs-index": non_ecs_mapping}) # Load ECS in an index mapping format (nested schema) current_version = Version.parse(load_current_package_version(), optional_minor_and_patch=True) ecs_schema = get_ecs_schema_mappings(current_version) index_lookup.update({"rule-ecs-index": ecs_schema}) + utils.combine_dicts(combined_mappings, ecs_schema) + + if not combined_mappings and not non_ecs_mapping and not ecs_schema: + raise ValueError("No mappings found") + index_lookup.update({"rule-non-ecs-index": non_ecs_mapping}) return existing_mappings, index_lookup, combined_mappings diff --git a/detection_rules/rule.py b/detection_rules/rule.py index 5f9e0dd36c5..92a65b6882f 100644 --- a/detection_rules/rule.py +++ b/detection_rules/rule.py @@ -30,6 +30,7 @@ from . import beats, ecs, endgame, utils from .config import load_current_package_version, parse_rules_config from .esql import get_esql_query_event_dataset_integrations +from .esql_errors import EsqlSemanticError from .integrations import ( find_least_compatible_version, get_integration_schema_fields, @@ -963,7 +964,7 @@ class ESQLRuleData(QueryRuleData): def validates_esql_data(self, data: dict[str, Any], **_: Any) -> None: """Custom validation for query rule type and subclasses.""" if data.get("index"): - raise ValidationError("Index is not a valid field for ES|QL rule type.") + raise EsqlSemanticError("Index is not a valid field for ES|QL rule type.") # Convert the query string to lowercase to handle case insensitivity query_lower = data["query"].lower() @@ -981,7 +982,7 @@ def validates_esql_data(self, data: dict[str, Any], **_: Any) -> None: # Ensure that non-aggregate queries have metadata if not combined_pattern.search(query_lower): - raise ValidationError( + raise EsqlSemanticError( f"Rule: {data['name']} contains a non-aggregate query without" f" metadata fields '_id', '_version', and '_index' ->" f" Add 'metadata _id, _version, _index' to the from command or add an aggregate function." @@ -991,7 +992,7 @@ def validates_esql_data(self, data: dict[str, Any], **_: Any) -> None: # Match | followed by optional whitespace/newlines and then 'keep' keep_pattern = re.compile(r"\|\s*keep\b", re.IGNORECASE | re.DOTALL) if not keep_pattern.search(query_lower): - raise ValidationError( + raise EsqlSemanticError( f"Rule: {data['name']} does not contain a 'keep' command -> Add a 'keep' command to the query." ) diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index 917843213bf..17d454bcac1 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -789,12 +789,13 @@ def validate_columns_index_mapping( # Check if the column exists in combined_mappings or a valid field generated from a function or operator keys = column_name.split(".") schema_type = utils.get_column_from_index_mapping_schema(keys, combined_mappings) + schema_type = kql.parser.elasticsearch_type_family(schema_type) if schema_type else None # Validate the type if not schema_type or column_type != schema_type: mismatched_columns.append( f"Dynamic field `{column_name}` is not correctly mapped. " - f"If not dynamic: expected `{schema_type}`, got `{column_type}`." + f"If not dynamic: expected from schema: `{schema_type}`, got from Kibana: `{column_type}`." ) if mismatched_columns: diff --git a/tests/data/collection_cloudtrail_logging_created.toml b/tests/data/collection_cloudtrail_logging_created.toml deleted file mode 100644 index ab123a45b5f..00000000000 --- a/tests/data/collection_cloudtrail_logging_created.toml +++ /dev/null @@ -1,63 +0,0 @@ -[metadata] -creation_date = "2020/06/10" -maturity = "production" -updated_date = "2025/01/15" - -[rule] -author = ["Elastic"] -description = "Rule used for testing." -false_positives = [ - """ - Trail creations may be made by a system or network administrator. Verify whether the user identity, user agent, - and/or hostname should be making changes in your environment. Trail creations by unfamiliar users or hosts should be - investigated. If known behavior is causing false positives, it can be exempted from the rule. - """, -] -from = "now-32m" -interval = "5m" -language = "esql" -license = "Elastic License v2" -name = "AWS CloudTrail Testing Rule" -note = """Rule used for testing.""" -references = [ - "https://docs.aws.amazon.com/awscloudtrail/latest/APIReference/API_CreateTrail.html", - "https://awscli.amazonaws.com/v2/documentation/api/latest/reference/cloudtrail/create-trail.html", -] -risk_score = 21 -rule_id = "cd088b8b-7bca-40d5-b71c-16ffb5309e66" -severity = "low" -tags = [ - "Domain: Cloud", - "Data Source: AWS", - "Data Source: Amazon Web Services", - "Use Case: Log Auditing", - "Tactic: Collection", - "Resources: Investigation Guide", -] -timestamp_override = "event.ingested" -type = "esql" - -query = ''' -from logs-aws.billing* metadata _id, _version, _index -| where @timestamp > now() - 30 minutes - and event.dataset in ("aws.billing") - and aws.cloudtrail.user_identity.arn is not null - and aws.cloudtrail.user_identity.type == "IAMUser" -| keep - aws.cloudtrail.user_identity.type -''' - - -[[rule.threat]] -framework = "MITRE ATT&CK" -[[rule.threat.technique]] -id = "T1530" -name = "Data from Cloud Storage" -reference = "https://attack.mitre.org/techniques/T1530/" - - -[rule.threat.tactic] -id = "TA0009" -name = "Collection" -reference = "https://attack.mitre.org/tactics/TA0009/" - diff --git a/tests/data/collection_cloudtrail_logging_created_correct.toml b/tests/data/collection_cloudtrail_logging_created_correct.toml deleted file mode 100644 index 65b1a2a0590..00000000000 --- a/tests/data/collection_cloudtrail_logging_created_correct.toml +++ /dev/null @@ -1,64 +0,0 @@ -[metadata] -creation_date = "2020/06/10" -maturity = "production" -integration = ["aws"] -updated_date = "2025/01/15" - -[rule] -author = ["Elastic"] -description = "Rule used for testing." -false_positives = [ - """ - Trail creations may be made by a system or network administrator. Verify whether the user identity, user agent, - and/or hostname should be making changes in your environment. Trail creations by unfamiliar users or hosts should be - investigated. If known behavior is causing false positives, it can be exempted from the rule. - """, -] -from = "now-32m" -interval = "5m" -language = "esql" -license = "Elastic License v2" -name = "AWS CloudTrail Testing Rule" -note = """Rule used for testing.""" -references = [ - "https://docs.aws.amazon.com/awscloudtrail/latest/APIReference/API_CreateTrail.html", - "https://awscli.amazonaws.com/v2/documentation/api/latest/reference/cloudtrail/create-trail.html", -] -risk_score = 21 -rule_id = "1d46d30f-1c66-4d0f-8a53-afeeb455196b" -severity = "low" -tags = [ - "Domain: Cloud", - "Data Source: AWS", - "Data Source: Amazon Web Services", - "Use Case: Log Auditing", - "Tactic: Collection", - "Resources: Investigation Guide", -] -timestamp_override = "event.ingested" -type = "esql" - -query = ''' -from logs-aws.cloudtrail* metadata _id, _version, _index -| where @timestamp > now() - 30 minutes - and event.dataset in ("aws.cloudtrail", "aws.billing") - and aws.cloudtrail.user_identity.arn is not null - and aws.cloudtrail.user_identity.type == "IAMUser" -| keep - aws.cloudtrail.user_identity.type -''' - - -[[rule.threat]] -framework = "MITRE ATT&CK" -[[rule.threat.technique]] -id = "T1530" -name = "Data from Cloud Storage" -reference = "https://attack.mitre.org/techniques/T1530/" - - -[rule.threat.tactic] -id = "TA0009" -name = "Collection" -reference = "https://attack.mitre.org/tactics/TA0009/" - diff --git a/tests/test_rules_remote.py b/tests/test_rules_remote.py index 07e5bf57039..a505b7ef863 100644 --- a/tests/test_rules_remote.py +++ b/tests/test_rules_remote.py @@ -3,25 +3,23 @@ # 2.0; you may not use this file except in compliance with the Elastic License # 2.0. -import time import unittest +from copy import deepcopy import pytest -from elasticsearch import BadRequestError -from elasticsearch import ConnectionError as ESConnectionError +from detection_rules.esql_errors import EsqlSchemaError, EsqlSyntaxError, EsqlTypeMismatchError from detection_rules.misc import ( get_default_config, - get_default_elasticsearch_client, - get_default_kibana_client, getdefault, ) from detection_rules.rule_loader import RuleCollection -from detection_rules.rule_validators import ESQLValidator -from detection_rules.utils import get_path +from detection_rules.utils import get_path, load_rule_contents from .base import BaseRuleTest +MAX_RETRIES = 3 + @unittest.skipIf(get_default_config() is None, "Skipping remote validation due to missing config") @unittest.skipIf( @@ -30,66 +28,79 @@ class TestRemoteRules(BaseRuleTest): """Test rules against a remote Elastic stack instance.""" - def test_esql_rules(self): - """Test all ES|QL rules against a cluster.""" - - esql_rules = [r for r in self.all_rules if r.contents.data.type == "esql"] - - print("ESQL rules loaded:", len(esql_rules)) - - if not esql_rules: - return - - with get_default_kibana_client() as kibana_client, get_default_elasticsearch_client() as elastic_client: - if not kibana_client or not elastic_client: - self.skipTest("Skipping remote validation due to missing client") - - # Retrieve verbosity level from pytest - verbosity: int = int(self._outcome.result.config.get_verbosity()) # type: ignore[reportIncompatibleMethodOverride] - - failed_count = 0 - fail_list: list[str] = [] - max_retries = 3 - for r in esql_rules: - print() - retry_count = 0 - while retry_count < max_retries: - try: - validator = ESQLValidator(r.contents.data.query) # type: ignore[reportIncompatibleMethodOverride] - _ = validator.remote_validate_rule_contents( - kibana_client, elastic_client, r.contents, verbosity - ) - break - except (ValueError, BadRequestError) as e: - print(f"FAILURE: {e}") - fail_list.append(f"FAILURE: {e}") - failed_count += 1 - break - except ESConnectionError as e: - retry_count += 1 - print(f"Connection error: {e}. Retrying {retry_count}/{max_retries}...") - time.sleep(30) - if retry_count == max_retries: - print(f"FAILURE: {e} after {max_retries} retries") - fail_list.append(f"FAILURE: {e} after {max_retries} retries") - failed_count += 1 - - print(f"Total rules: {len(esql_rules)}") - print(f"Failed rules: {failed_count}") - - if failed_count > 0: - self.fail(f"Found {failed_count} invalid rules") - def test_esql_related_integrations(self): """Test an ESQL rule has its related integrations built correctly.""" - file_path = get_path(["tests", "data", "collection_cloudtrail_logging_created_correct.toml"]) - rule = RuleCollection().load_file(file_path) + file_path = get_path(["tests", "data", "command_control_dummy_production_rule.toml"]) + original_production_rule = load_rule_contents(file_path) + production_rule = deepcopy(original_production_rule)[0] + production_rule["metadata"]["integration"] = ["aws"] + production_rule["rule"]["query"] = """ + from logs-aws.cloudtrail* metadata _id, _version, _index + | where @timestamp > now() - 30 minutes + and event.dataset in ("aws.cloudtrail", "aws.billing") + and aws.cloudtrail.user_identity.arn is not null + and aws.cloudtrail.user_identity.type == "IAMUser" + | keep + aws.cloudtrail.user_identity.type + """ + rule = RuleCollection().load_dict(production_rule) related_integrations = rule.contents.to_api_format()["related_integrations"] for integration in related_integrations: assert integration["package"] == "aws", f"Expected 'aws', but got {integration['package']}" - def test_esql_event_dataset(self): + def test_esql_event_dataset_schema_error(self): + """Test an ESQL rules that uses event.dataset field in the query validated the fields correctly.""" + # EsqlSchemaError + file_path = get_path(["tests", "data", "command_control_dummy_production_rule.toml"]) + original_production_rule = load_rule_contents(file_path) + # Test that a ValidationError is raised if the query doesn't match the schema + production_rule = deepcopy(original_production_rule)[0] + del production_rule["metadata"]["integration"] + production_rule["rule"]["query"] = """ + from logs-aws.cloudtrail* metadata _id, _version, _index + | where @timestamp > now() - 30 minutes + and event.dataset in ("aws.billing") + and aws.cloudtrail.user_identity.type == "IAMUser" + | keep + aws.cloudtrail.user_identity.type + """ + with pytest.raises(EsqlSchemaError): + _ = RuleCollection().load_dict(production_rule) + + def test_esql_type_mismatch_error(self): + """Test an ESQL rules that uses event.dataset field in the query validated the fields correctly.""" + # EsqlSchemaError + file_path = get_path(["tests", "data", "command_control_dummy_production_rule.toml"]) + original_production_rule = load_rule_contents(file_path) + # Test that a ValidationError is raised if the query doesn't match the schema + production_rule = deepcopy(original_production_rule)[0] + production_rule["metadata"]["integration"] = ["aws"] + production_rule["rule"]["query"] = """ + from logs-aws.cloudtrail* metadata _id, _version, _index + | where @timestamp > now() - 30 minutes + and event.dataset in ("aws.cloudtrail", "aws.billing") + and aws.cloudtrail.user_identity.type == 5 + | keep + aws.cloudtrail.user_identity.type + """ + with pytest.raises(EsqlTypeMismatchError): + _ = RuleCollection().load_dict(production_rule) + + def test_esql_syntax_error(self): """Test an ESQL rules that uses event.dataset field in the query validated the fields correctly.""" - file_path = get_path(["tests", "data", "collection_cloudtrail_logging_created.toml"]) - with pytest.raises(BadRequestError, match=r"Unknown column .*"): - _ = RuleCollection().load_file(file_path) + # EsqlSchemaError + file_path = get_path(["tests", "data", "command_control_dummy_production_rule.toml"]) + original_production_rule = load_rule_contents(file_path) + # Test that a ValidationError is raised if the query doesn't match the schema + production_rule = deepcopy(original_production_rule)[0] + production_rule["metadata"]["integration"] = ["aws"] + production_rule["rule"]["query"] = """ + from logs-aws.cloudtrail* metadata _id, _version, _index + | where @timestamp > now() - 30 minutes + and event.dataset in ("aws.cloudtrail", "aws.billing") + and aws.cloudtrail.user_identity.type = "IAMUser" + | keep + aws.cloudtrail.user_identity.type + """ + with pytest.raises(EsqlSyntaxError): + _ = RuleCollection().load_dict(production_rule) diff --git a/tests/test_schemas.py b/tests/test_schemas.py index 5148c3d2b39..1215e4b0fbc 100644 --- a/tests/test_schemas.py +++ b/tests/test_schemas.py @@ -18,6 +18,7 @@ from detection_rules import utils from detection_rules.config import load_current_package_version +from detection_rules.esql_errors import EsqlSemanticError from detection_rules.rule import TOMLRuleContents from detection_rules.rule_loader import RuleCollection from detection_rules.schemas import RULES_CONFIG, downgrade @@ -315,7 +316,7 @@ def test_esql_data_validation(self): """Test ESQL rule data validation""" # A random ESQL rule to deliver a test query - rule_path = Path("rules/windows/defense_evasion_posh_obfuscation_index_reversal.toml") + rule_path = Path("tests/data/command_control_dummy_production_rule.toml") rule_body = rule_path.read_text() rule_dict = pytoml.loads(rule_body) @@ -323,7 +324,7 @@ def test_esql_data_validation(self): query = """ FROM logs-windows.powershell_operational* METADATA _id, _version, _index | WHERE event.code == "4104" - | KEEP event.count + | KEEP event.code """ rule_dict["rule"]["query"] = query _ = RuleCollection().load_dict(rule_dict, path=rule_path) @@ -333,23 +334,23 @@ def test_esql_data_validation(self): query = """ FROM logs-windows.powershell_operational* METADATA _id, _index, _version | WHERE event.code == "4104" - | KEEP event.count + | KEEP event.code """ rule_dict["rule"]["query"] = query _ = RuleCollection().load_dict(rule_dict, path=rule_path) # Different metadata fields - with pytest.raises(ValidationError): + with pytest.raises(EsqlSemanticError): query = """ FROM logs-windows.powershell_operational* METADATA _foo, _index | WHERE event.code == "4104" - | KEEP event.count + | KEEP event.code """ rule_dict["rule"]["query"] = query _ = RuleCollection().load_dict(rule_dict, path=rule_path) # Missing `keep` - with pytest.raises(ValidationError): + with pytest.raises(EsqlSemanticError): query = """ FROM logs-windows.powershell_operational* METADATA _id, _index, _version | WHERE event.code == "4104" From 2aaeae68fd0c9697686c1b6c52db8fb0a40e97ec Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Thu, 25 Sep 2025 12:29:39 -0400 Subject: [PATCH 55/93] Add explicit deepcopy --- detection_rules/index_mappings.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/detection_rules/index_mappings.py b/detection_rules/index_mappings.py index ffb01b6b98c..9f99b0bc616 100644 --- a/detection_rules/index_mappings.py +++ b/detection_rules/index_mappings.py @@ -7,6 +7,7 @@ import time from collections.abc import Callable +from copy import deepcopy from typing import Any from elastic_transport import ObjectApiResponse @@ -211,7 +212,7 @@ def prepare_integration_mappings( # noqa: PLR0913 f"Removing parent field from schema for ES|QL validation." ) delete_nested_key_from_dict(stream_mappings, field_name) - utils.combine_dicts(integration_mappings, stream_mappings) + utils.combine_dicts(integration_mappings, deepcopy(stream_mappings)) index_lookup[f"{integration}-{stream}"] = stream_mappings return integration_mappings, index_lookup @@ -298,7 +299,7 @@ def find_nested_multifields(mapping: dict[str, Any], path: str = "") -> list[Any def find_flattened_fields_with_subfields(mapping: dict[str, Any], path: str = "") -> list[str]: """Recursively search for fields of type 'flattened' that have a 'fields' key in Elasticsearch mappings.""" - flattened_fields_with_subfields = [] + flattened_fields_with_subfields: list[str] = [] for field, properties in mapping.items(): current_path = f"{path}.{field}" if path else field @@ -314,7 +315,7 @@ def find_flattened_fields_with_subfields(mapping: dict[str, Any], path: str = "" find_flattened_fields_with_subfields(properties["properties"], current_path) # type: ignore[reportUnknownVariableType] ) - return flattened_fields_with_subfields # type: ignore[reportUnknownVariableType] + return flattened_fields_with_subfields def get_ecs_schema_mappings(current_version: Version) -> dict[str, Any]: @@ -366,8 +367,8 @@ def prepare_mappings( # noqa: PLR0913 # Combine existing and integration mappings into a single mapping dict combined_mappings: dict[str, Any] = {} - utils.combine_dicts(combined_mappings, existing_mappings) - utils.combine_dicts(combined_mappings, integration_mappings) + utils.combine_dicts(combined_mappings, deepcopy(existing_mappings)) + utils.combine_dicts(combined_mappings, deepcopy(integration_mappings)) # Load non-ecs schema and convert to index mapping format (nested schema) non_ecs_mapping: dict[str, Any] = {} @@ -382,7 +383,7 @@ def prepare_mappings( # noqa: PLR0913 ecs_schema = get_ecs_schema_mappings(current_version) index_lookup.update({"rule-ecs-index": ecs_schema}) - utils.combine_dicts(combined_mappings, ecs_schema) + utils.combine_dicts(combined_mappings, deepcopy(ecs_schema)) if not combined_mappings and not non_ecs_mapping and not ecs_schema: raise ValueError("No mappings found") From 2ee3f673d898de3e98faaa2523f743f67d8bd5cd Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Thu, 25 Sep 2025 13:09:08 -0400 Subject: [PATCH 56/93] Ignore Kibana long vs schema integer mismatch --- detection_rules/rule_validators.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index 17d454bcac1..6c1cd6e93cd 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -791,6 +791,11 @@ def validate_columns_index_mapping( schema_type = utils.get_column_from_index_mapping_schema(keys, combined_mappings) schema_type = kql.parser.elasticsearch_type_family(schema_type) if schema_type else None + # The mapping between integer and long may be different between Kibana and the schema + # both are numeric types with different ranges, but for our purposes they are equivalent + if column_type == "long" and schema_type == "integer": + continue + # Validate the type if not schema_type or column_type != schema_type: mismatched_columns.append( From a513c0a7cc29c1beafd77a2b774d9ec3c9373441 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Thu, 25 Sep 2025 14:18:28 -0400 Subject: [PATCH 57/93] can now enforce length with proper schemas --- detection_rules/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/detection_rules/utils.py b/detection_rules/utils.py index 9d9dbad3ccd..1a67fed1e37 100644 --- a/detection_rules/utils.py +++ b/detection_rules/utils.py @@ -563,6 +563,6 @@ def get_column_from_index_mapping_schema(keys: list[str], current_schema: dict[s return None column = current_schema.get(key) or {} # type: ignore[reportUnknownVariableType] column_type = column.get("type") if column else None # type: ignore[reportUnknownVariableType] - if not column_type and len(keys) > 1: + if len(keys) > 1: return get_column_from_index_mapping_schema(keys[1:], current_schema=column.get("properties")) # type: ignore[reportUnknownVariableType] return column_type # type: ignore[reportUnknownVariableType] From fdbb483b84bf519ba69b95b7805c8924f60bb260 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Sat, 27 Sep 2025 15:28:45 -0400 Subject: [PATCH 58/93] Add remote testing dev command --- detection_rules/devtools.py | 76 ++++++++++++++++++++++++++++++++++++- 1 file changed, 74 insertions(+), 2 deletions(-) diff --git a/detection_rules/devtools.py b/detection_rules/devtools.py index 19d059eafe9..97399927906 100644 --- a/detection_rules/devtools.py +++ b/detection_rules/devtools.py @@ -25,7 +25,8 @@ import pytoml # type: ignore[reportMissingTypeStubs] import requests.exceptions import yaml -from elasticsearch import Elasticsearch +from elasticsearch import BadRequestError, Elasticsearch +from elasticsearch import ConnectionError as ESConnectionError from eql.table import Table # type: ignore[reportMissingTypeStubs] from eql.utils import load_dump # type: ignore[reportMissingTypeStubs, reportUnknownVariableType] from kibana.connector import Kibana # type: ignore[reportMissingTypeStubs] @@ -39,6 +40,7 @@ from .docs import REPO_DOCS_DIR, IntegrationSecurityDocs, IntegrationSecurityDocsMDX from .ecs import download_endpoint_schemas, download_schemas from .endgame import EndgameSchemaManager +from .esql_errors import EsqlKibanaBaseError, EsqlSchemaError, EsqlSyntaxError, EsqlTypeMismatchError from .eswrap import CollectEvents, add_range_to_dsl from .ghwrap import GithubClient, update_gist from .integrations import ( @@ -50,7 +52,13 @@ load_integrations_manifests, ) from .main import root -from .misc import PYTHON_LICENSE, add_client, raise_client_error +from .misc import ( + PYTHON_LICENSE, + add_client, + get_default_elasticsearch_client, + get_default_kibana_client, + raise_client_error, +) from .packaging import CURRENT_RELEASE_PATH, PACKAGE_FILE, RELEASE_DIR, Package from .rule import ( AnyRuleData, @@ -63,6 +71,7 @@ TOMLRuleContents, ) from .rule_loader import RuleCollection, production_filter +from .rule_validators import ESQLValidator from .schemas import definitions, get_stack_versions from .utils import check_version_lock_double_bumps, dict_hash, get_etc_path, get_path from .version_lock import VersionLockFile, loaded_version_lock @@ -1403,6 +1412,69 @@ def rule_event_search( # noqa: PLR0913 raise_client_error("Rule is not a query rule!") +@test_group.command("esql-remote-validation") +@click.option( + "--verbosity", + type=click.IntRange(0, 1), + default=0, + help="Set verbosity level: 0 for minimal output, 1 for detailed output.", +) +def esql_remote_validation( + verbosity: int, +) -> None: + """Search using a rule file against an Elasticsearch instance.""" + + rule_collection: RuleCollection = RuleCollection.default().filter(production_filter) + esql_rules = [r for r in rule_collection if r.contents.data.type == "esql"] + + click.echo(f"ESQL rules loaded: {len(esql_rules)}") + + if not esql_rules: + return + # TODO(eric-forte-elastic): @add_client https://github.com/elastic/detection-rules/issues/5156 # noqa: FIX002 + with get_default_kibana_client() as kibana_client, get_default_elasticsearch_client() as elastic_client: + if not kibana_client or not elastic_client: + raise_client_error("Skipping remote validation due to missing client") + + failed_count = 0 + fail_list: list[str] = [] + max_retries = 3 + for r in esql_rules: + print() + retry_count = 0 + while retry_count < max_retries: + try: + validator = ESQLValidator(r.contents.data.query) # type: ignore[reportIncompatibleMethodOverride] + _ = validator.remote_validate_rule_contents(kibana_client, elastic_client, r.contents, verbosity) + break + except ( + ValueError, + BadRequestError, + EsqlSchemaError, + EsqlSyntaxError, + EsqlTypeMismatchError, + EsqlKibanaBaseError, + ) as e: + click.echo(f"FAILURE: {e}") + fail_list.append(f"{r.contents.data.rule_id} FAILURE: {type(e)}: {e}") + failed_count += 1 + break + except ESConnectionError as e: + retry_count += 1 + click.echo(f"Connection error: {e}. Retrying {retry_count}/{max_retries}...") + time.sleep(30) + if retry_count == max_retries: + click.echo(f"FAILURE: {e} after {max_retries} retries") + fail_list.append(f"FAILURE: {e} after {max_retries} retries") + failed_count += 1 + + click.echo(f"Total rules: {len(esql_rules)}") + click.echo(f"Failed rules: {failed_count}") + + _ = Path("failed_rules.log").write_text("\n".join(fail_list), encoding="utf-8") + click.echo("Failed rules written to failed_rules.log") + + @test_group.command("rule-survey") @click.argument("query", required=False) @click.option("--date-range", "-d", type=(str, str), default=("now-7d", "now"), help="Date range to scope search") From 5ae993796d2114331806e22eeb80e8cf02334cfa Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Mon, 29 Sep 2025 17:39:40 -0400 Subject: [PATCH 59/93] Get latest instead of least for this validation --- detection_rules/index_mappings.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/detection_rules/index_mappings.py b/detection_rules/index_mappings.py index 9f99b0bc616..16bc9bbfbe7 100644 --- a/detection_rules/index_mappings.py +++ b/detection_rules/index_mappings.py @@ -180,12 +180,12 @@ def prepare_integration_mappings( # noqa: PLR0913 for integration in rule_integrations: package = integration - package_version = integrations.find_least_compatible_version( + package_version, _ = integrations.find_latest_compatible_version( package, "", - stack_version, + Version.parse(stack_version), package_manifests, - ).lstrip("^") + ) package_schema = integration_schemas[package][package_version] # Apply dataset restrictions if any From 5e584183c3fcf62fba7b4ffb96f01de2a50fc0ca Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Mon, 29 Sep 2025 17:39:51 -0400 Subject: [PATCH 60/93] TODO items --- detection_rules/rule_validators.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index 6c1cd6e93cd..b5028ad1ca2 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -867,6 +867,7 @@ def remote_validate_rule( # noqa: PLR0913 kibana_details: dict[str, Any] = kibana_client.get("/api/status", {}) # type: ignore[reportUnknownVariableType] if "version" not in kibana_details: raise ValueError("Failed to retrieve Kibana details.") + # TODO decide if this should always be latest. I think it should be stack_version = str(kibana_details["version"]["number"]) self.log(f"Validating against {stack_version} stack") @@ -894,6 +895,10 @@ def remote_validate_rule( # noqa: PLR0913 query_columns, response = execute_query_against_indices(elastic_client, query, full_index_str, self.log) # type: ignore[reportUnknownVariableType] self.esql_unique_fields = query_columns + # TODO this is the looping location for each stack version + # as we only need to check against the schemas locally for the type + # mismatch error, as the syntax and semantic errors from the stack + # will not be impacted by the difference in schema mapping if self.validate_columns_index_mapping(query_columns, combined_mappings): self.log("All dynamic columns have proper formatting.") else: From a368516b398eca2d9419fe49e92ab27d3a174999 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Tue, 30 Sep 2025 00:21:14 -0400 Subject: [PATCH 61/93] Add validation for all stacks in schema map --- detection_rules/esql_errors.py | 4 +-- detection_rules/index_mappings.py | 1 + detection_rules/rule_validators.py | 39 ++++++++++++++++++----------- detection_rules/schemas/__init__.py | 6 +++++ 4 files changed, 33 insertions(+), 17 deletions(-) diff --git a/detection_rules/esql_errors.py b/detection_rules/esql_errors.py index 39ea8cb552d..54d4ab5436a 100644 --- a/detection_rules/esql_errors.py +++ b/detection_rules/esql_errors.py @@ -48,7 +48,7 @@ class EsqlSyntaxError(EsqlKibanaBaseError): class EsqlTypeMismatchError(Exception): - """Error when validating types in ESQL.""" + """Error when validating types in ESQL. Can occur in stack or local schema comparison.""" def __init__(self, message: str, elastic_client: Elasticsearch | None = None) -> None: if elastic_client: @@ -57,7 +57,7 @@ def __init__(self, message: str, elastic_client: Elasticsearch | None = None) -> class EsqlSemanticError(Exception): - """Error with ESQL semantics. Validated via Kibana until AST is available.""" + """Error with ESQL semantics. Validated through regex enforcement.""" def __init__(self, message: str) -> None: super().__init__(message) diff --git a/detection_rules/index_mappings.py b/detection_rules/index_mappings.py index 16bc9bbfbe7..580f04750cb 100644 --- a/detection_rules/index_mappings.py +++ b/detection_rules/index_mappings.py @@ -388,5 +388,6 @@ def prepare_mappings( # noqa: PLR0913 if not combined_mappings and not non_ecs_mapping and not ecs_schema: raise ValueError("No mappings found") index_lookup.update({"rule-non-ecs-index": non_ecs_mapping}) + utils.combine_dicts(combined_mappings, deepcopy(non_ecs_mapping)) return existing_mappings, index_lookup, combined_mappings diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index b5028ad1ca2..2833ae5568b 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -46,7 +46,7 @@ parse_datasets, ) from .rule import EQLRuleData, QueryRuleData, QueryValidator, RuleMeta, TOMLRuleContents, set_eql_config -from .schemas import get_stack_schemas +from .schemas import get_latest_stack_version, get_stack_schemas, get_stack_versions from .schemas.definitions import FROM_SOURCES_REGEX EQL_ERROR_TYPES = ( @@ -771,7 +771,7 @@ def get_unique_field_type(self, field_name: str) -> str | None: # type: ignore[ return None def validate_columns_index_mapping( - self, query_columns: list[dict[str, str]], combined_mappings: dict[str, Any] + self, query_columns: list[dict[str, str]], combined_mappings: dict[str, Any], version: str = "" ) -> bool: """Validate that the columns in the ESQL query match the provided mappings.""" mismatched_columns: list[str] = [] @@ -804,7 +804,9 @@ def validate_columns_index_mapping( ) if mismatched_columns: - raise EsqlTypeMismatchError("Column validation errors:\n" + "\n".join(mismatched_columns)) + raise EsqlTypeMismatchError( + f"Column validation errors in Stack Version {version}:\n" + "\n".join(mismatched_columns) + ) return True @@ -867,8 +869,7 @@ def remote_validate_rule( # noqa: PLR0913 kibana_details: dict[str, Any] = kibana_client.get("/api/status", {}) # type: ignore[reportUnknownVariableType] if "version" not in kibana_details: raise ValueError("Failed to retrieve Kibana details.") - # TODO decide if this should always be latest. I think it should be - stack_version = str(kibana_details["version"]["number"]) + stack_version = get_latest_stack_version() self.log(f"Validating against {stack_version} stack") indices_str, indices = self.get_esql_query_indices(query) # type: ignore[reportUnknownVariableType] @@ -886,8 +887,6 @@ def remote_validate_rule( # noqa: PLR0913 # Create remote indices full_index_str = create_remote_indices(elastic_client, existing_mappings, index_lookup, self.log) - utils.combine_dicts(combined_mappings, index_lookup["rule-non-ecs-index"]) - utils.combine_dicts(combined_mappings, index_lookup["rule-ecs-index"]) # Replace all sources with the test indices query = query.replace(indices_str, full_index_str) # type: ignore[reportUnknownVariableType] @@ -895,14 +894,24 @@ def remote_validate_rule( # noqa: PLR0913 query_columns, response = execute_query_against_indices(elastic_client, query, full_index_str, self.log) # type: ignore[reportUnknownVariableType] self.esql_unique_fields = query_columns - # TODO this is the looping location for each stack version - # as we only need to check against the schemas locally for the type - # mismatch error, as the syntax and semantic errors from the stack - # will not be impacted by the difference in schema mapping - if self.validate_columns_index_mapping(query_columns, combined_mappings): - self.log("All dynamic columns have proper formatting.") - else: - self.log("Dynamic column(s) have improper formatting.") + # Build a mapping lookup for all stack versions to validate against. + # We only need to check against the schemas locally for the type + # mismatch error, as the EsqlSchemaError and EsqlSyntaxError errors from the stack + # will not be impacted by the difference in schema type mapping. + mappings_lookup: dict[str, dict[str, Any]] = {stack_version: combined_mappings} + versions = get_stack_versions() + for version in versions: + if version in mappings_lookup: + continue + _, _, combined_mappings = prepare_mappings( + elastic_client, indices, event_dataset_integrations, metadata, version, self.log + ) + mappings_lookup[version] = combined_mappings + + for version, mapping in mappings_lookup.items(): + self.log(f"Validating {rule_id} against {version} stack") + if not self.validate_columns_index_mapping(query_columns, mapping, version=version): + self.log("Dynamic column(s) have improper formatting.") return response diff --git a/detection_rules/schemas/__init__.py b/detection_rules/schemas/__init__.py index c1fc8b4b7af..62228158a0a 100644 --- a/detection_rules/schemas/__init__.py +++ b/detection_rules/schemas/__init__.py @@ -395,6 +395,12 @@ def get_stack_versions(drop_patch: bool = False) -> list[str]: return versions +def get_latest_stack_version(drop_patch: bool = False) -> str: + """Get the latest defined and supported stack version.""" + parsed_versions = [Version.parse(version) for version in get_stack_versions(drop_patch=drop_patch)] + return str(max(parsed_versions)) + + @cached def get_min_supported_stack_version() -> Version: """Get the minimum defined and supported stack version.""" From dd6452161a986904e87b93288242788000c670bc Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Thu, 9 Oct 2025 09:15:09 -0400 Subject: [PATCH 62/93] Update function name --- .github/workflows/esql-validation.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/esql-validation.yml b/.github/workflows/esql-validation.yml index 1fb66ae0623..2c15ad9fe27 100644 --- a/.github/workflows/esql-validation.yml +++ b/.github/workflows/esql-validation.yml @@ -70,4 +70,4 @@ jobs: DR_API_KEY: ${{ vars.api_key || vars.DR_API_KEY }} run: | cd detection-rules - python -m pytest tests/test_rules_remote.py::TestRemoteRules::test_esql_rules + python -m detection_rules dev test esql-remote-validation From df9e285e89219fd99884478eeaca0f8a8ab5f233 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Thu, 9 Oct 2025 15:26:25 -0400 Subject: [PATCH 63/93] Use env rather than variables --- .github/workflows/esql-validation.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/esql-validation.yml b/.github/workflows/esql-validation.yml index 2c15ad9fe27..c322e0a2548 100644 --- a/.github/workflows/esql-validation.yml +++ b/.github/workflows/esql-validation.yml @@ -12,14 +12,14 @@ jobs: steps: - name: Check out repository - if: ${{ !vars.cloud_id && !vars.api_key }} + if: ${{ !secrets.cloud_id && !secrets.api_key }} uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 with: path: elastic-container repository: eric-forte-elastic/elastic-container - name: Build and run containers - if: ${{ !vars.cloud_id && !vars.api_key }} + if: ${{ !secrets.cloud_id && !secrets.api_key }} run: | cd elastic-container GENERATED_PASSWORD=$(openssl rand -base64 16) @@ -40,7 +40,7 @@ jobs: python-version: '3.13' - name: Get API Key and setup auth - if: ${{ !vars.cloud_id && !vars.api_key }} + if: ${{ !secrets.cloud_id && !secrets.api_key }} env: DR_ELASTICSEARCH_URL: "https://localhost:9200" ES_USER: "elastic" @@ -64,10 +64,10 @@ jobs: - name: Validate Test ESQL Rule env: - DR_CLOUD_ID: ${{ vars.cloud_id }} - DR_KIBANA_URL: ${{ vars.cloud_id == '' && 'https://localhost:5601' || '' }} - DR_ES_USER: ${{ vars.cloud_id == '' && 'elastic' || '' }} - DR_API_KEY: ${{ vars.api_key || vars.DR_API_KEY }} + DR_CLOUD_ID: ${{ secrets.cloud_id }} + DR_KIBANA_URL: ${{ secrets.cloud_id == '' && 'https://localhost:5601' || '' }} + DR_ES_USER: ${{ secrets.cloud_id == '' && 'elastic' || '' }} + DR_API_KEY: ${{ secrets.api_key || env.DR_API_KEY }} run: | cd detection-rules python -m detection_rules dev test esql-remote-validation From 269795a75debe426e978b1d76b13a49f5b7ab113 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Thu, 9 Oct 2025 15:44:06 -0400 Subject: [PATCH 64/93] Switch if logic to env --- .github/workflows/esql-validation.yml | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/.github/workflows/esql-validation.yml b/.github/workflows/esql-validation.yml index c322e0a2548..e7a91e955da 100644 --- a/.github/workflows/esql-validation.yml +++ b/.github/workflows/esql-validation.yml @@ -12,14 +12,20 @@ jobs: steps: - name: Check out repository - if: ${{ !secrets.cloud_id && !secrets.api_key }} + env: + DR_CLOUD_ID: ${{ secrets.cloud_id }} + DR_API_KEY: ${{ secrets.api_key }} + if: ${{ !env.DR_CLOUD_ID && !env.DR_API_KEY }} uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 with: path: elastic-container repository: eric-forte-elastic/elastic-container - name: Build and run containers - if: ${{ !secrets.cloud_id && !secrets.api_key }} + env: + DR_CLOUD_ID: ${{ secrets.cloud_id }} + DR_API_KEY: ${{ secrets.api_key }} + if: ${{ !env.DR_CLOUD_ID && !env.DR_API_KEY }} run: | cd elastic-container GENERATED_PASSWORD=$(openssl rand -base64 16) @@ -40,11 +46,13 @@ jobs: python-version: '3.13' - name: Get API Key and setup auth - if: ${{ !secrets.cloud_id && !secrets.api_key }} env: + DR_CLOUD_ID: ${{ secrets.cloud_id }} + DR_API_KEY: ${{ secrets.api_key }} DR_ELASTICSEARCH_URL: "https://localhost:9200" ES_USER: "elastic" ES_PASSWORD: ${{ env.GENERATED_PASSWORD }} + if: ${{ !env.DR_CLOUD_ID && !env.DR_API_KEY }} run: | cd detection-rules response=$(curl -k -X POST -u "$ES_USER:$ES_PASSWORD" -H "Content-Type: application/json" -d '{ From 2746b005e4a29a76dc5014106079b96b2e24da69 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Thu, 9 Oct 2025 19:58:28 -0400 Subject: [PATCH 65/93] Handle empty strings as None --- detection_rules/misc.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/detection_rules/misc.py b/detection_rules/misc.py index f8285c35e13..51b6ba0eb86 100644 --- a/detection_rules/misc.py +++ b/detection_rules/misc.py @@ -255,6 +255,8 @@ def get_elasticsearch_client( # noqa: PLR0913 ) -> Elasticsearch: """Get an authenticated elasticsearch client.""" + cloud_id = cloud_id or None + elasticsearch_url = elasticsearch_url or None if not (cloud_id or elasticsearch_url): raise_client_error("Missing required --cloud-id or --elasticsearch-url") From 76b33d747dc0251fa726888a0be857410de2cb12 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Thu, 9 Oct 2025 20:14:42 -0400 Subject: [PATCH 66/93] Add comment --- detection_rules/misc.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/detection_rules/misc.py b/detection_rules/misc.py index 51b6ba0eb86..064e60c2912 100644 --- a/detection_rules/misc.py +++ b/detection_rules/misc.py @@ -254,9 +254,10 @@ def get_elasticsearch_client( # noqa: PLR0913 **kwargs: Any, ) -> Elasticsearch: """Get an authenticated elasticsearch client.""" - + # Handle empty strings as None cloud_id = cloud_id or None elasticsearch_url = elasticsearch_url or None + if not (cloud_id or elasticsearch_url): raise_client_error("Missing required --cloud-id or --elasticsearch-url") From a07c7f5f3c3290b7948976907c2a8da9ef3e6919 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Thu, 9 Oct 2025 20:30:09 -0400 Subject: [PATCH 67/93] Add failed rules output for CI --- detection_rules/devtools.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/detection_rules/devtools.py b/detection_rules/devtools.py index 97399927906..617044b7808 100644 --- a/detection_rules/devtools.py +++ b/detection_rules/devtools.py @@ -1473,6 +1473,12 @@ def esql_remote_validation( _ = Path("failed_rules.log").write_text("\n".join(fail_list), encoding="utf-8") click.echo("Failed rules written to failed_rules.log") + if failed_count > 0: + click.echo("Failed rules:") + uuids = {line.split()[0] for line in fail_list} + click.echo("\n".join(uuids)) + ctx = click.get_current_context() + ctx.exit(1) @test_group.command("rule-survey") From e55c5935ec2bd8abd004503fa824826a8ad27466 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Thu, 9 Oct 2025 20:31:22 -0400 Subject: [PATCH 68/93] Add env masking --- .github/workflows/esql-validation.yml | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/.github/workflows/esql-validation.yml b/.github/workflows/esql-validation.yml index e7a91e955da..e0c107b5d16 100644 --- a/.github/workflows/esql-validation.yml +++ b/.github/workflows/esql-validation.yml @@ -29,7 +29,8 @@ jobs: run: | cd elastic-container GENERATED_PASSWORD=$(openssl rand -base64 16) - sed -i 's/changeme/$GENERATED_PASSWORD/' .env + sed -i "s|changeme|$GENERATED_PASSWORD|" .env + echo "::add-mask::$GENERATED_PASSWORD" echo "GENERATED_PASSWORD=$GENERATED_PASSWORD" >> $GITHUB_ENV set -x bash elastic-container.sh start @@ -38,7 +39,9 @@ jobs: - name: Setup Detection Rules uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 with: - fetch-depth: 0 + fetch-depth: 0 + path: detection-rules + repository: elastic/detection-rules - name: Set up Python 3.13 uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 @@ -61,6 +64,7 @@ jobs: }' "$DR_ELASTICSEARCH_URL/_security/api_key") DR_API_KEY=$(echo "$response" | jq -r '.encoded') + echo "::add-mask::$DR_API_KEY" echo "DR_API_KEY=$DR_API_KEY" >> $GITHUB_ENV - name: Install dependencies @@ -70,12 +74,13 @@ jobs: pip cache purge pip install .[dev] - - name: Validate Test ESQL Rule + - name: Remote Test ESQL Rules env: - DR_CLOUD_ID: ${{ secrets.cloud_id }} + DR_CLOUD_ID: ${{ secrets.cloud_id || '' }} DR_KIBANA_URL: ${{ secrets.cloud_id == '' && 'https://localhost:5601' || '' }} - DR_ES_USER: ${{ secrets.cloud_id == '' && 'elastic' || '' }} + DR_ELASTICSEARCH_URL: ${{ secrets.cloud_id == '' && 'https://localhost:9200' || '' }} DR_API_KEY: ${{ secrets.api_key || env.DR_API_KEY }} + DR_IGNORE_SSL_ERRORS: ${{ secrets.cloud_id == '' && 'true' || '' }} run: | cd detection-rules python -m detection_rules dev test esql-remote-validation From 827937a89294840084273abbd039bf7fe1011b9e Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Thu, 9 Oct 2025 21:01:10 -0400 Subject: [PATCH 69/93] Only run on modified esql rules or push to main --- .github/workflows/esql-validation.yml | 60 ++++++++++++++++++++------- detection_rules/devtools.py | 3 +- 2 files changed, 45 insertions(+), 18 deletions(-) diff --git a/.github/workflows/esql-validation.yml b/.github/workflows/esql-validation.yml index e0c107b5d16..8cce90755cd 100644 --- a/.github/workflows/esql-validation.yml +++ b/.github/workflows/esql-validation.yml @@ -11,11 +11,44 @@ jobs: runs-on: ubuntu-latest steps: + - name: Setup Detection Rules + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 + with: + fetch-depth: 0 + path: detection-rules + + - name: Check if new or modified rule files are ESQL rules + id: check-esql + run: | + cd detection-rules + + # Check if the event is a push + if [ "${{ github.event_name }}" = "push" ]; then + echo "Triggered by a push event. Setting run_esql=true." + echo "run_esql=true" >> $GITHUB_ENV + exit 0 + fi + + MODIFIED_FILES=$(git diff --name-only --diff-filter=AM HEAD~1 | grep '^rules/.*\.toml$' || true) + if [ -z "$MODIFIED_FILES" ]; then + echo "No modified or new .toml files found. Skipping workflow." + echo "run_esql=false" >> $GITHUB_ENV + exit 0 + fi + + if ! grep -q 'type = "esql"' $MODIFIED_FILES; then + echo "No 'type = \"esql\"' found in the modified .toml files. Skipping workflow." + echo "run_esql=false" >> $GITHUB_ENV + exit 0 + fi + + echo "run_esql=true" >> $GITHUB_ENV + - name: Check out repository env: DR_CLOUD_ID: ${{ secrets.cloud_id }} DR_API_KEY: ${{ secrets.api_key }} - if: ${{ !env.DR_CLOUD_ID && !env.DR_API_KEY }} + if: ${{ !env.DR_CLOUD_ID && !env.DR_API_KEY && env.run_esql == 'true' }} uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 with: path: elastic-container @@ -25,7 +58,7 @@ jobs: env: DR_CLOUD_ID: ${{ secrets.cloud_id }} DR_API_KEY: ${{ secrets.api_key }} - if: ${{ !env.DR_CLOUD_ID && !env.DR_API_KEY }} + if: ${{ !env.DR_CLOUD_ID && !env.DR_API_KEY && env.run_esql == 'true' }} run: | cd elastic-container GENERATED_PASSWORD=$(openssl rand -base64 16) @@ -35,19 +68,6 @@ jobs: set -x bash elastic-container.sh start - - - name: Setup Detection Rules - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 - with: - fetch-depth: 0 - path: detection-rules - repository: elastic/detection-rules - - - name: Set up Python 3.13 - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 - with: - python-version: '3.13' - - name: Get API Key and setup auth env: DR_CLOUD_ID: ${{ secrets.cloud_id }} @@ -55,7 +75,7 @@ jobs: DR_ELASTICSEARCH_URL: "https://localhost:9200" ES_USER: "elastic" ES_PASSWORD: ${{ env.GENERATED_PASSWORD }} - if: ${{ !env.DR_CLOUD_ID && !env.DR_API_KEY }} + if: ${{ !env.DR_CLOUD_ID && !env.DR_API_KEY && env.run_esql == 'true' }} run: | cd detection-rules response=$(curl -k -X POST -u "$ES_USER:$ES_PASSWORD" -H "Content-Type: application/json" -d '{ @@ -67,7 +87,14 @@ jobs: echo "::add-mask::$DR_API_KEY" echo "DR_API_KEY=$DR_API_KEY" >> $GITHUB_ENV + - name: Set up Python 3.13 + if: ${{ env.run_esql == 'true' }} + uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 + with: + python-version: '3.13' + - name: Install dependencies + if: ${{ env.run_esql == 'true' }} run: | cd detection-rules python -m pip install --upgrade pip @@ -75,6 +102,7 @@ jobs: pip install .[dev] - name: Remote Test ESQL Rules + if: ${{ env.run_esql == 'true' }} env: DR_CLOUD_ID: ${{ secrets.cloud_id || '' }} DR_KIBANA_URL: ${{ secrets.cloud_id == '' && 'https://localhost:5601' || '' }} diff --git a/detection_rules/devtools.py b/detection_rules/devtools.py index 617044b7808..f56b1d4840d 100644 --- a/detection_rules/devtools.py +++ b/detection_rules/devtools.py @@ -1440,7 +1440,6 @@ def esql_remote_validation( fail_list: list[str] = [] max_retries = 3 for r in esql_rules: - print() retry_count = 0 while retry_count < max_retries: try: @@ -1474,7 +1473,7 @@ def esql_remote_validation( _ = Path("failed_rules.log").write_text("\n".join(fail_list), encoding="utf-8") click.echo("Failed rules written to failed_rules.log") if failed_count > 0: - click.echo("Failed rules:") + click.echo("Failed rule IDs:") uuids = {line.split()[0] for line in fail_list} click.echo("\n".join(uuids)) ctx = click.get_current_context() From c077219d44f8388a92088d72609e2ea60c0c1c81 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Fri, 10 Oct 2025 11:49:55 -0400 Subject: [PATCH 70/93] Update to main elastic-container --- .github/workflows/esql-validation.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/esql-validation.yml b/.github/workflows/esql-validation.yml index 8cce90755cd..7887f630021 100644 --- a/.github/workflows/esql-validation.yml +++ b/.github/workflows/esql-validation.yml @@ -52,7 +52,7 @@ jobs: uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 with: path: elastic-container - repository: eric-forte-elastic/elastic-container + repository: peasead/elastic-container - name: Build and run containers env: From 244226fbaf6415fefe8ff23a607a03a957b299c6 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Fri, 10 Oct 2025 14:06:20 -0400 Subject: [PATCH 71/93] Add index validation and unsupported type checking --- detection_rules/esql_errors.py | 13 ++++++++++++- detection_rules/index_mappings.py | 31 ++++++++++++++++++++++++++++++ detection_rules/rule_validators.py | 12 ++++++++---- 3 files changed, 51 insertions(+), 5 deletions(-) diff --git a/detection_rules/esql_errors.py b/detection_rules/esql_errors.py index 54d4ab5436a..7d95c7455a3 100644 --- a/detection_rules/esql_errors.py +++ b/detection_rules/esql_errors.py @@ -43,8 +43,12 @@ class EsqlSchemaError(EsqlKibanaBaseError): """Error in ESQL schema. Validated via Kibana until AST is available.""" +class EsqlUnsupportedTypeError(EsqlKibanaBaseError): + """Error in ESQL type validation using unsupported type.""" + + class EsqlSyntaxError(EsqlKibanaBaseError): - """Error with ESQL syntax. Validated via Kibana until AST is available.""" + """Error with ESQL syntax.""" class EsqlTypeMismatchError(Exception): @@ -61,3 +65,10 @@ class EsqlSemanticError(Exception): def __init__(self, message: str) -> None: super().__init__(message) + + +class EsqlUnknownIndexError(Exception): + """Error with ESQL Indices. Validated through regex enforcement.""" + + def __init__(self, message: str) -> None: + super().__init__(message) diff --git a/detection_rules/index_mappings.py b/detection_rules/index_mappings.py index 580f04750cb..836f5c85306 100644 --- a/detection_rules/index_mappings.py +++ b/detection_rules/index_mappings.py @@ -5,6 +5,7 @@ """Validation logic for rules containing queries.""" +import re import time from collections.abc import Callable from copy import deepcopy @@ -23,6 +24,8 @@ EsqlSchemaError, EsqlSyntaxError, EsqlTypeMismatchError, + EsqlUnknownIndexError, + EsqlUnsupportedTypeError, cleanup_empty_indices, ) from .integrations import ( @@ -218,13 +221,39 @@ def prepare_integration_mappings( # noqa: PLR0913 return integration_mappings, index_lookup +def check_known_indices(indices: list[str], index_lookup: dict[str, Any]) -> None: + """Check if the provided indices are known based on the integration format.""" + + _ = index_lookup.pop("rule-ecs-index", None) + _ = index_lookup.pop("rule-non-ecs-index", None) + + # Assumes valid index format is logs-.* or logs-.-* + filtered_keys = {"logs-" + key.replace("-", ".") + "*" for key in index_lookup if key not in indices} + filtered_keys.update({"logs-" + key.replace("-", ".") + "-*" for key in index_lookup if key not in indices}) + matches = [] + + for index in indices: + pattern = re.compile(index.replace(".", r"\.").replace("*", ".*").rstrip("-")) + matches = [key for key in filtered_keys if pattern.fullmatch(key)] + + if not matches: + raise EsqlUnknownIndexError( + f"Unknown index pattern(s): {', '.join(indices)}. Known patterns: {', '.join(filtered_keys)}" + ) + + def create_remote_indices( elastic_client: Elasticsearch, existing_mappings: dict[str, Any], index_lookup: dict[str, Any], + indices: list[str], log: Callable[[str], None], ) -> str: """Create remote indices for validation and return the index string.""" + + # Check if the provided indices are known + check_known_indices(indices, index_lookup) + suffix = str(int(time.time() * 1000)) test_index = f"rule-test-index-{suffix}" response = create_index_with_index_mapping(elastic_client, test_index, existing_mappings) @@ -260,6 +289,8 @@ def execute_query_against_indices( raise EsqlSyntaxError(str(e), elastic_client) from e if "Unknown column" in error_msg: raise EsqlSchemaError(str(e), elastic_client) from e + if "verification_exception" in error_msg and "unsupported type" in error_msg: + raise EsqlUnsupportedTypeError(str(e), elastic_client) from e if "verification_exception" in error_msg: raise EsqlTypeMismatchError(str(e), elastic_client) from e raise EsqlKibanaBaseError(str(e), elastic_client) from e diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index 2833ae5568b..16955a67cd2 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -791,13 +791,17 @@ def validate_columns_index_mapping( schema_type = utils.get_column_from_index_mapping_schema(keys, combined_mappings) schema_type = kql.parser.elasticsearch_type_family(schema_type) if schema_type else None - # The mapping between integer and long may be different between Kibana and the schema - # both are numeric types with different ranges, but for our purposes they are equivalent - if column_type == "long" and schema_type == "integer": + # If it is in our schema, but Kibana returns unsupported + if schema_type and column_type == "unsupported": continue # Validate the type if not schema_type or column_type != schema_type: + # Attempt reverse mapping as for our purposes they are equivalent. + # We are generally concerned about the operators for the types not the values themselves. + reverse_col_type = kql.parser.elasticsearch_type_family(column_type) if column_type else None + if reverse_col_type is not None and schema_type is not None and reverse_col_type == schema_type: + continue mismatched_columns.append( f"Dynamic field `{column_name}` is not correctly mapped. " f"If not dynamic: expected from schema: `{schema_type}`, got from Kibana: `{column_type}`." @@ -886,7 +890,7 @@ def remote_validate_rule( # noqa: PLR0913 self.log(f"Combined mappings prepared: {len(combined_mappings)}") # Create remote indices - full_index_str = create_remote_indices(elastic_client, existing_mappings, index_lookup, self.log) + full_index_str = create_remote_indices(elastic_client, existing_mappings, index_lookup, indices, self.log) # Replace all sources with the test indices query = query.replace(indices_str, full_index_str) # type: ignore[reportUnknownVariableType] From cc768b243da359c86808e083dac6624cf100bacd Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Fri, 10 Oct 2025 23:04:14 -0400 Subject: [PATCH 72/93] Add index validation --- detection_rules/esql_errors.py | 2 + detection_rules/index_mappings.py | 71 ++++++++++++++++++++++-------- detection_rules/rule_validators.py | 11 +++-- 3 files changed, 61 insertions(+), 23 deletions(-) diff --git a/detection_rules/esql_errors.py b/detection_rules/esql_errors.py index 7d95c7455a3..5a43729cabb 100644 --- a/detection_rules/esql_errors.py +++ b/detection_rules/esql_errors.py @@ -15,6 +15,8 @@ "EsqlSemanticError", "EsqlSyntaxError", "EsqlTypeMismatchError", + "EsqlUnknownIndexError", + "EsqlUnsupportedTypeError", ) diff --git a/detection_rules/index_mappings.py b/detection_rules/index_mappings.py index 836f5c85306..729d224fc8a 100644 --- a/detection_rules/index_mappings.py +++ b/detection_rules/index_mappings.py @@ -221,17 +221,31 @@ def prepare_integration_mappings( # noqa: PLR0913 return integration_mappings, index_lookup -def check_known_indices(indices: list[str], index_lookup: dict[str, Any]) -> None: - """Check if the provided indices are known based on the integration format.""" +def get_filtered_index_schema( + indices: list[str], + index_lookup: dict[str, Any], + ecs_schema: dict[str, Any], + non_ecs_mapping: dict[str, Any], + custom_mapping: dict[str, Any], +) -> dict[str, Any]: + """Check if the provided indices are known based on the integration format. Returns the combined schema.""" - _ = index_lookup.pop("rule-ecs-index", None) - _ = index_lookup.pop("rule-non-ecs-index", None) + non_ecs_indices = ecs.get_non_ecs_schema() + custom_indices = ecs.get_custom_schemas() # Assumes valid index format is logs-.* or logs-.-* filtered_keys = {"logs-" + key.replace("-", ".") + "*" for key in index_lookup if key not in indices} filtered_keys.update({"logs-" + key.replace("-", ".") + "-*" for key in index_lookup if key not in indices}) - matches = [] - + # Replace "logs-endpoint." with "logs-endpoint.events." + filtered_keys = { + key.replace("logs-endpoint.", "logs-endpoint.events.") if "logs-endpoint." in key else key + for key in filtered_keys + } + filtered_keys.update(non_ecs_indices.keys()) + filtered_keys.update(custom_indices.keys()) + filtered_keys.add("logs-endpoint.alerts-*") + + matches: list[str] = [] for index in indices: pattern = re.compile(index.replace(".", r"\.").replace("*", ".*").rstrip("-")) matches = [key for key in filtered_keys if pattern.fullmatch(key)] @@ -241,19 +255,34 @@ def check_known_indices(indices: list[str], index_lookup: dict[str, Any]) -> Non f"Unknown index pattern(s): {', '.join(indices)}. Known patterns: {', '.join(filtered_keys)}" ) + filtered_index_lookup = { + "logs-" + key.replace("-", ".") + "*": value for key, value in index_lookup.items() if key not in indices + } + filtered_index_lookup.update( + {"logs-" + key.replace("-", ".") + "-*": value for key, value in index_lookup.items() if key not in indices} + ) + filtered_index_lookup = { + key.replace("logs-endpoint.", "logs-endpoint.events."): value for key, value in filtered_index_lookup.items() + } + filtered_index_lookup.update(non_ecs_mapping) + filtered_index_lookup.update(custom_mapping) + + combined_mappings: dict[str, Any] = {} + for match in matches: + utils.combine_dicts(combined_mappings, deepcopy(filtered_index_lookup.get(match, {}))) + + utils.combine_dicts(combined_mappings, deepcopy(ecs_schema)) + return combined_mappings + def create_remote_indices( elastic_client: Elasticsearch, existing_mappings: dict[str, Any], index_lookup: dict[str, Any], - indices: list[str], log: Callable[[str], None], ) -> str: """Create remote indices for validation and return the index string.""" - # Check if the provided indices are known - check_known_indices(indices, index_lookup) - suffix = str(int(time.time() * 1000)) test_index = f"rule-test-index-{suffix}" response = create_index_with_index_mapping(elastic_client, test_index, existing_mappings) @@ -294,7 +323,7 @@ def execute_query_against_indices( if "verification_exception" in error_msg: raise EsqlTypeMismatchError(str(e), elastic_client) from e raise EsqlKibanaBaseError(str(e), elastic_client) from e - if delete_indices or misc.getdefault("skip_empty_index_cleanup")(): + if delete_indices or not misc.getdefault("skip_empty_index_cleanup")(): for index_str in test_index_str.split(","): response = elastic_client.indices.delete(index=index_str.strip()) log(f"Test index `{index_str}` deleted: {response}") @@ -396,11 +425,6 @@ def prepare_mappings( # noqa: PLR0913 index_lookup.update(integration_index_lookup) - # Combine existing and integration mappings into a single mapping dict - combined_mappings: dict[str, Any] = {} - utils.combine_dicts(combined_mappings, deepcopy(existing_mappings)) - utils.combine_dicts(combined_mappings, deepcopy(integration_mappings)) - # Load non-ecs schema and convert to index mapping format (nested schema) non_ecs_mapping: dict[str, Any] = {} non_ecs = ecs.get_non_ecs_schema() @@ -409,16 +433,25 @@ def prepare_mappings( # noqa: PLR0913 non_ecs_mapping = ecs.flatten(non_ecs_mapping) non_ecs_mapping = utils.convert_to_nested_schema(non_ecs_mapping) + # Load custom schema and convert to index mapping format (nested schema) + custom_mapping: dict[str, Any] = {} + custom_indices = ecs.get_custom_schemas() + for index in indices: + custom_mapping.update(custom_indices.get(index, {})) + custom_mapping = ecs.flatten(custom_mapping) + custom_mapping = utils.convert_to_nested_schema(custom_mapping) + # Load ECS in an index mapping format (nested schema) current_version = Version.parse(load_current_package_version(), optional_minor_and_patch=True) ecs_schema = get_ecs_schema_mappings(current_version) + # Filter combined mappings based on the provided indices + combined_mappings = get_filtered_index_schema(indices, index_lookup, ecs_schema, non_ecs_mapping, custom_mapping) + index_lookup.update({"rule-ecs-index": ecs_schema}) - utils.combine_dicts(combined_mappings, deepcopy(ecs_schema)) - if not combined_mappings and not non_ecs_mapping and not ecs_schema: + if (not integration_mappings or existing_mappings) and not non_ecs_mapping and not ecs_schema: raise ValueError("No mappings found") index_lookup.update({"rule-non-ecs-index": non_ecs_mapping}) - utils.combine_dicts(combined_mappings, deepcopy(non_ecs_mapping)) return existing_mappings, index_lookup, combined_mappings diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index 16955a67cd2..c23b67592c3 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -771,7 +771,7 @@ def get_unique_field_type(self, field_name: str) -> str | None: # type: ignore[ return None def validate_columns_index_mapping( - self, query_columns: list[dict[str, str]], combined_mappings: dict[str, Any], version: str = "" + self, query_columns: list[dict[str, str]], combined_mappings: dict[str, Any], version: str = "", query: str = "" ) -> bool: """Validate that the columns in the ESQL query match the provided mappings.""" mismatched_columns: list[str] = [] @@ -784,6 +784,9 @@ def validate_columns_index_mapping( # Skip internal fields if column_name in ("_id", "_version", "_index"): continue + # Skip implicit fields + if column_name not in query: + continue column_type = column["type"] # Check if the column exists in combined_mappings or a valid field generated from a function or operator @@ -791,7 +794,7 @@ def validate_columns_index_mapping( schema_type = utils.get_column_from_index_mapping_schema(keys, combined_mappings) schema_type = kql.parser.elasticsearch_type_family(schema_type) if schema_type else None - # If it is in our schema, but Kibana returns unsupported + # If it is in the schema, but Kibana returns unsupported if schema_type and column_type == "unsupported": continue @@ -890,7 +893,7 @@ def remote_validate_rule( # noqa: PLR0913 self.log(f"Combined mappings prepared: {len(combined_mappings)}") # Create remote indices - full_index_str = create_remote_indices(elastic_client, existing_mappings, index_lookup, indices, self.log) + full_index_str = create_remote_indices(elastic_client, existing_mappings, index_lookup, self.log) # Replace all sources with the test indices query = query.replace(indices_str, full_index_str) # type: ignore[reportUnknownVariableType] @@ -914,7 +917,7 @@ def remote_validate_rule( # noqa: PLR0913 for version, mapping in mappings_lookup.items(): self.log(f"Validating {rule_id} against {version} stack") - if not self.validate_columns_index_mapping(query_columns, mapping, version=version): + if not self.validate_columns_index_mapping(query_columns, mapping, version=version, query=query): self.log("Dynamic column(s) have improper formatting.") return response From 3bf76559dc4aa0f327ea811b43503cabd0c6f205 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Mon, 13 Oct 2025 11:29:25 -0400 Subject: [PATCH 73/93] Add filtered index unit tests --- tests/test_rules_remote.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tests/test_rules_remote.py b/tests/test_rules_remote.py index a505b7ef863..dfaf884abdb 100644 --- a/tests/test_rules_remote.py +++ b/tests/test_rules_remote.py @@ -104,3 +104,38 @@ def test_esql_syntax_error(self): """ with pytest.raises(EsqlSyntaxError): _ = RuleCollection().load_dict(production_rule) + + def test_esql_filtered_index(self): + """Test an ESQL rule's schema validation to properly reduce it by the index.""" + # EsqlSchemaError + file_path = get_path(["tests", "data", "command_control_dummy_production_rule.toml"]) + original_production_rule = load_rule_contents(file_path) + # Test that a ValidationError is raised if the query doesn't match the schema + production_rule = deepcopy(original_production_rule)[0] + production_rule["metadata"]["integration"] = ["aws"] + production_rule["rule"]["query"] = """ + from logs-aws.cloud* metadata _id, _version, _index + | where @timestamp > now() - 30 minutes + and aws.cloudtrail.user_identity.type == "IAMUser" + | keep + aws.cloudtrail.user_identity.type + """ + _ = RuleCollection().load_dict(production_rule) + + def test_esql_filtered_index_error(self): + """Test an ESQL rule's schema validation when reduced by the index and check if the field is present.""" + # EsqlSchemaError + file_path = get_path(["tests", "data", "command_control_dummy_production_rule.toml"]) + original_production_rule = load_rule_contents(file_path) + # Test that a ValidationError is raised if the query doesn't match the schema + production_rule = deepcopy(original_production_rule)[0] + production_rule["metadata"]["integration"] = ["aws"] + production_rule["rule"]["query"] = """ + from logs-aws.billing* metadata _id, _version, _index + | where @timestamp > now() - 30 minutes + and aws.cloudtrail.user_identity.type == "IAMUser" + | keep + aws.cloudtrail.user_identity.type + """ + with pytest.raises(EsqlTypeMismatchError): + _ = RuleCollection().load_dict(production_rule) From 51a431668ac3671242bed92caa8760b87674ab9a Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Mon, 13 Oct 2025 11:31:48 -0400 Subject: [PATCH 74/93] Add implicit fields check --- tests/test_rules_remote.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_rules_remote.py b/tests/test_rules_remote.py index dfaf884abdb..2e25478c7ca 100644 --- a/tests/test_rules_remote.py +++ b/tests/test_rules_remote.py @@ -106,7 +106,7 @@ def test_esql_syntax_error(self): _ = RuleCollection().load_dict(production_rule) def test_esql_filtered_index(self): - """Test an ESQL rule's schema validation to properly reduce it by the index.""" + """Test an ESQL rule's schema validation to properly reduce it by the index and handle implicit fields.""" # EsqlSchemaError file_path = get_path(["tests", "data", "command_control_dummy_production_rule.toml"]) original_production_rule = load_rule_contents(file_path) @@ -118,7 +118,7 @@ def test_esql_filtered_index(self): | where @timestamp > now() - 30 minutes and aws.cloudtrail.user_identity.type == "IAMUser" | keep - aws.cloudtrail.user_identity.type + aws.* """ _ = RuleCollection().load_dict(production_rule) From 2362c2b82ce10d0f020e3569cc95d7512a703d8a Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Tue, 14 Oct 2025 01:22:37 -0400 Subject: [PATCH 75/93] Add messy index filtering --- detection_rules/index_mappings.py | 42 ++++++++++++++++++++--- detection_rules/rule_validators.py | 2 +- tests/test_rules_remote.py | 54 ++++++++++++++++++++++++++++++ 3 files changed, 92 insertions(+), 6 deletions(-) diff --git a/detection_rules/index_mappings.py b/detection_rules/index_mappings.py index 729d224fc8a..710884d86c6 100644 --- a/detection_rules/index_mappings.py +++ b/detection_rules/index_mappings.py @@ -227,12 +227,14 @@ def get_filtered_index_schema( ecs_schema: dict[str, Any], non_ecs_mapping: dict[str, Any], custom_mapping: dict[str, Any], -) -> dict[str, Any]: +) -> tuple[dict[str, Any], dict[str, Any]]: """Check if the provided indices are known based on the integration format. Returns the combined schema.""" non_ecs_indices = ecs.get_non_ecs_schema() custom_indices = ecs.get_custom_schemas() + # TODO finish adding unit tests for these cases + # Assumes valid index format is logs-.* or logs-.-* filtered_keys = {"logs-" + key.replace("-", ".") + "*" for key in index_lookup if key not in indices} filtered_keys.update({"logs-" + key.replace("-", ".") + "-*" for key in index_lookup if key not in indices}) @@ -248,13 +250,16 @@ def get_filtered_index_schema( matches: list[str] = [] for index in indices: pattern = re.compile(index.replace(".", r"\.").replace("*", ".*").rstrip("-")) - matches = [key for key in filtered_keys if pattern.fullmatch(key)] + matches.extend([key for key in filtered_keys if pattern.fullmatch(key)]) if not matches: raise EsqlUnknownIndexError( f"Unknown index pattern(s): {', '.join(indices)}. Known patterns: {', '.join(filtered_keys)}" ) + if "logs-endpoint.alerts-*" in matches and "logs-endpoint.events.alerts-*" not in matches: + matches.append("logs-endpoint.events.alerts-*") + filtered_index_lookup = { "logs-" + key.replace("-", ".") + "*": value for key, value in index_lookup.items() if key not in indices } @@ -268,11 +273,35 @@ def get_filtered_index_schema( filtered_index_lookup.update(custom_mapping) combined_mappings: dict[str, Any] = {} + utils.combine_dicts(combined_mappings, deepcopy(ecs_schema)) + for match in matches: utils.combine_dicts(combined_mappings, deepcopy(filtered_index_lookup.get(match, {}))) - utils.combine_dicts(combined_mappings, deepcopy(ecs_schema)) - return combined_mappings + filtered_index_mapping: dict[str, Any] = {} + + index_lookup_indices: dict[str, Any] = {} + + for key in index_lookup: + if key not in indices: + # Add logs-* and logs--* + transformed_key_star = f"logs-{key.replace('-', '.')}*" + transformed_key_dash = f"logs-{key.replace('-', '.')}-*" + if "logs-endpoint." in transformed_key_star or "logs-endpoint." in transformed_key_dash: + transformed_key_star = transformed_key_star.replace("logs-endpoint.", "logs-endpoint.events.") + transformed_key_dash = transformed_key_dash.replace("logs-endpoint.", "logs-endpoint.events.") + filtered_keys.update([transformed_key_star, transformed_key_dash]) + index_lookup_indices[transformed_key_star] = key.replace("-", ".") + index_lookup_indices[transformed_key_dash] = key.replace("-", ".") + + for match in matches: + if match in index_lookup_indices: + index_name = index_lookup_indices[match].replace(".", "-") + filtered_index_mapping[index_name] = index_lookup[index_name] + else: + filtered_index_mapping[match] = filtered_index_lookup.get(match, {}) + + return combined_mappings, filtered_index_mapping def create_remote_indices( @@ -380,6 +409,7 @@ def find_flattened_fields_with_subfields(mapping: dict[str, Any], path: str = "" def get_ecs_schema_mappings(current_version: Version) -> dict[str, Any]: """Get the ECS schema in an index mapping format (nested schema) handling scaled floats.""" + # TODO potentially update this to pull from _nested as an option if needed ecs_version = get_stack_schemas()[str(current_version)]["ecs"] ecs_schemas = ecs.get_schemas() ecs_schema_flattened: dict[str, Any] = {} @@ -446,7 +476,9 @@ def prepare_mappings( # noqa: PLR0913 ecs_schema = get_ecs_schema_mappings(current_version) # Filter combined mappings based on the provided indices - combined_mappings = get_filtered_index_schema(indices, index_lookup, ecs_schema, non_ecs_mapping, custom_mapping) + combined_mappings, index_lookup = get_filtered_index_schema( + indices, index_lookup, ecs_schema, non_ecs_mapping, custom_mapping + ) index_lookup.update({"rule-ecs-index": ecs_schema}) diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index c23b67592c3..a42827c7a8d 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -755,7 +755,7 @@ def unique_fields(self) -> list[str]: # type: ignore[reportIncompatibleMethodOv def get_esql_query_indices(self, query: str) -> tuple[str, list[str]]: """Extract indices from an ES|QL query.""" match = FROM_SOURCES_REGEX.search(query) - + # TODO add lookup join and enrich functions too if not match: return "", [] diff --git a/tests/test_rules_remote.py b/tests/test_rules_remote.py index 2e25478c7ca..5a7886e673c 100644 --- a/tests/test_rules_remote.py +++ b/tests/test_rules_remote.py @@ -137,5 +137,59 @@ def test_esql_filtered_index_error(self): | keep aws.cloudtrail.user_identity.type """ + with pytest.raises(EsqlSchemaError): + _ = RuleCollection().load_dict(production_rule) + + def test_esql_endpoint_alerts_index(self): + """Test an ESQL rule's schema validation using ecs fields in the alerts index.""" + # EsqlSchemaError + file_path = get_path(["tests", "data", "command_control_dummy_production_rule.toml"]) + original_production_rule = load_rule_contents(file_path) + production_rule = deepcopy(original_production_rule)[0] + production_rule["rule"]["query"] = """ + from logs-endpoint.alerts-* + | where event.code in ("malicious_file", "memory_signature", "shellcode_thread") and rule.name is not null + | keep host.id, rule.name, event.code + | stats Esql.host_id_count_distinct = count_distinct(host.id) by rule.name, event.code + | where Esql.host_id_count_distinct >= 3 + """ + _ = RuleCollection().load_dict(production_rule) + + def test_esql_endpoint_alerts_index_endpoint_fields(self): + """Test an ESQL rule's schema validation using endpoint integration fields in the alerts index.""" + # EsqlSchemaError + file_path = get_path(["tests", "data", "command_control_dummy_production_rule.toml"]) + original_production_rule = load_rule_contents(file_path) + production_rule = deepcopy(original_production_rule)[0] + # production_rule["metadata"]["integration"] = ["endpoint"] + production_rule["rule"]["query"] = """ + from logs-endpoint.alerts-* + | where event.code in ("malicious_file", "memory_signature", "shellcode_thread") and rule.name is not null and file.Ext.entry_modified > 0 + | keep host.id, rule.name, event.code, file.Ext.entry_modified + | stats Esql.host_id_count_distinct = count_distinct(host.id) by rule.name, event.code, file.Ext.entry_modified + | where Esql.host_id_count_distinct >= 3 + """ + # TODO this should Error when endpoint is commented out + # Caused by elastic container project issue with pytest.raises(EsqlTypeMismatchError): _ = RuleCollection().load_dict(production_rule) + + def test_esql_filtered_keep(self): + """Test an ESQL rule's schema validation.""" + # EsqlSchemaError + file_path = get_path(["tests", "data", "command_control_dummy_production_rule.toml"]) + original_production_rule = load_rule_contents(file_path) + # Test that a ValidationError is raised if the query doesn't match the schema + production_rule = deepcopy(original_production_rule)[0] + production_rule["metadata"]["integration"] = ["aws"] + production_rule["rule"]["query"] = """ + from logs-aws.billing* metadata _id, _version, _index + | where @timestamp > now() - 30 minutes and aws.cloudtrail.user_identity.type == "IAMUser" + | keep host.id, rule.name, event.code + | stats Esql.host_id_count_distinct = count_distinct(host.id) by rule.name, event.code + | where Esql.host_id_count_distinct >= 3 + """ + # TODO this should Error + # Caused by not filtering schemas by index when sent to Kibana + with pytest.raises(EsqlSchemaError): + _ = RuleCollection().load_dict(production_rule) From e6394c95bcdbeb5ad49b9ec536bc2fb99dc1bcf5 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Tue, 14 Oct 2025 10:43:47 -0400 Subject: [PATCH 76/93] Add error note --- tests/test_rules_remote.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tests/test_rules_remote.py b/tests/test_rules_remote.py index 5a7886e673c..5d2d4704b04 100644 --- a/tests/test_rules_remote.py +++ b/tests/test_rules_remote.py @@ -157,11 +157,10 @@ def test_esql_endpoint_alerts_index(self): def test_esql_endpoint_alerts_index_endpoint_fields(self): """Test an ESQL rule's schema validation using endpoint integration fields in the alerts index.""" - # EsqlSchemaError file_path = get_path(["tests", "data", "command_control_dummy_production_rule.toml"]) original_production_rule = load_rule_contents(file_path) production_rule = deepcopy(original_production_rule)[0] - # production_rule["metadata"]["integration"] = ["endpoint"] + production_rule["metadata"]["integration"] = [] production_rule["rule"]["query"] = """ from logs-endpoint.alerts-* | where event.code in ("malicious_file", "memory_signature", "shellcode_thread") and rule.name is not null and file.Ext.entry_modified > 0 @@ -169,8 +168,8 @@ def test_esql_endpoint_alerts_index_endpoint_fields(self): | stats Esql.host_id_count_distinct = count_distinct(host.id) by rule.name, event.code, file.Ext.entry_modified | where Esql.host_id_count_distinct >= 3 """ - # TODO this should Error when endpoint is commented out - # Caused by elastic container project issue + # This is a type mismatch error due to Elastic Container project including the Endpoint integration by default. + # Otherwise one would expect an EsqlSchemaError due to the field not being present in the alerts index. with pytest.raises(EsqlTypeMismatchError): _ = RuleCollection().load_dict(production_rule) @@ -189,7 +188,5 @@ def test_esql_filtered_keep(self): | stats Esql.host_id_count_distinct = count_distinct(host.id) by rule.name, event.code | where Esql.host_id_count_distinct >= 3 """ - # TODO this should Error - # Caused by not filtering schemas by index when sent to Kibana with pytest.raises(EsqlSchemaError): _ = RuleCollection().load_dict(production_rule) From c989677a759a3df732544c608d02bf3269458ea7 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Tue, 14 Oct 2025 11:20:00 -0400 Subject: [PATCH 77/93] Remove finished TODOs --- detection_rules/index_mappings.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/detection_rules/index_mappings.py b/detection_rules/index_mappings.py index 710884d86c6..e8aa7ad2d30 100644 --- a/detection_rules/index_mappings.py +++ b/detection_rules/index_mappings.py @@ -233,8 +233,6 @@ def get_filtered_index_schema( non_ecs_indices = ecs.get_non_ecs_schema() custom_indices = ecs.get_custom_schemas() - # TODO finish adding unit tests for these cases - # Assumes valid index format is logs-.* or logs-.-* filtered_keys = {"logs-" + key.replace("-", ".") + "*" for key in index_lookup if key not in indices} filtered_keys.update({"logs-" + key.replace("-", ".") + "-*" for key in index_lookup if key not in indices}) @@ -409,7 +407,6 @@ def find_flattened_fields_with_subfields(mapping: dict[str, Any], path: str = "" def get_ecs_schema_mappings(current_version: Version) -> dict[str, Any]: """Get the ECS schema in an index mapping format (nested schema) handling scaled floats.""" - # TODO potentially update this to pull from _nested as an option if needed ecs_version = get_stack_schemas()[str(current_version)]["ecs"] ecs_schemas = ecs.get_schemas() ecs_schema_flattened: dict[str, Any] = {} From b971823e233fc5441665a1f1b363159cc0b36ad5 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Tue, 14 Oct 2025 11:52:33 -0400 Subject: [PATCH 78/93] Remove on push --- .github/workflows/esql-validation.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/esql-validation.yml b/.github/workflows/esql-validation.yml index 7887f630021..f0ad550a53c 100644 --- a/.github/workflows/esql-validation.yml +++ b/.github/workflows/esql-validation.yml @@ -1,7 +1,5 @@ name: ES|QL Validation on: - push: - branches: [ "main", "8.*", "9.*" ] pull_request: branches: [ "*" ] paths: From d9e788cc53afa82177419aa637220b447bd70339 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Tue, 14 Oct 2025 12:22:42 -0400 Subject: [PATCH 79/93] Cleanup Index Validation --- detection_rules/index_mappings.py | 37 +++++++++++++++++------------- detection_rules/rule_validators.py | 1 - tests/test_rules_remote.py | 20 +++++++++++++++- 3 files changed, 40 insertions(+), 18 deletions(-) diff --git a/detection_rules/index_mappings.py b/detection_rules/index_mappings.py index e8aa7ad2d30..7fcd3ea0816 100644 --- a/detection_rules/index_mappings.py +++ b/detection_rules/index_mappings.py @@ -221,6 +221,23 @@ def prepare_integration_mappings( # noqa: PLR0913 return integration_mappings, index_lookup +def get_index_to_package_lookup(indices: list[str], index_lookup: dict[str, Any]) -> dict[str, Any]: + """Get a lookup of index patterns to package names for the provided indices.""" + index_lookup_indices: dict[str, Any] = {} + for key in index_lookup: + if key not in indices: + # Add logs-* and logs--* + transformed_key_star = f"logs-{key.replace('-', '.')}*" + transformed_key_dash = f"logs-{key.replace('-', '.')}-*" + if "logs-endpoint." in transformed_key_star or "logs-endpoint." in transformed_key_dash: + transformed_key_star = transformed_key_star.replace("logs-endpoint.", "logs-endpoint.events.") + transformed_key_dash = transformed_key_dash.replace("logs-endpoint.", "logs-endpoint.events.") + index_lookup_indices[transformed_key_star] = key.replace("-", ".") + index_lookup_indices[transformed_key_dash] = key.replace("-", ".") + + return index_lookup_indices + + def get_filtered_index_schema( indices: list[str], index_lookup: dict[str, Any], @@ -258,6 +275,7 @@ def get_filtered_index_schema( if "logs-endpoint.alerts-*" in matches and "logs-endpoint.events.alerts-*" not in matches: matches.append("logs-endpoint.events.alerts-*") + # Now that we have the matched indices, we need to filter the index lookup to only include those indices filtered_index_lookup = { "logs-" + key.replace("-", ".") + "*": value for key, value in index_lookup.items() if key not in indices } @@ -270,28 +288,15 @@ def get_filtered_index_schema( filtered_index_lookup.update(non_ecs_mapping) filtered_index_lookup.update(custom_mapping) + # Reduce the combined mappings to only the matched indices (local schema validation source of truth) combined_mappings: dict[str, Any] = {} utils.combine_dicts(combined_mappings, deepcopy(ecs_schema)) - for match in matches: utils.combine_dicts(combined_mappings, deepcopy(filtered_index_lookup.get(match, {}))) + # Reduce the index lookup to only the matched indices (remote/Kibana schema validation source of truth) filtered_index_mapping: dict[str, Any] = {} - - index_lookup_indices: dict[str, Any] = {} - - for key in index_lookup: - if key not in indices: - # Add logs-* and logs--* - transformed_key_star = f"logs-{key.replace('-', '.')}*" - transformed_key_dash = f"logs-{key.replace('-', '.')}-*" - if "logs-endpoint." in transformed_key_star or "logs-endpoint." in transformed_key_dash: - transformed_key_star = transformed_key_star.replace("logs-endpoint.", "logs-endpoint.events.") - transformed_key_dash = transformed_key_dash.replace("logs-endpoint.", "logs-endpoint.events.") - filtered_keys.update([transformed_key_star, transformed_key_dash]) - index_lookup_indices[transformed_key_star] = key.replace("-", ".") - index_lookup_indices[transformed_key_dash] = key.replace("-", ".") - + index_lookup_indices = get_index_to_package_lookup(indices, index_lookup) for match in matches: if match in index_lookup_indices: index_name = index_lookup_indices[match].replace(".", "-") diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index a42827c7a8d..b87a7b9b9bb 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -755,7 +755,6 @@ def unique_fields(self) -> list[str]: # type: ignore[reportIncompatibleMethodOv def get_esql_query_indices(self, query: str) -> tuple[str, list[str]]: """Extract indices from an ES|QL query.""" match = FROM_SOURCES_REGEX.search(query) - # TODO add lookup join and enrich functions too if not match: return "", [] diff --git a/tests/test_rules_remote.py b/tests/test_rules_remote.py index 5d2d4704b04..d019665eaf8 100644 --- a/tests/test_rules_remote.py +++ b/tests/test_rules_remote.py @@ -8,7 +8,7 @@ import pytest -from detection_rules.esql_errors import EsqlSchemaError, EsqlSyntaxError, EsqlTypeMismatchError +from detection_rules.esql_errors import EsqlSchemaError, EsqlSyntaxError, EsqlTypeMismatchError, EsqlUnknownIndexError from detection_rules.misc import ( get_default_config, getdefault, @@ -155,6 +155,24 @@ def test_esql_endpoint_alerts_index(self): """ _ = RuleCollection().load_dict(production_rule) + + def test_esql_endpoint_unknown_index(self): + """Test an ESQL rule's index validation. This is expected to error on an unknown index.""" + # EsqlSchemaError + file_path = get_path(["tests", "data", "command_control_dummy_production_rule.toml"]) + original_production_rule = load_rule_contents(file_path) + production_rule = deepcopy(original_production_rule)[0] + production_rule["rule"]["query"] = """ + from logs-endpoint.fake-* + | where event.code in ("malicious_file", "memory_signature", "shellcode_thread") and rule.name is not null + | keep host.id, rule.name, event.code + | stats Esql.host_id_count_distinct = count_distinct(host.id) by rule.name, event.code + | where Esql.host_id_count_distinct >= 3 + """ + with pytest.raises(EsqlUnknownIndexError): + _ = RuleCollection().load_dict(production_rule) + + def test_esql_endpoint_alerts_index_endpoint_fields(self): """Test an ESQL rule's schema validation using endpoint integration fields in the alerts index.""" file_path = get_path(["tests", "data", "command_control_dummy_production_rule.toml"]) From 3eecaaa19a3529f4b98ec3fa3ca2248e0d7dc71c Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Tue, 14 Oct 2025 12:37:16 -0400 Subject: [PATCH 80/93] linting --- tests/test_rules_remote.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/test_rules_remote.py b/tests/test_rules_remote.py index d019665eaf8..fdfb3da789e 100644 --- a/tests/test_rules_remote.py +++ b/tests/test_rules_remote.py @@ -155,7 +155,6 @@ def test_esql_endpoint_alerts_index(self): """ _ = RuleCollection().load_dict(production_rule) - def test_esql_endpoint_unknown_index(self): """Test an ESQL rule's index validation. This is expected to error on an unknown index.""" # EsqlSchemaError @@ -172,7 +171,6 @@ def test_esql_endpoint_unknown_index(self): with pytest.raises(EsqlUnknownIndexError): _ = RuleCollection().load_dict(production_rule) - def test_esql_endpoint_alerts_index_endpoint_fields(self): """Test an ESQL rule's schema validation using endpoint integration fields in the alerts index.""" file_path = get_path(["tests", "data", "command_control_dummy_production_rule.toml"]) From 93965df8fd75b3e0310bdbba7563e07fcb48b3f6 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Tue, 14 Oct 2025 12:55:44 -0400 Subject: [PATCH 81/93] Add remote unit test variable --- .github/workflows/pythonpackage.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index 314be2fd127..6e6f5dd4b30 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -37,6 +37,13 @@ jobs: env: # only run the test test_rule_change_has_updated_date on pull request events to main GITHUB_EVENT_NAME: "${{ github.event_name}}" + # only run remote validation if repo is set to do so otherwise defer to .github/workflows/esql-validation.yml + DR_REMOTE_ESQL_VALIDATION: "${{ vars.remote_esql_validation }}" + DR_CLOUD_ID: ${{ secrets.cloud_id }} + DR_KIBANA_URL: ${{ secrets.cloud_id }} + DR_ELASTICSEARCH_URL: ${{ secrets.cloud_id }} + DR_API_KEY: ${{ secrets.api_key }} + DR_IGNORE_SSL_ERRORS: ${{ secrets.cloud_id }} run: | python -m detection_rules test From e6236df1b597a31515c026bb95cac82b33c29df6 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Tue, 14 Oct 2025 14:15:39 -0400 Subject: [PATCH 82/93] Add ESQL integration tests to test_remote_cli --- detection_rules/etc/test_remote_cli.bash | 3 +++ 1 file changed, 3 insertions(+) diff --git a/detection_rules/etc/test_remote_cli.bash b/detection_rules/etc/test_remote_cli.bash index 386ed327f22..ca220bd2bdf 100755 --- a/detection_rules/etc/test_remote_cli.bash +++ b/detection_rules/etc/test_remote_cli.bash @@ -27,6 +27,9 @@ python -m detection_rules kibana import-rules -o -e -ac echo "Performing a rule export..." python -m detection_rules kibana export-rules -d $CUSTOM_RULES_DIR -ac -e -sv --custom-rules-only +echo "Testing ESQL Rules..." +python -m pytest tests/test_rules_remote.py::TestRemoteRules + echo "Removing generated files..." rm -rf $CUSTOM_RULES_DIR set -e CUSTOM_RULES_DIR From 4a41c68c007701ab1f3421df4e248518f26d74e5 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Tue, 14 Oct 2025 15:19:40 -0400 Subject: [PATCH 83/93] Error output cleanup --- detection_rules/devtools.py | 21 +++++++-------- detection_rules/esql_errors.py | 45 ++++++++++++++++++++++--------- detection_rules/index_mappings.py | 10 +++---- detection_rules/misc.py | 5 +++- 4 files changed, 52 insertions(+), 29 deletions(-) diff --git a/detection_rules/devtools.py b/detection_rules/devtools.py index f56b1d4840d..f3bfeb35653 100644 --- a/detection_rules/devtools.py +++ b/detection_rules/devtools.py @@ -40,7 +40,9 @@ from .docs import REPO_DOCS_DIR, IntegrationSecurityDocs, IntegrationSecurityDocsMDX from .ecs import download_endpoint_schemas, download_schemas from .endgame import EndgameSchemaManager -from .esql_errors import EsqlKibanaBaseError, EsqlSchemaError, EsqlSyntaxError, EsqlTypeMismatchError +from .esql_errors import ( + ESQL_EXCEPTION_TYPES, +) from .eswrap import CollectEvents, add_range_to_dsl from .ghwrap import GithubClient, update_gist from .integrations import ( @@ -1446,16 +1448,13 @@ def esql_remote_validation( validator = ESQLValidator(r.contents.data.query) # type: ignore[reportIncompatibleMethodOverride] _ = validator.remote_validate_rule_contents(kibana_client, elastic_client, r.contents, verbosity) break - except ( - ValueError, - BadRequestError, - EsqlSchemaError, - EsqlSyntaxError, - EsqlTypeMismatchError, - EsqlKibanaBaseError, - ) as e: - click.echo(f"FAILURE: {e}") - fail_list.append(f"{r.contents.data.rule_id} FAILURE: {type(e)}: {e}") + except (ValueError, BadRequestError, *ESQL_EXCEPTION_TYPES) as e: # type: ignore[reportUnknownMemberType] + e_type = type(e) # type: ignore[reportUnknownMemberType] + if e_type in ESQL_EXCEPTION_TYPES: + _ = e.show() # type: ignore[reportUnknownMemberType] + else: + click.echo(f"FAILURE: {e_type}: {e}") # type: ignore[reportUnknownMemberType] + fail_list.append(f"{r.contents.data.rule_id} FAILURE: {e_type}: {e}") # type: ignore[reportUnknownMemberType] failed_count += 1 break except ESConnectionError as e: diff --git a/detection_rules/esql_errors.py b/detection_rules/esql_errors.py index 5a43729cabb..8316888cc8c 100644 --- a/detection_rules/esql_errors.py +++ b/detection_rules/esql_errors.py @@ -5,9 +5,11 @@ """ESQL exceptions.""" +from collections.abc import Sequence + from elasticsearch import Elasticsearch # type: ignore[reportMissingTypeStubs] -from .misc import getdefault +from .misc import ClientError, getdefault __all__ = ( "EsqlKibanaBaseError", @@ -21,7 +23,7 @@ def cleanup_empty_indices( - elastic_client: Elasticsearch, index_patterns: tuple[str, ...] = ("rule-test-*", "test-*") + elastic_client: Elasticsearch, index_patterns: Sequence[str] = ("rule-test-*", "test-*") ) -> None: """Delete empty indices matching the given patterns.""" if getdefault("skip_empty_index_cleanup")(): @@ -33,12 +35,16 @@ def cleanup_empty_indices( _ = elastic_client.indices.delete(index=empty_index) -class EsqlKibanaBaseError(Exception): +class EsqlKibanaBaseError(ClientError): """Base class for ESQL exceptions with cleanup logic.""" - def __init__(self, message: str, elastic_client: Elasticsearch) -> None: + def __init__( + self, + message: str, + elastic_client: Elasticsearch, + ) -> None: cleanup_empty_indices(elastic_client) - super().__init__(message) + super().__init__(message, original_error=self) class EsqlSchemaError(EsqlKibanaBaseError): @@ -53,24 +59,39 @@ class EsqlSyntaxError(EsqlKibanaBaseError): """Error with ESQL syntax.""" -class EsqlTypeMismatchError(Exception): +class EsqlTypeMismatchError(ClientError): """Error when validating types in ESQL. Can occur in stack or local schema comparison.""" - def __init__(self, message: str, elastic_client: Elasticsearch | None = None) -> None: + def __init__( + self, + message: str, + elastic_client: Elasticsearch | None = None, + ) -> None: if elastic_client: cleanup_empty_indices(elastic_client) - super().__init__(message) + super().__init__(message, original_error=self) -class EsqlSemanticError(Exception): +class EsqlSemanticError(ClientError): """Error with ESQL semantics. Validated through regex enforcement.""" def __init__(self, message: str) -> None: - super().__init__(message) + super().__init__(message, original_error=self) -class EsqlUnknownIndexError(Exception): +class EsqlUnknownIndexError(ClientError): """Error with ESQL Indices. Validated through regex enforcement.""" def __init__(self, message: str) -> None: - super().__init__(message) + super().__init__(message, original_error=self) + + +ESQL_EXCEPTION_TYPES = ( + EsqlSchemaError, + EsqlSyntaxError, + EsqlUnsupportedTypeError, + EsqlTypeMismatchError, + EsqlKibanaBaseError, + EsqlSemanticError, + EsqlUnknownIndexError, +) diff --git a/detection_rules/index_mappings.py b/detection_rules/index_mappings.py index 7fcd3ea0816..92ba4bd28a1 100644 --- a/detection_rules/index_mappings.py +++ b/detection_rules/index_mappings.py @@ -347,14 +347,14 @@ def execute_query_against_indices( except BadRequestError as e: error_msg = str(e) if "parsing_exception" in error_msg: - raise EsqlSyntaxError(str(e), elastic_client) from e + raise EsqlSyntaxError(str(e), elastic_client) from None if "Unknown column" in error_msg: - raise EsqlSchemaError(str(e), elastic_client) from e + raise EsqlSchemaError(str(e), elastic_client) from None if "verification_exception" in error_msg and "unsupported type" in error_msg: - raise EsqlUnsupportedTypeError(str(e), elastic_client) from e + raise EsqlUnsupportedTypeError(str(e), elastic_client) from None if "verification_exception" in error_msg: - raise EsqlTypeMismatchError(str(e), elastic_client) from e - raise EsqlKibanaBaseError(str(e), elastic_client) from e + raise EsqlTypeMismatchError(str(e), elastic_client) from None + raise EsqlKibanaBaseError(str(e), elastic_client) from None if delete_indices or not misc.getdefault("skip_empty_index_cleanup")(): for index_str in test_index_str.split(","): response = elastic_client.indices.delete(index=index_str.strip()) diff --git a/detection_rules/misc.py b/detection_rules/misc.py index 064e60c2912..a8d93a3b46c 100644 --- a/detection_rules/misc.py +++ b/detection_rules/misc.py @@ -48,7 +48,10 @@ def __init__(self, message: str, original_error: Exception | None = None) -> Non def show(self, file: IO[Any] | None = None, err: bool = True) -> None: """Print the error to the console.""" - msg = f"{click.style(f'CLI Error ({self.original_error_type})', fg='red', bold=True)}: {self.format_message()}" + header = f"CLI Error ({self.original_error_type})" + if "Esql" in str(self.original_error_type): + header = f"{self.original_error_type}" + msg = f"{click.style(header, fg='red', bold=True)}: {self.format_message()}" click.echo(msg, err=err, file=file) From 13f81dd853891fcf62f4b4edd449e32b39e39f4d Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Tue, 14 Oct 2025 15:30:31 -0400 Subject: [PATCH 84/93] Remove if else as it is not needed --- detection_rules/misc.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/detection_rules/misc.py b/detection_rules/misc.py index a8d93a3b46c..c8aa0fb53d9 100644 --- a/detection_rules/misc.py +++ b/detection_rules/misc.py @@ -48,9 +48,7 @@ def __init__(self, message: str, original_error: Exception | None = None) -> Non def show(self, file: IO[Any] | None = None, err: bool = True) -> None: """Print the error to the console.""" - header = f"CLI Error ({self.original_error_type})" - if "Esql" in str(self.original_error_type): - header = f"{self.original_error_type}" + header = f"Error ({self.original_error_type})" msg = f"{click.style(header, fg='red', bold=True)}: {self.format_message()}" click.echo(msg, err=err, file=file) From a252f30df7729154cfc6724b5b39ee5708056697 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Tue, 14 Oct 2025 21:51:54 -0400 Subject: [PATCH 85/93] Add cross cluster search support --- detection_rules/devtools.py | 1 + detection_rules/index_mappings.py | 1 + detection_rules/rule_validators.py | 6 +++++- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/detection_rules/devtools.py b/detection_rules/devtools.py index f3bfeb35653..8d9ce31098c 100644 --- a/detection_rules/devtools.py +++ b/detection_rules/devtools.py @@ -1451,6 +1451,7 @@ def esql_remote_validation( except (ValueError, BadRequestError, *ESQL_EXCEPTION_TYPES) as e: # type: ignore[reportUnknownMemberType] e_type = type(e) # type: ignore[reportUnknownMemberType] if e_type in ESQL_EXCEPTION_TYPES: + click.echo(click.style(f"{r.contents.data.rule_id} ", fg="red", bold=True), nl=False) _ = e.show() # type: ignore[reportUnknownMemberType] else: click.echo(f"FAILURE: {e_type}: {e}") # type: ignore[reportUnknownMemberType] diff --git a/detection_rules/index_mappings.py b/detection_rules/index_mappings.py index 92ba4bd28a1..ea1150f451b 100644 --- a/detection_rules/index_mappings.py +++ b/detection_rules/index_mappings.py @@ -261,6 +261,7 @@ def get_filtered_index_schema( filtered_keys.update(non_ecs_indices.keys()) filtered_keys.update(custom_indices.keys()) filtered_keys.add("logs-endpoint.alerts-*") + filtered_keys.update(indices) matches: list[str] = [] for index in indices: diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index b87a7b9b9bb..aa45a88c41d 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -759,7 +759,11 @@ def get_esql_query_indices(self, query: str) -> tuple[str, list[str]]: return "", [] sources_str = match.group("sources") - return sources_str, [source.strip() for source in sources_str.split(",")] + # Truncate cross cluster search indices to local indices + sources_list: list[str] = [ + source.split(":", 1)[-1].strip() if ":" in source else source.strip() for source in sources_str.split(",") + ] + return sources_str, sources_list def get_unique_field_type(self, field_name: str) -> str | None: # type: ignore[reportIncompatibleMethodOverride] """Get the type of the unique field. Requires remote validation to have occurred.""" From 3b0a88b0bdd24b0eaea357af7dc95bdadbcc2769 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Wed, 15 Oct 2025 12:14:22 -0400 Subject: [PATCH 86/93] Docstring and logging cleanup --- detection_rules/devtools.py | 2 +- detection_rules/esql.py | 2 +- detection_rules/rule_validators.py | 4 +++- tests/test_rules_remote.py | 8 +++----- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/detection_rules/devtools.py b/detection_rules/devtools.py index 8d9ce31098c..26b6fdba4e2 100644 --- a/detection_rules/devtools.py +++ b/detection_rules/devtools.py @@ -1450,7 +1450,7 @@ def esql_remote_validation( break except (ValueError, BadRequestError, *ESQL_EXCEPTION_TYPES) as e: # type: ignore[reportUnknownMemberType] e_type = type(e) # type: ignore[reportUnknownMemberType] - if e_type in ESQL_EXCEPTION_TYPES: + if isinstance(e, ESQL_EXCEPTION_TYPES): click.echo(click.style(f"{r.contents.data.rule_id} ", fg="red", bold=True), nl=False) _ = e.show() # type: ignore[reportUnknownMemberType] else: diff --git a/detection_rules/esql.py b/detection_rules/esql.py index a14cc91004d..12515ff6e85 100644 --- a/detection_rules/esql.py +++ b/detection_rules/esql.py @@ -3,7 +3,7 @@ # 2.0; you may not use this file except in compliance with the Elastic License # 2.0. -"""ESQL exceptions.""" +"""ESQL Query Parsing Classes.""" import re from dataclasses import dataclass diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index aa45a88c41d..9f05c94fce2 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -886,7 +886,9 @@ def remote_validate_rule( # noqa: PLR0913 self.log(f"Extracted indices from query: {', '.join(indices)}") event_dataset_integrations = get_esql_query_event_dataset_integrations(query) - self.log(f"Extracted Event Dataset integrations from query: {', '.join(indices)}") + self.log( + f"Extracted Event Dataset integrations from query: {', '.join(str(integration) for integration in event_dataset_integrations)}" + ) # Get mappings for all matching existing index templates existing_mappings, index_lookup, combined_mappings = prepare_mappings( diff --git a/tests/test_rules_remote.py b/tests/test_rules_remote.py index fdfb3da789e..d31631a474a 100644 --- a/tests/test_rules_remote.py +++ b/tests/test_rules_remote.py @@ -18,8 +18,6 @@ from .base import BaseRuleTest -MAX_RETRIES = 3 - @unittest.skipIf(get_default_config() is None, "Skipping remote validation due to missing config") @unittest.skipIf( @@ -49,7 +47,7 @@ def test_esql_related_integrations(self): assert integration["package"] == "aws", f"Expected 'aws', but got {integration['package']}" def test_esql_event_dataset_schema_error(self): - """Test an ESQL rules that uses event.dataset field in the query validated the fields correctly.""" + """Test an ESQL rule that uses event.dataset field in the query that restricts the schema failing validation.""" # EsqlSchemaError file_path = get_path(["tests", "data", "command_control_dummy_production_rule.toml"]) original_production_rule = load_rule_contents(file_path) @@ -68,7 +66,7 @@ def test_esql_event_dataset_schema_error(self): _ = RuleCollection().load_dict(production_rule) def test_esql_type_mismatch_error(self): - """Test an ESQL rules that uses event.dataset field in the query validated the fields correctly.""" + """Test an ESQL rule that produces a type error comparing a keyword to a number.""" # EsqlSchemaError file_path = get_path(["tests", "data", "command_control_dummy_production_rule.toml"]) original_production_rule = load_rule_contents(file_path) @@ -87,7 +85,7 @@ def test_esql_type_mismatch_error(self): _ = RuleCollection().load_dict(production_rule) def test_esql_syntax_error(self): - """Test an ESQL rules that uses event.dataset field in the query validated the fields correctly.""" + """Test an ESQL rule that incorrectly using = for comparison.""" # EsqlSchemaError file_path = get_path(["tests", "data", "command_control_dummy_production_rule.toml"]) original_production_rule = load_rule_contents(file_path) From e84079db0ac27ba5d62cb4fa3ceaff14131e4514 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Wed, 15 Oct 2025 12:18:35 -0400 Subject: [PATCH 87/93] Linting --- detection_rules/rule_validators.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index 9f05c94fce2..bc6bb697ecd 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -887,7 +887,8 @@ def remote_validate_rule( # noqa: PLR0913 event_dataset_integrations = get_esql_query_event_dataset_integrations(query) self.log( - f"Extracted Event Dataset integrations from query: {', '.join(str(integration) for integration in event_dataset_integrations)}" + "Extracted Event Dataset integrations from query: " + f"{', '.join(str(integration) for integration in event_dataset_integrations)}" ) # Get mappings for all matching existing index templates From 318b6c2738e625e1f77674a34cb1ad33333f2001 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Wed, 15 Oct 2025 13:01:30 -0400 Subject: [PATCH 88/93] Comment cleanup --- detection_rules/rule_validators.py | 2 +- tests/test_rules_remote.py | 8 -------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/detection_rules/rule_validators.py b/detection_rules/rule_validators.py index bc6bb697ecd..78cc0a348ec 100644 --- a/detection_rules/rule_validators.py +++ b/detection_rules/rule_validators.py @@ -740,7 +740,7 @@ def log(self, val: str) -> None: @property def ast(self) -> Any: - """Return the AST of the ESQL query. Dependant in ESQL parser which is not implemented""" + """Return the AST of the ESQL query. Dependant on an ESQL parser, which is not implemented""" # Needs to return none to prevent not implemented error return None diff --git a/tests/test_rules_remote.py b/tests/test_rules_remote.py index d31631a474a..b64413402bf 100644 --- a/tests/test_rules_remote.py +++ b/tests/test_rules_remote.py @@ -48,7 +48,6 @@ def test_esql_related_integrations(self): def test_esql_event_dataset_schema_error(self): """Test an ESQL rule that uses event.dataset field in the query that restricts the schema failing validation.""" - # EsqlSchemaError file_path = get_path(["tests", "data", "command_control_dummy_production_rule.toml"]) original_production_rule = load_rule_contents(file_path) # Test that a ValidationError is raised if the query doesn't match the schema @@ -67,7 +66,6 @@ def test_esql_event_dataset_schema_error(self): def test_esql_type_mismatch_error(self): """Test an ESQL rule that produces a type error comparing a keyword to a number.""" - # EsqlSchemaError file_path = get_path(["tests", "data", "command_control_dummy_production_rule.toml"]) original_production_rule = load_rule_contents(file_path) # Test that a ValidationError is raised if the query doesn't match the schema @@ -86,7 +84,6 @@ def test_esql_type_mismatch_error(self): def test_esql_syntax_error(self): """Test an ESQL rule that incorrectly using = for comparison.""" - # EsqlSchemaError file_path = get_path(["tests", "data", "command_control_dummy_production_rule.toml"]) original_production_rule = load_rule_contents(file_path) # Test that a ValidationError is raised if the query doesn't match the schema @@ -105,7 +102,6 @@ def test_esql_syntax_error(self): def test_esql_filtered_index(self): """Test an ESQL rule's schema validation to properly reduce it by the index and handle implicit fields.""" - # EsqlSchemaError file_path = get_path(["tests", "data", "command_control_dummy_production_rule.toml"]) original_production_rule = load_rule_contents(file_path) # Test that a ValidationError is raised if the query doesn't match the schema @@ -122,7 +118,6 @@ def test_esql_filtered_index(self): def test_esql_filtered_index_error(self): """Test an ESQL rule's schema validation when reduced by the index and check if the field is present.""" - # EsqlSchemaError file_path = get_path(["tests", "data", "command_control_dummy_production_rule.toml"]) original_production_rule = load_rule_contents(file_path) # Test that a ValidationError is raised if the query doesn't match the schema @@ -140,7 +135,6 @@ def test_esql_filtered_index_error(self): def test_esql_endpoint_alerts_index(self): """Test an ESQL rule's schema validation using ecs fields in the alerts index.""" - # EsqlSchemaError file_path = get_path(["tests", "data", "command_control_dummy_production_rule.toml"]) original_production_rule = load_rule_contents(file_path) production_rule = deepcopy(original_production_rule)[0] @@ -155,7 +149,6 @@ def test_esql_endpoint_alerts_index(self): def test_esql_endpoint_unknown_index(self): """Test an ESQL rule's index validation. This is expected to error on an unknown index.""" - # EsqlSchemaError file_path = get_path(["tests", "data", "command_control_dummy_production_rule.toml"]) original_production_rule = load_rule_contents(file_path) production_rule = deepcopy(original_production_rule)[0] @@ -189,7 +182,6 @@ def test_esql_endpoint_alerts_index_endpoint_fields(self): def test_esql_filtered_keep(self): """Test an ESQL rule's schema validation.""" - # EsqlSchemaError file_path = get_path(["tests", "data", "command_control_dummy_production_rule.toml"]) original_production_rule = load_rule_contents(file_path) # Test that a ValidationError is raised if the query doesn't match the schema From daa80ed4603797f33a98e1d042384aa1fbe50e6f Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Wed, 15 Oct 2025 13:06:31 -0400 Subject: [PATCH 89/93] Typo --- detection_rules/misc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/detection_rules/misc.py b/detection_rules/misc.py index c8aa0fb53d9..6403573fdb0 100644 --- a/detection_rules/misc.py +++ b/detection_rules/misc.py @@ -316,7 +316,7 @@ def get_kibana_client( def get_default_kibana_client() -> Kibana: - """Get an default authenticated Kibana client.""" + """Get a default authenticated Kibana client.""" return get_kibana_client( api_key=getdefault("api_key")(), cloud_id=getdefault("cloud_id")(), From 8f3c7bede3f2b6c08358d57d830d458d083203ff Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Wed, 15 Oct 2025 13:08:23 -0400 Subject: [PATCH 90/93] Add string check --- detection_rules/rule.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/detection_rules/rule.py b/detection_rules/rule.py index 8763acc3570..137fe7c22cc 100644 --- a/detection_rules/rule.py +++ b/detection_rules/rule.py @@ -1521,6 +1521,8 @@ def get_packaged_integrations( # of machine learning analytic packages rule_integrations: str | list[str] = meta.get("integration") or [] + if isinstance(rule_integrations, str): + rule_integrations = [rule_integrations] for integration in rule_integrations: ineligible_integrations = [ *definitions.NON_DATASET_PACKAGES, From cee0b078ee525a7b95c2f9a9417fa128c2ac4020 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Wed, 15 Oct 2025 13:11:51 -0400 Subject: [PATCH 91/93] Update type annotation --- detection_rules/index_mappings.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/detection_rules/index_mappings.py b/detection_rules/index_mappings.py index ea1150f451b..456e9f6ea61 100644 --- a/detection_rules/index_mappings.py +++ b/detection_rules/index_mappings.py @@ -170,13 +170,13 @@ def prepare_integration_mappings( # noqa: PLR0913 """Prepare integration mappings for the given rule integrations.""" integration_mappings: dict[str, Any] = {} index_lookup: dict[str, Any] = {} - dataset_restriction: dict[str, str] = {} + dataset_restriction: dict[str, list[str]] = {} # Process restrictions, note we need this for loops to be separate for event_dataset in event_dataset_integrations: # Ensure the integration is in rule_integrations if event_dataset.package not in rule_integrations: - dataset_restriction.setdefault(event_dataset.package, []).append(event_dataset.integration) # type: ignore[reportIncompatibleMethodOverride] + dataset_restriction.setdefault(event_dataset.package, []).append(event_dataset.integration) for event_dataset in event_dataset_integrations: if event_dataset.package not in rule_integrations: rule_integrations.append(event_dataset.package) From c6050c7f07b6b52b1f8f9a6e9743fc735f01dbf1 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Wed, 15 Oct 2025 14:23:08 -0400 Subject: [PATCH 92/93] update names to dr_ --- .github/workflows/esql-validation.yml | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/esql-validation.yml b/.github/workflows/esql-validation.yml index f0ad550a53c..5aae215fc32 100644 --- a/.github/workflows/esql-validation.yml +++ b/.github/workflows/esql-validation.yml @@ -44,8 +44,8 @@ jobs: - name: Check out repository env: - DR_CLOUD_ID: ${{ secrets.cloud_id }} - DR_API_KEY: ${{ secrets.api_key }} + DR_CLOUD_ID: ${{ secrets.dr_cloud_id }} + DR_API_KEY: ${{ secrets.dr_api_key }} if: ${{ !env.DR_CLOUD_ID && !env.DR_API_KEY && env.run_esql == 'true' }} uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 with: @@ -54,8 +54,8 @@ jobs: - name: Build and run containers env: - DR_CLOUD_ID: ${{ secrets.cloud_id }} - DR_API_KEY: ${{ secrets.api_key }} + DR_CLOUD_ID: ${{ secrets.dr_cloud_id }} + DR_API_KEY: ${{ secrets.dr_api_key }} if: ${{ !env.DR_CLOUD_ID && !env.DR_API_KEY && env.run_esql == 'true' }} run: | cd elastic-container @@ -68,8 +68,8 @@ jobs: - name: Get API Key and setup auth env: - DR_CLOUD_ID: ${{ secrets.cloud_id }} - DR_API_KEY: ${{ secrets.api_key }} + DR_CLOUD_ID: ${{ secrets.dr_cloud_id }} + DR_API_KEY: ${{ secrets.dr_api_key }} DR_ELASTICSEARCH_URL: "https://localhost:9200" ES_USER: "elastic" ES_PASSWORD: ${{ env.GENERATED_PASSWORD }} @@ -102,11 +102,11 @@ jobs: - name: Remote Test ESQL Rules if: ${{ env.run_esql == 'true' }} env: - DR_CLOUD_ID: ${{ secrets.cloud_id || '' }} - DR_KIBANA_URL: ${{ secrets.cloud_id == '' && 'https://localhost:5601' || '' }} - DR_ELASTICSEARCH_URL: ${{ secrets.cloud_id == '' && 'https://localhost:9200' || '' }} - DR_API_KEY: ${{ secrets.api_key || env.DR_API_KEY }} - DR_IGNORE_SSL_ERRORS: ${{ secrets.cloud_id == '' && 'true' || '' }} + DR_CLOUD_ID: ${{ secrets.dr_cloud_id || '' }} + DR_KIBANA_URL: ${{ secrets.dr_cloud_id == '' && 'https://localhost:5601' || '' }} + DR_ELASTICSEARCH_URL: ${{ secrets.dr_cloud_id == '' && 'https://localhost:9200' || '' }} + DR_API_KEY: ${{ secrets.dr_api_key || env.DR_API_KEY }} + DR_IGNORE_SSL_ERRORS: ${{ secrets.dr_cloud_id == '' && 'true' || '' }} run: | cd detection-rules python -m detection_rules dev test esql-remote-validation From 30411c87c5213274ba22bcb3265f989270eff5a3 Mon Sep 17 00:00:00 2001 From: eric-forte-elastic Date: Wed, 15 Oct 2025 14:25:54 -0400 Subject: [PATCH 93/93] update to DR naming --- .github/workflows/pythonpackage.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index 6e6f5dd4b30..4959e05acd0 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -39,11 +39,11 @@ jobs: GITHUB_EVENT_NAME: "${{ github.event_name}}" # only run remote validation if repo is set to do so otherwise defer to .github/workflows/esql-validation.yml DR_REMOTE_ESQL_VALIDATION: "${{ vars.remote_esql_validation }}" - DR_CLOUD_ID: ${{ secrets.cloud_id }} - DR_KIBANA_URL: ${{ secrets.cloud_id }} - DR_ELASTICSEARCH_URL: ${{ secrets.cloud_id }} - DR_API_KEY: ${{ secrets.api_key }} - DR_IGNORE_SSL_ERRORS: ${{ secrets.cloud_id }} + DR_CLOUD_ID: ${{ secrets.dr_cloud_id }} + DR_KIBANA_URL: ${{ secrets.dr_cloud_id }} + DR_ELASTICSEARCH_URL: ${{ secrets.dr_cloud_id }} + DR_API_KEY: ${{ secrets.dr_api_key }} + DR_IGNORE_SSL_ERRORS: ${{ secrets.dr_cloud_id }} run: | python -m detection_rules test