From 5608de7875b78eb9e9abf0ada50ccc70b57042b0 Mon Sep 17 00:00:00 2001 From: Carl Flottmann <carl.flottmann@oracle.com> Date: Thu, 27 Mar 2025 15:30:30 +1000 Subject: [PATCH 01/10] fix: resolve skipped heuristic handling in pypi malware checker Signed-off-by: Carl Flottmann <carl.flottmann@oracle.com> --- .../checks/detect_malicious_metadata_check.py | 94 +++++++++++-------- .../test_detect_malicious_metadata_check.py | 31 ++++++ 2 files changed, 85 insertions(+), 40 deletions(-) diff --git a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py index 80439bb79..cee65c186 100644 --- a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py +++ b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py @@ -128,7 +128,9 @@ def validate_malware(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[boo is_malware, detail_info = sourcecode_analyzer.analyze() return is_malware, detail_info - def evaluate_heuristic_results(self, heuristic_results: dict[Heuristics, HeuristicResult]) -> float | None: + def evaluate_heuristic_results( + self, heuristic_results: dict[Heuristics, HeuristicResult] + ) -> tuple[float, JsonType]: """Analyse the heuristic results to determine the maliciousness of the package. Parameters @@ -138,18 +140,17 @@ def evaluate_heuristic_results(self, heuristic_results: dict[Heuristics, Heurist Returns ------- - float | None - Returns the confidence associated with the detected malicious combination, otherwise None if no associated - malicious combination was triggered. + tuple[float, JsonType] + Returns the confidence associated with the detected malicious combination, and associated rule IDs detailing + what rules were triggered. """ facts_list: list[str] = [] for heuristic, result in heuristic_results.items(): - if result == HeuristicResult.SKIP: - facts_list.append(f"0.0::{heuristic.value}.") - elif result == HeuristicResult.PASS: + if result == HeuristicResult.PASS: facts_list.append(f"{heuristic.value} :- true.") - else: # HeuristicResult.FAIL + elif result == HeuristicResult.FAIL: facts_list.append(f"{heuristic.value} :- false.") + # Do not define for HeuristicResult.SKIP facts = "\n".join(facts_list) problog_code = f"{facts}\n\n{self.malware_rules_problog_model}" @@ -158,10 +159,12 @@ def evaluate_heuristic_results(self, heuristic_results: dict[Heuristics, Heurist problog_model = PrologString(problog_code) problog_results: dict[Term, float] = get_evaluatable().create_from(problog_model).evaluate() - confidence: float | None = problog_results.get(Term(self.problog_result_access)) - if confidence == 0.0: - return None # no rules were triggered - return confidence + confidence = sum(conf for conf in problog_results.values() if conf is not None) + triggered_rules: JsonType = ["No malicious rules triggered"] + if confidence > 0: + triggered_rules = [term.args[0] for term in problog_results] + + return confidence, triggered_rules def run_heuristics( self, pypi_package_json: PyPIPackageJsonAsset @@ -278,9 +281,10 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: except HeuristicAnalyzerValueError: return CheckResultData(result_tables=[], result_type=CheckResultType.UNKNOWN) - confidence = self.evaluate_heuristic_results(result) + confidence, triggered_rules = self.evaluate_heuristic_results(result) + detail_info["triggered_rules"] = triggered_rules result_type = CheckResultType.FAILED - if confidence is None: + if not confidence: confidence = Confidence.HIGH result_type = CheckResultType.PASSED elif ctx.dynamic_data["validate_malware"]: @@ -321,51 +325,61 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: AnomalousVersionAnalyzer, ] - problog_result_access = "result" - malware_rules_problog_model = f""" - % Heuristic groupings + % ----- Wrappers ------ + % These should be used to logically check for a pass or fail on a heuristic for the rest of the model. They exist since, + % when a heuristic is skipped, it is ommitted from being defined in the ProbLog model, and as such these try_call statements + % are needed to handle referencing an undefined fact. + passed(H) :- try_call(H). + failed(H) :- try_call(not H). + + % ----- Heuristic groupings ----- % These are common combinations of heuristics that are used in many of the rules, thus themselves representing % certain behaviors. When changing or adding rules here, if there are frequent combinations of particular - % heuristics, group them together here. + % heuristics, group them together here. Note, these should only be used to check if a grouping statement + % is true. Evaluating 'not quickUndetailed' would be true if empty project link and closer release join + % date passed, or if they were both skipped, which is not desired behaviour. % Maintainer has recently joined, publishing an undetailed page with no links. - quickUndetailed :- not {Heuristics.EMPTY_PROJECT_LINK.value}, not {Heuristics.CLOSER_RELEASE_JOIN_DATE.value}. + quickUndetailed :- failed({Heuristics.EMPTY_PROJECT_LINK.value}), failed({Heuristics.CLOSER_RELEASE_JOIN_DATE.value}). % Maintainer releases a suspicious setup.py and forces it to run by omitting a .whl file. - forceSetup :- not {Heuristics.SUSPICIOUS_SETUP.value}, not {Heuristics.WHEEL_ABSENCE.value}. + forceSetup :- failed({Heuristics.SUSPICIOUS_SETUP.value}), failed({Heuristics.WHEEL_ABSENCE.value}). - % Suspicious Combinations + % ----- Suspicious Combinations ----- % Package released recently with little detail, forcing the setup.py to run. - {Confidence.HIGH.value}::high :- quickUndetailed, forceSetup, not {Heuristics.ONE_RELEASE.value}. - {Confidence.HIGH.value}::high :- quickUndetailed, forceSetup, not {Heuristics.HIGH_RELEASE_FREQUENCY.value}. + {Confidence.HIGH.value}::result("high_confidence_1") :- + quickUndetailed, forceSetup, failed({Heuristics.ONE_RELEASE.value}). + {Confidence.HIGH.value}::result("high_confidence_2") :- + quickUndetailed, forceSetup, failed({Heuristics.HIGH_RELEASE_FREQUENCY.value}). % Package released recently with little detail, with some more refined trust markers introduced: project links, % multiple different releases, but there is no source code repository matching it and the setup is suspicious. - {Confidence.HIGH.value}::high :- not {Heuristics.SOURCE_CODE_REPO.value}, - not {Heuristics.HIGH_RELEASE_FREQUENCY.value}, - not {Heuristics.CLOSER_RELEASE_JOIN_DATE.value}, - {Heuristics.UNCHANGED_RELEASE.value}, + {Confidence.HIGH.value}::result("high_confidence_3") :- + failed({Heuristics.SOURCE_CODE_REPO.value}), + failed({Heuristics.HIGH_RELEASE_FREQUENCY.value}), + passed({Heuristics.UNCHANGED_RELEASE.value}), + failed({Heuristics.CLOSER_RELEASE_JOIN_DATE.value}), forceSetup. % Package released recently with little detail, with multiple releases as a trust marker, but frequent and with % the same code. - {Confidence.MEDIUM.value}::medium :- quickUndetailed, - not {Heuristics.HIGH_RELEASE_FREQUENCY.value}, - not {Heuristics.UNCHANGED_RELEASE.value}, - {Heuristics.SUSPICIOUS_SETUP.value}. + {Confidence.MEDIUM.value}::result("medium_confidence_1") :- + quickUndetailed, + failed({Heuristics.HIGH_RELEASE_FREQUENCY.value}), + failed({Heuristics.UNCHANGED_RELEASE.value}), + passed({Heuristics.SUSPICIOUS_SETUP.value}). % Package released recently with little detail and an anomalous version number for a single-release package. - {Confidence.MEDIUM.value}::medium :- quickUndetailed, - not {Heuristics.ONE_RELEASE.value}, - {Heuristics.WHEEL_ABSENCE.value}, - not {Heuristics.ANOMALOUS_VERSION.value}. - - {problog_result_access} :- high. - {problog_result_access} :- medium. - - query({problog_result_access}). + {Confidence.MEDIUM.value}::result("medium_confidence_2") :- + quickUndetailed, + failed({Heuristics.ONE_RELEASE.value}), + passed({Heuristics.WHEEL_ABSENCE.value}), + failed({Heuristics.ANOMALOUS_VERSION.value}). + + % ----- Evaluation ----- + query(result(_)). """ diff --git a/tests/slsa_analyzer/checks/test_detect_malicious_metadata_check.py b/tests/slsa_analyzer/checks/test_detect_malicious_metadata_check.py index c6ecb044d..3a2090cb0 100644 --- a/tests/slsa_analyzer/checks/test_detect_malicious_metadata_check.py +++ b/tests/slsa_analyzer/checks/test_detect_malicious_metadata_check.py @@ -12,6 +12,7 @@ from pytest_httpserver import HTTPServer from macaron.config.defaults import load_defaults +from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool from macaron.slsa_analyzer.checks.check_result import CheckResultType from macaron.slsa_analyzer.checks.detect_malicious_metadata_check import DetectMaliciousMetadataCheck @@ -98,3 +99,33 @@ def test_detect_malicious_metadata( ).respond_with_json({}) assert check.run_check(ctx).result_type == expected + + +@pytest.mark.parametrize( + ("combination"), + [ + pytest.param( + { + # similar to rule ID high_confidence_1, but SUSPICIOUS_SETUP is skipped since the file does not exist, + # so the rule should not trigger. + Heuristics.EMPTY_PROJECT_LINK: HeuristicResult.FAIL, + Heuristics.SOURCE_CODE_REPO: HeuristicResult.SKIP, + Heuristics.ONE_RELEASE: HeuristicResult.FAIL, + Heuristics.HIGH_RELEASE_FREQUENCY: HeuristicResult.SKIP, + Heuristics.UNCHANGED_RELEASE: HeuristicResult.SKIP, + Heuristics.CLOSER_RELEASE_JOIN_DATE: HeuristicResult.FAIL, + Heuristics.SUSPICIOUS_SETUP: HeuristicResult.SKIP, + Heuristics.WHEEL_ABSENCE: HeuristicResult.FAIL, + Heuristics.ANOMALOUS_VERSION: HeuristicResult.PASS, + }, + id="test_skipped_evaluation", + ) + ], +) +def test_evaluations(combination: dict[Heuristics, HeuristicResult]) -> None: + """Test heuristic combinations to ensure they evaluate as expected.""" + check = DetectMaliciousMetadataCheck() + + confidence, _ = check.evaluate_heuristic_results(combination) + + assert confidence == 0 From b79e9dfbe9ab2c1d99a67a0117e3114430716d2a Mon Sep 17 00:00:00 2001 From: Carl Flottmann <carl.flottmann@oracle.com> Date: Thu, 27 Mar 2025 15:38:04 +1000 Subject: [PATCH 02/10] docs: updated README for contributing to the problog model appropriately Signed-off-by: Carl Flottmann <carl.flottmann@oracle.com> --- src/macaron/malware_analyzer/README.md | 5 +++-- .../slsa_analyzer/checks/detect_malicious_metadata_check.py | 4 +--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/macaron/malware_analyzer/README.md b/src/macaron/malware_analyzer/README.md index 7617e4156..592a864c9 100644 --- a/src/macaron/malware_analyzer/README.md +++ b/src/macaron/malware_analyzer/README.md @@ -61,8 +61,9 @@ When contributing an analyzer, it must meet the following requirements: - The analyzer name must be added to [heuristics.py](./pypi_heuristics/heuristics.py) file so it can be used for rule combinations in [detect_malicious_metadata_check.py](../slsa_analyzer/checks/detect_malicious_metadata_check.py) - Update the `malware_rules_problog_model` in [detect_malicious_metadata_check.py](../slsa_analyzer/checks/detect_malicious_metadata_check.py) with logical statements where the heuristic should be included. When adding new rules, please follow the following guidelines: - Provide a [confidence value](../slsa_analyzer/checks/check_result.py) using the `Confidence` enum. - - Provide a name based on this confidence value (i.e. `high`, `medium`, or `low`) - - If it does not already exist, make sure to assign this to the result variable (`problog_result_access`) + - Ensure it is assigned to the "result" string name, otherwise it will not be queried and evaluated. + - Assign a string rule ID to the rule. This will be used to backtrack to determine if it was triggered. + - Make sure to wrap pass/fail statements in `passed()` and `failed()`. Not doing so may result in undesirable behaviour, see the comments in the model for more details. - If there are commonly used combinations introduced by adding the heuristic, combine and justify them at the top of the static model (see `quickUndetailed` and `forceSetup` as current examples). ### Confidence Score Motivation diff --git a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py index cee65c186..74b4278c5 100644 --- a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py +++ b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py @@ -336,9 +336,7 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: % ----- Heuristic groupings ----- % These are common combinations of heuristics that are used in many of the rules, thus themselves representing % certain behaviors. When changing or adding rules here, if there are frequent combinations of particular - % heuristics, group them together here. Note, these should only be used to check if a grouping statement - % is true. Evaluating 'not quickUndetailed' would be true if empty project link and closer release join - % date passed, or if they were both skipped, which is not desired behaviour. + % heuristics, group them together here. % Maintainer has recently joined, publishing an undetailed page with no links. quickUndetailed :- failed({Heuristics.EMPTY_PROJECT_LINK.value}), failed({Heuristics.CLOSER_RELEASE_JOIN_DATE.value}). From de5a63e309a893f4825d8e78b6a6a50988943abc Mon Sep 17 00:00:00 2001 From: Carl Flottmann <carl.flottmann@oracle.com> Date: Thu, 27 Mar 2025 16:31:57 +1000 Subject: [PATCH 03/10] chore: improved rule ID names Signed-off-by: Carl Flottmann <carl.flottmann@oracle.com> --- .../checks/detect_malicious_metadata_check.py | 10 +++++----- .../checks/test_detect_malicious_metadata_check.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py index 74b4278c5..a34b378a4 100644 --- a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py +++ b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py @@ -347,14 +347,14 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: % ----- Suspicious Combinations ----- % Package released recently with little detail, forcing the setup.py to run. - {Confidence.HIGH.value}::result("high_confidence_1") :- + {Confidence.HIGH.value}::result("malware_high_confidence_1") :- quickUndetailed, forceSetup, failed({Heuristics.ONE_RELEASE.value}). - {Confidence.HIGH.value}::result("high_confidence_2") :- + {Confidence.HIGH.value}::result("malware_high_confidence_2") :- quickUndetailed, forceSetup, failed({Heuristics.HIGH_RELEASE_FREQUENCY.value}). % Package released recently with little detail, with some more refined trust markers introduced: project links, % multiple different releases, but there is no source code repository matching it and the setup is suspicious. - {Confidence.HIGH.value}::result("high_confidence_3") :- + {Confidence.HIGH.value}::result("malware_high_confidence_3") :- failed({Heuristics.SOURCE_CODE_REPO.value}), failed({Heuristics.HIGH_RELEASE_FREQUENCY.value}), passed({Heuristics.UNCHANGED_RELEASE.value}), @@ -363,14 +363,14 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: % Package released recently with little detail, with multiple releases as a trust marker, but frequent and with % the same code. - {Confidence.MEDIUM.value}::result("medium_confidence_1") :- + {Confidence.MEDIUM.value}::result("malware_medium_confidence_1") :- quickUndetailed, failed({Heuristics.HIGH_RELEASE_FREQUENCY.value}), failed({Heuristics.UNCHANGED_RELEASE.value}), passed({Heuristics.SUSPICIOUS_SETUP.value}). % Package released recently with little detail and an anomalous version number for a single-release package. - {Confidence.MEDIUM.value}::result("medium_confidence_2") :- + {Confidence.MEDIUM.value}::result("malware_medium_confidence_2") :- quickUndetailed, failed({Heuristics.ONE_RELEASE.value}), passed({Heuristics.WHEEL_ABSENCE.value}), diff --git a/tests/slsa_analyzer/checks/test_detect_malicious_metadata_check.py b/tests/slsa_analyzer/checks/test_detect_malicious_metadata_check.py index 3a2090cb0..fcc7cccc1 100644 --- a/tests/slsa_analyzer/checks/test_detect_malicious_metadata_check.py +++ b/tests/slsa_analyzer/checks/test_detect_malicious_metadata_check.py @@ -106,7 +106,7 @@ def test_detect_malicious_metadata( [ pytest.param( { - # similar to rule ID high_confidence_1, but SUSPICIOUS_SETUP is skipped since the file does not exist, + # similar to rule ID malware_high_confidence_1, but SUSPICIOUS_SETUP is skipped since the file does not exist, # so the rule should not trigger. Heuristics.EMPTY_PROJECT_LINK: HeuristicResult.FAIL, Heuristics.SOURCE_CODE_REPO: HeuristicResult.SKIP, From 3118f3841eca6dc78d9bb77160ce25f152aa99c6 Mon Sep 17 00:00:00 2001 From: Carl Flottmann <carl.flottmann@oracle.com> Date: Mon, 31 Mar 2025 10:33:16 +1000 Subject: [PATCH 04/10] fix: confidence is now accumulated appropriately, such that several rules triggers increase the confidence Signed-off-by: Carl Flottmann <carl.flottmann@oracle.com> --- .../checks/detect_malicious_metadata_check.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py index a34b378a4..b04f8a290 100644 --- a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py +++ b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py @@ -140,11 +140,18 @@ def evaluate_heuristic_results( Returns ------- - tuple[float, JsonType] + tuple[float, list[str]] Returns the confidence associated with the detected malicious combination, and associated rule IDs detailing what rules were triggered. """ facts_list: list[str] = [] + triggered_rules = [] + # confidence is calculated using the probability of the package being benign, so the negation of the confidence values + # in the problog model. Multiplying these probabilities together on several triggers will further decrease the probability + # of the package being benign. This is then negated after calculation to get the probability of the package being malicious. + # If no rules are triggered, this will simply result in 1.0 - 1.0 = 0.0. + confidence: float = 1.0 + for heuristic, result in heuristic_results.items(): if result == HeuristicResult.PASS: facts_list.append(f"{heuristic.value} :- true.") @@ -159,10 +166,11 @@ def evaluate_heuristic_results( problog_model = PrologString(problog_code) problog_results: dict[Term, float] = get_evaluatable().create_from(problog_model).evaluate() - confidence = sum(conf for conf in problog_results.values() if conf is not None) - triggered_rules: JsonType = ["No malicious rules triggered"] - if confidence > 0: - triggered_rules = [term.args[0] for term in problog_results] + for term, conf in problog_results.items(): + if conf is not None and conf > 0: + confidence *= 1.0 - conf # decrease the probability of the package being benign + triggered_rules.append(term.args[0]) + confidence = round(1.0 - confidence, 2) # 2 decimal places return confidence, triggered_rules From 69c6f1c1215ef12c9a738c0d3b2a166d4416367a Mon Sep 17 00:00:00 2001 From: Carl Flottmann <carl.flottmann@oracle.com> Date: Mon, 31 Mar 2025 10:44:46 +1000 Subject: [PATCH 05/10] chore: added comment example for confidence calculation Signed-off-by: Carl Flottmann <carl.flottmann@oracle.com> --- .../slsa_analyzer/checks/detect_malicious_metadata_check.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py index b04f8a290..6cb012ed1 100644 --- a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py +++ b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py @@ -150,6 +150,9 @@ def evaluate_heuristic_results( # in the problog model. Multiplying these probabilities together on several triggers will further decrease the probability # of the package being benign. This is then negated after calculation to get the probability of the package being malicious. # If no rules are triggered, this will simply result in 1.0 - 1.0 = 0.0. + # For example, if a LOW rule and MEDIUM rule are triggered, with confidences 0.4 and 0.7 respectively, this would result in + # the following calculation for confidence in package maliciousness: + # 1 - (1.0 * (1 - 0.4) * (1 - 0.7)) = 0.82 confidence: float = 1.0 for heuristic, result in heuristic_results.items(): From fe3037999f91c84ca5fbf0336d781bf09764f6e5 Mon Sep 17 00:00:00 2001 From: Carl Flottmann <carl.flottmann@oracle.com> Date: Mon, 31 Mar 2025 16:22:13 +1000 Subject: [PATCH 06/10] fix: problog performs more accurate aggregation. Wrappers explanation improved. Signed-off-by: Carl Flottmann <carl.flottmann@oracle.com> --- .../checks/detect_malicious_metadata_check.py | 58 +++++++++++-------- 1 file changed, 35 insertions(+), 23 deletions(-) diff --git a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py index 6cb012ed1..780d6152f 100644 --- a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py +++ b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py @@ -145,15 +145,7 @@ def evaluate_heuristic_results( what rules were triggered. """ facts_list: list[str] = [] - triggered_rules = [] - # confidence is calculated using the probability of the package being benign, so the negation of the confidence values - # in the problog model. Multiplying these probabilities together on several triggers will further decrease the probability - # of the package being benign. This is then negated after calculation to get the probability of the package being malicious. - # If no rules are triggered, this will simply result in 1.0 - 1.0 = 0.0. - # For example, if a LOW rule and MEDIUM rule are triggered, with confidences 0.4 and 0.7 respectively, this would result in - # the following calculation for confidence in package maliciousness: - # 1 - (1.0 * (1 - 0.4) * (1 - 0.7)) = 0.82 - confidence: float = 1.0 + triggered_rules: dict[str, JsonType] = {} for heuristic, result in heuristic_results.items(): if result == HeuristicResult.PASS: @@ -169,11 +161,11 @@ def evaluate_heuristic_results( problog_model = PrologString(problog_code) problog_results: dict[Term, float] = get_evaluatable().create_from(problog_model).evaluate() - for term, conf in problog_results.items(): - if conf is not None and conf > 0: - confidence *= 1.0 - conf # decrease the probability of the package being benign - triggered_rules.append(term.args[0]) - confidence = round(1.0 - confidence, 2) # 2 decimal places + confidence = problog_results.pop(Term(self.problog_result_access), 0.0) + if confidence > 0: # a rule was triggered + for term, conf in problog_results.items(): + if term.args: + triggered_rules[str(term.args[0])] = conf return confidence, triggered_rules @@ -336,11 +328,21 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: AnomalousVersionAnalyzer, ] + problog_result_access = "result" + malware_rules_problog_model = f""" % ----- Wrappers ------ - % These should be used to logically check for a pass or fail on a heuristic for the rest of the model. They exist since, - % when a heuristic is skipped, it is ommitted from being defined in the ProbLog model, and as such these try_call statements - % are needed to handle referencing an undefined fact. + % When a heuristic is skipped, it is ommitted from the problog model facts definition. This means that references in this + % static model must account for when they are not existent. These wrappers perform this function using the inbuilt try_call + % problog function. It will try to evaluate the provided logic, and return false if it encounters an error, such as the fact + % not being defined. For example, you are expecting A to pass, so we do: + % + % passed(A) + % + % If A was 'true', then this will return true, as A did pass. If A was 'false', then this will return false, as A did not pass. + % If A was not defined, then this will return false, as A did not pass. + % Please use these wrappers throughout the problog model for logic definitions. + passed(H) :- try_call(H). failed(H) :- try_call(not H). @@ -358,14 +360,14 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: % ----- Suspicious Combinations ----- % Package released recently with little detail, forcing the setup.py to run. - {Confidence.HIGH.value}::result("malware_high_confidence_1") :- + {Confidence.HIGH.value}::trigger(malware_high_confidence_1) :- quickUndetailed, forceSetup, failed({Heuristics.ONE_RELEASE.value}). - {Confidence.HIGH.value}::result("malware_high_confidence_2") :- + {Confidence.HIGH.value}::trigger(malware_high_confidence_2) :- quickUndetailed, forceSetup, failed({Heuristics.HIGH_RELEASE_FREQUENCY.value}). % Package released recently with little detail, with some more refined trust markers introduced: project links, % multiple different releases, but there is no source code repository matching it and the setup is suspicious. - {Confidence.HIGH.value}::result("malware_high_confidence_3") :- + {Confidence.HIGH.value}::trigger(malware_high_confidence_3) :- failed({Heuristics.SOURCE_CODE_REPO.value}), failed({Heuristics.HIGH_RELEASE_FREQUENCY.value}), passed({Heuristics.UNCHANGED_RELEASE.value}), @@ -374,21 +376,31 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: % Package released recently with little detail, with multiple releases as a trust marker, but frequent and with % the same code. - {Confidence.MEDIUM.value}::result("malware_medium_confidence_1") :- + {Confidence.MEDIUM.value}::trigger(malware_medium_confidence_1) :- quickUndetailed, failed({Heuristics.HIGH_RELEASE_FREQUENCY.value}), failed({Heuristics.UNCHANGED_RELEASE.value}), passed({Heuristics.SUSPICIOUS_SETUP.value}). % Package released recently with little detail and an anomalous version number for a single-release package. - {Confidence.MEDIUM.value}::result("malware_medium_confidence_2") :- + {Confidence.MEDIUM.value}::trigger(malware_medium_confidence_2) :- quickUndetailed, failed({Heuristics.ONE_RELEASE.value}), passed({Heuristics.WHEEL_ABSENCE.value}), failed({Heuristics.ANOMALOUS_VERSION.value}). % ----- Evaluation ----- - query(result(_)). + + % Aggregate result + {problog_result_access} :- trigger(malware_high_confidence_1). + {problog_result_access} :- trigger(malware_high_confidence_2). + {problog_result_access} :- trigger(malware_high_confidence_3). + {problog_result_access} :- trigger(malware_medium_confidence_2). + {problog_result_access} :- trigger(malware_medium_confidence_1). + query({problog_result_access}). + + % Explainability + query(trigger(_)). """ From 6d50aaa0bb4ba95dae9a2b4c82e424be32e80f8b Mon Sep 17 00:00:00 2001 From: Carl Flottmann <carl.flottmann@oracle.com> Date: Mon, 31 Mar 2025 16:29:19 +1000 Subject: [PATCH 07/10] docs: updated readme for contributing to problog model. Signed-off-by: Carl Flottmann <carl.flottmann@oracle.com> --- src/macaron/malware_analyzer/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/macaron/malware_analyzer/README.md b/src/macaron/malware_analyzer/README.md index 592a864c9..d5d30a670 100644 --- a/src/macaron/malware_analyzer/README.md +++ b/src/macaron/malware_analyzer/README.md @@ -61,8 +61,8 @@ When contributing an analyzer, it must meet the following requirements: - The analyzer name must be added to [heuristics.py](./pypi_heuristics/heuristics.py) file so it can be used for rule combinations in [detect_malicious_metadata_check.py](../slsa_analyzer/checks/detect_malicious_metadata_check.py) - Update the `malware_rules_problog_model` in [detect_malicious_metadata_check.py](../slsa_analyzer/checks/detect_malicious_metadata_check.py) with logical statements where the heuristic should be included. When adding new rules, please follow the following guidelines: - Provide a [confidence value](../slsa_analyzer/checks/check_result.py) using the `Confidence` enum. - - Ensure it is assigned to the "result" string name, otherwise it will not be queried and evaluated. - - Assign a string rule ID to the rule. This will be used to backtrack to determine if it was triggered. + - Ensure it is assigned to the `problog_result_access` string variable, otherwise it will not be queried and evaluated. + - Assign a rule ID to the rule. This will be used to backtrack to determine if it was triggered. - Make sure to wrap pass/fail statements in `passed()` and `failed()`. Not doing so may result in undesirable behaviour, see the comments in the model for more details. - If there are commonly used combinations introduced by adding the heuristic, combine and justify them at the top of the static model (see `quickUndetailed` and `forceSetup` as current examples). From f1b6b4e43939fd2441534768b18fe84c765b9c63 Mon Sep 17 00:00:00 2001 From: Carl Flottmann <carl.flottmann@oracle.com> Date: Wed, 9 Apr 2025 11:34:58 +1000 Subject: [PATCH 08/10] fix: removed explicit pass on malware_medium_confidence_w to make the rule ignorant of wheel absence result Signed-off-by: Carl Flottmann <carl.flottmann@oracle.com> --- .../slsa_analyzer/checks/detect_malicious_metadata_check.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py index 780d6152f..dd60c5ac1 100644 --- a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py +++ b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py @@ -386,7 +386,6 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: {Confidence.MEDIUM.value}::trigger(malware_medium_confidence_2) :- quickUndetailed, failed({Heuristics.ONE_RELEASE.value}), - passed({Heuristics.WHEEL_ABSENCE.value}), failed({Heuristics.ANOMALOUS_VERSION.value}). % ----- Evaluation ----- From 03035cf9972eda128d9211182e4e5daceba273fe Mon Sep 17 00:00:00 2001 From: Carl Flottmann <carl.flottmann@oracle.com> Date: Wed, 9 Apr 2025 14:04:03 +1000 Subject: [PATCH 09/10] chore: updated docstring for evaluate heuristic results return type Signed-off-by: Carl Flottmann <carl.flottmann@oracle.com> --- .../slsa_analyzer/checks/detect_malicious_metadata_check.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py index dd60c5ac1..9d20e12bc 100644 --- a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py +++ b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py @@ -140,9 +140,9 @@ def evaluate_heuristic_results( Returns ------- - tuple[float, list[str]] + tuple[float, JsonType] Returns the confidence associated with the detected malicious combination, and associated rule IDs detailing - what rules were triggered. + what rules were triggered and their confidence as a dict[str, float] type. """ facts_list: list[str] = [] triggered_rules: dict[str, JsonType] = {} @@ -328,6 +328,7 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: AnomalousVersionAnalyzer, ] + # name used to query the result of all problog rules, so it can be accessed outside the model. problog_result_access = "result" malware_rules_problog_model = f""" From 8d01aa89f1840f1586e831aec9677b4b1ecce049 Mon Sep 17 00:00:00 2001 From: Carl Flottmann <carl.flottmann@oracle.com> Date: Wed, 9 Apr 2025 14:51:40 +1000 Subject: [PATCH 10/10] test: updated test to check the explainability also Signed-off-by: Carl Flottmann <carl.flottmann@oracle.com> --- .../checks/test_detect_malicious_metadata_check.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/slsa_analyzer/checks/test_detect_malicious_metadata_check.py b/tests/slsa_analyzer/checks/test_detect_malicious_metadata_check.py index fcc7cccc1..ca4f17ddf 100644 --- a/tests/slsa_analyzer/checks/test_detect_malicious_metadata_check.py +++ b/tests/slsa_analyzer/checks/test_detect_malicious_metadata_check.py @@ -126,6 +126,7 @@ def test_evaluations(combination: dict[Heuristics, HeuristicResult]) -> None: """Test heuristic combinations to ensure they evaluate as expected.""" check = DetectMaliciousMetadataCheck() - confidence, _ = check.evaluate_heuristic_results(combination) - + confidence, triggered_rules = check.evaluate_heuristic_results(combination) assert confidence == 0 + # Expecting this to be a dictionary, so we can ignore the type problems + assert len(dict(triggered_rules)) == 0 # type: ignore[arg-type]