From 5608de7875b78eb9e9abf0ada50ccc70b57042b0 Mon Sep 17 00:00:00 2001
From: Carl Flottmann <carl.flottmann@oracle.com>
Date: Thu, 27 Mar 2025 15:30:30 +1000
Subject: [PATCH 01/10] fix: resolve skipped heuristic handling in pypi malware
 checker

Signed-off-by: Carl Flottmann <carl.flottmann@oracle.com>
---
 .../checks/detect_malicious_metadata_check.py | 94 +++++++++++--------
 .../test_detect_malicious_metadata_check.py   | 31 ++++++
 2 files changed, 85 insertions(+), 40 deletions(-)

diff --git a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
index 80439bb79..cee65c186 100644
--- a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
+++ b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
@@ -128,7 +128,9 @@ def validate_malware(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[boo
         is_malware, detail_info = sourcecode_analyzer.analyze()
         return is_malware, detail_info
 
-    def evaluate_heuristic_results(self, heuristic_results: dict[Heuristics, HeuristicResult]) -> float | None:
+    def evaluate_heuristic_results(
+        self, heuristic_results: dict[Heuristics, HeuristicResult]
+    ) -> tuple[float, JsonType]:
         """Analyse the heuristic results to determine the maliciousness of the package.
 
         Parameters
@@ -138,18 +140,17 @@ def evaluate_heuristic_results(self, heuristic_results: dict[Heuristics, Heurist
 
         Returns
         -------
-        float | None
-            Returns the confidence associated with the detected malicious combination, otherwise None if no associated
-            malicious combination was triggered.
+        tuple[float, JsonType]
+            Returns the confidence associated with the detected malicious combination, and associated rule IDs detailing
+            what rules were triggered.
         """
         facts_list: list[str] = []
         for heuristic, result in heuristic_results.items():
-            if result == HeuristicResult.SKIP:
-                facts_list.append(f"0.0::{heuristic.value}.")
-            elif result == HeuristicResult.PASS:
+            if result == HeuristicResult.PASS:
                 facts_list.append(f"{heuristic.value} :- true.")
-            else:  # HeuristicResult.FAIL
+            elif result == HeuristicResult.FAIL:
                 facts_list.append(f"{heuristic.value} :- false.")
+            # Do not define for HeuristicResult.SKIP
 
         facts = "\n".join(facts_list)
         problog_code = f"{facts}\n\n{self.malware_rules_problog_model}"
@@ -158,10 +159,12 @@ def evaluate_heuristic_results(self, heuristic_results: dict[Heuristics, Heurist
         problog_model = PrologString(problog_code)
         problog_results: dict[Term, float] = get_evaluatable().create_from(problog_model).evaluate()
 
-        confidence: float | None = problog_results.get(Term(self.problog_result_access))
-        if confidence == 0.0:
-            return None  # no rules were triggered
-        return confidence
+        confidence = sum(conf for conf in problog_results.values() if conf is not None)
+        triggered_rules: JsonType = ["No malicious rules triggered"]
+        if confidence > 0:
+            triggered_rules = [term.args[0] for term in problog_results]
+
+        return confidence, triggered_rules
 
     def run_heuristics(
         self, pypi_package_json: PyPIPackageJsonAsset
@@ -278,9 +281,10 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
                         except HeuristicAnalyzerValueError:
                             return CheckResultData(result_tables=[], result_type=CheckResultType.UNKNOWN)
 
-                        confidence = self.evaluate_heuristic_results(result)
+                        confidence, triggered_rules = self.evaluate_heuristic_results(result)
+                        detail_info["triggered_rules"] = triggered_rules
                         result_type = CheckResultType.FAILED
-                        if confidence is None:
+                        if not confidence:
                             confidence = Confidence.HIGH
                             result_type = CheckResultType.PASSED
                         elif ctx.dynamic_data["validate_malware"]:
@@ -321,51 +325,61 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
         AnomalousVersionAnalyzer,
     ]
 
-    problog_result_access = "result"
-
     malware_rules_problog_model = f"""
-    % Heuristic groupings
+    % ----- Wrappers ------
+    % These should be used to logically check for a pass or fail on a heuristic for the rest of the model. They exist since,
+    % when a heuristic is skipped, it is ommitted from being defined in the ProbLog model, and as such these try_call statements
+    % are needed to handle referencing an undefined fact.
+    passed(H) :- try_call(H).
+    failed(H) :- try_call(not H).
+
+    % ----- Heuristic groupings -----
     % These are common combinations of heuristics that are used in many of the rules, thus themselves representing
     % certain behaviors. When changing or adding rules here, if there are frequent combinations of particular
-    % heuristics, group them together here.
+    % heuristics, group them together here. Note, these should only be used to check if a grouping statement
+    % is true. Evaluating 'not quickUndetailed' would be true if empty project link and closer release join
+    % date passed, or if they were both skipped, which is not desired behaviour.
 
     % Maintainer has recently joined, publishing an undetailed page with no links.
-    quickUndetailed :- not {Heuristics.EMPTY_PROJECT_LINK.value}, not {Heuristics.CLOSER_RELEASE_JOIN_DATE.value}.
+    quickUndetailed :- failed({Heuristics.EMPTY_PROJECT_LINK.value}), failed({Heuristics.CLOSER_RELEASE_JOIN_DATE.value}).
 
     % Maintainer releases a suspicious setup.py and forces it to run by omitting a .whl file.
-    forceSetup :- not {Heuristics.SUSPICIOUS_SETUP.value}, not {Heuristics.WHEEL_ABSENCE.value}.
+    forceSetup :- failed({Heuristics.SUSPICIOUS_SETUP.value}), failed({Heuristics.WHEEL_ABSENCE.value}).
 
-    % Suspicious Combinations
+    % ----- Suspicious Combinations -----
 
     % Package released recently with little detail, forcing the setup.py to run.
-    {Confidence.HIGH.value}::high :- quickUndetailed, forceSetup, not {Heuristics.ONE_RELEASE.value}.
-    {Confidence.HIGH.value}::high :- quickUndetailed, forceSetup, not {Heuristics.HIGH_RELEASE_FREQUENCY.value}.
+    {Confidence.HIGH.value}::result("high_confidence_1") :-
+        quickUndetailed, forceSetup, failed({Heuristics.ONE_RELEASE.value}).
+    {Confidence.HIGH.value}::result("high_confidence_2") :-
+        quickUndetailed, forceSetup, failed({Heuristics.HIGH_RELEASE_FREQUENCY.value}).
 
     % Package released recently with little detail, with some more refined trust markers introduced: project links,
     % multiple different releases, but there is no source code repository matching it and the setup is suspicious.
-    {Confidence.HIGH.value}::high :- not {Heuristics.SOURCE_CODE_REPO.value},
-        not {Heuristics.HIGH_RELEASE_FREQUENCY.value},
-        not {Heuristics.CLOSER_RELEASE_JOIN_DATE.value},
-        {Heuristics.UNCHANGED_RELEASE.value},
+    {Confidence.HIGH.value}::result("high_confidence_3") :-
+        failed({Heuristics.SOURCE_CODE_REPO.value}),
+        failed({Heuristics.HIGH_RELEASE_FREQUENCY.value}),
+        passed({Heuristics.UNCHANGED_RELEASE.value}),
+        failed({Heuristics.CLOSER_RELEASE_JOIN_DATE.value}),
         forceSetup.
 
     % Package released recently with little detail, with multiple releases as a trust marker, but frequent and with
     % the same code.
-    {Confidence.MEDIUM.value}::medium :- quickUndetailed,
-        not {Heuristics.HIGH_RELEASE_FREQUENCY.value},
-        not {Heuristics.UNCHANGED_RELEASE.value},
-        {Heuristics.SUSPICIOUS_SETUP.value}.
+    {Confidence.MEDIUM.value}::result("medium_confidence_1") :-
+        quickUndetailed,
+        failed({Heuristics.HIGH_RELEASE_FREQUENCY.value}),
+        failed({Heuristics.UNCHANGED_RELEASE.value}),
+        passed({Heuristics.SUSPICIOUS_SETUP.value}).
 
     % Package released recently with little detail and an anomalous version number for a single-release package.
-    {Confidence.MEDIUM.value}::medium :- quickUndetailed,
-        not {Heuristics.ONE_RELEASE.value},
-        {Heuristics.WHEEL_ABSENCE.value},
-        not {Heuristics.ANOMALOUS_VERSION.value}.
-
-    {problog_result_access} :- high.
-    {problog_result_access} :- medium.
-
-    query({problog_result_access}).
+    {Confidence.MEDIUM.value}::result("medium_confidence_2") :-
+        quickUndetailed,
+        failed({Heuristics.ONE_RELEASE.value}),
+        passed({Heuristics.WHEEL_ABSENCE.value}),
+        failed({Heuristics.ANOMALOUS_VERSION.value}).
+
+    % ----- Evaluation -----
+    query(result(_)).
     """
 
 
diff --git a/tests/slsa_analyzer/checks/test_detect_malicious_metadata_check.py b/tests/slsa_analyzer/checks/test_detect_malicious_metadata_check.py
index c6ecb044d..3a2090cb0 100644
--- a/tests/slsa_analyzer/checks/test_detect_malicious_metadata_check.py
+++ b/tests/slsa_analyzer/checks/test_detect_malicious_metadata_check.py
@@ -12,6 +12,7 @@
 from pytest_httpserver import HTTPServer
 
 from macaron.config.defaults import load_defaults
+from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics
 from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool
 from macaron.slsa_analyzer.checks.check_result import CheckResultType
 from macaron.slsa_analyzer.checks.detect_malicious_metadata_check import DetectMaliciousMetadataCheck
@@ -98,3 +99,33 @@ def test_detect_malicious_metadata(
     ).respond_with_json({})
 
     assert check.run_check(ctx).result_type == expected
+
+
+@pytest.mark.parametrize(
+    ("combination"),
+    [
+        pytest.param(
+            {
+                # similar to rule ID high_confidence_1, but SUSPICIOUS_SETUP is skipped since the file does not exist,
+                # so the rule should not trigger.
+                Heuristics.EMPTY_PROJECT_LINK: HeuristicResult.FAIL,
+                Heuristics.SOURCE_CODE_REPO: HeuristicResult.SKIP,
+                Heuristics.ONE_RELEASE: HeuristicResult.FAIL,
+                Heuristics.HIGH_RELEASE_FREQUENCY: HeuristicResult.SKIP,
+                Heuristics.UNCHANGED_RELEASE: HeuristicResult.SKIP,
+                Heuristics.CLOSER_RELEASE_JOIN_DATE: HeuristicResult.FAIL,
+                Heuristics.SUSPICIOUS_SETUP: HeuristicResult.SKIP,
+                Heuristics.WHEEL_ABSENCE: HeuristicResult.FAIL,
+                Heuristics.ANOMALOUS_VERSION: HeuristicResult.PASS,
+            },
+            id="test_skipped_evaluation",
+        )
+    ],
+)
+def test_evaluations(combination: dict[Heuristics, HeuristicResult]) -> None:
+    """Test heuristic combinations to ensure they evaluate as expected."""
+    check = DetectMaliciousMetadataCheck()
+
+    confidence, _ = check.evaluate_heuristic_results(combination)
+
+    assert confidence == 0

From b79e9dfbe9ab2c1d99a67a0117e3114430716d2a Mon Sep 17 00:00:00 2001
From: Carl Flottmann <carl.flottmann@oracle.com>
Date: Thu, 27 Mar 2025 15:38:04 +1000
Subject: [PATCH 02/10] docs: updated README for contributing to the problog
 model appropriately

Signed-off-by: Carl Flottmann <carl.flottmann@oracle.com>
---
 src/macaron/malware_analyzer/README.md                       | 5 +++--
 .../slsa_analyzer/checks/detect_malicious_metadata_check.py  | 4 +---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/macaron/malware_analyzer/README.md b/src/macaron/malware_analyzer/README.md
index 7617e4156..592a864c9 100644
--- a/src/macaron/malware_analyzer/README.md
+++ b/src/macaron/malware_analyzer/README.md
@@ -61,8 +61,9 @@ When contributing an analyzer, it must meet the following requirements:
 - The analyzer name must be added to [heuristics.py](./pypi_heuristics/heuristics.py) file so it can be used for rule combinations in [detect_malicious_metadata_check.py](../slsa_analyzer/checks/detect_malicious_metadata_check.py)
 - Update the `malware_rules_problog_model` in [detect_malicious_metadata_check.py](../slsa_analyzer/checks/detect_malicious_metadata_check.py) with logical statements where the heuristic should be included. When adding new rules, please follow the following guidelines:
    - Provide a [confidence value](../slsa_analyzer/checks/check_result.py) using the `Confidence` enum.
-   - Provide a name based on this confidence value (i.e. `high`, `medium`, or `low`)
-   - If it does not already exist, make sure to assign this to the result variable (`problog_result_access`)
+   - Ensure it is assigned to the "result" string name, otherwise it will not be queried and evaluated.
+   - Assign a string rule ID to the rule. This will be used to backtrack to determine if it was triggered.
+   - Make sure to wrap pass/fail statements in `passed()` and `failed()`. Not doing so may result in undesirable behaviour, see the comments in the model for more details.
    - If there are commonly used combinations introduced by adding the heuristic, combine and justify them at the top of the static model (see `quickUndetailed` and `forceSetup` as current examples).  
 
 ### Confidence Score Motivation
diff --git a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
index cee65c186..74b4278c5 100644
--- a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
+++ b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
@@ -336,9 +336,7 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
     % ----- Heuristic groupings -----
     % These are common combinations of heuristics that are used in many of the rules, thus themselves representing
     % certain behaviors. When changing or adding rules here, if there are frequent combinations of particular
-    % heuristics, group them together here. Note, these should only be used to check if a grouping statement
-    % is true. Evaluating 'not quickUndetailed' would be true if empty project link and closer release join
-    % date passed, or if they were both skipped, which is not desired behaviour.
+    % heuristics, group them together here.
 
     % Maintainer has recently joined, publishing an undetailed page with no links.
     quickUndetailed :- failed({Heuristics.EMPTY_PROJECT_LINK.value}), failed({Heuristics.CLOSER_RELEASE_JOIN_DATE.value}).

From de5a63e309a893f4825d8e78b6a6a50988943abc Mon Sep 17 00:00:00 2001
From: Carl Flottmann <carl.flottmann@oracle.com>
Date: Thu, 27 Mar 2025 16:31:57 +1000
Subject: [PATCH 03/10] chore: improved rule ID names

Signed-off-by: Carl Flottmann <carl.flottmann@oracle.com>
---
 .../checks/detect_malicious_metadata_check.py          | 10 +++++-----
 .../checks/test_detect_malicious_metadata_check.py     |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
index 74b4278c5..a34b378a4 100644
--- a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
+++ b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
@@ -347,14 +347,14 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
     % ----- Suspicious Combinations -----
 
     % Package released recently with little detail, forcing the setup.py to run.
-    {Confidence.HIGH.value}::result("high_confidence_1") :-
+    {Confidence.HIGH.value}::result("malware_high_confidence_1") :-
         quickUndetailed, forceSetup, failed({Heuristics.ONE_RELEASE.value}).
-    {Confidence.HIGH.value}::result("high_confidence_2") :-
+    {Confidence.HIGH.value}::result("malware_high_confidence_2") :-
         quickUndetailed, forceSetup, failed({Heuristics.HIGH_RELEASE_FREQUENCY.value}).
 
     % Package released recently with little detail, with some more refined trust markers introduced: project links,
     % multiple different releases, but there is no source code repository matching it and the setup is suspicious.
-    {Confidence.HIGH.value}::result("high_confidence_3") :-
+    {Confidence.HIGH.value}::result("malware_high_confidence_3") :-
         failed({Heuristics.SOURCE_CODE_REPO.value}),
         failed({Heuristics.HIGH_RELEASE_FREQUENCY.value}),
         passed({Heuristics.UNCHANGED_RELEASE.value}),
@@ -363,14 +363,14 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
 
     % Package released recently with little detail, with multiple releases as a trust marker, but frequent and with
     % the same code.
-    {Confidence.MEDIUM.value}::result("medium_confidence_1") :-
+    {Confidence.MEDIUM.value}::result("malware_medium_confidence_1") :-
         quickUndetailed,
         failed({Heuristics.HIGH_RELEASE_FREQUENCY.value}),
         failed({Heuristics.UNCHANGED_RELEASE.value}),
         passed({Heuristics.SUSPICIOUS_SETUP.value}).
 
     % Package released recently with little detail and an anomalous version number for a single-release package.
-    {Confidence.MEDIUM.value}::result("medium_confidence_2") :-
+    {Confidence.MEDIUM.value}::result("malware_medium_confidence_2") :-
         quickUndetailed,
         failed({Heuristics.ONE_RELEASE.value}),
         passed({Heuristics.WHEEL_ABSENCE.value}),
diff --git a/tests/slsa_analyzer/checks/test_detect_malicious_metadata_check.py b/tests/slsa_analyzer/checks/test_detect_malicious_metadata_check.py
index 3a2090cb0..fcc7cccc1 100644
--- a/tests/slsa_analyzer/checks/test_detect_malicious_metadata_check.py
+++ b/tests/slsa_analyzer/checks/test_detect_malicious_metadata_check.py
@@ -106,7 +106,7 @@ def test_detect_malicious_metadata(
     [
         pytest.param(
             {
-                # similar to rule ID high_confidence_1, but SUSPICIOUS_SETUP is skipped since the file does not exist,
+                # similar to rule ID malware_high_confidence_1, but SUSPICIOUS_SETUP is skipped since the file does not exist,
                 # so the rule should not trigger.
                 Heuristics.EMPTY_PROJECT_LINK: HeuristicResult.FAIL,
                 Heuristics.SOURCE_CODE_REPO: HeuristicResult.SKIP,

From 3118f3841eca6dc78d9bb77160ce25f152aa99c6 Mon Sep 17 00:00:00 2001
From: Carl Flottmann <carl.flottmann@oracle.com>
Date: Mon, 31 Mar 2025 10:33:16 +1000
Subject: [PATCH 04/10] fix: confidence is now accumulated appropriately, such
 that several rules triggers increase the confidence

Signed-off-by: Carl Flottmann <carl.flottmann@oracle.com>
---
 .../checks/detect_malicious_metadata_check.py  | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
index a34b378a4..b04f8a290 100644
--- a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
+++ b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
@@ -140,11 +140,18 @@ def evaluate_heuristic_results(
 
         Returns
         -------
-        tuple[float, JsonType]
+        tuple[float, list[str]]
             Returns the confidence associated with the detected malicious combination, and associated rule IDs detailing
             what rules were triggered.
         """
         facts_list: list[str] = []
+        triggered_rules = []
+        # confidence is calculated using the probability of the package being benign, so the negation of the confidence values
+        # in the problog model. Multiplying these probabilities together on several triggers will further decrease the probability
+        # of the package being benign. This is then negated after calculation to get the probability of the package being malicious.
+        # If no rules are triggered, this will simply result in 1.0 - 1.0 = 0.0.
+        confidence: float = 1.0
+
         for heuristic, result in heuristic_results.items():
             if result == HeuristicResult.PASS:
                 facts_list.append(f"{heuristic.value} :- true.")
@@ -159,10 +166,11 @@ def evaluate_heuristic_results(
         problog_model = PrologString(problog_code)
         problog_results: dict[Term, float] = get_evaluatable().create_from(problog_model).evaluate()
 
-        confidence = sum(conf for conf in problog_results.values() if conf is not None)
-        triggered_rules: JsonType = ["No malicious rules triggered"]
-        if confidence > 0:
-            triggered_rules = [term.args[0] for term in problog_results]
+        for term, conf in problog_results.items():
+            if conf is not None and conf > 0:
+                confidence *= 1.0 - conf  # decrease the probability of the package being benign
+                triggered_rules.append(term.args[0])
+        confidence = round(1.0 - confidence, 2)  # 2 decimal places
 
         return confidence, triggered_rules
 

From 69c6f1c1215ef12c9a738c0d3b2a166d4416367a Mon Sep 17 00:00:00 2001
From: Carl Flottmann <carl.flottmann@oracle.com>
Date: Mon, 31 Mar 2025 10:44:46 +1000
Subject: [PATCH 05/10] chore: added comment example for confidence calculation

Signed-off-by: Carl Flottmann <carl.flottmann@oracle.com>
---
 .../slsa_analyzer/checks/detect_malicious_metadata_check.py    | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
index b04f8a290..6cb012ed1 100644
--- a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
+++ b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
@@ -150,6 +150,9 @@ def evaluate_heuristic_results(
         # in the problog model. Multiplying these probabilities together on several triggers will further decrease the probability
         # of the package being benign. This is then negated after calculation to get the probability of the package being malicious.
         # If no rules are triggered, this will simply result in 1.0 - 1.0 = 0.0.
+        # For example, if a LOW rule and MEDIUM rule are triggered, with confidences 0.4 and 0.7 respectively, this would result in
+        # the following calculation for confidence in package maliciousness:
+        # 1 - (1.0 * (1 - 0.4) * (1 - 0.7)) = 0.82
         confidence: float = 1.0
 
         for heuristic, result in heuristic_results.items():

From fe3037999f91c84ca5fbf0336d781bf09764f6e5 Mon Sep 17 00:00:00 2001
From: Carl Flottmann <carl.flottmann@oracle.com>
Date: Mon, 31 Mar 2025 16:22:13 +1000
Subject: [PATCH 06/10] fix: problog performs more accurate aggregation.
 Wrappers explanation improved.

Signed-off-by: Carl Flottmann <carl.flottmann@oracle.com>
---
 .../checks/detect_malicious_metadata_check.py | 58 +++++++++++--------
 1 file changed, 35 insertions(+), 23 deletions(-)

diff --git a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
index 6cb012ed1..780d6152f 100644
--- a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
+++ b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
@@ -145,15 +145,7 @@ def evaluate_heuristic_results(
             what rules were triggered.
         """
         facts_list: list[str] = []
-        triggered_rules = []
-        # confidence is calculated using the probability of the package being benign, so the negation of the confidence values
-        # in the problog model. Multiplying these probabilities together on several triggers will further decrease the probability
-        # of the package being benign. This is then negated after calculation to get the probability of the package being malicious.
-        # If no rules are triggered, this will simply result in 1.0 - 1.0 = 0.0.
-        # For example, if a LOW rule and MEDIUM rule are triggered, with confidences 0.4 and 0.7 respectively, this would result in
-        # the following calculation for confidence in package maliciousness:
-        # 1 - (1.0 * (1 - 0.4) * (1 - 0.7)) = 0.82
-        confidence: float = 1.0
+        triggered_rules: dict[str, JsonType] = {}
 
         for heuristic, result in heuristic_results.items():
             if result == HeuristicResult.PASS:
@@ -169,11 +161,11 @@ def evaluate_heuristic_results(
         problog_model = PrologString(problog_code)
         problog_results: dict[Term, float] = get_evaluatable().create_from(problog_model).evaluate()
 
-        for term, conf in problog_results.items():
-            if conf is not None and conf > 0:
-                confidence *= 1.0 - conf  # decrease the probability of the package being benign
-                triggered_rules.append(term.args[0])
-        confidence = round(1.0 - confidence, 2)  # 2 decimal places
+        confidence = problog_results.pop(Term(self.problog_result_access), 0.0)
+        if confidence > 0:  # a rule was triggered
+            for term, conf in problog_results.items():
+                if term.args:
+                    triggered_rules[str(term.args[0])] = conf
 
         return confidence, triggered_rules
 
@@ -336,11 +328,21 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
         AnomalousVersionAnalyzer,
     ]
 
+    problog_result_access = "result"
+
     malware_rules_problog_model = f"""
     % ----- Wrappers ------
-    % These should be used to logically check for a pass or fail on a heuristic for the rest of the model. They exist since,
-    % when a heuristic is skipped, it is ommitted from being defined in the ProbLog model, and as such these try_call statements
-    % are needed to handle referencing an undefined fact.
+    % When a heuristic is skipped, it is ommitted from the problog model facts definition. This means that references in this
+    % static model must account for when they are not existent. These wrappers perform this function using the inbuilt try_call
+    % problog function. It will try to evaluate the provided logic, and return false if it encounters an error, such as the fact
+    % not being defined. For example, you are expecting A to pass, so we do:
+    %
+    % passed(A)
+    %
+    % If A was 'true', then this will return true, as A did pass. If A was 'false', then this will return false, as A did not pass.
+    % If A was not defined, then this will return false, as A did not pass.
+    % Please use these wrappers throughout the problog model for logic definitions.
+
     passed(H) :- try_call(H).
     failed(H) :- try_call(not H).
 
@@ -358,14 +360,14 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
     % ----- Suspicious Combinations -----
 
     % Package released recently with little detail, forcing the setup.py to run.
-    {Confidence.HIGH.value}::result("malware_high_confidence_1") :-
+    {Confidence.HIGH.value}::trigger(malware_high_confidence_1) :-
         quickUndetailed, forceSetup, failed({Heuristics.ONE_RELEASE.value}).
-    {Confidence.HIGH.value}::result("malware_high_confidence_2") :-
+    {Confidence.HIGH.value}::trigger(malware_high_confidence_2) :-
         quickUndetailed, forceSetup, failed({Heuristics.HIGH_RELEASE_FREQUENCY.value}).
 
     % Package released recently with little detail, with some more refined trust markers introduced: project links,
     % multiple different releases, but there is no source code repository matching it and the setup is suspicious.
-    {Confidence.HIGH.value}::result("malware_high_confidence_3") :-
+    {Confidence.HIGH.value}::trigger(malware_high_confidence_3) :-
         failed({Heuristics.SOURCE_CODE_REPO.value}),
         failed({Heuristics.HIGH_RELEASE_FREQUENCY.value}),
         passed({Heuristics.UNCHANGED_RELEASE.value}),
@@ -374,21 +376,31 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
 
     % Package released recently with little detail, with multiple releases as a trust marker, but frequent and with
     % the same code.
-    {Confidence.MEDIUM.value}::result("malware_medium_confidence_1") :-
+    {Confidence.MEDIUM.value}::trigger(malware_medium_confidence_1) :-
         quickUndetailed,
         failed({Heuristics.HIGH_RELEASE_FREQUENCY.value}),
         failed({Heuristics.UNCHANGED_RELEASE.value}),
         passed({Heuristics.SUSPICIOUS_SETUP.value}).
 
     % Package released recently with little detail and an anomalous version number for a single-release package.
-    {Confidence.MEDIUM.value}::result("malware_medium_confidence_2") :-
+    {Confidence.MEDIUM.value}::trigger(malware_medium_confidence_2) :-
         quickUndetailed,
         failed({Heuristics.ONE_RELEASE.value}),
         passed({Heuristics.WHEEL_ABSENCE.value}),
         failed({Heuristics.ANOMALOUS_VERSION.value}).
 
     % ----- Evaluation -----
-    query(result(_)).
+
+    % Aggregate result
+    {problog_result_access} :- trigger(malware_high_confidence_1).
+    {problog_result_access} :- trigger(malware_high_confidence_2).
+    {problog_result_access} :- trigger(malware_high_confidence_3).
+    {problog_result_access} :- trigger(malware_medium_confidence_2).
+    {problog_result_access} :- trigger(malware_medium_confidence_1).
+    query({problog_result_access}).
+
+    % Explainability
+    query(trigger(_)).
     """
 
 

From 6d50aaa0bb4ba95dae9a2b4c82e424be32e80f8b Mon Sep 17 00:00:00 2001
From: Carl Flottmann <carl.flottmann@oracle.com>
Date: Mon, 31 Mar 2025 16:29:19 +1000
Subject: [PATCH 07/10] docs: updated readme for contributing to problog model.

Signed-off-by: Carl Flottmann <carl.flottmann@oracle.com>
---
 src/macaron/malware_analyzer/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/macaron/malware_analyzer/README.md b/src/macaron/malware_analyzer/README.md
index 592a864c9..d5d30a670 100644
--- a/src/macaron/malware_analyzer/README.md
+++ b/src/macaron/malware_analyzer/README.md
@@ -61,8 +61,8 @@ When contributing an analyzer, it must meet the following requirements:
 - The analyzer name must be added to [heuristics.py](./pypi_heuristics/heuristics.py) file so it can be used for rule combinations in [detect_malicious_metadata_check.py](../slsa_analyzer/checks/detect_malicious_metadata_check.py)
 - Update the `malware_rules_problog_model` in [detect_malicious_metadata_check.py](../slsa_analyzer/checks/detect_malicious_metadata_check.py) with logical statements where the heuristic should be included. When adding new rules, please follow the following guidelines:
    - Provide a [confidence value](../slsa_analyzer/checks/check_result.py) using the `Confidence` enum.
-   - Ensure it is assigned to the "result" string name, otherwise it will not be queried and evaluated.
-   - Assign a string rule ID to the rule. This will be used to backtrack to determine if it was triggered.
+   - Ensure it is assigned to the `problog_result_access` string variable, otherwise it will not be queried and evaluated.
+   - Assign a rule ID to the rule. This will be used to backtrack to determine if it was triggered.
    - Make sure to wrap pass/fail statements in `passed()` and `failed()`. Not doing so may result in undesirable behaviour, see the comments in the model for more details.
    - If there are commonly used combinations introduced by adding the heuristic, combine and justify them at the top of the static model (see `quickUndetailed` and `forceSetup` as current examples).  
 

From f1b6b4e43939fd2441534768b18fe84c765b9c63 Mon Sep 17 00:00:00 2001
From: Carl Flottmann <carl.flottmann@oracle.com>
Date: Wed, 9 Apr 2025 11:34:58 +1000
Subject: [PATCH 08/10] fix: removed explicit pass on
 malware_medium_confidence_w to make the rule ignorant of wheel absence result

Signed-off-by: Carl Flottmann <carl.flottmann@oracle.com>
---
 .../slsa_analyzer/checks/detect_malicious_metadata_check.py      | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
index 780d6152f..dd60c5ac1 100644
--- a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
+++ b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
@@ -386,7 +386,6 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
     {Confidence.MEDIUM.value}::trigger(malware_medium_confidence_2) :-
         quickUndetailed,
         failed({Heuristics.ONE_RELEASE.value}),
-        passed({Heuristics.WHEEL_ABSENCE.value}),
         failed({Heuristics.ANOMALOUS_VERSION.value}).
 
     % ----- Evaluation -----

From 03035cf9972eda128d9211182e4e5daceba273fe Mon Sep 17 00:00:00 2001
From: Carl Flottmann <carl.flottmann@oracle.com>
Date: Wed, 9 Apr 2025 14:04:03 +1000
Subject: [PATCH 09/10] chore: updated docstring for evaluate heuristic results
 return type

Signed-off-by: Carl Flottmann <carl.flottmann@oracle.com>
---
 .../slsa_analyzer/checks/detect_malicious_metadata_check.py  | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
index dd60c5ac1..9d20e12bc 100644
--- a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
+++ b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
@@ -140,9 +140,9 @@ def evaluate_heuristic_results(
 
         Returns
         -------
-        tuple[float, list[str]]
+        tuple[float, JsonType]
             Returns the confidence associated with the detected malicious combination, and associated rule IDs detailing
-            what rules were triggered.
+            what rules were triggered and their confidence as a dict[str, float] type.
         """
         facts_list: list[str] = []
         triggered_rules: dict[str, JsonType] = {}
@@ -328,6 +328,7 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
         AnomalousVersionAnalyzer,
     ]
 
+    # name used to query the result of all problog rules, so it can be accessed outside the model.
     problog_result_access = "result"
 
     malware_rules_problog_model = f"""

From 8d01aa89f1840f1586e831aec9677b4b1ecce049 Mon Sep 17 00:00:00 2001
From: Carl Flottmann <carl.flottmann@oracle.com>
Date: Wed, 9 Apr 2025 14:51:40 +1000
Subject: [PATCH 10/10] test: updated test to check the explainability also

Signed-off-by: Carl Flottmann <carl.flottmann@oracle.com>
---
 .../checks/test_detect_malicious_metadata_check.py           | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/slsa_analyzer/checks/test_detect_malicious_metadata_check.py b/tests/slsa_analyzer/checks/test_detect_malicious_metadata_check.py
index fcc7cccc1..ca4f17ddf 100644
--- a/tests/slsa_analyzer/checks/test_detect_malicious_metadata_check.py
+++ b/tests/slsa_analyzer/checks/test_detect_malicious_metadata_check.py
@@ -126,6 +126,7 @@ def test_evaluations(combination: dict[Heuristics, HeuristicResult]) -> None:
     """Test heuristic combinations to ensure they evaluate as expected."""
     check = DetectMaliciousMetadataCheck()
 
-    confidence, _ = check.evaluate_heuristic_results(combination)
-
+    confidence, triggered_rules = check.evaluate_heuristic_results(combination)
     assert confidence == 0
+    # Expecting this to be a dictionary, so we can ignore the type problems
+    assert len(dict(triggered_rules)) == 0  # type: ignore[arg-type]