Skip to content

fix: fix incorrect skip result evaluation causing false positives in PyPI malware reporting" #1031

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Apr 9, 2025
Merged
5 changes: 3 additions & 2 deletions src/macaron/malware_analyzer/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,9 @@ When contributing an analyzer, it must meet the following requirements:
- The analyzer name must be added to [heuristics.py](./pypi_heuristics/heuristics.py) file so it can be used for rule combinations in [detect_malicious_metadata_check.py](../slsa_analyzer/checks/detect_malicious_metadata_check.py)
- Update the `malware_rules_problog_model` in [detect_malicious_metadata_check.py](../slsa_analyzer/checks/detect_malicious_metadata_check.py) with logical statements where the heuristic should be included. When adding new rules, please follow the following guidelines:
- Provide a [confidence value](../slsa_analyzer/checks/check_result.py) using the `Confidence` enum.
- Provide a name based on this confidence value (i.e. `high`, `medium`, or `low`)
- If it does not already exist, make sure to assign this to the result variable (`problog_result_access`)
- Ensure it is assigned to the `problog_result_access` string variable, otherwise it will not be queried and evaluated.
- Assign a rule ID to the rule. This will be used to backtrack to determine if it was triggered.
- Make sure to wrap pass/fail statements in `passed()` and `failed()`. Not doing so may result in undesirable behaviour, see the comments in the model for more details.
- If there are commonly used combinations introduced by adding the heuristic, combine and justify them at the top of the static model (see `quickUndetailed` and `forceSetup` as current examples).

### Confidence Score Motivation
Expand Down
107 changes: 71 additions & 36 deletions src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,9 @@ def validate_malware(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[boo
is_malware, detail_info = sourcecode_analyzer.analyze()
return is_malware, detail_info

def evaluate_heuristic_results(self, heuristic_results: dict[Heuristics, HeuristicResult]) -> float | None:
def evaluate_heuristic_results(
self, heuristic_results: dict[Heuristics, HeuristicResult]
) -> tuple[float, JsonType]:
"""Analyse the heuristic results to determine the maliciousness of the package.

Parameters
Expand All @@ -138,18 +140,19 @@ def evaluate_heuristic_results(self, heuristic_results: dict[Heuristics, Heurist

Returns
-------
float | None
Returns the confidence associated with the detected malicious combination, otherwise None if no associated
malicious combination was triggered.
tuple[float, JsonType]
Returns the confidence associated with the detected malicious combination, and associated rule IDs detailing
what rules were triggered and their confidence as a dict[str, float] type.
"""
facts_list: list[str] = []
triggered_rules: dict[str, JsonType] = {}

for heuristic, result in heuristic_results.items():
if result == HeuristicResult.SKIP:
facts_list.append(f"0.0::{heuristic.value}.")
elif result == HeuristicResult.PASS:
if result == HeuristicResult.PASS:
facts_list.append(f"{heuristic.value} :- true.")
else: # HeuristicResult.FAIL
elif result == HeuristicResult.FAIL:
facts_list.append(f"{heuristic.value} :- false.")
# Do not define for HeuristicResult.SKIP

facts = "\n".join(facts_list)
problog_code = f"{facts}\n\n{self.malware_rules_problog_model}"
Expand All @@ -158,10 +161,13 @@ def evaluate_heuristic_results(self, heuristic_results: dict[Heuristics, Heurist
problog_model = PrologString(problog_code)
problog_results: dict[Term, float] = get_evaluatable().create_from(problog_model).evaluate()

confidence: float | None = problog_results.get(Term(self.problog_result_access))
if confidence == 0.0:
return None # no rules were triggered
return confidence
confidence = problog_results.pop(Term(self.problog_result_access), 0.0)
if confidence > 0: # a rule was triggered
for term, conf in problog_results.items():
if term.args:
triggered_rules[str(term.args[0])] = conf

return confidence, triggered_rules

def run_heuristics(
self, pypi_package_json: PyPIPackageJsonAsset
Expand Down Expand Up @@ -278,9 +284,10 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
except HeuristicAnalyzerValueError:
return CheckResultData(result_tables=[], result_type=CheckResultType.UNKNOWN)

confidence = self.evaluate_heuristic_results(result)
confidence, triggered_rules = self.evaluate_heuristic_results(result)
detail_info["triggered_rules"] = triggered_rules
result_type = CheckResultType.FAILED
if confidence is None:
if not confidence:
confidence = Confidence.HIGH
result_type = CheckResultType.PASSED
elif ctx.dynamic_data["validate_malware"]:
Expand Down Expand Up @@ -321,51 +328,79 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
AnomalousVersionAnalyzer,
]

# name used to query the result of all problog rules, so it can be accessed outside the model.
problog_result_access = "result"

malware_rules_problog_model = f"""
% Heuristic groupings
% ----- Wrappers ------
% When a heuristic is skipped, it is ommitted from the problog model facts definition. This means that references in this
% static model must account for when they are not existent. These wrappers perform this function using the inbuilt try_call
% problog function. It will try to evaluate the provided logic, and return false if it encounters an error, such as the fact
% not being defined. For example, you are expecting A to pass, so we do:
%
% passed(A)
%
% If A was 'true', then this will return true, as A did pass. If A was 'false', then this will return false, as A did not pass.
% If A was not defined, then this will return false, as A did not pass.
% Please use these wrappers throughout the problog model for logic definitions.

passed(H) :- try_call(H).
failed(H) :- try_call(not H).

% ----- Heuristic groupings -----
% These are common combinations of heuristics that are used in many of the rules, thus themselves representing
% certain behaviors. When changing or adding rules here, if there are frequent combinations of particular
% heuristics, group them together here.

% Maintainer has recently joined, publishing an undetailed page with no links.
quickUndetailed :- not {Heuristics.EMPTY_PROJECT_LINK.value}, not {Heuristics.CLOSER_RELEASE_JOIN_DATE.value}.
quickUndetailed :- failed({Heuristics.EMPTY_PROJECT_LINK.value}), failed({Heuristics.CLOSER_RELEASE_JOIN_DATE.value}).

% Maintainer releases a suspicious setup.py and forces it to run by omitting a .whl file.
forceSetup :- not {Heuristics.SUSPICIOUS_SETUP.value}, not {Heuristics.WHEEL_ABSENCE.value}.
forceSetup :- failed({Heuristics.SUSPICIOUS_SETUP.value}), failed({Heuristics.WHEEL_ABSENCE.value}).

% Suspicious Combinations
% ----- Suspicious Combinations -----

% Package released recently with little detail, forcing the setup.py to run.
{Confidence.HIGH.value}::high :- quickUndetailed, forceSetup, not {Heuristics.ONE_RELEASE.value}.
{Confidence.HIGH.value}::high :- quickUndetailed, forceSetup, not {Heuristics.HIGH_RELEASE_FREQUENCY.value}.
{Confidence.HIGH.value}::trigger(malware_high_confidence_1) :-
quickUndetailed, forceSetup, failed({Heuristics.ONE_RELEASE.value}).
{Confidence.HIGH.value}::trigger(malware_high_confidence_2) :-
quickUndetailed, forceSetup, failed({Heuristics.HIGH_RELEASE_FREQUENCY.value}).

% Package released recently with little detail, with some more refined trust markers introduced: project links,
% multiple different releases, but there is no source code repository matching it and the setup is suspicious.
{Confidence.HIGH.value}::high :- not {Heuristics.SOURCE_CODE_REPO.value},
not {Heuristics.HIGH_RELEASE_FREQUENCY.value},
not {Heuristics.CLOSER_RELEASE_JOIN_DATE.value},
{Heuristics.UNCHANGED_RELEASE.value},
{Confidence.HIGH.value}::trigger(malware_high_confidence_3) :-
failed({Heuristics.SOURCE_CODE_REPO.value}),
failed({Heuristics.HIGH_RELEASE_FREQUENCY.value}),
passed({Heuristics.UNCHANGED_RELEASE.value}),
failed({Heuristics.CLOSER_RELEASE_JOIN_DATE.value}),
forceSetup.

% Package released recently with little detail, with multiple releases as a trust marker, but frequent and with
% the same code.
{Confidence.MEDIUM.value}::medium :- quickUndetailed,
not {Heuristics.HIGH_RELEASE_FREQUENCY.value},
not {Heuristics.UNCHANGED_RELEASE.value},
{Heuristics.SUSPICIOUS_SETUP.value}.
{Confidence.MEDIUM.value}::trigger(malware_medium_confidence_1) :-
quickUndetailed,
failed({Heuristics.HIGH_RELEASE_FREQUENCY.value}),
failed({Heuristics.UNCHANGED_RELEASE.value}),
passed({Heuristics.SUSPICIOUS_SETUP.value}).

% Package released recently with little detail and an anomalous version number for a single-release package.
{Confidence.MEDIUM.value}::medium :- quickUndetailed,
not {Heuristics.ONE_RELEASE.value},
{Heuristics.WHEEL_ABSENCE.value},
not {Heuristics.ANOMALOUS_VERSION.value}.

{problog_result_access} :- high.
{problog_result_access} :- medium.

{Confidence.MEDIUM.value}::trigger(malware_medium_confidence_2) :-
quickUndetailed,
failed({Heuristics.ONE_RELEASE.value}),
failed({Heuristics.ANOMALOUS_VERSION.value}).

% ----- Evaluation -----

% Aggregate result
{problog_result_access} :- trigger(malware_high_confidence_1).
{problog_result_access} :- trigger(malware_high_confidence_2).
{problog_result_access} :- trigger(malware_high_confidence_3).
{problog_result_access} :- trigger(malware_medium_confidence_2).
{problog_result_access} :- trigger(malware_medium_confidence_1).
query({problog_result_access}).

% Explainability
query(trigger(_)).
"""


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from pytest_httpserver import HTTPServer

from macaron.config.defaults import load_defaults
from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics
from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool
from macaron.slsa_analyzer.checks.check_result import CheckResultType
from macaron.slsa_analyzer.checks.detect_malicious_metadata_check import DetectMaliciousMetadataCheck
Expand Down Expand Up @@ -98,3 +99,34 @@ def test_detect_malicious_metadata(
).respond_with_json({})

assert check.run_check(ctx).result_type == expected


@pytest.mark.parametrize(
("combination"),
[
pytest.param(
{
# similar to rule ID malware_high_confidence_1, but SUSPICIOUS_SETUP is skipped since the file does not exist,
# so the rule should not trigger.
Heuristics.EMPTY_PROJECT_LINK: HeuristicResult.FAIL,
Heuristics.SOURCE_CODE_REPO: HeuristicResult.SKIP,
Heuristics.ONE_RELEASE: HeuristicResult.FAIL,
Heuristics.HIGH_RELEASE_FREQUENCY: HeuristicResult.SKIP,
Heuristics.UNCHANGED_RELEASE: HeuristicResult.SKIP,
Heuristics.CLOSER_RELEASE_JOIN_DATE: HeuristicResult.FAIL,
Heuristics.SUSPICIOUS_SETUP: HeuristicResult.SKIP,
Heuristics.WHEEL_ABSENCE: HeuristicResult.FAIL,
Heuristics.ANOMALOUS_VERSION: HeuristicResult.PASS,
},
id="test_skipped_evaluation",
)
],
)
def test_evaluations(combination: dict[Heuristics, HeuristicResult]) -> None:
"""Test heuristic combinations to ensure they evaluate as expected."""
check = DetectMaliciousMetadataCheck()

confidence, triggered_rules = check.evaluate_heuristic_results(combination)
assert confidence == 0
# Expecting this to be a dictionary, so we can ignore the type problems
assert len(dict(triggered_rules)) == 0 # type: ignore[arg-type]
Loading