@@ -128,7 +128,9 @@ def validate_malware(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[boo
128
128
is_malware , detail_info = sourcecode_analyzer .analyze ()
129
129
return is_malware , detail_info
130
130
131
- def evaluate_heuristic_results (self , heuristic_results : dict [Heuristics , HeuristicResult ]) -> float | None :
131
+ def evaluate_heuristic_results (
132
+ self , heuristic_results : dict [Heuristics , HeuristicResult ]
133
+ ) -> tuple [float , JsonType ]:
132
134
"""Analyse the heuristic results to determine the maliciousness of the package.
133
135
134
136
Parameters
@@ -138,18 +140,17 @@ def evaluate_heuristic_results(self, heuristic_results: dict[Heuristics, Heurist
138
140
139
141
Returns
140
142
-------
141
- float | None
142
- Returns the confidence associated with the detected malicious combination, otherwise None if no associated
143
- malicious combination was triggered.
143
+ tuple[ float, JsonType]
144
+ Returns the confidence associated with the detected malicious combination, and associated rule IDs detailing
145
+ what rules were triggered.
144
146
"""
145
147
facts_list : list [str ] = []
146
148
for heuristic , result in heuristic_results .items ():
147
- if result == HeuristicResult .SKIP :
148
- facts_list .append (f"0.0::{ heuristic .value } ." )
149
- elif result == HeuristicResult .PASS :
149
+ if result == HeuristicResult .PASS :
150
150
facts_list .append (f"{ heuristic .value } :- true." )
151
- else : # HeuristicResult.FAIL
151
+ elif result == HeuristicResult .FAIL :
152
152
facts_list .append (f"{ heuristic .value } :- false." )
153
+ # Do not define for HeuristicResult.SKIP
153
154
154
155
facts = "\n " .join (facts_list )
155
156
problog_code = f"{ facts } \n \n { self .malware_rules_problog_model } "
@@ -158,10 +159,12 @@ def evaluate_heuristic_results(self, heuristic_results: dict[Heuristics, Heurist
158
159
problog_model = PrologString (problog_code )
159
160
problog_results : dict [Term , float ] = get_evaluatable ().create_from (problog_model ).evaluate ()
160
161
161
- confidence : float | None = problog_results .get (Term (self .problog_result_access ))
162
- if confidence == 0.0 :
163
- return None # no rules were triggered
164
- return confidence
162
+ confidence = sum (conf for conf in problog_results .values () if conf is not None )
163
+ triggered_rules : JsonType = ["No malicious rules triggered" ]
164
+ if confidence > 0 :
165
+ triggered_rules = [term .args [0 ] for term in problog_results ]
166
+
167
+ return confidence , triggered_rules
165
168
166
169
def run_heuristics (
167
170
self , pypi_package_json : PyPIPackageJsonAsset
@@ -278,9 +281,10 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
278
281
except HeuristicAnalyzerValueError :
279
282
return CheckResultData (result_tables = [], result_type = CheckResultType .UNKNOWN )
280
283
281
- confidence = self .evaluate_heuristic_results (result )
284
+ confidence , triggered_rules = self .evaluate_heuristic_results (result )
285
+ detail_info ["triggered_rules" ] = triggered_rules
282
286
result_type = CheckResultType .FAILED
283
- if confidence is None :
287
+ if not confidence :
284
288
confidence = Confidence .HIGH
285
289
result_type = CheckResultType .PASSED
286
290
elif ctx .dynamic_data ["validate_malware" ]:
@@ -321,51 +325,61 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
321
325
AnomalousVersionAnalyzer ,
322
326
]
323
327
324
- problog_result_access = "result"
325
-
326
328
malware_rules_problog_model = f"""
327
- % Heuristic groupings
329
+ % ----- Wrappers ------
330
+ % These should be used to logically check for a pass or fail on a heuristic for the rest of the model. They exist since,
331
+ % when a heuristic is skipped, it is ommitted from being defined in the ProbLog model, and as such these try_call statements
332
+ % are needed to handle referencing an undefined fact.
333
+ passed(H) :- try_call(H).
334
+ failed(H) :- try_call(not H).
335
+
336
+ % ----- Heuristic groupings -----
328
337
% These are common combinations of heuristics that are used in many of the rules, thus themselves representing
329
338
% certain behaviors. When changing or adding rules here, if there are frequent combinations of particular
330
- % heuristics, group them together here.
339
+ % heuristics, group them together here. Note, these should only be used to check if a grouping statement
340
+ % is true. Evaluating 'not quickUndetailed' would be true if empty project link and closer release join
341
+ % date passed, or if they were both skipped, which is not desired behaviour.
331
342
332
343
% Maintainer has recently joined, publishing an undetailed page with no links.
333
- quickUndetailed :- not { Heuristics .EMPTY_PROJECT_LINK .value } , not { Heuristics .CLOSER_RELEASE_JOIN_DATE .value } .
344
+ quickUndetailed :- failed( { Heuristics .EMPTY_PROJECT_LINK .value } ), failed( { Heuristics .CLOSER_RELEASE_JOIN_DATE .value } ) .
334
345
335
346
% Maintainer releases a suspicious setup.py and forces it to run by omitting a .whl file.
336
- forceSetup :- not { Heuristics .SUSPICIOUS_SETUP .value } , not { Heuristics .WHEEL_ABSENCE .value } .
347
+ forceSetup :- failed( { Heuristics .SUSPICIOUS_SETUP .value } ), failed( { Heuristics .WHEEL_ABSENCE .value } ) .
337
348
338
- % Suspicious Combinations
349
+ % ----- Suspicious Combinations -----
339
350
340
351
% Package released recently with little detail, forcing the setup.py to run.
341
- { Confidence .HIGH .value } ::high :- quickUndetailed, forceSetup, not { Heuristics .ONE_RELEASE .value } .
342
- { Confidence .HIGH .value } ::high :- quickUndetailed, forceSetup, not { Heuristics .HIGH_RELEASE_FREQUENCY .value } .
352
+ { Confidence .HIGH .value } ::result("high_confidence_1") :-
353
+ quickUndetailed, forceSetup, failed({ Heuristics .ONE_RELEASE .value } ).
354
+ { Confidence .HIGH .value } ::result("high_confidence_2") :-
355
+ quickUndetailed, forceSetup, failed({ Heuristics .HIGH_RELEASE_FREQUENCY .value } ).
343
356
344
357
% Package released recently with little detail, with some more refined trust markers introduced: project links,
345
358
% multiple different releases, but there is no source code repository matching it and the setup is suspicious.
346
- { Confidence .HIGH .value } ::high :- not { Heuristics .SOURCE_CODE_REPO .value } ,
347
- not { Heuristics .HIGH_RELEASE_FREQUENCY .value } ,
348
- not { Heuristics .CLOSER_RELEASE_JOIN_DATE .value } ,
349
- { Heuristics .UNCHANGED_RELEASE .value } ,
359
+ { Confidence .HIGH .value } ::result("high_confidence_3") :-
360
+ failed({ Heuristics .SOURCE_CODE_REPO .value } ),
361
+ failed({ Heuristics .HIGH_RELEASE_FREQUENCY .value } ),
362
+ passed({ Heuristics .UNCHANGED_RELEASE .value } ),
363
+ failed({ Heuristics .CLOSER_RELEASE_JOIN_DATE .value } ),
350
364
forceSetup.
351
365
352
366
% Package released recently with little detail, with multiple releases as a trust marker, but frequent and with
353
367
% the same code.
354
- { Confidence .MEDIUM .value } ::medium :- quickUndetailed,
355
- not { Heuristics .HIGH_RELEASE_FREQUENCY .value } ,
356
- not { Heuristics .UNCHANGED_RELEASE .value } ,
357
- { Heuristics .SUSPICIOUS_SETUP .value } .
368
+ { Confidence .MEDIUM .value } ::result("medium_confidence_1") :-
369
+ quickUndetailed,
370
+ failed({ Heuristics .HIGH_RELEASE_FREQUENCY .value } ),
371
+ failed({ Heuristics .UNCHANGED_RELEASE .value } ),
372
+ passed({ Heuristics .SUSPICIOUS_SETUP .value } ).
358
373
359
374
% Package released recently with little detail and an anomalous version number for a single-release package.
360
- { Confidence .MEDIUM .value } ::medium :- quickUndetailed,
361
- not { Heuristics .ONE_RELEASE .value } ,
362
- { Heuristics .WHEEL_ABSENCE .value } ,
363
- not { Heuristics .ANOMALOUS_VERSION .value } .
364
-
365
- { problog_result_access } :- high.
366
- { problog_result_access } :- medium.
367
-
368
- query({ problog_result_access } ).
375
+ { Confidence .MEDIUM .value } ::result("medium_confidence_2") :-
376
+ quickUndetailed,
377
+ failed({ Heuristics .ONE_RELEASE .value } ),
378
+ passed({ Heuristics .WHEEL_ABSENCE .value } ),
379
+ failed({ Heuristics .ANOMALOUS_VERSION .value } ).
380
+
381
+ % ----- Evaluation -----
382
+ query(result(_)).
369
383
"""
370
384
371
385
0 commit comments