Skip to content

Commit b516bf9

Browse files
committed
feat(tarball): implement new EntryFilesAnalyser API
1 parent 5c62ea3 commit b516bf9

File tree

3 files changed

+223
-84
lines changed

3 files changed

+223
-84
lines changed

workspaces/scanner/src/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ export async function verify(
9797
...NPM_TOKEN, registry: getLocalRegistryURL(), cache: `${os.homedir()}/.npm`
9898
});
9999

100-
const scanResult = await tarball.scanPackage(dest, packageName);
100+
const scanResult = await tarball.scanPackage(dest);
101101

102102
return scanResult;
103103
}
Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
// Import Node.js Dependencies
2+
import os from "node:os";
3+
import path from "node:path";
4+
5+
// Import Third-party Dependencies
6+
import pacote from "pacote";
7+
import * as conformance from "@nodesecure/conformance";
8+
import { ManifestManager } from "@nodesecure/mama";
9+
import {
10+
EntryFilesAnalyser,
11+
AstAnalyser,
12+
type Warning,
13+
type Dependency
14+
} from "@nodesecure/js-x-ray";
15+
16+
// Import Internal Dependencies
17+
import {
18+
getTarballComposition
19+
} from "../utils/index.js";
20+
21+
// CONSTANTS
22+
const kNpmToken = typeof process.env.NODE_SECURE_TOKEN === "string" ?
23+
{ token: process.env.NODE_SECURE_TOKEN } :
24+
{};
25+
26+
export interface NpmTarballExtractOptions {
27+
registry?: string;
28+
}
29+
30+
export class TarballExtractor {
31+
static JS_EXTENSIONS = new Set([".js", ".mjs", ".cjs"]);
32+
33+
public manifest: ManifestManager;
34+
public archiveLocation: string;
35+
36+
constructor(
37+
archiveLocation: string,
38+
mama: ManifestManager
39+
) {
40+
this.archiveLocation = archiveLocation;
41+
this.manifest = mama;
42+
}
43+
44+
async scan() {
45+
const [
46+
composition,
47+
spdx
48+
] = await Promise.all([
49+
getTarballComposition(this.archiveLocation),
50+
conformance.extractLicenses(this.archiveLocation)
51+
]);
52+
53+
return {
54+
spdx,
55+
composition
56+
};
57+
}
58+
59+
async runJavaScriptSast(
60+
JSFiles: string[]
61+
) {
62+
const astAnalyzer = new AstAnalyser();
63+
64+
const dependencies: Record<string, Record<string, Dependency>> = Object.create(null);
65+
const minified: string[] = [];
66+
const warnings: Warning[] = [];
67+
68+
// TODO: make sure all files exist
69+
const entries = [...this.manifest.getEntryFiles()]
70+
.filter((entryFile) => TarballExtractor.JS_EXTENSIONS.has(path.extname(entryFile)))
71+
.map((relPath) => path.join(this.archiveLocation, relPath));
72+
73+
if (entries.length > 0) {
74+
const efa = new EntryFilesAnalyser({
75+
astAnalyzer
76+
});
77+
for await (const fileReport of efa.analyse(entries)) {
78+
warnings.push(
79+
...fileReport.warnings.map((warning) => {
80+
return { ...warning, file: fileReport.file };
81+
})
82+
);
83+
84+
if (fileReport.ok) {
85+
dependencies[fileReport.file] = Object.fromEntries(
86+
fileReport.dependencies
87+
);
88+
if (fileReport.isMinified) {
89+
minified.push(fileReport.file);
90+
}
91+
}
92+
}
93+
}
94+
else {
95+
const { name, type = "script" } = this.manifest.document;
96+
97+
for (const file of JSFiles) {
98+
const result = await astAnalyzer.analyseFile(
99+
path.join(this.archiveLocation, file),
100+
{
101+
packageName: name,
102+
module: type === "module"
103+
}
104+
);
105+
106+
warnings.push(
107+
...result.warnings.map((curr) => Object.assign({}, curr, { file }))
108+
);
109+
if (result.ok) {
110+
dependencies[file] = Object.fromEntries(result.dependencies);
111+
if (result.isMinified) {
112+
minified.push(file);
113+
}
114+
}
115+
}
116+
}
117+
118+
return {
119+
dependencies,
120+
warnings,
121+
minified
122+
};
123+
}
124+
125+
static async fromGit(
126+
location: string,
127+
url: string,
128+
options: NpmTarballExtractOptions = {}
129+
) {
130+
const { registry } = options;
131+
132+
await pacote.extract(url, location, {
133+
...kNpmToken,
134+
registry,
135+
cache: `${os.homedir()}/.npm`
136+
});
137+
138+
return this.fromFileSystem(location);
139+
}
140+
141+
static async fromNpm(
142+
location: string,
143+
spec: string,
144+
options: NpmTarballExtractOptions = {}
145+
) {
146+
const { registry } = options;
147+
148+
await pacote.extract(spec, location, {
149+
...kNpmToken,
150+
registry,
151+
cache: `${os.homedir()}/.npm`
152+
});
153+
154+
return this.fromFileSystem(location);
155+
}
156+
157+
static async fromFileSystem(
158+
location: string
159+
): Promise<TarballExtractor> {
160+
const mama = await ManifestManager.fromPackageJSON(location);
161+
162+
return new TarballExtractor(location, mama);
163+
}
164+
}

workspaces/tarball/src/tarball.ts

Lines changed: 58 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,22 @@
11
// Import Node.js Dependencies
22
import path from "node:path";
3-
import os from "node:os";
43

54
// Import Third-party Dependencies
65
import {
7-
AstAnalyser,
86
type Warning,
97
type Dependency
108
} from "@nodesecure/js-x-ray";
11-
import pacote from "pacote";
129
import * as conformance from "@nodesecure/conformance";
13-
import { ManifestManager } from "@nodesecure/mama";
1410

1511
// Import Internal Dependencies
1612
import {
17-
getTarballComposition,
1813
isSensitiveFile,
1914
analyzeDependencies,
15+
filterDependencyKind,
2016
booleanToFlags
2117
} from "./utils/index.js";
18+
import { TarballExtractor } from "./class/TarballExtractor.class.js";
2219
import * as warnings from "./warnings.js";
23-
import * as sast from "./sast/index.js";
2420

2521
export interface DependencyRef {
2622
id: number;
@@ -53,12 +49,7 @@ export interface DependencyRef {
5349
}
5450

5551
// CONSTANTS
56-
const NPM_TOKEN = typeof process.env.NODE_SECURE_TOKEN === "string" ?
57-
{ token: process.env.NODE_SECURE_TOKEN } :
58-
{};
59-
6052
const kNativeCodeExtensions = new Set([".gyp", ".c", ".cpp", ".node", ".so", ".h"]);
61-
const kJsExtname = new Set([".js", ".mjs", ".cjs"]);
6253

6354
export interface scanDirOrArchiveOptions {
6455
ref: DependencyRef;
@@ -73,33 +64,22 @@ export async function scanDirOrArchive(
7364
options: scanDirOrArchiveOptions
7465
) {
7566
const { ref, location = process.cwd(), tmpLocation = null, registry } = options;
67+
const spec = `${name}@${version}`;
7668

77-
const isNpmTarball = !(tmpLocation === null);
78-
const dest = isNpmTarball ? path.join(tmpLocation, `${name}@${version}`) : location;
79-
80-
// If this is an NPM tarball then we extract it on the disk with pacote.
81-
if (isNpmTarball) {
82-
await pacote.extract(
83-
ref.flags.includes("isGit") ? ref.gitUrl! : `${name}@${version}`,
84-
dest,
85-
{
86-
...NPM_TOKEN,
87-
registry,
88-
cache: `${os.homedir()}/.npm`
89-
}
90-
);
69+
let tarex: TarballExtractor;
70+
if (typeof tmpLocation === "string") {
71+
const location = path.join(tmpLocation, spec);
72+
73+
tarex = ref.flags.includes("isGit") ?
74+
await TarballExtractor.fromGit(location, ref.gitUrl!, { registry }) :
75+
await TarballExtractor.fromNpm(location, spec, { registry });
9176
}
77+
else {
78+
tarex = await TarballExtractor.fromFileSystem(location);
79+
}
80+
const mama = tarex.manifest;
9281

93-
// Read the package.json at the root of the directory or archive.
94-
const [
95-
mama,
96-
composition,
97-
spdx
98-
] = await Promise.all([
99-
ManifestManager.fromPackageJSON(dest),
100-
getTarballComposition(dest),
101-
conformance.extractLicenses(dest)
102-
]);
82+
const { composition, spdx } = await tarex.scan();
10383

10484
{
10585
const { description, engines, repository, scripts } = mama.document;
@@ -119,17 +99,34 @@ export async function scanDirOrArchive(
11999

120100
// Search for minified and runtime dependencies
121101
// Run a JS-X-Ray analysis on each JavaScript files of the project!
122-
const scannedFiles = await sast.scanManyFiles(composition.files, dest, name);
102+
const scannedFiles = await tarex.runJavaScriptSast(
103+
composition.files.filter(
104+
(name) => TarballExtractor.JS_EXTENSIONS.has(path.extname(name))
105+
)
106+
);
123107

124-
ref.warnings.push(...scannedFiles.flatMap((row) => row.warnings));
125-
if (/^0(\.\d+)*$/.test(version)) {
108+
ref.warnings.push(...scannedFiles.warnings);
109+
if (mama.hasZeroSemver) {
126110
ref.warnings.push(warnings.getSemVerWarning(version));
127111
}
128112

129-
const dependencies = [...new Set(scannedFiles.flatMap((row) => row.dependencies))];
130-
const filesDependencies = [...new Set(scannedFiles.flatMap((row) => row.filesDependencies))];
131-
const tryDependencies = new Set(scannedFiles.flatMap((row) => row.tryDependencies));
132-
const minifiedFiles = scannedFiles.filter((row) => row.isMinified).flatMap((row) => row.file);
113+
const files = new Set<string>();
114+
const dependencies = new Set<string>();
115+
const dependenciesInTryBlock = new Set<string>();
116+
117+
for (const [file, fileDeps] of Object.entries(scannedFiles.dependencies)) {
118+
const filtered = filterDependencyKind(
119+
[...Object.keys(fileDeps)],
120+
path.dirname(file)
121+
);
122+
123+
[...Object.entries(fileDeps)]
124+
.flatMap(([name, dependency]) => (dependency.inTry ? [name] : []))
125+
.forEach((name) => dependenciesInTryBlock.add(name));
126+
127+
filtered.packages.forEach((name) => dependencies.add(name));
128+
filtered.files.forEach((file) => files.add(file));
129+
}
133130

134131
const {
135132
nodeDependencies,
@@ -139,8 +136,8 @@ export async function scanDirOrArchive(
139136
unusedDependencies,
140137
flags
141138
} = analyzeDependencies(
142-
dependencies,
143-
{ mama, tryDependencies }
139+
[...dependencies],
140+
{ mama, tryDependencies: dependenciesInTryBlock }
144141
);
145142

146143
ref.size = composition.size;
@@ -150,15 +147,15 @@ export async function scanDirOrArchive(
150147
ref.composition.required_subpath = subpathImportsDependencies;
151148
ref.composition.unused.push(...unusedDependencies);
152149
ref.composition.missing.push(...missingDependencies);
153-
ref.composition.required_files = filesDependencies;
150+
ref.composition.required_files = [...files];
154151
ref.composition.required_nodejs = nodeDependencies;
155-
ref.composition.minified = minifiedFiles;
152+
ref.composition.minified = scannedFiles.minified;
156153

157154
ref.flags.push(...booleanToFlags({
158155
...flags,
159156
hasNoLicense: spdx.uniqueLicenseIds.length === 0,
160157
hasMultipleLicenses: spdx.uniqueLicenseIds.length > 1,
161-
hasMinifiedCode: minifiedFiles.length > 0,
158+
hasMinifiedCode: scannedFiles.minified.length > 0,
162159
hasWarnings: ref.warnings.length > 0 && !ref.flags.includes("hasWarnings"),
163160
hasBannedFile: composition.files.some((path) => isSensitiveFile(path)),
164161
hasNativeCode: mama.flags.isNative ||
@@ -189,46 +186,24 @@ export interface ScannedPackageResult {
189186
}
190187

191188
export async function scanPackage(
192-
dest: string,
193-
packageName?: string
189+
dest: string
194190
): Promise<ScannedPackageResult> {
195-
const [
196-
mama,
191+
const extractor = await TarballExtractor.fromFileSystem(dest);
192+
193+
const {
197194
composition,
198195
spdx
199-
] = await Promise.all([
200-
ManifestManager.fromPackageJSON(dest),
201-
getTarballComposition(dest),
202-
conformance.extractLicenses(dest)
203-
]);
204-
const { type = "script" } = mama.document;
205-
206-
// Search for runtime dependencies
207-
const dependencies: Record<string, Record<string, Dependency>> = Object.create(null);
208-
const minified: string[] = [];
209-
const warnings: Warning[] = [];
210-
211-
const JSFiles = composition.files
212-
.filter((name) => kJsExtname.has(path.extname(name)));
213-
for (const file of JSFiles) {
214-
const result = await new AstAnalyser().analyseFile(
215-
path.join(dest, file),
216-
{
217-
packageName: packageName ?? mama.document.name,
218-
module: type === "module"
219-
}
220-
);
196+
} = await extractor.scan();
221197

222-
warnings.push(
223-
...result.warnings.map((curr) => Object.assign({}, curr, { file }))
224-
);
225-
if (result.ok) {
226-
dependencies[file] = Object.fromEntries(result.dependencies);
227-
if (result.isMinified) {
228-
minified.push(file);
229-
}
230-
}
231-
}
198+
const {
199+
dependencies,
200+
warnings,
201+
minified
202+
} = await extractor.runJavaScriptSast(
203+
composition.files.filter(
204+
(name) => TarballExtractor.JS_EXTENSIONS.has(path.extname(name))
205+
)
206+
);
232207

233208
return {
234209
files: {

0 commit comments

Comments
 (0)