diff --git a/lib/bundle.ts b/lib/bundle.ts index 911dcf67..fa1a2d6e 100644 --- a/lib/bundle.ts +++ b/lib/bundle.ts @@ -1,5 +1,3 @@ -import isEqual from "lodash/isEqual"; - import $Ref from "./ref.js"; import type { ParserOptions } from "./options.js"; import Pointer from "./pointer.js"; @@ -8,6 +6,15 @@ import type $Refs from "./refs.js"; import type { $RefParser } from "./index"; import type { JSONSchema } from "./types/index.js"; +const DEBUG_PERFORMANCE = process.env.DEBUG === "true" || (typeof globalThis !== 'undefined' && (globalThis as any).DEBUG_BUNDLE_PERFORMANCE === true); + +const perf = { + mark: (name: string) => DEBUG_PERFORMANCE && performance.mark(name), + measure: (name: string, start: string, end: string) => DEBUG_PERFORMANCE && performance.measure(name, start, end), + log: (message: string, ...args: any[]) => DEBUG_PERFORMANCE && console.log("[PERF] " + message, ...args), + warn: (message: string, ...args: any[]) => DEBUG_PERFORMANCE && console.warn("[PERF] " + message, ...args) +}; + export interface InventoryEntry { $ref: any; circular: any; @@ -24,19 +31,51 @@ export interface InventoryEntry { } /** - * TODO + * Fast lookup using Map instead of linear search with deep equality */ -const findInInventory = (inventory: Array, $refParent: any, $refKey: any) => { - for (const entry of inventory) { - if (entry) { - if (isEqual(entry.parent, $refParent)) { - if (entry.key === $refKey) { - return entry; - } - } +const createInventoryLookup = () => { + const lookup = new Map(); + const objectIds = new WeakMap(); // Use WeakMap to avoid polluting objects + let idCounter = 0; + let lookupCount = 0; + let addCount = 0; + + const getObjectId = (obj: any) => { + if (!objectIds.has(obj)) { + objectIds.set(obj, `obj_${++idCounter}`); } - } - return undefined; + return objectIds.get(obj)!; + }; + + const createInventoryKey = ($refParent: any, $refKey: any) => { + // Use WeakMap-based lookup to avoid polluting the actual schema objects + return `${getObjectId($refParent)}_${$refKey}`; + }; + + return { + add: (entry: InventoryEntry) => { + addCount++; + const key = createInventoryKey(entry.parent, entry.key); + lookup.set(key, entry); + if (addCount % 100 === 0) { + perf.log(`Inventory lookup: Added ${addCount} entries, map size: ${lookup.size}`); + } + }, + find: ($refParent: any, $refKey: any) => { + lookupCount++; + const key = createInventoryKey($refParent, $refKey); + const result = lookup.get(key); + if (lookupCount % 100 === 0) { + perf.log(`Inventory lookup: ${lookupCount} lookups performed`); + } + return result; + }, + remove: (entry: InventoryEntry) => { + const key = createInventoryKey(entry.parent, entry.key); + lookup.delete(key); + }, + getStats: () => ({ lookupCount, addCount, mapSize: lookup.size }) + }; }; /** @@ -49,9 +88,12 @@ const inventory$Ref = ({ $refs, indirections, inventory, + inventoryLookup, options, path, pathFromRoot, + visitedObjects = new WeakSet(), + resolvedRefs = new Map(), }: { /** * The key in `$refParent` that is a JSON Reference @@ -70,6 +112,10 @@ const inventory$Ref = ({ * An array of already-inventoried $ref pointers */ inventory: Array; + /** + * Fast lookup for inventory entries + */ + inventoryLookup: ReturnType; options: ParserOptions; /** * The full path of the JSON Reference at `$refKey`, possibly with a JSON Pointer in the hash @@ -79,13 +125,39 @@ const inventory$Ref = ({ * The path of the JSON Reference at `$refKey`, from the schema root */ pathFromRoot: string; + /** + * Set of already visited objects to avoid infinite loops and redundant processing + */ + visitedObjects?: WeakSet; + /** + * Cache for resolved $ref targets to avoid redundant resolution + */ + resolvedRefs?: Map; }) => { + perf.mark('inventory-ref-start'); const $ref = $refKey === null ? $refParent : $refParent[$refKey]; const $refPath = url.resolve(path, $ref.$ref); - const pointer = $refs._resolve($refPath, pathFromRoot, options); + + // Check cache first to avoid redundant resolution + let pointer = resolvedRefs.get($refPath); + if (!pointer) { + perf.mark('resolve-start'); + pointer = $refs._resolve($refPath, pathFromRoot, options); + perf.mark('resolve-end'); + perf.measure('resolve-time', 'resolve-start', 'resolve-end'); + + if (pointer) { + resolvedRefs.set($refPath, pointer); + perf.log(`Cached resolved $ref: ${$refPath}`); + } + } + if (pointer === null) { + perf.mark('inventory-ref-end'); + perf.measure('inventory-ref-time', 'inventory-ref-start', 'inventory-ref-end'); return; } + const parsed = Pointer.parse(pathFromRoot); const depth = parsed.length; const file = url.stripHash(pointer.path); @@ -95,17 +167,24 @@ const inventory$Ref = ({ indirections += pointer.indirections; // Check if this exact location (parent + key + pathFromRoot) has already been inventoried - const existingEntry = findInInventory(inventory, $refParent, $refKey); + perf.mark('lookup-start'); + const existingEntry = inventoryLookup.find($refParent, $refKey); + perf.mark('lookup-end'); + perf.measure('lookup-time', 'lookup-start', 'lookup-end'); + if (existingEntry && existingEntry.pathFromRoot === pathFromRoot) { // This exact location has already been inventoried, so we don't need to process it again if (depth < existingEntry.depth || indirections < existingEntry.indirections) { removeFromInventory(inventory, existingEntry); + inventoryLookup.remove(existingEntry); } else { + perf.mark('inventory-ref-end'); + perf.measure('inventory-ref-time', 'inventory-ref-start', 'inventory-ref-end'); return; } } - inventory.push({ + const newEntry: InventoryEntry = { $ref, // The JSON Reference (e.g. {$ref: string}) circular: pointer.circular, // Is this $ref pointer DIRECTLY circular? (i.e. it references itself) depth, // How far from the JSON Schema root is this $ref pointer? @@ -118,10 +197,16 @@ const inventory$Ref = ({ parent: $refParent, // The object that contains this $ref pointer pathFromRoot, // The path to the $ref pointer, from the JSON Schema root value: pointer.value, // The resolved value of the $ref pointer - }); + }; + + inventory.push(newEntry); + inventoryLookup.add(newEntry); + + perf.log(`Inventoried $ref: ${$ref.$ref} -> ${file}${hash} (external: ${external}, depth: ${depth})`); // Recursively crawl the resolved value if (!existingEntry || external) { + perf.mark('crawl-recursive-start'); crawl({ parent: pointer.value, key: null, @@ -129,10 +214,18 @@ const inventory$Ref = ({ pathFromRoot, indirections: indirections + 1, inventory, + inventoryLookup, $refs, options, + visitedObjects, + resolvedRefs, }); + perf.mark('crawl-recursive-end'); + perf.measure('crawl-recursive-time', 'crawl-recursive-start', 'crawl-recursive-end'); } + + perf.mark('inventory-ref-end'); + perf.measure('inventory-ref-time', 'inventory-ref-start', 'inventory-ref-end'); }; /** @@ -142,11 +235,14 @@ const crawl = ({ $refs, indirections, inventory, + inventoryLookup, key, options, parent, path, pathFromRoot, + visitedObjects = new WeakSet(), + resolvedRefs = new Map(), }: { $refs: $Refs; indirections: number; @@ -154,6 +250,10 @@ const crawl = ({ * An array of already-inventoried $ref pointers */ inventory: Array; + /** + * Fast lookup for inventory entries + */ + inventoryLookup: ReturnType; /** * The property key of `parent` to be crawled */ @@ -171,11 +271,26 @@ const crawl = ({ * The path of the property being crawled, from the schema root */ pathFromRoot: string; + /** + * Set of already visited objects to avoid infinite loops and redundant processing + */ + visitedObjects?: WeakSet; + /** + * Cache for resolved $ref targets to avoid redundant resolution + */ + resolvedRefs?: Map; }) => { const obj = key === null ? parent : parent[key as keyof typeof parent]; if (obj && typeof obj === "object" && !ArrayBuffer.isView(obj)) { + // Early exit if we've already processed this exact object + if (visitedObjects.has(obj)) { + perf.log(`Skipping already visited object at ${pathFromRoot}`); + return; + } + if ($Ref.isAllowed$Ref(obj)) { + perf.log(`Found $ref at ${pathFromRoot}: ${(obj as any).$ref}`); inventory$Ref({ $refParent: parent, $refKey: key, @@ -183,10 +298,16 @@ const crawl = ({ pathFromRoot, indirections, inventory, + inventoryLookup, $refs, options, + visitedObjects, + resolvedRefs, }); } else { + // Mark this object as visited BEFORE processing its children + visitedObjects.add(obj); + // Crawl the object in a specific order that's optimized for bundling. // This is important because it determines how `pathFromRoot` gets built, // which later determines which keys get dereferenced and which ones get remapped @@ -217,8 +338,11 @@ const crawl = ({ pathFromRoot: keyPathFromRoot, indirections, inventory, + inventoryLookup, $refs, options, + visitedObjects, + resolvedRefs, }); } else { crawl({ @@ -228,8 +352,11 @@ const crawl = ({ pathFromRoot: keyPathFromRoot, indirections, inventory, + inventoryLookup, $refs, options, + visitedObjects, + resolvedRefs, }); } } @@ -260,7 +387,11 @@ const crawl = ({ * @param inventory */ function remap(inventory: InventoryEntry[]) { + perf.log(`Starting remap with ${inventory.length} inventory entries`); + perf.mark('remap-start'); + // Group & sort all the $ref pointers, so they're in the order that we need to dereference/remap them + perf.mark('sort-inventory-start'); inventory.sort((a: InventoryEntry, b: InventoryEntry) => { if (a.file !== b.file) { // Group all the $refs that point to the same file @@ -295,11 +426,22 @@ function remap(inventory: InventoryEntry[]) { } } }); + perf.mark('sort-inventory-end'); + perf.measure('sort-inventory-time', 'sort-inventory-start', 'sort-inventory-end'); + + perf.log(`Sorted ${inventory.length} inventory entries`); let file, hash, pathFromRoot; - for (const entry of inventory) { - // console.log('Re-mapping $ref pointer "%s" at %s', entry.$ref.$ref, entry.pathFromRoot); + perf.mark('remap-loop-start'); + for (const entry of inventory) { + + // Safety check: ensure entry and entry.$ref are valid objects + if (!entry || !entry.$ref || typeof entry.$ref !== 'object') { + perf.warn(`Skipping invalid inventory entry:`, entry); + continue; + } + if (!entry.external && !entry.hash?.startsWith("#/paths/")) { // This $ref already resolves to the main JSON Schema file entry.$ref.$ref = entry.hash; @@ -317,14 +459,45 @@ function remap(inventory: InventoryEntry[]) { // This is the first $ref to point to this value, so dereference the value. // Any other $refs that point to the same value will point to this $ref instead - entry.$ref = entry.parent[entry.key] = $Ref.dereference(entry.$ref, entry.value); + + // Safety check: ensure parent and key are valid before assignment + if (entry.parent && entry.key !== null && entry.key !== undefined) { + try { + perf.mark('dereference-start'); + // Use the original bundling logic: dereference and assign + entry.$ref = entry.parent[entry.key] = $Ref.dereference(entry.$ref, entry.value); + perf.mark('dereference-end'); + perf.measure('dereference-time', 'dereference-start', 'dereference-end'); + } catch (error) { + perf.warn(`Error during dereference:`, error); + perf.warn(`Entry details:`, { + $ref: entry.$ref, + value: entry.value, + pathFromRoot: entry.pathFromRoot + }); + // Skip this entry on error + continue; + } + } else { + perf.warn(`Skipping dereference for invalid parent/key:`, { parent: !!entry.parent, key: entry.key }); + continue; + } if (entry.circular) { // This $ref points to itself - entry.$ref.$ref = entry.pathFromRoot; + if (typeof entry.$ref === 'object' && entry.$ref && '$ref' in entry.$ref) { + entry.$ref.$ref = entry.pathFromRoot; + } } } } + perf.mark('remap-loop-end'); + perf.measure('remap-loop-time', 'remap-loop-start', 'remap-loop-end'); + + perf.mark('remap-end'); + perf.measure('remap-total-time', 'remap-start', 'remap-end'); + + perf.log(`Completed remap of ${inventory.length} entries`); // we want to ensure that any $refs that point to another $ref are remapped to point to the final value // let hadChange = true; @@ -362,8 +535,18 @@ function removeFromInventory(inventory: InventoryEntry[], entry: any) { */ export const bundle = (parser: $RefParser, options: ParserOptions) => { // console.log('Bundling $ref pointers in %s', parser.$refs._root$Ref.path); + perf.mark('bundle-start'); + // Build an inventory of all $ref pointers in the JSON Schema const inventory: InventoryEntry[] = []; + const inventoryLookup = createInventoryLookup(); + + perf.log('Starting crawl phase'); + perf.mark('crawl-phase-start'); + + const visitedObjects = new WeakSet(); + const resolvedRefs = new Map(); // Cache for resolved $ref targets + crawl({ parent: parser, key: "schema", @@ -371,10 +554,47 @@ export const bundle = (parser: $RefParser, options: ParserOptions) => { pathFromRoot: "#", indirections: 0, inventory, + inventoryLookup, $refs: parser.$refs, options, + visitedObjects, + resolvedRefs, }); + perf.mark('crawl-phase-end'); + perf.measure('crawl-phase-time', 'crawl-phase-start', 'crawl-phase-end'); + + const stats = inventoryLookup.getStats(); + perf.log(`Crawl phase complete. Found ${inventory.length} $refs. Lookup stats:`, stats); + // Remap all $ref pointers + perf.log('Starting remap phase'); + perf.mark('remap-phase-start'); remap(inventory); + perf.mark('remap-phase-end'); + perf.measure('remap-phase-time', 'remap-phase-start', 'remap-phase-end'); + + perf.mark('bundle-end'); + perf.measure('bundle-total-time', 'bundle-start', 'bundle-end'); + + perf.log('Bundle complete. Performance summary:'); + + // Log final stats + const finalStats = inventoryLookup.getStats(); + perf.log(`Final inventory stats:`, finalStats); + perf.log(`Resolved refs cache size: ${resolvedRefs.size}`); + + if (DEBUG_PERFORMANCE) { + // Log all performance measures + const measures = performance.getEntriesByType('measure'); + measures.forEach(measure => { + if (measure.name.includes('time')) { + console.log(`${measure.name}: ${measure.duration.toFixed(2)}ms`); + } + }); + + // Clear performance marks and measures for next run + performance.clearMarks(); + performance.clearMeasures(); + } };