Skip to content

Commit 3a8218a

Browse files
committed
Optimize relativize performance.
1 parent f3b0e11 commit 3a8218a

File tree

2 files changed

+125
-191
lines changed

2 files changed

+125
-191
lines changed

src/BaseIRI.js

Lines changed: 64 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -4,136 +4,99 @@ import { escapeRegex } from './Util';
44
// - file: IRIs (which could also use backslashes)
55
// - IRIs containing /. or /.. or //
66
const INVALID_OR_UNSUPPORTED = /^:?[^:?#]*(?:[?#]|$)|^file:|^[^:]*:\/*[^?#]+?\/(?:\.\.?(?:\/|$)|\/)/i;
7+
const CURRENT = './';
8+
const PARENT = '../';
9+
const QUERY = '?';
10+
const FRAGMENT = '#';
711

812
export default class BaseIRI {
913
constructor(base) {
1014
this.base = base;
11-
this._initialized = false;
1215
this._baseLength = 0;
1316
this._baseMatcher = null;
14-
this._pathReplacements = {};
17+
this._pathReplacements = new Array(base.length + 1);
1518
}
1619

1720
static supports(base) {
1821
return !INVALID_OR_UNSUPPORTED.test(base);
1922
}
2023

21-
_init() {
22-
if (this._initialized)
23-
return this.baseMatcher !== null;
24-
this._initialized = true;
25-
24+
_getBaseMatcher() {
25+
if (this._baseMatcher)
26+
return this._baseMatcher;
2627
if (!BaseIRI.supports(this.base))
27-
return false;
28-
29-
// Generate regex for baseIRI with optional groups for segments
30-
let baseIRIRegex = '';
31-
let segmentsCount = 0;
32-
let stage = 0;
33-
const slashPositions = [];
34-
let i = 0;
35-
let containsQuery = false;
28+
return this._baseMatcher = /.^/;
3629

37-
// Stage 0: extract the scheme
30+
// Extract the scheme
3831
const scheme = /^[^:]*:\/*/.exec(this.base)[0];
39-
baseIRIRegex += escapeRegex(scheme);
40-
i = scheme.length;
41-
stage = 1;
32+
const regexHead = ['^', escapeRegex(scheme)];
33+
const regexTail = [];
4234

43-
// Stage 1: find the next segment until reaching ? or #
44-
let end = this.base.length;
45-
while (stage === 1 && i < end) {
46-
const match = /[/?#]/.exec(this.base.substring(i));
47-
if (match) {
48-
if (match[0] === '#') {
49-
// Stop at this hash.
50-
end = i + match.index;
51-
stage = 3;
52-
}
53-
else {
54-
baseIRIRegex += escapeRegex(this.base.substring(i, i + match.index + 1));
55-
baseIRIRegex += '(';
56-
segmentsCount++;
57-
if (match[0] === '/') {
58-
slashPositions.push(i + match.index);
59-
}
60-
else {
61-
this._pathReplacements[i + match.index] = '?';
62-
containsQuery = true;
63-
stage = 2;
64-
}
65-
i += match.index + 1;
66-
}
67-
}
35+
// Generate a regex for every path segment
36+
const segments = [], segmenter = /[^/?#]*([/?#])/y;
37+
let segment, query = 0, fragment = 0, last = segmenter.lastIndex = scheme.length;
38+
while (!query && !fragment && (segment = segmenter.exec(this.base))) {
39+
// Truncate base resolution path at fragment start
40+
if (segment[1] === FRAGMENT)
41+
fragment = segmenter.lastIndex - 1;
6842
else {
69-
stage = 3;
70-
}
71-
}
72-
73-
// Stage 2: find any fragment
74-
if (stage === 2) {
75-
const match = /#/.exec(this.base.substring(i));
76-
if (match) {
77-
// Stop at this hash.
78-
end = i + match.index;
79-
}
80-
stage = 3;
81-
}
43+
// Create regex that matches the segment
44+
regexHead.push(escapeRegex(segment[0]), '(?:');
45+
regexTail.push(')?');
8246

83-
// Stage 3: parse the remainder of the base IRI
84-
if (stage === 3) {
85-
baseIRIRegex += escapeRegex(this.base.substring(i, end));
86-
if (containsQuery) {
87-
baseIRIRegex += '(#|$)';
88-
}
89-
else {
90-
baseIRIRegex += '([?#]|$)';
47+
// Create dedicated query string replacement
48+
if (segment[1] !== QUERY)
49+
segments.push(last = segmenter.lastIndex);
50+
else {
51+
query = last = segmenter.lastIndex;
52+
fragment = this.base.indexOf(FRAGMENT, query);
53+
this._pathReplacements[query] = QUERY;
54+
}
9155
}
92-
i = end;
9356
}
9457

95-
// Complete the optional groups for the segments
96-
baseIRIRegex += ')?'.repeat(segmentsCount);
97-
98-
// Precalculate the rest of the substitutions
99-
if (this._pathReplacements[end - 1] === undefined) {
100-
this._pathReplacements[end - 1] = '';
101-
}
102-
for (let i = 0; i < slashPositions.length; i++) {
103-
this._pathReplacements[slashPositions[i]] = '../'.repeat(slashPositions.length - i - 1);
104-
}
105-
this._pathReplacements[slashPositions[slashPositions.length - 1]] = './';
58+
// Precalculate parent path substitutions
59+
for (let i = 0; i < segments.length; i++)
60+
this._pathReplacements[segments[i]] = PARENT.repeat(segments.length - i - 1);
61+
this._pathReplacements[segments[segments.length - 1]] = CURRENT;
10662

107-
// Set the baseMatcher
108-
this._baseMatcher = new RegExp(baseIRIRegex);
109-
this._baseLength = end;
110-
return true;
63+
// Add the remainder of the base IRI (without fragment) to the regex
64+
this._baseLength = fragment > 0 ? fragment : this.base.length;
65+
regexHead.push(
66+
escapeRegex(this.base.substring(last, this._baseLength)),
67+
query ? '(?:#|$)' : '(?:[?#]|$)',
68+
);
69+
return this._baseMatcher = new RegExp([...regexHead, ...regexTail].join(''));
11170
}
11271

11372
toRelative(iri) {
114-
if (!this._init())
115-
return iri;
116-
const delimiterMatch = /:\/{0,2}/.exec(iri);
117-
if (!delimiterMatch || /\/\.{0,2}\//.test(iri.substring(delimiterMatch.index + delimiterMatch[0].length))) {
118-
return iri;
119-
}
120-
const match = this._baseMatcher.exec(iri);
121-
if (!match) {
73+
// Unsupported or non-matching base IRI
74+
const match = this._getBaseMatcher().exec(iri);
75+
if (!match)
12276
return iri;
123-
}
77+
78+
// Exact base IRI match
12479
const length = match[0].length;
125-
if (length === this._baseLength && length === iri.length) {
80+
if (length === this._baseLength && length === iri.length)
12681
return '';
82+
83+
// Parent path match
84+
const parentPath = this._pathReplacements[length];
85+
if (parentPath) {
86+
const suffix = iri.substring(length);
87+
// Don't abbreviate unsupported path
88+
if (parentPath !== QUERY &&
89+
/(?:^|\/)(?:\/|..?(?:[/#?]|$))/.test(suffix) && // fast test
90+
/^(?:[^#?]*?\/)?(?:\/|\.\.?(?:[/#?]|$))/.test(suffix)) // rigorous test
91+
return iri;
92+
// Omit ./ with fragment or query string
93+
if (parentPath === CURRENT && /^[^?#]/.test(suffix))
94+
return suffix;
95+
// Append suffix to relative parent path
96+
return parentPath + suffix;
12797
}
128-
let substitution = this._pathReplacements[length - 1];
129-
if (substitution !== undefined) {
130-
const substr = iri.substring(length);
131-
if (substitution === './' && substr && ((!substr.startsWith('#') && !substr.startsWith('?')) || length === this._baseLength)) {
132-
substitution = '';
133-
}
134-
return substitution + substr;
135-
}
136-
// Matched the [?#], so make sure to add the delimiter
98+
99+
// Fragment or query string, so include delimiter
137100
return iri.substring(length - 1);
138101
}
139102
}

test/BaseIRI-test.js

Lines changed: 61 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -57,121 +57,92 @@ describe('BaseIRI', () => {
5757
});
5858

5959
describe('A BaseIRI instance', () => {
60-
it('should relativize http://', () => {
61-
const baseIri = new BaseIRI('http://example.org/foo/');
60+
relativizes('an HTTP URL', 'http://example.org/foo/',
61+
'http://example.org/foo/baz', 'baz');
6262

63-
const iri = `${baseIri.base}baz`;
64-
const relativized = baseIri.toRelative(iri);
63+
relativizes('an HTTPS URL', 'https://example.org/foo/',
64+
'https://example.org/foo/baz', 'baz');
6565

66-
expect(relativized).toBe('baz');
67-
});
66+
relativizes('a file URL', 'file:///tmp/foo/bar',
67+
'file:///tmp/foo/bar/baz');
6868

69-
it('should relativize https://', () => {
70-
const baseIri = new BaseIRI('https://example.org/foo/');
69+
relativizes('a base IRI without scheme', '/tmp/foo/bar',
70+
'/tmp/foo/bar/baz');
7171

72-
const iri = `${baseIri.base}baz`;
73-
const relativized = baseIri.toRelative(iri);
72+
relativizes('a base IRI containing //', 'http://example.org/foo//bar',
73+
'http://example.org/foo//bar/baz');
7474

75-
expect(relativized).toBe('baz');
76-
});
75+
relativizes('a base IRI containing ./', 'http://example.org/foo/./bar',
76+
'http://example.org/foo/./bar/baz');
7777

78-
it('should not relativize a base IRI with a file scheme', () => {
79-
const baseIri = new BaseIRI('file:///tmp/foo/bar');
78+
relativizes('a base IRI containing ../', 'http://example.org/foo/../bar',
79+
'http://example.org/foo/../bar/baz');
8080

81-
const iri = `${baseIri.base}/baz`;
82-
const relativized = baseIri.toRelative(iri);
81+
relativizes('a base IRI ending in //', 'http://example.org/foo//',
82+
'http://example.org/foo//baz');
8383

84-
expect(relativized).toBe(iri);
85-
});
84+
relativizes('a base IRI ending in ./', 'http://example.org/foo/.',
85+
'http://example.org/foo/./baz');
8686

87-
it('should not relativize a base IRI without scheme', () => {
88-
const baseIri = new BaseIRI('/tmp/foo/bar');
87+
relativizes('a base IRI ending in ../', 'http://example.org/foo/..',
88+
'http://example.org/foo/../baz');
8989

90-
const iri = `${baseIri.base}/baz`;
91-
const relativized = baseIri.toRelative(iri);
90+
relativizes('an IRI ending in //', 'http://example.org/foo/',
91+
'http://example.org/foo//');
9292

93-
expect(relativized).toBe(iri);
94-
});
93+
relativizes('an IRI ending in /.', 'http://example.org/foo/',
94+
'http://example.org/foo/.');
9595

96-
it('should not relativize a base IRI containing `//`', () => {
97-
const baseIri = new BaseIRI('http://example.org/foo//bar');
96+
relativizes('an IRI ending in /..', 'http://example.org/foo/',
97+
'http://example.org/foo/..');
9898

99-
const iri = `${baseIri.base}/baz`;
100-
const relativized = baseIri.toRelative(iri);
99+
relativizes('an IRI ending in /./', 'http://example.org/foo/',
100+
'http://example.org/foo/./');
101101

102-
expect(relativized).toBe(iri);
103-
});
102+
relativizes('an IRI ending in /../', 'http://example.org/foo/',
103+
'http://example.org/foo/../');
104104

105-
it('should not relativize a base IRI containing `/./`', () => {
106-
const baseIri = new BaseIRI('http://example.org/foo/./bar');
105+
relativizes('an IRI containing // at the matching position', 'http://example.org/foo/',
106+
'http://example.org/foo//baz');
107107

108-
const iri = `${baseIri.base}/baz`;
109-
const relativized = baseIri.toRelative(iri);
108+
relativizes('an IRI containing ./ at the matching position', 'http://example.org/foo/',
109+
'http://example.org/foo/./baz');
110110

111-
expect(relativized).toBe(iri);
112-
});
111+
relativizes('an IRI containing ../ at the matching position', 'http://example.org/foo/',
112+
'http://example.org/foo/../baz');
113113

114-
it('should not relativize a base IRI containing `/../`', () => {
115-
const baseIri = new BaseIRI('http://example.org/foo/../bar');
114+
relativizes('an IRI containing //', 'http://example.org/foo/',
115+
'http://example.org/foo/bar//baz');
116116

117-
const iri = `${baseIri.base}/baz`;
118-
const relativized = baseIri.toRelative(iri);
117+
relativizes('an IRI containing ./', 'http://example.org/foo/',
118+
'http://example.org/foo/bar/./baz');
119119

120-
expect(relativized).toBe(iri);
121-
});
120+
relativizes('an IRI containing ../', 'http://example.org/foo/',
121+
'http://example.org/foo/bar/../baz');
122122

123-
it('should not relativize a base IRI ending in `/.`', () => {
124-
const baseIri = new BaseIRI('http://example.org/foo/.');
123+
relativizes('an IRI containing // in its query string', 'http://example.org/foo/',
124+
'http://example.org/foo/baz?bar//baz', 'baz?bar//baz');
125125

126-
const iri = `${baseIri.base}/baz`;
127-
const relativized = baseIri.toRelative(iri);
126+
relativizes('an IRI containing ./ in its query string', 'http://example.org/foo/',
127+
'http://example.org/foo/baz?bar/./baz', 'baz?bar/./baz');
128128

129-
expect(relativized).toBe(iri);
130-
});
129+
relativizes('an IRI containing ../ in its query string', 'http://example.org/foo/',
130+
'http://example.org/foo/baz?bar/../baz', 'baz?bar/../baz');
131131

132-
it('should not relativize a base IRI ending in `/..`', () => {
133-
const baseIri = new BaseIRI('http://example.org/foo/..');
132+
relativizes('an IRI containing // in its fragment', 'http://example.org/foo/',
133+
'http://example.org/foo/baz#bar//baz', 'baz#bar//baz');
134134

135-
const iri = `${baseIri.base}/baz`;
136-
const relativized = baseIri.toRelative(iri);
135+
relativizes('an IRI containing ./ in its fragment', 'http://example.org/foo/',
136+
'http://example.org/foo/baz#bar/./baz', 'baz#bar/./baz');
137137

138-
expect(relativized).toBe(iri);
139-
});
140-
141-
it('should not relativize an IRI with file scheme', () => {
142-
const baseIri = new BaseIRI('http://example.org/foo/');
143-
144-
const iri = 'file:///tmp/foo/bar';
145-
const relativized = baseIri.toRelative(iri);
146-
147-
expect(relativized).toBe(iri);
148-
});
149-
150-
it('should not relativize an IRI containing `//`', () => {
151-
const baseIri = new BaseIRI('http://example.org/foo/');
152-
153-
const iri = 'http://example.org/foo//bar';
154-
const relativized = baseIri.toRelative(iri);
155-
156-
expect(relativized).toBe(iri);
157-
});
158-
159-
it('should not relativize an IRI containing `/./`', () => {
160-
const baseIri = new BaseIRI('http://example.org/foo/');
161-
162-
const iri = 'http://example.org/foo/./bar';
163-
const relativized = baseIri.toRelative(iri);
164-
165-
expect(relativized).toBe(iri);
166-
});
167-
168-
it('should not relativize an IRI containing `/../`', () => {
169-
const baseIri = new BaseIRI('http://example.org/foo/');
170-
171-
const iri = 'http://example.org/foo/../bar';
172-
const relativized = baseIri.toRelative(iri);
173-
174-
expect(relativized).toBe(iri);
175-
});
138+
relativizes('an IRI containing ../ in its fragment', 'http://example.org/foo/',
139+
'http://example.org/foo/baz#bar/../baz', 'baz#bar/../baz');
176140
});
177141
});
142+
143+
function relativizes(description, base, absolute, relative) {
144+
it(`${relative ? 'relativizes' : 'does not relativize'} ${description}`, () => {
145+
const baseIri = new BaseIRI(base);
146+
expect(baseIri.toRelative(absolute)).toBe(relative || absolute);
147+
});
148+
}

0 commit comments

Comments
 (0)