-
Notifications
You must be signed in to change notification settings - Fork 11
/
lunr-tokenizer.js
27 lines (21 loc) · 978 Bytes
/
lunr-tokenizer.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
// The tokenization used by lunr by default doesn't work well with code.
// We extend the set of characters considered separators to include
// parentheses and commas.
const path = require('path');
const fs = require('fs');
module.exports.register = () => {
const lunr = require('lunr');
lunr.tokenizer.separator = /[\s\-(),]+/;
lunr.QueryLexer.termSeparator = lunr.tokenizer.separator;
// The lunr source code is vendored into the UI, and tokenization for search results
// is done client side, so we have to patch this file to fix tokenization too.
const patch = `(function () { globalThis.lunr.tokenizer.separator = ${lunr.tokenizer.separator.toString()}; })();`
const searchUiPath = path.join(
path.dirname(require.resolve('@antora/lunr-extension/package.json')),
'data/js/search-ui.js',
);
const searchUi = fs.readFileSync(searchUiPath, 'utf8');
if (!searchUi.includes(patch)) {
fs.writeFileSync(searchUiPath, searchUi + patch);
}
};