Skip to content

Commit

Permalink
fix(trie): fixes object properties access to avoid searching through …
Browse files Browse the repository at this point in the history
…inherited props (#146)

fix #137
  • Loading branch information
micheleriva authored Oct 6, 2022
1 parent 7ccfb19 commit d02c1d9
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 16 deletions.
9 changes: 7 additions & 2 deletions src/prefix-tree/trie.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { create as createNode, removeDocument, updateParent, Node } from "./node";
import { boundedLevenshtein } from "../levenshtein";
import { getOwnProperty } from "../utils";

export type Nodes = Record<string, Node>;

Expand All @@ -19,7 +20,9 @@ function findAllWords(nodes: Nodes, node: Node, output: FindResult, term: string
return;
}

if (!(word in output)) {
// always check in own property to prevent access to inherited properties
// fix https://github.com/LyraSearch/lyra/issues/137
if (!Object.hasOwn(output, word)) {
if (tolerance) {
// computing the absolute difference of letters between the term and the word
const difference = Math.abs(term.length - word.length);
Expand All @@ -36,7 +39,9 @@ function findAllWords(nodes: Nodes, node: Node, output: FindResult, term: string
}

// check if _output[word] exists and then add the doc to it
if (output[word] && docIDs.length) {
// always check in own property to prevent access to inherited properties
// fix https://github.com/LyraSearch/lyra/issues/137
if (getOwnProperty(output, word) && docIDs.length) {
const docs = new Set(output[word]);

for (const doc of docIDs) {
Expand Down
71 changes: 63 additions & 8 deletions src/tokenizer/diacritics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,75 @@ const DIACRITICS_CHARCODE_START = 192;
const DIACRITICS_CHARCODE_END = 252;

const CHARCODE_REPLACE_MAPPING = [
65, 65, 65, 65, 65, 65, 65, 67, 69, 69, 69,
69, 73, 73, 73, 73, null, 78, 79, 79, 79,
79, 79, 79, 79, 85, 85, 85, 85, null, null,
null, 97, 97, 97, 97, 97, 97, 97, 99, 101,
101, 101, 101, 105, 105, 105, 105, null, 110, 111,
111, 111, 111, 111, 111, 111, 117, 117, 117, 117
65,
65,
65,
65,
65,
65,
65,
67,
69,
69,
69,
69,
73,
73,
73,
73,
null,
78,
79,
79,
79,
79,
79,
79,
79,
85,
85,
85,
85,
null,
null,
null,
97,
97,
97,
97,
97,
97,
97,
99,
101,
101,
101,
101,
105,
105,
105,
105,
null,
110,
111,
111,
111,
111,
111,
111,
111,
117,
117,
117,
117,
];

function replaceChar(charCode: number) : number {
function replaceChar(charCode: number): number {
if (charCode < DIACRITICS_CHARCODE_START || charCode > DIACRITICS_CHARCODE_END) return charCode;
return CHARCODE_REPLACE_MAPPING[charCode - DIACRITICS_CHARCODE_START] || charCode;
}

export function replaceDiacritics(str: string) : string {
export function replaceDiacritics(str: string): string {
const stringCharCode = [];
for (let idx = 0; idx < str.length; idx++) {
stringCharCode[idx] = replaceChar(str.charCodeAt(idx));
Expand Down
4 changes: 4 additions & 0 deletions src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,7 @@ export function uniqueId(): string {
}

export const reservedPropertyNames = ["id"];

export function getOwnProperty<T = unknown>(object: any, property: string): T | undefined {
return Object.hasOwn(object, property) ? object[property] : undefined;
}
3 changes: 2 additions & 1 deletion tests/config/c8-local.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{
"reporter": ["text", "html"]
"reporter": ["text", "html"],
"exclude": ["stemmer"]
}
8 changes: 4 additions & 4 deletions tests/diacritics.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@ t.test("Diacritics Replacer", t => {
t.test("Should replace diacritics", t => {
t.plan(3);

const I1 = 'áàâãéèêíïóôõöúçñÁÀÂÃÉÈÍÏÓÔÕÖÚÇÑ';
const I2 = 'áaauioèaíïóiuubnÁoiÃotytÓhygÚnÑ';
const I3 = 'aaaaeeeiiooooucnAAAAEEIIOOOOUCN';
const I1 = "áàâãéèêíïóôõöúçñÁÀÂÃÉÈÍÏÓÔÕÖÚÇÑ";
const I2 = "áaauioèaíïóiuubnÁoiÃotytÓhygÚnÑ";
const I3 = "aaaaeeeiiooooucnAAAAEEIIOOOOUCN";

const O1 = replaceDiacritics(I1);
const O2 = replaceDiacritics(I2);
const O3 = replaceDiacritics(I3);

t.equal(O1, 'aaaaeeeiiooooucnAAAAEEIIOOOOUCN');
t.equal(O1, "aaaaeeeiiooooucnAAAAEEIIOOOOUCN");
t.equal(O2, `aaauioeaiioiuubnAoiAotytOhygUnN`);
t.equal(O3, `aaaaeeeiiooooucnAAAAEEIIOOOOUCN`);
});
Expand Down
23 changes: 22 additions & 1 deletion tests/lyra.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import t from "tap";
import { create, insert, remove, search, insertBatch, insertWithHooks } from "../src/lyra";
import { create, insert, remove, search, insertBatch, insertWithHooks, Lyra } from "../src/lyra";

t.test("defaultLanguage", t => {
t.plan(3);
Expand Down Expand Up @@ -708,3 +708,24 @@ t.test("custom tokenizer configuration", t => {
t.same(searchResult2.count, 0);
});
});

t.test("should access own properties exclusively", t => {
t.plan(1);

const db = create({
schema: {
txt: "string",
},
});

insert(db, {
txt: "constructor",
});

search(db, {
term: "constructor",
tolerance: 1,
});

t.same(1, 1);
});

0 comments on commit d02c1d9

Please sign in to comment.