Skip to content

Commit

Permalink
perf: reduce regex usage in getDomainFromOriginOrURL (#241)
Browse files Browse the repository at this point in the history
  • Loading branch information
connorjclark authored Feb 6, 2025
1 parent 26bbd54 commit 021299a
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 2 deletions.
6 changes: 4 additions & 2 deletions lib/create-entity-finder-api.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@ const ROOT_DOMAIN_REGEX = /[^.]+\.([^.]+|(gov|com|co|ne)\.\w{2})$/i
function getDomainFromOriginOrURL(originOrURL) {
if (typeof originOrURL !== 'string') return null
if (originOrURL.length > 10000 || originOrURL.startsWith('data:')) return null
if (DOMAIN_IN_URL_REGEX.test(originOrURL)) return originOrURL.match(DOMAIN_IN_URL_REGEX)[1]
if (DOMAIN_CHARACTERS.test(originOrURL)) return originOrURL.match(DOMAIN_CHARACTERS)[0]
let m = originOrURL.match(DOMAIN_IN_URL_REGEX)
if (m) return m[1]
m = originOrURL.match(DOMAIN_CHARACTERS)
if (m) return m[0]
return null
}

Expand Down
14 changes: 14 additions & 0 deletions lib/create-entity-finder-api.test.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
const fs = require('fs')
const path = require('path')
const {createAPIFromDataset} = require('./create-entity-finder-api.js')

describe('getEntity', () => {
Expand Down Expand Up @@ -41,4 +43,16 @@ describe('getEntity', () => {
expect(api.getEntity('https://bar.example.co.uk/path').name).toEqual('Domain')
expect(api.getEntity('https://baz.bar.example.co.uk/path').name).toEqual('Domain')
})

it.skip('stress test', () => {
const urls = fs
.readFileSync(path.join(__dirname, '../data/random-urls.txt'), 'utf8')
.split('\n')
.filter(Boolean)
console.time('getEntity')
for (let i = 0; i < 1_000_000; i++) {
api.getEntity(urls[i % urls.length])
}
console.timeEnd('getEntity')
})
})

0 comments on commit 021299a

Please sign in to comment.