From 94bf5004689dd227aba704df011bfcf5f2a0e987 Mon Sep 17 00:00:00 2001 From: Paul Irish Date: Thu, 6 Feb 2025 19:42:18 -0800 Subject: [PATCH] perf: avoid calling getDomainFromOriginOrURL twice per getEntity (#243) --- lib/create-entity-finder-api.js | 40 ++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/lib/create-entity-finder-api.js b/lib/create-entity-finder-api.js index 78ae7c8..aa42497 100644 --- a/lib/create-entity-finder-api.js +++ b/lib/create-entity-finder-api.js @@ -1,24 +1,35 @@ const DOMAIN_IN_URL_REGEX = /:\/\/(\S*?)(:\d+)?(\/|$)/ -const DOMAIN_CHARACTERS = /([a-z0-9.-]+\.[a-z0-9]+|localhost)/i +const DOMAIN_CHARACTERS = /(?:[a-z0-9.-]+\.[a-z0-9]+|localhost)/i const IP_REGEX = /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/ const ROOT_DOMAIN_REGEX = /[^.]+\.([^.]+|(gov|com|co|ne)\.\w{2})$/i -function getDomainFromOriginOrURL(originOrURL) { - if (typeof originOrURL !== 'string') return null - if (originOrURL.length > 10000 || originOrURL.startsWith('data:')) return null +/** + * @param {string} originOrURL + * @return {[string|null, string|null]} - The first item is the root domain, the second item is the domain. + */ +function parseDomains(originOrURL) { + if (typeof originOrURL !== 'string') return [null, null] + if (originOrURL.length > 10000 || originOrURL.startsWith('data:')) return [null, null] let m = originOrURL.match(DOMAIN_IN_URL_REGEX) - if (m) return m[1] + let domain; + if (m) { + domain = m[1] + } m = originOrURL.match(DOMAIN_CHARACTERS) - if (m) return m[0] - return null + if (m) { + domain = m[0] + } + + if (!domain) return [null, null] + if (IP_REGEX.test(domain)) return [domain, domain] + m = domain.match(ROOT_DOMAIN_REGEX) + const rootDomain = m && m[0] || domain; + + return [rootDomain, domain] } -function getRootDomain(originOrURL) { - const domain = getDomainFromOriginOrURL(originOrURL) - if (!domain) return null - if (IP_REGEX.test(domain)) return domain - const match = domain.match(ROOT_DOMAIN_REGEX) - return (match && match[0]) || domain +function getRootDomain(originOrURL,) { + return parseDomains(originOrURL)[0]; } function sliceSubdomainFromDomain(domain, rootDomain) { @@ -30,8 +41,7 @@ function sliceSubdomainFromDomain(domain, rootDomain) { } function getEntityInDataset(entityByDomain, entityBySubDomain, entityByRootDomain, originOrURL) { - const domain = getDomainFromOriginOrURL(originOrURL) - const rootDomain = getRootDomain(domain) + const [rootDomain, domain] = parseDomains(originOrURL); if (!domain || !rootDomain) return undefined if (entityByDomain.has(domain)) return entityByDomain.get(domain)