Skip to content

Commit

Permalink
fix: handle data URLs gracefully
Browse files Browse the repository at this point in the history
  • Loading branch information
patrickhulce committed Oct 16, 2019
1 parent 40a04ad commit 4171f49
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 0 deletions.
4 changes: 4 additions & 0 deletions lib/create-entity-finder-api.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,16 @@ const IP_REGEX = /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/
const ROOT_DOMAIN_REGEX = /[^.]+\.([^.]+|(gov|com|co|ne)\.\w{2})$/i

function getDomainFromOriginOrURL(originOrURL) {
if (typeof originOrURL !== 'string') return null
if (originOrURL.length > 10000 || originOrURL.startsWith('data:')) return null
if (DOMAIN_IN_URL_REGEX.test(originOrURL)) return originOrURL.match(DOMAIN_IN_URL_REGEX)[1]
if (DOMAIN_CHARACTERS.test(originOrURL)) return originOrURL.match(DOMAIN_CHARACTERS)[0]
throw new Error(`Unable to find domain in "${originOrURL}"`)
}

function getRootDomain(originOrURL) {
const domain = getDomainFromOriginOrURL(originOrURL)
if (!domain) return null
if (IP_REGEX.test(domain)) return domain
const match = domain.match(ROOT_DOMAIN_REGEX)
return (match && match[0]) || domain
Expand All @@ -19,6 +22,7 @@ function getRootDomain(originOrURL) {
function getEntityInDataset(entityByDomain, entityByRootDomain, originOrURL) {
const domain = getDomainFromOriginOrURL(originOrURL)
const rootDomain = getRootDomain(domain)
if (!domain || !rootDomain) return undefined
if (entityByDomain.has(domain)) return entityByDomain.get(domain)
if (entityByRootDomain.has(rootDomain)) return entityByRootDomain.get(rootDomain)
return undefined
Expand Down
10 changes: 10 additions & 0 deletions lib/index.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ describe('getRootDomain', () => {
expect(getRootDomain('*.hulce.photography')).toEqual('hulce.photography')
})

it('runs on *massive* inputs', () => {

This comment has been minimized.

Copy link
@Krinkle

Krinkle Jun 9, 2020

Contributor

Would it be useful to also assert that a (short) data URI results in the same behaviour?

E.g. something like

  • data:image/gif;base64,R0lGODlhAQABAIAAAP8AADAAACwAAAAAAQABAAACAkQBADs=
    or
  • data:image/svg+xml,%3Csvg xmlns=%22http://www.w3.org/2000/svg%22 width=%228%22 height=%228%22 viewBox=%220 0 8 8%22%3E %3Ccircle cx=%224%22 cy=%224%22 r=%222%22/%3E %3Ca xmlns:xlink=%22http://www.w3.org/1999/xlink%22 xlink:title=%22%3F%3E%22%3Etest%3C/a%3E %3C/svg%3E

These, I think, should not be considered third-party requests (GoogleChrome/lighthouse#9834).

The code now handles that correctly, but just wondering about test coverage. I'd be interested in submitting a patch if so :)

This comment has been minimized.

Copy link
@patrickhulce

patrickhulce Jun 9, 2020

Author Owner

Thanks @Krinkle! Yeah those are good ideas for additional tests 👍

This comment has been minimized.

Copy link
@Krinkle

Krinkle Jun 9, 2020

Contributor

Alrighty! #97

const massiveInput = '123456789'.repeat(100e3)
expect(getRootDomain(massiveInput)).toEqual(null)
})

it('throws on invalid inputs', () => {
expect(() => getRootDomain('this is not a domain')).toThrow()
expect(() => getRootDomain('neither-is-this')).toThrow()
Expand Down Expand Up @@ -112,6 +117,11 @@ Object {
expect(getEntity('http://fbcdn-photos-e-a.akamaihd.net/1234.jpg').name).toEqual('Facebook')
expect(getEntity('http://unknown.akamaihd.net/1234.jpg').name).toEqual('Akamai')
})

it('runs on *massive* inputs', () => {
const massiveInput = '123456789'.repeat(100e3)
expect(getEntity(massiveInput)).toEqual(undefined)
})
})

describe('build state', () => {
Expand Down

0 comments on commit 4171f49

Please sign in to comment.