Skip to content

Commit

Permalink
fix: misc build-time fixes for better attribution
Browse files Browse the repository at this point in the history
  • Loading branch information
patrickhulce committed Jan 17, 2020
1 parent af5bfdb commit d690a99
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 52 deletions.
2 changes: 1 addition & 1 deletion bin/build-entity-json-files.js
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ const entitiesInHTTPArchive = _(httpArchiveData)
// Find all the unique entities for our domains found in HTTPArchive
.map(({domain}) => getEntity(domain))
.filter(Boolean)
.uniq()
.uniqBy(e => e.name)
// Use the original entity which has the minimal form
.map(e => sourceEntities.find(candidate => candidate.name === e.name))
.value()
Expand Down
12 changes: 12 additions & 0 deletions bin/fix-line-delimited-json.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
const fs = require('fs')
const path = require('path')

const fileToFix = path.resolve(process.cwd(), process.argv[2])
console.log('Fixing', fileToFix, '...')
const lines = fs
.readFileSync(process.argv[2], 'utf8')
.split('\n')
.filter(Boolean)
JSON.parse(lines[0])

fs.writeFileSync(fileToFix, '[\n' + lines.join(',') + '\n]')
2 changes: 1 addition & 1 deletion bin/generate-canonical-domain-csv.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ const entries = Array.from(observedDomains)
.map(domain => {
const entity = getEntity(domain)
if (!entity) {
return undefined
return [domain, domain, 'unknown']
}

return [domain, entity.domains[0], entity.categories[0] || 'other']
Expand Down
6 changes: 5 additions & 1 deletion lib/create-entity-finder-api.js
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,11 @@ function createAPIFromDataset(entities_) {
entity.averageExecutionTime = entity.totalExecutionTime / entity.totalOccurrences

for (const domain of entity.domains) {
if (entityByDomain.has(domain)) throw new Error(`Duplicate domain ${domain}`)
if (entityByDomain.has(domain)) {
const duplicate = entityByDomain.get(domain)
throw new Error(`Duplicate domain ${domain} (${entity.name} and ${duplicate.name})`)
}

entityByDomain.set(domain, entity)

const rootDomain = getRootDomain(domain)
Expand Down
98 changes: 49 additions & 49 deletions lib/index.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -54,59 +54,59 @@ describe('getRootDomain', () => {
describe('getEntity', () => {
it('works for direct domain usage', () => {
expect(getEntity('https://js.connect.facebook.net/lib.js')).toMatchInlineSnapshot(`
Object {
"averageExecutionTime": 161.0291502603836,
"categories": Array [
"social",
],
"company": "Facebook",
"domains": Array [
"*.atlassbx.com",
"*.facebook.com",
"*.fbsbx.com",
"fbcdn-photos-e-a.akamaihd.net",
"*.facebook.net",
"*.fbcdn.net",
],
"examples": Array [
"www.facebook.com",
"connect.facebook.net",
"staticxx.facebook.com",
"static.xx.fbcdn.net",
"m.facebook.com",
"an.facebook.com",
"platform-lookaside.fbsbx.com",
],
"homepage": "https://www.facebook.com",
"name": "Facebook",
"totalExecutionTime": 578108820,
"totalOccurrences": 3590088,
}
`)
Object {
"averageExecutionTime": 222.95578518974813,
"categories": Array [
"social",
],
"company": "Facebook",
"domains": Array [
"*.atlassbx.com",
"*.facebook.com",
"*.fbsbx.com",
"fbcdn-photos-e-a.akamaihd.net",
"*.facebook.net",
"*.fbcdn.net",
],
"examples": Array [
"www.facebook.com",
"connect.facebook.net",
"staticxx.facebook.com",
"static.xx.fbcdn.net",
"m.facebook.com",
"an.facebook.com",
"platform-lookaside.fbsbx.com",
],
"homepage": "https://www.facebook.com",
"name": "Facebook",
"totalExecutionTime": 322128748,
"totalOccurrences": 1444810,
}
`)
})

it('works for inferred domain usage', () => {
expect(getEntity('https://unknown.typekit.net/fonts.css')).toMatchInlineSnapshot(`
Object {
"averageExecutionTime": 93.6581576026637,
"categories": Array [
"cdn",
],
"company": "Adobe",
"domains": Array [
"*.typekit.com",
"*.typekit.net",
],
"examples": Array [
"use.typekit.net",
"p.typekit.net",
],
"homepage": "https://fonts.adobe.com/",
"name": "Adobe TypeKit",
"totalExecutionTime": 1940878,
"totalOccurrences": 20723,
}
`)
Object {
"averageExecutionTime": 105.38858905165768,
"categories": Array [
"cdn",
],
"company": "Adobe",
"domains": Array [
"*.typekit.com",
"*.typekit.net",
],
"examples": Array [
"use.typekit.net",
"p.typekit.net",
],
"homepage": "https://fonts.adobe.com/",
"name": "Adobe TypeKit",
"totalExecutionTime": 1230201,
"totalOccurrences": 11673,
}
`)
})

it('does not over-infer', () => {
Expand Down

0 comments on commit d690a99

Please sign in to comment.