Skip to content

Commit

Permalink
✨ omit grapher explorers from explorer-views-and-charts index
Browse files Browse the repository at this point in the history
  • Loading branch information
ikesau committed Nov 6, 2024
1 parent 3823a65 commit 2a5f22d
Show file tree
Hide file tree
Showing 3 changed files with 104 additions and 88 deletions.
91 changes: 17 additions & 74 deletions baker/algolia/indexExplorerViewsAndChartsToAlgolia.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,78 +6,15 @@ import {
BUGSNAG_NODE_API_KEY,
} from "../../settings/serverSettings.js"
import { getAlgoliaClient } from "./configureAlgolia.js"
import { ExplorerViewFinalRecord } from "./utils/types.js"
import { getExplorerViewRecords } from "./utils/explorerViews.js"
import {
explorerViewRecordToChartRecord,
getExplorerViewRecords,
scaleExplorerScores,
} from "./utils/explorerViews.js"
import { getChartsRecords } from "./utils/charts.js"
import { getIndexName } from "../../site/search/searchClient.js"
import {
ChartRecord,
ChartRecordType,
SearchIndexName,
} from "../../site/search/searchTypes.js"

function explorerViewRecordToChartRecord(
e: ExplorerViewFinalRecord
): ChartRecord & {
viewTitleIndexWithinExplorer: number
} {
return {
type: ChartRecordType.ExplorerView,
objectID: e.objectID!,
chartId: Math.floor(Math.random() * 1000000),
slug: e.explorerSlug,
queryParams: e.viewQueryParams,
title: e.viewTitle,
subtitle: e.explorerSubtitle,
variantName: "",
keyChartForTags: [],
tags: e.tags,
availableEntities: e.availableEntities,
publishedAt: new Date().toISOString(),
updatedAt: new Date().toISOString(),
numDimensions: e.numNonDefaultSettings,
titleLength: e.titleLength,
numRelatedArticles: 0,
views_7d: e.explorerViews_7d,
viewTitleIndexWithinExplorer: e.viewTitleIndexWithinExplorer,
score: e.score,
}
}

/**
* Scale explorer scores to the range of grapher scores
* e.g. if the highest explorer score is 100 and the highest grapher score is 1000,
* we want to scale the explorer scores to be between 0 and 1000
*/
function scaleExplorerScores(
explorerRecords: ChartRecord[],
grapherRecords: ChartRecord[]
): ChartRecord[] {
const explorerScores = explorerRecords.map((e) => e.score)
const explorerScoreMax = Math.max(...explorerScores)

const grapherScores = grapherRecords.map((e) => e.score)
const grapherScoreBounds = {
max: Math.max(...grapherScores),
min: Math.min(...grapherScores),
}

// scale positive explorer scores to the range of grapher scores
// We want to keep negative scores because they're intentionally downranked as near-duplicates of existing views
return explorerRecords.map((e): ChartRecord => {
if (e.score < 0) return e
// A value between 0 and 1
const normalized = e.score / explorerScoreMax
const grapherRange = grapherScoreBounds.max - grapherScoreBounds.min
const scaled = Math.round(
normalized * grapherRange + grapherScoreBounds.min
)
return {
...e,
score: scaled,
}
})
}
import { SearchIndexName } from "../../site/search/searchTypes.js"
import { ConvertedExplorerChartHit } from "./utils/types.js"

// We get 200k operations with Algolia's Open Source plan. We've hit 140k in the past so this might push us over.
// If we standardize the record shape, we could have this be the only index and have a `type` field
Expand Down Expand Up @@ -113,11 +50,17 @@ const indexExplorerViewsAndChartsToAlgolia = async () => {
}
}, db.TransactionCloseMode.Close)

const convertedExplorerViews = explorerViews.map(
explorerViewRecordToChartRecord
)
const convertedNonGrapherExplorerViews: ConvertedExplorerChartHit[] = []
for (const view of explorerViews) {
if (!view.viewGrapherId) {
convertedNonGrapherExplorerViews.push(
explorerViewRecordToChartRecord(view)
)
}
}

const scaledExplorerViews = scaleExplorerScores(
convertedExplorerViews,
convertedNonGrapherExplorerViews,
grapherViews
)
const records = [...scaledExplorerViews, ...grapherViews]
Expand Down
66 changes: 66 additions & 0 deletions baker/algolia/utils/explorerViews.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,74 @@ import {
IndicatorEnrichedExplorerViewRecord,
IndicatorUnenrichedExplorerViewRecord,
CsvEnrichedExplorerViewRecord,
ConvertedExplorerChartHit,
} from "./types.js"
import { processAvailableEntities as processRecordAvailableEntities } from "./shared.js"
import {
ChartRecord,
ChartRecordType,
} from "../../../site/search/searchTypes.js"

export function explorerViewRecordToChartRecord(
e: ExplorerViewFinalRecord
): ConvertedExplorerChartHit {
return {
type: ChartRecordType.ExplorerView,
objectID: e.objectID!,
chartId: Math.floor(Math.random() * 1000000),
slug: e.explorerSlug,
queryParams: e.viewQueryParams,
title: e.viewTitle,
subtitle: e.explorerSubtitle,
variantName: "",
keyChartForTags: [],
tags: e.tags,
availableEntities: e.availableEntities,
publishedAt: new Date().toISOString(),
updatedAt: new Date().toISOString(),
numDimensions: e.numNonDefaultSettings,
titleLength: e.titleLength,
numRelatedArticles: 0,
views_7d: e.explorerViews_7d,
viewTitleIndexWithinExplorer: e.viewTitleIndexWithinExplorer,
score: e.score,
}
}

/**
* Scale explorer scores to the range of grapher scores
* e.g. if the highest explorer score is 100 and the highest grapher score is 1000,
* we want to scale the explorer scores to be between 0 and 1000
*/
export function scaleExplorerScores(
explorerRecords: ChartRecord[],
grapherRecords: ChartRecord[]
): ChartRecord[] {
const explorerScores = explorerRecords.map((e) => e.score)
const explorerScoreMax = Math.max(...explorerScores)

const grapherScores = grapherRecords.map((e) => e.score)
const grapherScoreBounds = {
max: Math.max(...grapherScores),
min: Math.min(...grapherScores),
}

// scale positive explorer scores to the range of grapher scores
// We want to keep negative scores because they're intentionally downranked as near-duplicates of existing views
return explorerRecords.map((e): ChartRecord => {
if (e.score < 0) return e
// A value between 0 and 1
const normalized = e.score / explorerScoreMax
const grapherRange = grapherScoreBounds.max - grapherScoreBounds.min
const scaled = Math.round(
normalized * grapherRange + grapherScoreBounds.min
)
return {
...e,
score: scaled,
}
})
}

// Creates a search-ready string from a choice.
// Special handling is pretty much only necessary for checkboxes: If they are not ticked, then their name is not included.
Expand Down
35 changes: 21 additions & 14 deletions baker/algolia/utils/types.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { DbEnrichedVariable } from "@ourworldindata/types"
import { PageType } from "../../../site/search/searchTypes.js"
import { ChartRecord, PageType } from "../../../site/search/searchTypes.js"

/** Pages */
export interface TypeAndImportance {
Expand Down Expand Up @@ -37,6 +37,20 @@ export interface ParsedChartRecordRow {
}

/** Explorers */
export interface IndicatorMetadata {
entityNames: string[]
titlePublic?: string
display?: { name: string }
name: string
descriptionShort?: string
}

export interface ExplorerViewGrapherInfo {
id: number
title: string
subtitle: string
}

export type EntitiesByColumnDictionary = Record<
string,
Record<string, string[]>
Expand Down Expand Up @@ -118,7 +132,7 @@ export type EnrichedExplorerRecord =
| IndicatorEnrichedExplorerViewRecord
| CsvEnrichedExplorerViewRecord

/** This is the final record we index to Algolia */
/** This is the final record we index to Algolia for the `explorer-views` index */
export interface ExplorerViewFinalRecord {
objectID: string
explorerTitle: string
Expand All @@ -144,18 +158,11 @@ export interface ExplorerViewFinalRecord {
// These 2 aren't currently used in the explorer-views index (used in /search), but we need them in the data catalog
tags: string[]
availableEntities: string[]
// Only used to filter out these views from the data catalog (because we already index graphers)
viewGrapherId?: number
}

export interface IndicatorMetadata {
entityNames: string[]
titlePublic?: string
display?: { name: string }
name: string
descriptionShort?: string
}

export interface ExplorerViewGrapherInfo {
id: number
title: string
subtitle: string
// This is the final record we index to Algolia for the `explorer-views-and-charts` index
export type ConvertedExplorerChartHit = ChartRecord & {
viewTitleIndexWithinExplorer: number
}

0 comments on commit 2a5f22d

Please sign in to comment.