diff --git a/baker/algolia/indexChartsToAlgolia.ts b/baker/algolia/indexChartsToAlgolia.ts index 92b0572f2cf..30086cdca04 100644 --- a/baker/algolia/indexChartsToAlgolia.ts +++ b/baker/algolia/indexChartsToAlgolia.ts @@ -1,12 +1,23 @@ import * as db from "../../db/db.js" -import { ALGOLIA_INDEXING } from "../../settings/serverSettings.js" +import { + ALGOLIA_INDEXING, + BUGSNAG_NODE_API_KEY, +} from "../../settings/serverSettings.js" import { getAlgoliaClient } from "./configureAlgolia.js" import { SearchIndexName } from "../../site/search/searchTypes.js" import { getIndexName } from "../../site/search/searchClient.js" import { getChartsRecords } from "./utils/charts.js" +import Bugsnag from "@bugsnag/js" const indexChartsToAlgolia = async () => { if (!ALGOLIA_INDEXING) return + if (BUGSNAG_NODE_API_KEY) { + Bugsnag.start({ + apiKey: BUGSNAG_NODE_API_KEY, + context: "index-explorer-views-to-algolia", + autoTrackSessions: false, + }) + } const client = getAlgoliaClient() if (!client) { diff --git a/baker/algolia/utils/explorerViews.ts b/baker/algolia/utils/explorerViews.ts index aca2397b227..21e51208351 100644 --- a/baker/algolia/utils/explorerViews.ts +++ b/baker/algolia/utils/explorerViews.ts @@ -8,7 +8,7 @@ import { import { at, get, groupBy, mapValues, orderBy, partition, uniq } from "lodash" import { MarkdownTextWrap } from "@ourworldindata/components" import { logErrorAndMaybeSendToBugsnag } from "../../../serverUtils/errorLog.js" -import { obtainAvailableEntitiesForAllGraphers } from "../../updateChartEntities.js" +import { obtainAvailableEntitiesForGraphers } from "../../updateChartEntities.js" import { fetchS3MetadataByPath } from "../../../db/model/Variable.js" import { getVariableMetadataRoute } from "@ourworldindata/grapher" import pMap from "p-map" @@ -53,7 +53,7 @@ export function explorerViewRecordToChartRecord( return { type: ChartRecordType.ExplorerView, objectID: e.objectID!, - chartId: Math.floor(Math.random() * 1000000), + chartId: -1, slug: e.explorerSlug, queryParams: e.viewQueryParams, title: e.viewTitle, @@ -74,35 +74,20 @@ export function explorerViewRecordToChartRecord( } /** - * Scale explorer scores to the range of grapher scores - * e.g. if the highest explorer score is 100 and the highest grapher score is 1000, - * we want to scale the explorer scores to be between 0 and 1000 + * Scale records' positive scores to be between 0 and 10000. */ -export function scaleExplorerScores( - explorerRecords: ChartRecord[], - grapherRecords: ChartRecord[] -): ChartRecord[] { - const explorerScores = explorerRecords.map((e) => e.score) - const explorerScoreMax = Math.max(...explorerScores) - - const grapherScores = grapherRecords.map((e) => e.score) - const grapherScoreBounds = { - max: Math.max(...grapherScores), - min: Math.min(...grapherScores), - } - - // scale positive explorer scores to the range of grapher scores - // We want to keep negative scores because they're intentionally downranked as near-duplicates of existing views - return explorerRecords.map((e): ChartRecord => { - if (e.score < 0) return e +export function scaleRecordScores(records: ChartRecord[]): ChartRecord[] { + const scores = records.map((r) => r.score) + const maxScore = Math.max(...scores) + return records.map((record): ChartRecord => { + // For ExplorerView records, we want to keep negative scores, + // because they're intentionally downranked as near-duplicates of existing views + if (record.score < 0) return record // A value between 0 and 1 - const normalized = e.score / explorerScoreMax - const grapherRange = grapherScoreBounds.max - grapherScoreBounds.min - const scaled = Math.round( - normalized * grapherRange + grapherScoreBounds.min - ) + const normalized = record.score / maxScore + const scaled = Math.round(normalized * 10000) return { - ...e, + ...record, score: scaled, } }) @@ -180,25 +165,23 @@ async function fetchIndicatorMetadata( ...keyBy(metadataFromDB, "catalogPath"), } as ExplorerIndicatorMetadataDictionary - async function fetchEntitiesForId(id?: number) { - if (id) { - const metadata = await fetchS3MetadataByPath( - getVariableMetadataRoute(DATA_API_URL, id) - ) - const entityNames = get(metadata, "dimensions.entities.values", []) - .map((value) => value.name) - .filter((name): name is string => !!name) + async function fetchEntitiesForId(id: number) { + const metadata = await fetchS3MetadataByPath( + getVariableMetadataRoute(DATA_API_URL, id) + ) + const entityNames = get(metadata, "dimensions.entities.values", []) + .map((value) => value.name) + .filter((name): name is string => !!name) - const idEntry = indicatorMetadataByIdAndPath[id] - if (idEntry) { - idEntry.entityNames = entityNames - } - const path = metadata.catalogPath - if (path) { - const pathEntry = indicatorMetadataByIdAndPath[path] - if (pathEntry) { - pathEntry.entityNames = entityNames - } + const idEntry = indicatorMetadataByIdAndPath[id] + if (idEntry) { + idEntry.entityNames = entityNames + } + const path = metadata.catalogPath + if (path) { + const pathEntry = indicatorMetadataByIdAndPath[path] + if (pathEntry) { + pathEntry.entityNames = entityNames } } } @@ -421,7 +404,7 @@ const enrichWithGrapherData = async ( `Fetching grapher configs from ${grapherIds.length} graphers for explorer ${explorerInfo.slug}` ) const grapherInfo = await fetchGrapherInfo(trx, grapherIds) - const availableEntities = await obtainAvailableEntitiesForAllGraphers( + const availableEntities = await obtainAvailableEntitiesForGraphers( trx, grapherIds ) @@ -452,9 +435,9 @@ async function enrichRecordWithTableData( return } - const availableEntities = ySlugs - .flatMap((ySlug) => entitiesPerColumnPerTable[tableSlug][ySlug]) - .filter((name, i, array) => !!name && array.indexOf(name) === i) + const availableEntities = uniq( + ySlugs.flatMap((ySlug) => entitiesPerColumnPerTable[tableSlug][ySlug]) + ) return { ...record, @@ -491,8 +474,8 @@ function enrichRecordWithIndicatorData( ).flatMap((meta) => meta.entityNames) const uniqueNonEmptyEntityNames = uniq(allEntityNames).filter( - Boolean - ) as string[] + (name): name is string => !!name + ) const firstYIndicator = record.yVariableIds[0] @@ -630,22 +613,7 @@ export const getExplorerViewRecordsForExplorer = async ( explorerAdminServer: ExplorerAdminServer ): Promise => { const { slug } = explorerInfo - // Get explorer program and table definitions const explorerProgram = await explorerAdminServer.getExplorerFromSlug(slug) - // TODO: why doesn't us-covid-data-explorer have tableSlugs or tableDefs? - const tableDefs = explorerProgram.tableSlugs - .map((tableSlug) => explorerProgram.getTableDef(tableSlug)) - .filter((x) => x && x.url && x.slug) as TableDef[] - - // Fetch and process CSV table data - console.log( - `Fetching CSV table data for ${slug} and aggregating entities by column` - ) - const entitiesPerColumnPerTable = - await getEntitiesPerColumnPerTable(tableDefs) - console.log( - "Finished fetching CSV table data and aggregating entities by column" - ) console.log( `Creating ${explorerProgram.decisionMatrix.numRows} base records for explorer ${slug}` @@ -687,6 +655,20 @@ export const getExplorerViewRecordsForExplorer = async ( indicatorMetadataDictionary ) + const tableDefs = explorerProgram.tableSlugs + .map((tableSlug) => explorerProgram.getTableDef(tableSlug)) + .filter((x) => x && x.url && x.slug) as TableDef[] + + // Fetch and process CSV table data + console.log( + `Fetching CSV table data for ${slug} and aggregating entities by column` + ) + const entitiesPerColumnPerTable = + await getEntitiesPerColumnPerTable(tableDefs) + console.log( + "Finished fetching CSV table data and aggregating entities by column" + ) + const enrichedCsvRecords = await enrichWithTableData( csvBaseRecords, entitiesPerColumnPerTable diff --git a/baker/algolia/utils/types.ts b/baker/algolia/utils/types.ts index f8ebd9d505e..6c6346a2213 100644 --- a/baker/algolia/utils/types.ts +++ b/baker/algolia/utils/types.ts @@ -108,7 +108,7 @@ export type IndicatorUnenrichedExplorerViewRecord = ExplorerViewBaseRecord & { export type IndicatorEnrichedExplorerViewRecord = ExplorerViewBaseRecord & { viewGrapherId: never ySlugs: string[] - tableSlug: string + tableSlug: never availableEntities: string[] titleLength: number } @@ -116,8 +116,7 @@ export type IndicatorEnrichedExplorerViewRecord = ExplorerViewBaseRecord & { export type CsvUnenrichedExplorerViewRecord = ExplorerViewBaseRecord & { viewGrapherId: never ySlugs: string[] - // TODO: why are there nulls here? - tableSlug: string | null + tableSlug: string } export type CsvEnrichedExplorerViewRecord = ExplorerViewBaseRecord & { diff --git a/baker/updateChartEntities.ts b/baker/updateChartEntities.ts index 9e9736c19dd..10264e28155 100644 --- a/baker/updateChartEntities.ts +++ b/baker/updateChartEntities.ts @@ -120,7 +120,7 @@ const obtainAvailableEntitiesForGrapherConfig = async ( } else return [] } -export const obtainAvailableEntitiesForAllGraphers = async ( +export const obtainAvailableEntitiesForGraphers = async ( trx: db.KnexReadonlyTransaction, // Optional subset of IDs to restrict data fetching to chartIds?: number[] @@ -196,7 +196,7 @@ const updateAvailableEntitiesForAllGraphers = async ( "--- Obtaining available entity ids for all published graphers ---" ) const availableEntitiesByChartId = - await obtainAvailableEntitiesForAllGraphers(trx) + await obtainAvailableEntitiesForGraphers(trx) console.log("--- Fetch stats ---") console.log(