Skip to content

Commit

Permalink
✨ Add gdoc_components_table that stores flat components from all docs
Browse files Browse the repository at this point in the history
This PR adds a new table that contains an unfolded, flat list of all gdocs components in all gdocs.

For each gdoc, the tree in $.content.body is iterated, a copy of the content is made without any children and the children are recursed into. Span arrays are converted to plain text.

A new script, reconstructPostsGdocsComponents fills the db initially. When saving gdocs, the components for this gdoc are updated in the new posts_gdocs_components table.
  • Loading branch information
danyx23 authored Dec 13, 2024
1 parent 09bc3c0 commit bea23fc
Show file tree
Hide file tree
Showing 10 changed files with 584 additions and 4 deletions.
5 changes: 5 additions & 0 deletions adminSiteServer/apiRouter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ import {
DbPlainChartView,
ChartViewsTableName,
DbInsertChartView,
PostsGdocsComponentsTableName,
CHART_VIEW_PROPS_TO_PERSIST,
CHART_VIEW_PROPS_TO_OMIT,
DbEnrichedImage,
Expand Down Expand Up @@ -3081,6 +3082,10 @@ deleteRouteWithRWTransaction(apiRouter, "/gdocs/:id", async (req, res, trx) => {
await trx.table(PostsGdocsLinksTableName).where({ sourceId: id }).delete()
await trx.table(PostsGdocsXImagesTableName).where({ gdocId: id }).delete()
await trx.table(PostsGdocsTableName).where({ id }).delete()
await trx
.table(PostsGdocsComponentsTableName)
.where({ gdocId: id })
.delete()
if (gdoc.published && checkIsGdocPostExcludingFragments(gdoc)) {
await removeIndividualGdocPostFromIndex(gdoc)
}
Expand Down
23 changes: 23 additions & 0 deletions db/migration/1732626230267-addPostsGdocsComponentsTable.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import { MigrationInterface, QueryRunner } from "typeorm"

export class AddPostsGdocsComponentsTable1732626230267
implements MigrationInterface
{
public async up(queryRunner: QueryRunner): Promise<void> {
await queryRunner.query(`-- sql
CREATE TABLE posts_gdocs_components (
id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
gdocId VARCHAR(255),
config JSON,
parent VARCHAR(1024),
path VARCHAR(1024),
FOREIGN KEY (gdocId) REFERENCES posts_gdocs(id) ON DELETE CASCADE ON UPDATE CASCADE,
INDEX idx_gdocId (gdocId)
) ENGINE=InnoDB;
`)
}

public async down(queryRunner: QueryRunner): Promise<void> {
await queryRunner.query(`DROP TABLE posts_gdocs_components;`)
}
}
28 changes: 25 additions & 3 deletions db/model/Gdoc/GdocFactory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,15 @@ import {
GdocsContentSource,
ImageMetadata,
LatestDataInsight,
OwidEnrichedGdocBlock,
OwidGdoc,
OwidGdocBaseInterface,
OwidGdocDataInsightContent,
OwidGdocIndexItem,
OwidGdocMinimalPostInterface,
OwidGdocPublicationContext,
OwidGdocType,
PostsGdocsComponentsTableName,
PostsGdocsLinksTableName,
PostsGdocsTableName,
PostsGdocsXImagesTableName,
Expand Down Expand Up @@ -47,6 +49,7 @@ import { enrichedBlocksToMarkdown } from "./enrichedToMarkdown.js"
import { GdocAbout } from "./GdocAbout.js"
import { GdocAuthor } from "./GdocAuthor.js"
import { extractFilenamesFromBlock } from "./gdocUtils.js"
import { getGdocComponentsWithoutChildren } from "./extractGdocComponentInfo.js"

export function gdocFromJSON(
json: Record<string, any>
Expand Down Expand Up @@ -129,7 +132,7 @@ export async function createGdocAndInsertIntoDb(
}

export async function updateGdocContentOnly(
knex: KnexReadonlyTransaction,
knex: KnexReadWriteTransaction,
id: string,
gdoc: GdocPost | GdocDataInsight | GdocHomepage | GdocAbout | GdocAuthor
): Promise<void> {
Expand All @@ -140,7 +143,7 @@ export async function updateGdocContentOnly(
} catch (e) {
console.error("Error when converting content to markdown", e)
}
return knex
await knex
.table(PostsGdocsTableName)
.where({ id })
.andWhere("revisionId", "<>", gdoc.revisionId)
Expand All @@ -149,6 +152,23 @@ export async function updateGdocContentOnly(
revisionId: gdoc.revisionId,
markdown,
})
await updateDerivedGdocPostsComponents(knex, id, gdoc.content.body)
}

export async function updateDerivedGdocPostsComponents(
knex: KnexReadWriteTransaction,
gdocId: string,
body: OwidEnrichedGdocBlock[] | undefined
): Promise<void> {
await knex
.table(PostsGdocsComponentsTableName)
.where({ gdocId: gdocId })
.delete()
if (body) {
const components = getGdocComponentsWithoutChildren(gdocId, body)
if (components.length)
await knex(PostsGdocsComponentsTableName).insert(components)
}
}

export async function getGdocBaseObjectById(
Expand Down Expand Up @@ -583,7 +603,9 @@ export async function upsertGdoc(
.onConflict("id")
.merge()
sql = query.toSQL()
return query
const indices = await query
await updateDerivedGdocPostsComponents(knex, gdoc.id, gdoc.content.body)
return indices
} catch (e) {
console.error(`Error occured in sql: ${sql}`, e)
throw e
Expand Down
Loading

0 comments on commit bea23fc

Please sign in to comment.