Skip to content

Commit

Permalink
✨ provisional cloudflare image functionality in site, removing old gd…
Browse files Browse the repository at this point in the history
…rive code
  • Loading branch information
ikesau committed Nov 18, 2024
1 parent b57f2c6 commit ce3ef62
Show file tree
Hide file tree
Showing 12 changed files with 25 additions and 383 deletions.
5 changes: 1 addition & 4 deletions adminSiteServer/apiRouter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,6 @@ import {
saveGrapherConfigToR2,
saveGrapherConfigToR2ByUUID,
} from "./chartConfigR2Helpers.js"
import { fetchImagesFromDriveAndSyncToS3 } from "../db/model/Image.js"
import { createMultiDimConfig } from "./multiDim.js"
import { isMultiDimDataPagePublished } from "../db/model/MultiDimDataPage.js"

Expand Down Expand Up @@ -3052,9 +3051,7 @@ deleteRouteWithRWTransaction(apiRouter, "/gdocs/:id", async (req, res, trx) => {
await validateTombstoneRelatedLinkUrl(trx, tombstone.relatedLinkUrl)
const slug = gdocSlug.replace("/", "")
const { relatedLinkThumbnail } = tombstone

Check warning on line 3053 in adminSiteServer/apiRouter.ts

View workflow job for this annotation

GitHub Actions / eslint

'relatedLinkThumbnail' is assigned a value but never used
if (relatedLinkThumbnail) {
await fetchImagesFromDriveAndSyncToS3(trx, [relatedLinkThumbnail])
}
// TODO: validate relatedLinkThumbnail?
await trx
.table("posts_gdocs_tombstones")
.insert({ ...tombstone, gdocId: id, slug })
Expand Down
4 changes: 2 additions & 2 deletions db/db.ts
Original file line number Diff line number Diff line change
Expand Up @@ -356,12 +356,12 @@ export const getImageMetadataByFilenames = async (
`-- sql
SELECT
id,
googleId,
filename,
defaultAlt,
updatedAt,
originalWidth,
originalHeight
originalHeight,
cloudflareId
FROM
images
WHERE filename IN (?)`,
Expand Down
4 changes: 0 additions & 4 deletions db/model/Gdoc/GdocFactory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ import {
import { enrichedBlocksToMarkdown } from "./enrichedToMarkdown.js"
import { GdocAuthor } from "./GdocAuthor.js"
import { extractFilenamesFromBlock } from "./gdocUtils.js"
import { fetchImagesFromDriveAndSyncToS3 } from "../Image.js"

export function gdocFromJSON(
json: Record<string, any>
Expand Down Expand Up @@ -351,9 +350,6 @@ export async function loadGdocFromGdocBase(
if (contentSource === GdocsContentSource.Gdocs) {
// TODO: if we get here via fromJSON then we have already done this - optimize that?
await gdoc.fetchAndEnrichGdoc()
// If we're loading from Gdocs, now's also the time to fetch images from gdrive and sync them to S3
// In any other case, the images should already be in the DB and S3
await fetchImagesFromDriveAndSyncToS3(knex, gdoc.filenames)
}

await gdoc.loadState(knex)
Expand Down
215 changes: 1 addition & 214 deletions db/model/Image.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,113 +31,6 @@ import {
} from "../../settings/serverSettings.js"
import { KnexReadWriteTransaction, KnexReadonlyTransaction } from "../db.js"

class ImageStore {
async fetchImageMetadata(
filenames: string[]
): Promise<Record<string, ImageMetadata | undefined>> {
console.log(
`Fetching image metadata from Google Drive ${
filenames.length ? `for ${filenames.join(", ")}` : ""
}`
)
const driveClient = google.drive({
version: "v3",
auth: OwidGoogleAuth.getGoogleReadonlyAuth(),
})
// e.g. `and (name="example.png" or name="image.svg")`
// https://developers.google.com/drive/api/guides/search-files#examples
const filenamesFilter = filenames.length
? `and (${filenames
.map((filename) => `name='${filename}'`)
.join(" or ")})`
: ""

const listParams: drive_v3.Params$Resource$Files$List = {
fields: "nextPageToken, files(id, name, description, modifiedTime, imageMediaMetadata, trashed)",
q: `'${GDOCS_CLIENT_EMAIL}' in readers and mimeType contains 'image/' ${filenamesFilter}`,
driveId: GDOCS_SHARED_DRIVE_ID,
corpora: "drive",
supportsAllDrives: true,
includeItemsFromAllDrives: true,
pageSize: 1000,
}

let files: drive_v3.Schema$File[] = []
let nextPageToken: drive_v3.Schema$FileList["nextPageToken"] = undefined
let isInitialQuery = true

while (nextPageToken || isInitialQuery) {
await driveClient.files
.list({
...listParams,
pageToken: nextPageToken,
})
// chaining this so that reassigning nextPageToken doesn't trip up TypeScript
.then((res) => {
const nextFiles = res.data.files ?? []
nextPageToken = res.data.nextPageToken
files = [...files, ...nextFiles]
})
isInitialQuery = false
}

function validateImage(
image: drive_v3.Schema$File
): image is GDriveImageMetadata {
return Boolean(
image.id && image.name && image.modifiedTime && !image.trashed
)
}

const images: ImageMetadata[] = files
.filter(validateImage)
.map((google: GDriveImageMetadata) => ({
googleId: google.id,
filename: google.name,
defaultAlt: google.description ?? "",
updatedAt: new Date(google.modifiedTime).getTime(),
originalWidth: google.imageMediaMetadata?.width ?? null,
originalHeight: google.imageMediaMetadata?.height ?? null,
}))

const duplicateFilenames = findDuplicates(
images.map((image) => image.filename)
)

if (duplicateFilenames.length) {
throw new Error(
`Multiple images are named ${duplicateFilenames.join(", ")}`
)
}

console.log(
`Fetched ${images.length} images' metadata from Google Drive`
)
const imageMetadata = keyBy(images, "filename")
// Only applies when we're fetching specific images i.e. `filenames` is not empty
for (const filename of filenames) {
if (!imageMetadata[filename]) {
throw Error(`Image ${filename} not found in Google Drive`)
}
}
return imageMetadata
}

async syncImagesToS3(
knex: KnexReadWriteTransaction,
images: Record<string, ImageMetadata>
): Promise<(Image | undefined)[]> {
if (!images) return []
return Promise.all(
Object.keys(images).map((filename) =>
Image.syncImage(knex, images[filename])
)
)
}
}

export const imageStore = new ImageStore()

export const s3Client = new S3Client({
endpoint: R2_ENDPOINT,
forcePathStyle: false,
Expand All @@ -150,7 +43,7 @@ export const s3Client = new S3Client({

export class Image implements ImageMetadata {
id!: number
googleId!: string
cloudflareId!: string
filename!: string
defaultAlt!: string
updatedAt!: number | null
Expand All @@ -177,94 +70,6 @@ export class Image implements ImageMetadata {
constructor(metadata: ImageMetadata) {
Object.assign(this, metadata)
}

// Given a record from Drive, see if we're already aware of it
// If we are, see if Drive's version is different from the one we have stored
// If it is, upload it and update our record
// If we're not aware of it, upload and record it
static async syncImage(
knex: KnexReadWriteTransaction,
metadata: ImageMetadata
): Promise<Image> {
const fresh = new Image(metadata)
const stored = await getImageByFilename(knex, metadata.filename)

try {
if (stored) {
if (
stored.updatedAt !== fresh.updatedAt ||
stored.defaultAlt !== fresh.defaultAlt ||
stored.originalWidth !== fresh.originalWidth ||
stored.originalHeight !== fresh.originalHeight
) {
await fresh.fetchFromDriveAndUploadToS3()
stored.updatedAt = fresh.updatedAt
stored.defaultAlt = fresh.defaultAlt
stored.originalWidth = fresh.originalWidth
stored.originalHeight = fresh.originalHeight
await updateImage(knex, stored.id, {
updatedAt: fresh.updatedAt,
defaultAlt: fresh.defaultAlt,
originalWidth: fresh.originalWidth,
originalHeight: fresh.originalHeight,
})
}
return stored
} else {
await fresh.fetchFromDriveAndUploadToS3()
const id = await insertImageClass(knex, fresh)
fresh.id = id
return fresh
}
} catch (e) {
throw new Error(`Error syncing image ${metadata.filename}: ${e}`)
}
}

async fetchFromDriveAndUploadToS3(): Promise<void> {
const driveClient = google.drive({
version: "v3",
auth: OwidGoogleAuth.getGoogleReadonlyAuth(),
})

const file = await driveClient.files.get(
{
fileId: this.googleId,
alt: "media",
},
{
responseType: "arraybuffer",
}
)

const imageArrayBuffer = file.data as Buffer

const indexOfFirstSlash = IMAGE_HOSTING_R2_BUCKET_PATH.indexOf("/")
const bucket = IMAGE_HOSTING_R2_BUCKET_PATH.slice(0, indexOfFirstSlash)
const directory = IMAGE_HOSTING_R2_BUCKET_PATH.slice(
indexOfFirstSlash + 1
)

const MIMEType = getFilenameMIMEType(this.filename)

if (!MIMEType) {
throw new Error(
`Error uploading image "${this.filename}": unsupported file extension`
)
}

const params: PutObjectCommandInput = {
Bucket: bucket,
Key: `${directory}/${this.filename}`,
Body: imageArrayBuffer,
ACL: "public-read",
ContentType: MIMEType,
}
await s3Client.send(new PutObjectCommand(params))
console.log(
`Successfully uploaded object: ${params.Bucket}/${params.Key}`
)
}
}

export async function getImageByFilename(
Expand Down Expand Up @@ -309,21 +114,3 @@ export async function insertImageObject(
const [id] = await knex.table("images").insert(image)
return id
}

export async function fetchImagesFromDriveAndSyncToS3(
knex: KnexReadWriteTransaction,
filenames: string[] = []
): Promise<Image[]> {
if (!filenames.length) return []

try {
const metadataObject = await imageStore.fetchImageMetadata(filenames)
const metadataArray = Object.values(metadataObject) as ImageMetadata[]

return Promise.all(
metadataArray.map((metadata) => Image.syncImage(knex, metadata))
)
} catch (e) {
throw new Error(`Error fetching images from Drive: ${e}`)
}
}
8 changes: 0 additions & 8 deletions devTools/updateImageHeights/tsconfig.json

This file was deleted.

69 changes: 0 additions & 69 deletions devTools/updateImageHeights/update-image-heights.ts

This file was deleted.

1 change: 0 additions & 1 deletion packages/@ourworldindata/types/src/dbTypes/Images.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ export const ImagesTableName = "images"
export interface DbInsertImage {
defaultAlt: string
filename: string
googleId: string
id?: number
originalWidth?: number | null
originalHeight?: number | null
Expand Down
2 changes: 1 addition & 1 deletion packages/@ourworldindata/types/src/gdocTypes/Image.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ export type ImageMetadata = Pick<
DbEnrichedImage,
| "defaultAlt"
| "filename"
| "googleId"
| "cloudflareId"
| "originalHeight"
| "originalWidth"
| "updatedAt"
Expand Down
Loading

0 comments on commit ce3ef62

Please sign in to comment.