diff --git a/adminSiteServer/apiRouter.ts b/adminSiteServer/apiRouter.ts index a6f366d051e..86e7e2ca30f 100644 --- a/adminSiteServer/apiRouter.ts +++ b/adminSiteServer/apiRouter.ts @@ -188,7 +188,6 @@ import { saveGrapherConfigToR2, saveGrapherConfigToR2ByUUID, } from "./chartConfigR2Helpers.js" -import { fetchImagesFromDriveAndSyncToS3 } from "../db/model/Image.js" import { createMultiDimConfig } from "./multiDim.js" import { isMultiDimDataPagePublished } from "../db/model/MultiDimDataPage.js" @@ -3052,9 +3051,7 @@ deleteRouteWithRWTransaction(apiRouter, "/gdocs/:id", async (req, res, trx) => { await validateTombstoneRelatedLinkUrl(trx, tombstone.relatedLinkUrl) const slug = gdocSlug.replace("/", "") const { relatedLinkThumbnail } = tombstone - if (relatedLinkThumbnail) { - await fetchImagesFromDriveAndSyncToS3(trx, [relatedLinkThumbnail]) - } + // TODO: validate relatedLinkThumbnail? await trx .table("posts_gdocs_tombstones") .insert({ ...tombstone, gdocId: id, slug }) diff --git a/db/db.ts b/db/db.ts index 7fc2d8096c0..54d905c73c3 100644 --- a/db/db.ts +++ b/db/db.ts @@ -356,12 +356,12 @@ export const getImageMetadataByFilenames = async ( `-- sql SELECT id, - googleId, filename, defaultAlt, updatedAt, originalWidth, - originalHeight + originalHeight, + cloudflareId FROM images WHERE filename IN (?)`, diff --git a/db/model/Gdoc/GdocFactory.ts b/db/model/Gdoc/GdocFactory.ts index 4e2ebdfb8c6..9625dc6a16b 100644 --- a/db/model/Gdoc/GdocFactory.ts +++ b/db/model/Gdoc/GdocFactory.ts @@ -46,7 +46,6 @@ import { import { enrichedBlocksToMarkdown } from "./enrichedToMarkdown.js" import { GdocAuthor } from "./GdocAuthor.js" import { extractFilenamesFromBlock } from "./gdocUtils.js" -import { fetchImagesFromDriveAndSyncToS3 } from "../Image.js" export function gdocFromJSON( json: Record @@ -351,9 +350,6 @@ export async function loadGdocFromGdocBase( if (contentSource === GdocsContentSource.Gdocs) { // TODO: if we get here via fromJSON then we have already done this - optimize that? await gdoc.fetchAndEnrichGdoc() - // If we're loading from Gdocs, now's also the time to fetch images from gdrive and sync them to S3 - // In any other case, the images should already be in the DB and S3 - await fetchImagesFromDriveAndSyncToS3(knex, gdoc.filenames) } await gdoc.loadState(knex) diff --git a/db/model/Image.ts b/db/model/Image.ts index 7042f529eca..15296fd9126 100644 --- a/db/model/Image.ts +++ b/db/model/Image.ts @@ -31,113 +31,6 @@ import { } from "../../settings/serverSettings.js" import { KnexReadWriteTransaction, KnexReadonlyTransaction } from "../db.js" -class ImageStore { - async fetchImageMetadata( - filenames: string[] - ): Promise> { - console.log( - `Fetching image metadata from Google Drive ${ - filenames.length ? `for ${filenames.join(", ")}` : "" - }` - ) - const driveClient = google.drive({ - version: "v3", - auth: OwidGoogleAuth.getGoogleReadonlyAuth(), - }) - // e.g. `and (name="example.png" or name="image.svg")` - // https://developers.google.com/drive/api/guides/search-files#examples - const filenamesFilter = filenames.length - ? `and (${filenames - .map((filename) => `name='${filename}'`) - .join(" or ")})` - : "" - - const listParams: drive_v3.Params$Resource$Files$List = { - fields: "nextPageToken, files(id, name, description, modifiedTime, imageMediaMetadata, trashed)", - q: `'${GDOCS_CLIENT_EMAIL}' in readers and mimeType contains 'image/' ${filenamesFilter}`, - driveId: GDOCS_SHARED_DRIVE_ID, - corpora: "drive", - supportsAllDrives: true, - includeItemsFromAllDrives: true, - pageSize: 1000, - } - - let files: drive_v3.Schema$File[] = [] - let nextPageToken: drive_v3.Schema$FileList["nextPageToken"] = undefined - let isInitialQuery = true - - while (nextPageToken || isInitialQuery) { - await driveClient.files - .list({ - ...listParams, - pageToken: nextPageToken, - }) - // chaining this so that reassigning nextPageToken doesn't trip up TypeScript - .then((res) => { - const nextFiles = res.data.files ?? [] - nextPageToken = res.data.nextPageToken - files = [...files, ...nextFiles] - }) - isInitialQuery = false - } - - function validateImage( - image: drive_v3.Schema$File - ): image is GDriveImageMetadata { - return Boolean( - image.id && image.name && image.modifiedTime && !image.trashed - ) - } - - const images: ImageMetadata[] = files - .filter(validateImage) - .map((google: GDriveImageMetadata) => ({ - googleId: google.id, - filename: google.name, - defaultAlt: google.description ?? "", - updatedAt: new Date(google.modifiedTime).getTime(), - originalWidth: google.imageMediaMetadata?.width ?? null, - originalHeight: google.imageMediaMetadata?.height ?? null, - })) - - const duplicateFilenames = findDuplicates( - images.map((image) => image.filename) - ) - - if (duplicateFilenames.length) { - throw new Error( - `Multiple images are named ${duplicateFilenames.join(", ")}` - ) - } - - console.log( - `Fetched ${images.length} images' metadata from Google Drive` - ) - const imageMetadata = keyBy(images, "filename") - // Only applies when we're fetching specific images i.e. `filenames` is not empty - for (const filename of filenames) { - if (!imageMetadata[filename]) { - throw Error(`Image ${filename} not found in Google Drive`) - } - } - return imageMetadata - } - - async syncImagesToS3( - knex: KnexReadWriteTransaction, - images: Record - ): Promise<(Image | undefined)[]> { - if (!images) return [] - return Promise.all( - Object.keys(images).map((filename) => - Image.syncImage(knex, images[filename]) - ) - ) - } -} - -export const imageStore = new ImageStore() - export const s3Client = new S3Client({ endpoint: R2_ENDPOINT, forcePathStyle: false, @@ -150,7 +43,7 @@ export const s3Client = new S3Client({ export class Image implements ImageMetadata { id!: number - googleId!: string + cloudflareId!: string filename!: string defaultAlt!: string updatedAt!: number | null @@ -177,94 +70,6 @@ export class Image implements ImageMetadata { constructor(metadata: ImageMetadata) { Object.assign(this, metadata) } - - // Given a record from Drive, see if we're already aware of it - // If we are, see if Drive's version is different from the one we have stored - // If it is, upload it and update our record - // If we're not aware of it, upload and record it - static async syncImage( - knex: KnexReadWriteTransaction, - metadata: ImageMetadata - ): Promise { - const fresh = new Image(metadata) - const stored = await getImageByFilename(knex, metadata.filename) - - try { - if (stored) { - if ( - stored.updatedAt !== fresh.updatedAt || - stored.defaultAlt !== fresh.defaultAlt || - stored.originalWidth !== fresh.originalWidth || - stored.originalHeight !== fresh.originalHeight - ) { - await fresh.fetchFromDriveAndUploadToS3() - stored.updatedAt = fresh.updatedAt - stored.defaultAlt = fresh.defaultAlt - stored.originalWidth = fresh.originalWidth - stored.originalHeight = fresh.originalHeight - await updateImage(knex, stored.id, { - updatedAt: fresh.updatedAt, - defaultAlt: fresh.defaultAlt, - originalWidth: fresh.originalWidth, - originalHeight: fresh.originalHeight, - }) - } - return stored - } else { - await fresh.fetchFromDriveAndUploadToS3() - const id = await insertImageClass(knex, fresh) - fresh.id = id - return fresh - } - } catch (e) { - throw new Error(`Error syncing image ${metadata.filename}: ${e}`) - } - } - - async fetchFromDriveAndUploadToS3(): Promise { - const driveClient = google.drive({ - version: "v3", - auth: OwidGoogleAuth.getGoogleReadonlyAuth(), - }) - - const file = await driveClient.files.get( - { - fileId: this.googleId, - alt: "media", - }, - { - responseType: "arraybuffer", - } - ) - - const imageArrayBuffer = file.data as Buffer - - const indexOfFirstSlash = IMAGE_HOSTING_R2_BUCKET_PATH.indexOf("/") - const bucket = IMAGE_HOSTING_R2_BUCKET_PATH.slice(0, indexOfFirstSlash) - const directory = IMAGE_HOSTING_R2_BUCKET_PATH.slice( - indexOfFirstSlash + 1 - ) - - const MIMEType = getFilenameMIMEType(this.filename) - - if (!MIMEType) { - throw new Error( - `Error uploading image "${this.filename}": unsupported file extension` - ) - } - - const params: PutObjectCommandInput = { - Bucket: bucket, - Key: `${directory}/${this.filename}`, - Body: imageArrayBuffer, - ACL: "public-read", - ContentType: MIMEType, - } - await s3Client.send(new PutObjectCommand(params)) - console.log( - `Successfully uploaded object: ${params.Bucket}/${params.Key}` - ) - } } export async function getImageByFilename( @@ -309,21 +114,3 @@ export async function insertImageObject( const [id] = await knex.table("images").insert(image) return id } - -export async function fetchImagesFromDriveAndSyncToS3( - knex: KnexReadWriteTransaction, - filenames: string[] = [] -): Promise { - if (!filenames.length) return [] - - try { - const metadataObject = await imageStore.fetchImageMetadata(filenames) - const metadataArray = Object.values(metadataObject) as ImageMetadata[] - - return Promise.all( - metadataArray.map((metadata) => Image.syncImage(knex, metadata)) - ) - } catch (e) { - throw new Error(`Error fetching images from Drive: ${e}`) - } -} diff --git a/devTools/updateImageHeights/tsconfig.json b/devTools/updateImageHeights/tsconfig.json deleted file mode 100644 index abbd4697531..00000000000 --- a/devTools/updateImageHeights/tsconfig.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "extends": "../tsconfigs/tsconfig.base.json", - "compilerOptions": { - "outDir": "../../itsJustJavascript/devTools/updateImageHeights", - "rootDir": "." - }, - "references": [{ "path": "../../settings" }, { "path": "../../db" }] -} diff --git a/devTools/updateImageHeights/update-image-heights.ts b/devTools/updateImageHeights/update-image-heights.ts deleted file mode 100644 index d58f89770e2..00000000000 --- a/devTools/updateImageHeights/update-image-heights.ts +++ /dev/null @@ -1,69 +0,0 @@ -import { imageStore } from "../../db/model/Image.js" -import * as db from "../../db/db.js" -import * as lodash from "lodash" -import { exit } from "../../db/cleanup.js" - -async function updateImageHeights() { - const transaction = await db.knexInstance().transaction() - const filenames = await db - .knexRaw<{ filename: string }>( - transaction, - `SELECT DISTINCT filename - FROM posts_gdocs_x_images pgxi - LEFT JOIN images i ON pgxi.imageId = i.id` - ) - .then((rows) => rows.map((row) => row.filename)) - - console.log("Fetching image metadata...") - const images = await imageStore.fetchImageMetadata([]) - console.log("Fetching image metadata...done") - - if (!images) { - throw new Error("No images found") - } - - let imagesWithoutOriginalHeight = [] - try { - let index = 0 - for (const batch of lodash.chunk(filenames, 20)) { - const promises = [] - for (const filename of batch) { - const image = images[filename] - if (image && image.originalHeight) { - promises.push( - db.knexRaw( - transaction, - ` - UPDATE images - SET originalHeight = ? - WHERE filename = ? - `, - [image.originalHeight, filename] - ) - ) - } else { - console.error(`No original height found for ${filename}`) - imagesWithoutOriginalHeight.push(filename) - } - } - console.log(`Updating image heights for batch ${index}...`) - await Promise.all(promises) - console.log(`Updating image heights for batch ${index}...done`) - index++ - } - await transaction.commit() - console.log("All image heights updated successfully!") - // Most likely due to the original file being deleted but the DB not being updated, each of these will need to be manually checked - console.log( - "Images without original height:", - imagesWithoutOriginalHeight - ) - await exit() - } catch (error) { - console.error(error) - await transaction.rollback() - await exit() - } -} - -updateImageHeights() diff --git a/packages/@ourworldindata/types/src/dbTypes/Images.ts b/packages/@ourworldindata/types/src/dbTypes/Images.ts index 430931cf3ce..15154d100a1 100644 --- a/packages/@ourworldindata/types/src/dbTypes/Images.ts +++ b/packages/@ourworldindata/types/src/dbTypes/Images.ts @@ -2,7 +2,6 @@ export const ImagesTableName = "images" export interface DbInsertImage { defaultAlt: string filename: string - googleId: string id?: number originalWidth?: number | null originalHeight?: number | null diff --git a/packages/@ourworldindata/types/src/gdocTypes/Image.ts b/packages/@ourworldindata/types/src/gdocTypes/Image.ts index f9e162ac31c..67296474477 100644 --- a/packages/@ourworldindata/types/src/gdocTypes/Image.ts +++ b/packages/@ourworldindata/types/src/gdocTypes/Image.ts @@ -18,7 +18,7 @@ export type ImageMetadata = Pick< DbEnrichedImage, | "defaultAlt" | "filename" - | "googleId" + | "cloudflareId" | "originalHeight" | "originalWidth" | "updatedAt" diff --git a/packages/@ourworldindata/utils/src/image.ts b/packages/@ourworldindata/utils/src/image.ts index 080a7ffd9c5..c03a518dd01 100644 --- a/packages/@ourworldindata/utils/src/image.ts +++ b/packages/@ourworldindata/utils/src/image.ts @@ -29,13 +29,12 @@ export function getSizes( export function generateSrcSet( sizes: number[], - filename: ImageMetadata["filename"] + id: ImageMetadata["cloudflareId"], + absoluteUrl: string = "" ): string { return sizes .map((size) => { - const path = `/images/published/${getFilenameWithoutExtension( - encodeURIComponent(filename) - )}_${size}.png` + const path = `${absoluteUrl}/${id}/w=${size}` return `${path} ${size}w` }) .join(", ") @@ -93,7 +92,8 @@ export type SourceProps = { */ export function generateSourceProps( smallImage: ImageMetadata | undefined, - regularImage: ImageMetadata + regularImage: ImageMetadata, + absoluteUrl: string = "" ): SourceProps[] { const props: SourceProps[] = [] if (smallImage) { @@ -106,7 +106,11 @@ export function generateSourceProps( const regularSizes = getSizes(regularImage.originalWidth) props.push({ media: undefined, - srcSet: generateSrcSet(regularSizes, regularImage.filename), + srcSet: generateSrcSet( + regularSizes, + regularImage.filename, + absoluteUrl + ), }) return props } diff --git a/settings/clientSettings.ts b/settings/clientSettings.ts index d0f23ada019..318caa166ee 100644 --- a/settings/clientSettings.ts +++ b/settings/clientSettings.ts @@ -59,6 +59,8 @@ export const ALGOLIA_SEARCH_KEY: string = process.env.ALGOLIA_SEARCH_KEY ?? "" export const ALGOLIA_INDEX_PREFIX: string = process.env.ALGOLIA_INDEX_PREFIX ?? "" +export const CLOUDFLARE_IMAGES_URL = process.env.CLOUDFLARE_IMAGES_URL ?? "" + export const DONATE_API_URL: string = process.env.DONATE_API_URL ?? "http://localhost:8788/donation/donate" diff --git a/site/gdocs/components/Image.tsx b/site/gdocs/components/Image.tsx index 0a57d2d5cb9..28e6a8425d4 100644 --- a/site/gdocs/components/Image.tsx +++ b/site/gdocs/components/Image.tsx @@ -8,10 +8,7 @@ import { } from "@ourworldindata/utils" import cx from "classnames" import { LIGHTBOX_IMAGE_CLASS } from "../../Lightbox.js" -import { - IMAGE_HOSTING_R2_BUCKET_SUBFOLDER_PATH, - IMAGE_HOSTING_R2_CDN_URL, -} from "../../../settings/clientSettings.js" +import { CLOUDFLARE_IMAGES_URL } from "../../../settings/clientSettings.js" import { DocumentContext } from "../OwidGdoc.js" import { Container } from "./ArticleBlock.js" import { useImage } from "../utils.js" @@ -83,7 +80,6 @@ export default function Image(props: { "image--has-outline": hasOutline, }) - const { isPreviewing } = useContext(DocumentContext) const image = useImage(filename) const smallImage = useImage(smallFilename) const renderImageError = (name: string) => ( @@ -97,16 +93,10 @@ export default function Image(props: { ) if (!image) { - if (isPreviewing) { - return renderImageError(filename) - } // Don't render anything if we're not previewing (i.e. a bake) and the image is not found return null } // Here we can fall back to the regular image filename, so don't return null if not found - if (isPreviewing && smallFilename && !smallImage) { - return renderImageError(smallFilename) - } const alt = props.alt ?? image.defaultAlt const maybeLightboxClassName = @@ -114,67 +104,14 @@ export default function Image(props: { ? "" : LIGHTBOX_IMAGE_CLASS - if (isPreviewing) { - const makePreviewUrl = (f: string) => - `${IMAGE_HOSTING_R2_CDN_URL}/${IMAGE_HOSTING_R2_BUCKET_SUBFOLDER_PATH}/${encodeURIComponent(f)}` + // TODO: SVG - const PreviewSource = (props: { i?: ImageMetadata; sm?: boolean }) => { - const { i, sm } = props - if (!i) return null - - return ( - - ) - } - return ( - - - - {alt} - - ) - } - - if (filename.endsWith(".svg")) { - const pngFilename = `${getFilenameWithoutExtension(filename)}.png` - const imgSrc = `${IMAGES_DIRECTORY}${encodeURIComponent(filename)}` - return ( -
- {alt} - {containerType !== "thumbnail" ? ( - - Download image - - ) : null} -
- ) - } - - const imageSrc = `${IMAGES_DIRECTORY}${encodeURIComponent(filename)}` - const sourceProps = generateSourceProps(smallImage, image) + const imageSrc = `${CLOUDFLARE_IMAGES_URL}/${encodeURIComponent(filename)}/small` + const sourceProps = generateSourceProps( + smallImage, + image, + CLOUDFLARE_IMAGES_URL + ) return ( diff --git a/tsconfig.json b/tsconfig.json index 429704421c9..ed757b80026 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -25,9 +25,6 @@ { "path": "./adminSiteServer" }, - { - "path": "./devTools/updateImageHeights" - }, { "path": "./devTools/svgTester" },