-
-
Notifications
You must be signed in to change notification settings - Fork 229
/
GDriveImagesBaker.tsx
180 lines (164 loc) · 6.72 KB
/
GDriveImagesBaker.tsx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
import fs from "fs-extra"
import path from "path"
import * as db from "../db/db.js"
import {
IMAGE_HOSTING_R2_CDN_URL,
IMAGE_HOSTING_R2_BUCKET_SUBFOLDER_PATH,
} from "../settings/serverSettings.js"
import {
DbRawImage,
getFilenameAsPng,
parseImageRow,
retryPromise,
} from "@ourworldindata/utils"
import { Image } from "../db/model/Image.js"
import sharp from "sharp"
import pMap from "p-map"
import { BAKED_BASE_URL } from "../settings/clientSettings.js"
export const bakeDriveImages = async (
knex: db.KnexReadonlyTransaction,
bakedSiteDir: string
) => {
// Get all GDocs images, download locally and resize them
const images: Image[] = await db
.knexRaw<DbRawImage>(
knex,
`SELECT DISTINCT *
FROM images
WHERE id IN (SELECT DISTINCT imageId FROM posts_gdocs_x_images)
OR filename IN (SELECT DISTINCT relatedLinkThumbnail FROM posts_gdocs_tombstones)`
)
.then((results) =>
results.map((result) => new Image(parseImageRow(result)))
)
const imagesDirectory = path.join(bakedSiteDir, "images", "published")
// TODO 2024-02-29: In the retrospective about a recent resized image bug in prod we
// discussed a few improvements to make to this code:
// - [ ] Add etags for all the resizes so that we are checking if all
// the sizes are up to date, not just the original image.
// - [ ] Clarify the filenames of the paths involved so that it is clear
// what refers to the original image, the local version, ...
// - [ ] Break this function into smaller functions to make it easier to
// understand and maintain.
// If this causes timeout errors, try decreasing concurrency (2 should be safe)
await pMap(
images,
async (image) => {
const remoteFilePath = path.join(
IMAGE_HOSTING_R2_CDN_URL,
IMAGE_HOSTING_R2_BUCKET_SUBFOLDER_PATH,
image.filename
)
const localImagePath = path.join(imagesDirectory, image.filename)
const localImageEtagPath = localImagePath + ".etag"
// If the image already exists locally, try to use its etag
const existingEtag = await readEtagFromFile(
localImagePath,
localImageEtagPath
)
const response = await retryPromise(
() =>
fetch(remoteFilePath, {
headers: {
// XXX hotfix: force png rebuild every time, to work around missing png size variants on prod
// "If-None-Match": existingEtag,
},
}).then((response) => {
if (response.status === 304) {
// Image has not been modified, skip without logging
return response
} else if (response.ok) {
// Log fetched images if it was success but wasn't 304
console.log(
`Fetching image ${image.filename} from ${remoteFilePath} using etag ${existingEtag}...`
)
return response
} else {
// If the response status is 404, throw an error to trigger retry
const msg = `Fetching image failed: ${response.status} ${response.statusText} ${response.url}`
console.log(msg)
throw new Error(msg)
}
}),
{ maxRetries: 5, exponentialBackoff: true, initialDelay: 1000 }
)
// Image has not been modified, skip
// XXX hotfix: force png rebuild every time, to work around missing png size variants on prod
// if (response.status === 304) {
// return
// }
let buffer = Buffer.from(await response.arrayBuffer())
if (!image.isSvg) {
// Save the original image
await fs.writeFile(
path.join(imagesDirectory, image.filename),
buffer
)
// Save resized versions
await Promise.all(
image.sizes!.map((width) => {
const localResizedFilepath = path.join(
imagesDirectory,
`${image.filenameWithoutExtension}_${width}.png`
)
return sharp(buffer)
.resize(width)
.png()
.toFile(localResizedFilepath)
})
)
} else {
// A PNG alternative to the SVG for the "Download image" link
const pngFilename = getFilenameAsPng(image.filename)
await sharp(buffer)
.resize(2000)
.png()
.toFile(path.join(imagesDirectory, pngFilename))
// Import the site's webfonts
const svg = buffer
.toString()
.replace(
/(<svg.*?>)/,
`$1<defs><style>@import url(${BAKED_BASE_URL}/fonts.css)</style></defs>`
)
buffer = Buffer.from(svg)
// Save the svg
await fs.writeFile(
path.join(imagesDirectory, image.filename),
buffer
)
}
// Save the etag to a sidecar
await fs.writeFile(
localImageEtagPath,
readEtagFromHeader(response),
"utf8"
)
},
{ concurrency: 5 }
)
}
const readEtagFromHeader = (response: Response) => {
const etag = response.headers.get("etag")
if (!etag) {
throw new Error("No etag header found")
}
// strip extra quotes from etag
return etag.replace(/^"|"$/g, "")
}
const readEtagFromFile = async (
localImagePath: string,
localImageEtagPath: string
) => {
let etag = await Promise.all([
fs.exists(localImagePath),
fs.exists(localImageEtagPath),
]).then(([exists, etagExists]) =>
exists && etagExists ? fs.readFile(localImageEtagPath, "utf8") : ""
)
// DigitalOcean wraps etag in double quotes
if (!etag.includes('"')) {
etag = '"' + etag + '"'
}
return etag
}