diff --git a/Changelog b/Changelog index 042f69bf..7a593971 100644 --- a/Changelog +++ b/Changelog @@ -1,6 +1,10 @@ Unreleased: * FIX: Ensure AWS SDK has access to object size when issuing an upload (@benoit #2117) * FIX: Change log level of S3 missing keys message (@benoit #2144) +* FIX: Logic to set .webp path prefix on reencoded images is skewed (@benoit74 #2140) +* FIX: S3 cached images are missing (@benoit74 #2136) +* FIX: Do not rely on URL filename extension to detect images (@benoit74 #2088) +* FIX: S3 cached image are never used (@benoit74 #2138) 1.14.0: * FIX: Remove S3 upload concurrency to avoid 'RequestTimeTooSkewed' errors (@benoi74 #2118) diff --git a/package-lock.json b/package-lock.json index 4d12faf3..675107ac 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "mwoffliner", - "version": "1.14.0", + "version": "1.14.1-dev0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "mwoffliner", - "version": "1.14.0", + "version": "1.14.1-dev0", "license": "GPL-3.0", "dependencies": { "@aws-sdk/client-s3": "^3.374.0", @@ -45,7 +45,6 @@ "imagemin-webp": "^7.0.0", "md5": "^2.3.0", "merge": "^2.1.1", - "mime-type": "^4.0.0", "mkdirp": "^2.1.6", "mocha": "^10.2.0", "p-map": "^5.5.0", @@ -10446,11 +10445,6 @@ "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==" }, - "node_modules/inherits-ex": { - "version": "1.6.0", - "resolved": "https://registry.npmjs.org/inherits-ex/-/inherits-ex-1.6.0.tgz", - "integrity": "sha512-67sANrSoIvMmYDy0qyjmM/PvFdgBmWZVQoPBsRpDuP4tmlylEX1KdGN1bHvReG3eHBdaHY7WlZsrqys4y/cLVA==" - }, "node_modules/ini": { "version": "1.3.8", "resolved": "https://registry.npmjs.org/ini/-/ini-1.3.8.tgz", @@ -13534,19 +13528,6 @@ "node": ">= 0.6" } }, - "node_modules/mime-type": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/mime-type/-/mime-type-4.0.0.tgz", - "integrity": "sha512-1FCc9fsTg44pd7koB486WEepve+sc4847F0USUf08j4+bAU6/9ckIq4kHVEhCxbxHCyUZy++dxx/PtSR/m4XBQ==", - "dependencies": { - "micromatch": "^4.0.2", - "path.js": "^1.0.7", - "util-ex": "^0.3.15" - }, - "engines": { - "node": ">= 8.6" - } - }, "node_modules/mime-types": { "version": "2.1.35", "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", @@ -15042,24 +15023,6 @@ "node": ">=8" } }, - "node_modules/path.js": { - "version": "1.0.7", - "resolved": "https://registry.npmjs.org/path.js/-/path.js-1.0.7.tgz", - "integrity": "sha512-DPX1vNSmckC3figW8xT/fEuF+XBg/96RUpXfW0yT6UGHgQI2mtTSADflz45bXKREbz+5GJa0qDQGNIpNr1skRQ==", - "dependencies": { - "escape-string-regexp": "^1.0.3", - "inherits-ex": "^1.1.2", - "util-ex": "^0.3.10" - } - }, - "node_modules/path.js/node_modules/escape-string-regexp": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz", - "integrity": "sha512-vbRorB5FUQWvla16U8R/qgaFIya2qGzwDrNmCZuYKrbdSUMG6I1ZCGQRefkRVhuOkIGVne7BQ35DSfo1qvJqFg==", - "engines": { - "node": ">=0.8.0" - } - }, "node_modules/peek-readable": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/peek-readable/-/peek-readable-5.0.0.tgz", @@ -18996,15 +18959,6 @@ "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==" }, - "node_modules/util-ex": { - "version": "0.3.18", - "resolved": "https://registry.npmjs.org/util-ex/-/util-ex-0.3.18.tgz", - "integrity": "sha512-GPVjD257DtgCDMHYqbdWvZ+RY3HaXZ7Dps/44de5WscOjFNL2Qr+6dTIKGlyfA4A5BXyeFKWy8mb19OATWhh8Q==", - "dependencies": { - "inherits-ex": "^1.5.2", - "xtend": "^4.0.2" - } - }, "node_modules/uuid": { "version": "8.3.2", "resolved": "https://registry.npmjs.org/uuid/-/uuid-8.3.2.tgz", diff --git a/package.json b/package.json index 3001f19f..1808e3b3 100644 --- a/package.json +++ b/package.json @@ -94,7 +94,6 @@ "imagemin-webp": "^7.0.0", "md5": "^2.3.0", "merge": "^2.1.1", - "mime-type": "^4.0.0", "mkdirp": "^2.1.6", "mocha": "^10.2.0", "p-map": "^5.5.0", diff --git a/src/Downloader.ts b/src/Downloader.ts index ca646146..a7e65a3b 100644 --- a/src/Downloader.ts +++ b/src/Downloader.ts @@ -15,8 +15,9 @@ import imageminWebp from 'imagemin-webp' import sharp from 'sharp' import http from 'http' import https from 'https' +import { fileTypeFromBuffer } from 'file-type' -import { normalizeMwResponse, DB_ERROR, WEAK_ETAG_REGEX, stripHttpFromUrl, isBitmapImageMimeType, isImageUrl, getMimeType, isWebpCandidateImageMimeType } from './util/index.js' +import { normalizeMwResponse, DB_ERROR, WEAK_ETAG_REGEX, stripHttpFromUrl, isBitmapImageMimeType, isWebpCandidateImageMimeType } from './util/index.js' import S3 from './S3.js' import * as logger from './Logger.js' import MediaWiki, { QueryOpts } from './MediaWiki.js' @@ -68,6 +69,10 @@ interface BackoffOptions { backoffHandler: (number: number, delay: number, error?: any) => void } +interface CompressionData { + data: any +} + export const defaultStreamRequestOptions: AxiosRequestConfig = { headers: { accept: 'application/octet-stream', @@ -353,7 +358,7 @@ class Downloader { const url = urlHelper.deserializeUrl(_url) await this.claimRequest() return new Promise((resolve, reject) => { - this.backoffCall(this.getJSONCb, url, (err: any, val: any) => { + this.backoffCall(this.getJSONCb, url, 'json', (err: any, val: any) => { this.releaseRequest() if (err) { const httpStatus = err.response && err.response.status @@ -366,7 +371,7 @@ class Downloader { }) } - public async downloadContent(_url: string, retry = true): Promise<{ content: Buffer | string; responseHeaders: any }> { + public async downloadContent(_url: string, kind: string, retry = true): Promise<{ content: Buffer | string; contentType: string; setCookie: string | null }> { if (!_url) { throw new Error(`Parameter [${_url}] is not a valid url`) } @@ -384,9 +389,9 @@ class Downloader { } } if (retry) { - this.backoffCall(this.getContentCb, url, cb) + this.backoffCall(this.getContentCb, url, kind, cb) } else { - this.getContentCb(url, cb) + this.getContentCb(url, kind, cb) } }) } catch (err) { @@ -454,7 +459,7 @@ class Downloader { return null } - private getJSONCb = (url: string, handler: (...args: any[]) => any): void => { + private getJSONCb = (url: string, kind: string, handler: (...args: any[]) => any): void => { logger.info(`Getting JSON from [${url}]`) axios .get(url, this.jsonRequestOptions) @@ -466,7 +471,7 @@ class Downloader { const newMaxActiveRequests: number = Math.max(this.maxActiveRequests - 1, 1) logger.log(`Setting maxActiveRequests from [${this.maxActiveRequests}] to [${newMaxActiveRequests}]`) this.maxActiveRequests = newMaxActiveRequests - return this.getJSONCb(url, handler) + return this.getJSONCb(url, kind, handler) } else if (err.response && err.response.status === 404) { handler(err) } @@ -477,56 +482,68 @@ class Downloader { }) } - private async getCompressedBody(resp: any): Promise { - if (isBitmapImageMimeType(resp.headers['content-type'])) { - if (isWebpCandidateImageMimeType(this.webp, resp.headers['content-type']) && !this.cssDependenceUrls.hasOwnProperty(resp.config.url)) { - resp.data = await (imagemin as any) - .buffer(resp.data, imageminOptions.get('webp').get(resp.headers['content-type'])) - .catch(async (err) => { - if (/Unsupported color conversion request/.test(err.stderr)) { - return (imagemin as any) - .buffer(await sharp(resp.data).toColorspace('srgb').toBuffer(), imageminOptions.get('webp').get(resp.headers['content-type'])) - .catch(() => { - return resp.data - }) - .then((data) => { - resp.headers['content-type'] = 'image/webp' - return data + private async getImageMimeType(data: any): Promise { + const fileType = await fileTypeFromBuffer(data) + return fileType ? fileType.mime : null + } + + private async getCompressedBody(input: CompressionData): Promise { + const contentType = await this.getImageMimeType(input.data) + if (isBitmapImageMimeType(contentType)) { + if (this.webp && isWebpCandidateImageMimeType(contentType)) { + return { + data: await (imagemin as any) + .buffer(input.data, imageminOptions.get('webp').get(contentType)) + .catch(async (err) => { + if (/Unsupported color conversion request/.test(err.stderr)) { + return (imagemin as any) + .buffer(await sharp(input.data).toColorspace('srgb').toBuffer(), imageminOptions.get('webp').get(contentType)) + .catch(() => { + return input.data + }) + .then((data) => { + return data + }) + } else { + return (imagemin as any).buffer(input.data, imageminOptions.get('default').get(contentType)).catch(() => { + return input.data }) - } else { - return (imagemin as any).buffer(resp.data, imageminOptions.get('default').get(resp.headers['content-type'])).catch(() => { - return resp.data - }) - } - }) - .then((data) => { - resp.headers['content-type'] = 'image/webp' - return data - }) - resp.headers.path_postfix = '.webp' + } + }) + .then((data) => { + return data + }), + } } else { - resp.data = await (imagemin as any).buffer(resp.data, imageminOptions.get('default').get(resp.headers['content-type'])).catch(() => { - return resp.data - }) + return { + data: await (imagemin as any).buffer(input.data, imageminOptions.get('default').get(contentType)).catch(() => { + return input.data + }), + } } - return true } - return false + return { + data: input.data, + } } - private getContentCb = async (url: string, handler: any): Promise => { + private getContentCb = async (url: string, kind: string, handler: any): Promise => { logger.info(`Downloading [${url}]`) try { - if (this.optimisationCacheUrl && isImageUrl(url)) { + if (this.optimisationCacheUrl && kind === 'image') { this.downloadImage(url, handler) } else { // Use the base domain of the wiki being scraped as the Referer header, so that we can // successfully scrap WMF map tiles. - const resp = await axios(url, { ...this.arrayBufferRequestOptions, headers: { Referer: MediaWiki.baseUrl.href } }) - await this.getCompressedBody(resp) + const resp = await axios(url, { ...this.arrayBufferRequestOptions, headers: { ...this.arrayBufferRequestOptions.headers, Referer: MediaWiki.baseUrl.href } }) + // If content is an image, we might benefit from compressing it + const content = kind === 'image' ? (await this.getCompressedBody({ data: resp.data })).data : resp.data + // compute content-type from content, since getCompressedBody might have modified it + const contentType = kind === 'image' ? (await this.getImageMimeType(content)) || resp.headers['content-type'] : resp.headers['content-type'] handler(null, { - responseHeaders: resp.headers, - content: resp.data, + contentType, + content, + setCookie: resp.headers['set-cookie'] ? resp.headers['set-cookie'].join(';') : null, }) } } catch (err) { @@ -555,46 +572,44 @@ class Downloader { } // Use the base domain of the wiki being scraped as the Referer header, so that we can // successfully scrap WMF map tiles. - const mwResp = await axios(url, { ...this.arrayBufferRequestOptions, headers: { Referer: MediaWiki.baseUrl.href } }) - - // HTTP response content-type can not really be trusted (at least if 304) - mwResp.headers['content-type'] = getMimeType(url, s3Resp?.Metadata?.contenttype || mwResp.headers['content-type']) + const mwResp = await axios(url, { ...this.arrayBufferRequestOptions, headers: { ...this.arrayBufferRequestOptions.headers, Referer: MediaWiki.baseUrl.href } }) // Most of the images, after having been uploaded once to the // cache, will always have 304 status, until modified. If cache // is up to date, return cached image. if (mwResp.status === 304) { - // eslint-disable-next-line @typescript-eslint/no-unused-vars - const headers = (({ Body, ...o }) => o)(s3Resp) - - // If image is a webp conversion candidate - if (isWebpCandidateImageMimeType(this.webp, mwResp.headers['content-type']) && !this.cssDependenceUrls.hasOwnProperty(mwResp.config.url)) { - headers.path_postfix = '.webp' - headers['content-type'] = 'image/webp' - } - // Proceed with image + const data = (await this.streamToBuffer(s3Resp.Body as Readable)) as any + const contentType = await this.getImageMimeType(data) + logger.info(`Using S3-cached image for ${url} (contentType: ${contentType})`) handler(null, { - responseHeaders: headers, - content: (await this.streamToBuffer(s3Resp.Body as Readable)) as any, + contentType, + content: data, }) - return } // Compress content because image blob comes from upstream MediaWiki - await this.getCompressedBody(mwResp) + const compressedData = (await this.getCompressedBody({ data: mwResp.data })).data // Check for the ETag and upload to cache const etag = this.removeEtagWeakPrefix(mwResp.headers.etag) if (etag) { - await this.s3.uploadBlob(stripHttpFromUrl(url), mwResp.data, etag, mwResp.headers['content-type'], this.webp ? 'webp' : '1') + await this.s3.uploadBlob(stripHttpFromUrl(url), compressedData, etag, this.webp ? 'webp' : '1') + } + + // get contentType from image, with fallback to response headers should the image be unsupported at all (e.g. SVG) + const contentType = (await this.getImageMimeType(compressedData)) || mwResp.headers['content-type'] + if (s3Resp) { + logger.info(`Using image downloaded from upstream for ${url} (S3-cached image is outdated, contentType: ${contentType})`) + } else { + logger.info(`Using image downloaded from upstream for ${url} (no S3-cached image found, contentType: ${contentType})`) } // Proceed with image handler(null, { - responseHeaders: mwResp.headers, - content: mwResp.data, + contentType, + content: compressedData, }) }) .catch((err) => { @@ -630,8 +645,8 @@ class Downloader { } } - private backoffCall(handler: (...args: any[]) => void, url: string, callback: (...args: any[]) => void | Promise): void { - const call = backoff.call(handler, url, callback) + private backoffCall(handler: (...args: any[]) => void, url: string, kind: string, callback: (...args: any[]) => void | Promise): void { + const call = backoff.call(handler, url, kind, callback) call.setStrategy(this.backoffOptions.strategy) call.retryIf(this.backoffOptions.retryIf) call.failAfter(this.backoffOptions.failAfter) diff --git a/src/Dump.ts b/src/Dump.ts index f467bbea..3b160427 100644 --- a/src/Dump.ts +++ b/src/Dump.ts @@ -202,7 +202,7 @@ export class Dump { const sheetUrls: Array = [] /* Load main page to see which CSS files are needed */ - const { content } = await downloader.downloadContent(this.mwMetaData.webUrl) + const { content } = await downloader.downloadContent(this.mwMetaData.webUrl, 'data') const html = content.toString() const doc = domino.createDocument(html) const links = Array.from(doc.getElementsByTagName('link')) diff --git a/src/MediaWiki.ts b/src/MediaWiki.ts index 0efcc958..e4045203 100644 --- a/src/MediaWiki.ts +++ b/src/MediaWiki.ts @@ -270,7 +270,7 @@ class MediaWiki { } // Getting token to login. - const { content, responseHeaders } = await downloader.downloadContent(url + 'action=query&meta=tokens&type=login&format=json&formatversion=2') + const { content, setCookie } = await downloader.downloadContent(url + 'action=query&meta=tokens&type=login&format=json&formatversion=2', 'data') // Logging in await axios(this.actionApiUrl.href, { @@ -282,7 +282,7 @@ class MediaWiki { lgtoken: JSON.parse(content.toString()).query.tokens.logintoken, }), headers: { - Cookie: responseHeaders['set-cookie'].join(';'), + Cookie: setCookie, 'Content-Type': 'application/x-www-form-urlencoded', }, method: 'POST', @@ -388,7 +388,7 @@ class MediaWiki { public async getTextDirection(downloader: Downloader): Promise { logger.log('Getting text direction...') - const { content } = await downloader.downloadContent(this.webUrl.href) + const { content } = await downloader.downloadContent(this.webUrl.href, 'data') const body = content.toString() const doc = domino.createDocument(body) const contentNode = doc.getElementById('mw-content-text') @@ -458,7 +458,7 @@ class MediaWiki { public async getSubTitle(downloader: Downloader) { logger.log('Getting sub-title...') - const { content } = await downloader.downloadContent(this.webUrl.href) + const { content } = await downloader.downloadContent(this.webUrl.href, 'data') const html = content.toString() const doc = domino.createDocument(html) const subTitleNode = doc.getElementById('siteSub') diff --git a/src/S3.ts b/src/S3.ts index f5adc318..d510ac2b 100644 --- a/src/S3.ts +++ b/src/S3.ts @@ -65,11 +65,11 @@ class S3 { }) } - public uploadBlob(key: string, data: any, eTag: string, contentType: string, version: string): Promise { + public uploadBlob(key: string, data: any, eTag: string, version: string): Promise { const command = new PutObjectCommand({ Bucket: this.bucketName, Key: key, - Metadata: { etag: eTag, contenttype: contentType, version }, + Metadata: { etag: eTag, version }, Body: data, }) diff --git a/src/mwoffliner.lib.ts b/src/mwoffliner.lib.ts index 43b9ac45..13b361db 100644 --- a/src/mwoffliner.lib.ts +++ b/src/mwoffliner.lib.ts @@ -414,7 +414,7 @@ async function execute(argv: any) { await getThumbnailsData() logger.log('Getting Main Page') - await getMainPage(dump, zimCreator, downloader) + await getMainPage(dump, zimCreator) logger.log('Getting articles') stime = Date.now() @@ -527,7 +527,7 @@ async function execute(argv: any) { const parsedUrl = urlParser.parse(entries.logo) const logoUrl = parsedUrl.protocol ? entries.logo : MediaWiki.baseUrl.protocol + entries.logo - const { content } = await downloader.downloadContent(logoUrl) + const { content } = await downloader.downloadContent(logoUrl, 'image') return sharp(content).resize(48, 48, { fit: sharp.fit.inside, withoutEnlargement: true }).png().toBuffer() } @@ -541,7 +541,7 @@ async function execute(argv: any) { } } - function getMainPage(dump: Dump, zimCreator: ZimCreator, downloader: Downloader) { + function getMainPage(dump: Dump, zimCreator: ZimCreator) { async function createMainPage() { logger.log('Creating main page...') const doc = domino.createDocument( @@ -579,7 +579,7 @@ async function execute(argv: any) { } if (articlesWithImages.length > MIN_IMAGE_THRESHOLD_ARTICLELIST_PAGE) { - const articlesWithImagesEl = articlesWithImages.map((article) => makeArticleImageTile(dump, article, downloader.webp)).join('\n') + const articlesWithImagesEl = articlesWithImages.map((article) => makeArticleImageTile(dump, article)).join('\n') doc.body.innerHTML = `
${articlesWithImagesEl}
` } else { const articlesWithoutImagesEl = allArticles.map((article) => makeArticleListItem(dump, article)).join('\n') @@ -623,7 +623,10 @@ async function execute(argv: any) { articleDetail.internalThumbnailUrl = getRelativeFilePath('Main_Page', getMediaBase(suitableResUrl, true), 'I') - await Promise.all([filesToDownloadXPath.set(path, { url: urlHelper.serializeUrl(suitableResUrl), mult, width } as FileDetail), articleDetailXId.set(articleId, articleDetail)]) + await Promise.all([ + filesToDownloadXPath.set(path, { url: urlHelper.serializeUrl(suitableResUrl), mult, width, kind: 'image' } as FileDetail), + articleDetailXId.set(articleId, articleDetail), + ]) } async function getThumbnailsData(): Promise { diff --git a/src/renderers/abstract.renderer.ts b/src/renderers/abstract.renderer.ts index 039a7e93..e49a7576 100644 --- a/src/renderers/abstract.renderer.ts +++ b/src/renderers/abstract.renderer.ts @@ -10,16 +10,7 @@ import { config } from '../config.js' import { Dump } from '../Dump.js' import { rewriteUrlsOfDoc } from '../util/rewriteUrls.js' import { footerTemplate } from '../Templates.js' -import { - getFullUrl, - getMediaBase, - getMimeType, - getRelativeFilePath, - isWebpCandidateImageMimeType, - interpolateTranslationString, - encodeArticleIdForZimHtmlUrl, - getStaticFiles, -} from '../util/misc.js' +import { getFullUrl, getMediaBase, getRelativeFilePath, interpolateTranslationString, encodeArticleIdForZimHtmlUrl, getStaticFiles } from '../util/misc.js' type renderType = 'auto' | 'desktop' | 'mobile' | 'specific' type renderName = 'VisualEditor' | 'WikimediaDesktop' | 'WikimediaMobile' | 'RestApi' @@ -42,7 +33,6 @@ export type RendererBuilderOptions = RendererBuilderOptionsCommon | RendererBuil export interface RenderOpts { data?: any - webp: boolean _moduleDependencies: any articleId?: string articleDetailXId?: RKVS @@ -55,6 +45,8 @@ export interface RenderSingleOutput { articleId: string displayTitle: string html: string + imageDependencies: any + videoDependencies: any mediaDependencies: any moduleDependencies: any staticFiles: string[] @@ -76,14 +68,14 @@ export abstract class Renderer { srcCache: KVS, articleId: string, videoEl: DominoElement, - webp: boolean, - ): Promise<{ mediaDependencies: string[]; subtitles: string[] }> { - const mediaDependencies: string[] = [] + ): Promise<{ imageDependencies: string[]; videoDependencies: string[]; subtitles: string[] }> { + const imageDependencies: string[] = [] + const videoDependencies: string[] = [] const subtitles: string[] = [] if (dump.nopic || dump.novid || dump.nodet) { DOMUtils.deleteNode(videoEl) - return { mediaDependencies, subtitles } + return { imageDependencies, videoDependencies, subtitles } } this.adjustVideoElementAttributes(videoEl) @@ -93,18 +85,18 @@ export abstract class Renderer { if (!chosenVideoSourceEl) { logger.warn(`Unable to find an appropriate video/audio source for an media element in article '${articleId}'`) DOMUtils.deleteNode(videoEl) - return { mediaDependencies, subtitles } + return { imageDependencies, videoDependencies, subtitles } } - this.handleVideoPoster(videoEl, articleId, webp, mediaDependencies, srcCache) - this.updateVideoSrc(chosenVideoSourceEl, articleId, srcCache, mediaDependencies) + this.handleVideoPoster(videoEl, articleId, imageDependencies, srcCache) + this.updateVideoSrc(chosenVideoSourceEl, articleId, srcCache, videoDependencies) const trackElements = Array.from(videoEl.querySelectorAll('track')) for (const track of trackElements) { subtitles.push(await this.treatSubtitle(track, articleId)) } - return { mediaDependencies, subtitles } + return { imageDependencies, videoDependencies, subtitles } } private adjustVideoElementAttributes(videoEl: DominoElement): void { @@ -187,30 +179,30 @@ export abstract class Renderer { return chosenVideoSourceEl } - private handleVideoPoster(videoEl: DominoElement, articleId: string, webp: boolean, mediaDependencies: string[], srcCache: KVS): void { + private handleVideoPoster(videoEl: DominoElement, articleId: string, imageDependencies: string[], srcCache: KVS): void { const posterUrl = videoEl.getAttribute('poster') if (posterUrl) { const videoPosterUrl = getFullUrl(posterUrl, MediaWiki.baseUrl) const newVideoPosterUrl = getRelativeFilePath(articleId, getMediaBase(videoPosterUrl, true), 'I') if (posterUrl) { - videoEl.setAttribute('poster', isWebpCandidateImageMimeType(webp, getMimeType(newVideoPosterUrl)) ? newVideoPosterUrl + '.webp' : newVideoPosterUrl) + videoEl.setAttribute('poster', newVideoPosterUrl) } videoEl.removeAttribute('resource') if (!srcCache.hasOwnProperty(videoPosterUrl)) { srcCache[videoPosterUrl] = true - mediaDependencies.push(videoPosterUrl) + imageDependencies.push(videoPosterUrl) } } } - private updateVideoSrc(chosenVideoSourceEl: DominoElement, articleId: string, srcCache: KVS, mediaDependencies: string[]): void { + private updateVideoSrc(chosenVideoSourceEl: DominoElement, articleId: string, srcCache: KVS, videoDependencies: string[]): void { /* Download content, but avoid duplicate calls */ const sourceUrl = getFullUrl(chosenVideoSourceEl.getAttribute('src'), MediaWiki.baseUrl) if (!srcCache.hasOwnProperty(sourceUrl)) { srcCache[sourceUrl] = true - mediaDependencies.push(sourceUrl) + videoDependencies.push(sourceUrl) } /* Set new URL for the video element */ @@ -261,12 +253,12 @@ export abstract class Renderer { imageNode.parentNode.replaceChild(thumbDiv, imageNode) } - private async treatImage(dump: Dump, srcCache: KVS, articleId: string, img: DominoElement, webp: boolean): Promise<{ mediaDependencies: string[] }> { - const mediaDependencies: string[] = [] + private async treatImage(dump: Dump, srcCache: KVS, articleId: string, img: DominoElement): Promise<{ imageDependencies: string[] }> { + const imageDependencies: string[] = [] if (!this.shouldKeepImage(dump, img)) { DOMUtils.deleteNode(img) - return { mediaDependencies } + return { imageDependencies } } /* Remove image link */ @@ -287,7 +279,7 @@ export abstract class Renderer { linkNode.parentNode.replaceChild(img, linkNode) } else { DOMUtils.deleteNode(img) - return { mediaDependencies } + return { imageDependencies } } } } @@ -303,11 +295,11 @@ export abstract class Renderer { /* Download image, but avoid duplicate calls */ if (!srcCache.hasOwnProperty(src)) { srcCache[src] = true - mediaDependencies.push(src) + imageDependencies.push(src) } /* Change image source attribute to point to the local image */ - img.setAttribute('src', isWebpCandidateImageMimeType(webp, getMimeType(src)) ? newSrc + '.webp' : newSrc) + img.setAttribute('src', newSrc) /* Remove useless 'resource' attribute */ img.removeAttribute('resource') @@ -321,7 +313,7 @@ export abstract class Renderer { /* Add lazy loading */ img.setAttribute('loading', 'lazy') - return { mediaDependencies } + return { imageDependencies } } private shouldKeepImage(dump: Dump, img: DominoElement) { @@ -334,8 +326,9 @@ export abstract class Renderer { ) } - protected async treatMedias(parsoidDoc: DominoElement, dump: Dump, articleId: string, webp: boolean) { - let mediaDependencies: string[] = [] + protected async treatMedias(parsoidDoc: DominoElement, dump: Dump, articleId: string) { + let imageDependencies: string[] = [] + let videoDependencies: string[] = [] let subtitles: string[] = [] /* Clean/rewrite image tags */ const imgs = Array.from(parsoidDoc.getElementsByTagName('img')) @@ -344,14 +337,15 @@ export abstract class Renderer { for (const videoEl of videos) { //