From 2bac4c0620046b2dd472fc8f80efc2d8c8727c8d Mon Sep 17 00:00:00 2001 From: awd Date: Fri, 15 Aug 2025 09:49:05 -0700 Subject: [PATCH 1/2] api/pinterest: fixed cobalt sometimes downloading low resolution images --- api/src/processing/services/pinterest.js | 51 +++++++++++++++++++++--- 1 file changed, 45 insertions(+), 6 deletions(-) diff --git a/api/src/processing/services/pinterest.js b/api/src/processing/services/pinterest.js index c21400e1a..4e86b8a75 100644 --- a/api/src/processing/services/pinterest.js +++ b/api/src/processing/services/pinterest.js @@ -36,14 +36,53 @@ export default async function(o) { audioFilename: `pinterest_${id}_audio` } - const imageLink = [...html.matchAll(imageRegex)] - .map(([, link]) => link) - .find(a => a.endsWith('.jpg') || a.endsWith('.gif')); + const allImageMatches = [...html.matchAll(imageRegex)]; + + if (allImageMatches.length === 0) { + return { error: "fetch.empty" }; + } - const imageType = imageLink.endsWith(".gif") ? "gif" : "jpg" + // Step 1: Get the first image (always main content) + const firstImageUrl = allImageMatches[0][1]; + + // Step 2: Extract the image hash/identifier + const hashMatch = firstImageUrl.match(/\/([0-9a-f]{2}\/[0-9a-f]{2}\/[0-9a-f]{2}\/[0-9a-f]{32})\.(jpg|gif)/); + + if (!hashMatch) { + // Fallback to first image if we can't parse the hash + const imageType = firstImageUrl.endsWith(".gif") ? "gif" : "jpg"; + return { + urls: firstImageUrl, + isPhoto: true, + filename: `pinterest_${id}.${imageType}` + }; + } + + const imageHash = hashMatch[1]; // e.g., "7c/0a/1c/7c0a1c5f1c999a4a67f3c5b847da093c" + const extension = hashMatch[2]; + + // Step 3: Find all variations of this specific image + const sameImageUrls = allImageMatches + .map(([, url]) => url) + .filter(url => url.includes(imageHash)) + .filter(url => url.endsWith(`.${extension}`)); + + // Step 4: Sort by quality and take the best + const bestQualityUrl = sameImageUrls.sort((a, b) => { + const getQualityScore = (url) => { + if (url.includes('/originals/')) return 4; + if (url.includes('/736x/')) return 3; + if (url.includes('/474x/')) return 2; + if (url.includes('/236x/')) return 1; + return 0; + }; + return getQualityScore(b) - getQualityScore(a); + })[0]; + + const imageType = extension; - if (imageLink) return { - urls: imageLink, + if (bestQualityUrl) return { + urls: bestQualityUrl, isPhoto: true, filename: `pinterest_${id}.${imageType}` } From b6f35e14c6aba1a7b1cda1a55bd598d5d57c14dc Mon Sep 17 00:00:00 2001 From: potatolover68 Date: Fri, 15 Aug 2025 10:04:23 -0700 Subject: [PATCH 2/2] api/pinterest: removed hard-coded resolutions, improved regex, and added safety net fallback in case regex doesn't match --- api/src/processing/services/pinterest.js | 32 +++++++++++++++++++----- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/api/src/processing/services/pinterest.js b/api/src/processing/services/pinterest.js index 4e86b8a75..94c31de0c 100644 --- a/api/src/processing/services/pinterest.js +++ b/api/src/processing/services/pinterest.js @@ -2,7 +2,7 @@ import { genericUserAgent } from "../../config.js"; import { resolveRedirectingURL } from "../url.js"; const videoRegex = /"url":"(https:\/\/v1\.pinimg\.com\/videos\/.*?)"/g; -const imageRegex = /src="(https:\/\/i\.pinimg\.com\/.*\.(jpg|gif))"/g; +const imageRegex = /src="(https:\/\/i\.pinimg\.com\/(?:\d+x|orig)\/[0-9a-f/]{41}\.(jpg|gif))"/g; const notFoundRegex = /"__typename"\s*:\s*"PinNotFound"/; export default async function(o) { @@ -39,6 +39,21 @@ export default async function(o) { const allImageMatches = [...html.matchAll(imageRegex)]; if (allImageMatches.length === 0) { + // Fallback to broader regex if precise one finds nothing + const fallbackRegex = /src="(https:\/\/i\.pinimg\.com\/.*\.(jpg|gif))"/g; + const fallbackMatches = [...html.matchAll(fallbackRegex)]; + + if (fallbackMatches.length > 0) { + // Use first fallback image + const fallbackUrl = fallbackMatches[0][1]; + const imageType = fallbackUrl.endsWith(".gif") ? "gif" : "jpg"; + return { + urls: fallbackUrl, + isPhoto: true, + filename: `pinterest_${id}.${imageType}` + }; + } + return { error: "fetch.empty" }; } @@ -46,7 +61,7 @@ export default async function(o) { const firstImageUrl = allImageMatches[0][1]; // Step 2: Extract the image hash/identifier - const hashMatch = firstImageUrl.match(/\/([0-9a-f]{2}\/[0-9a-f]{2}\/[0-9a-f]{2}\/[0-9a-f]{32})\.(jpg|gif)/); + const hashMatch = firstImageUrl.match(/\/(?:\d+x|orig)\/([0-9a-f]{2}\/[0-9a-f]{2}\/[0-9a-f]{2}\/[0-9a-f]{32})\.(jpg|gif)/); if (!hashMatch) { // Fallback to first image if we can't parse the hash @@ -70,10 +85,15 @@ export default async function(o) { // Step 4: Sort by quality and take the best const bestQualityUrl = sameImageUrls.sort((a, b) => { const getQualityScore = (url) => { - if (url.includes('/originals/')) return 4; - if (url.includes('/736x/')) return 3; - if (url.includes('/474x/')) return 2; - if (url.includes('/236x/')) return 1; + // Check for originals (highest quality) + if (url.includes('/orig/')) return Infinity; + + // Extract resolution number (e.g., "736" from "/736x/") + const resolutionMatch = url.match(/\/(\d+)x\//); + if (resolutionMatch) { + return parseInt(resolutionMatch[1], 10); + } + return 0; }; return getQualityScore(b) - getQualityScore(a);