Skip to content

Commit 711ad69

Browse files
authored
fix: [broken-internal-links] Prompt completed but no output was found (#1603)
https://jira.corp.adobe.com/browse/SITES-36310
1 parent b7da257 commit 711ad69

File tree

8 files changed

+810
-135
lines changed

8 files changed

+810
-135
lines changed

src/internal-links/suggestions-generator.js

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
import { getPrompt, isNonEmptyArray } from '@adobe/spacecat-shared-utils';
1414
import { AzureOpenAIClient } from '@adobe/spacecat-shared-gpt-client';
1515
import { Audit, Suggestion as SuggestionDataAccess } from '@adobe/spacecat-shared-data-access';
16-
import { getScrapedDataForSiteId } from '../support/utils.js';
16+
import { getScrapedDataForSiteId, limitConcurrency } from '../support/utils.js';
1717
import { syncSuggestions } from '../utils/data-access.js';
1818
import { filterByAuditScope, extractPathPrefix } from './subpath-filter.js';
1919

@@ -25,6 +25,7 @@ export const generateSuggestionData = async (finalUrl, brokenInternalLinks, cont
2525
const azureOpenAIClient = AzureOpenAIClient.createFrom(context);
2626
const azureOpenAIOptions = { responseFormat: 'json_object' };
2727
const BATCH_SIZE = 300;
28+
const MAX_CONCURRENT_AI_CALLS = 5;
2829

2930
// Ensure brokenInternalLinks is an array
3031
if (!Array.isArray(brokenInternalLinks)) {
@@ -205,19 +206,20 @@ export const generateSuggestionData = async (finalUrl, brokenInternalLinks, cont
205206
}
206207
}
207208

208-
const updatedInternalLinks = [];
209-
for (let index = 0; index < brokenLinksWithFilteredData.length; index += 1) {
210-
const link = brokenLinksWithFilteredData[index];
211-
const headerSuggestions = headerSuggestionsResults[index];
212-
// eslint-disable-next-line no-await-in-loop
213-
const updatedLink = await processLink(link, headerSuggestions);
214-
// Remove filtered data before returning (not needed in final result)
215-
const cleanLink = { ...updatedLink };
216-
delete cleanLink.filteredSiteData;
217-
delete cleanLink.filteredHeaderLinks;
218-
updatedInternalLinks.push(cleanLink);
219-
}
220-
return updatedInternalLinks;
209+
return limitConcurrency(
210+
brokenLinksWithFilteredData.map(
211+
(link, index) => async () => {
212+
const headerSuggestions = headerSuggestionsResults[index];
213+
const updatedLink = await processLink(link, headerSuggestions);
214+
// Remove filtered data before returning (not needed in final result)
215+
const cleanLink = { ...updatedLink };
216+
delete cleanLink.filteredSiteData;
217+
delete cleanLink.filteredHeaderLinks;
218+
return cleanLink;
219+
},
220+
),
221+
MAX_CONCURRENT_AI_CALLS,
222+
);
221223
};
222224

223225
export async function syncBrokenInternalLinksSuggestions({

src/paid-traffic-analysis/cache-warmer.js

Lines changed: 1 addition & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import {
1919
} from '@adobe/spacecat-shared-athena-client';
2020
import crypto from 'crypto';
2121
import { fileExists, addResultJsonToCache } from './caching-helper.js';
22+
import { limitConcurrency } from '../support/utils.js';
2223

2324
const QUERIES = [
2425
{ dimensions: ['utm_campaign', 'path', 'device'], mapper: TrafficDataWithCWVDto },
@@ -80,28 +81,6 @@ function getConfig(env) {
8081
};
8182
}
8283

83-
async function limitConcurrency(tasks, maxConcurrent) {
84-
const results = [];
85-
const executing = [];
86-
87-
for (const task of tasks) {
88-
const promise = task().then((result) => {
89-
executing.splice(executing.indexOf(promise), 1);
90-
return result;
91-
});
92-
93-
results.push(promise);
94-
executing.push(promise);
95-
96-
if (executing.length >= maxConcurrent) {
97-
// eslint-disable-next-line no-await-in-loop
98-
await Promise.race(executing);
99-
}
100-
}
101-
102-
return Promise.all(results);
103-
}
104-
10584
async function checkCacheExists(
10685
context,
10786
log,

src/preflight/links.js

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,31 @@ import { generateSuggestionData } from '../internal-links/suggestions-generator.
1717

1818
export const PREFLIGHT_LINKS = 'links';
1919

20+
/**
21+
* Create an issue object for a broken internal link with AI suggestions
22+
* @param {string} urlTo - The URL that is broken
23+
* @param {number} status - HTTP status code
24+
* @param {string} baseURLOrigin - Base URL origin to replace preview origin
25+
* @param {Array} urlsSuggested - Optional array of suggested alternative URLs from AI
26+
* @param {string} aiRationale - Optional AI rationale for suggestions
27+
* @returns {Object} Issue object with all fields including aiSuggestion
28+
*/
29+
export function createBrokenLinkIssue(urlTo, status, baseURLOrigin, urlsSuggested, aiRationale) {
30+
const aiUrls = (urlsSuggested && urlsSuggested.length > 0)
31+
? urlsSuggested.map((url) => stripTrailingSlash(
32+
url.replace(new URL(url).origin, baseURLOrigin),
33+
)) : [];
34+
35+
return {
36+
url: stripTrailingSlash(urlTo.replace(new URL(urlTo).origin, baseURLOrigin)),
37+
issue: `Status ${status}`,
38+
seoImpact: 'High',
39+
seoRecommendation: 'Fix or remove broken links to improve user experience and SEO',
40+
aiSuggestion: aiUrls.length > 0 ? aiUrls[0] : undefined,
41+
aiRationale,
42+
};
43+
}
44+
2045
export default async function links(context, auditContext) {
2146
const {
2247
site, job, log,
@@ -83,17 +108,14 @@ export default async function links(context, auditContext) {
83108
if (!brokenInternalLinksByPage.has(href)) {
84109
brokenInternalLinksByPage.set(href, []);
85110
}
86-
const aiUrls = urlsSuggested?.map((url) => stripTrailingSlash(
87-
url.replace(new URL(url).origin, baseURLOrigin),
88-
));
89-
brokenInternalLinksByPage.get(href).push({
90-
url: stripTrailingSlash(urlTo.replace(new URL(urlTo).origin, baseURLOrigin)),
91-
issue: `Status ${status}`,
92-
seoImpact: 'High',
93-
seoRecommendation: 'Fix or remove broken links to improve user experience and SEO',
94-
aiSuggestion: aiUrls[0],
111+
const issue = createBrokenLinkIssue(
112+
urlTo,
113+
status,
114+
baseURLOrigin,
115+
urlsSuggested,
95116
aiRationale,
96-
});
117+
);
118+
brokenInternalLinksByPage.get(href).push(issue);
97119
});
98120
} else {
99121
auditResult.brokenInternalLinks.forEach(({ urlTo, href, status }) => {

src/support/utils.js

Lines changed: 48 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,34 @@ export function getUrlWithoutPath(url) {
196196
return `${urlObj.protocol}//${urlObj.host}`;
197197
}
198198

199+
/**
200+
* Limits the concurrency of async tasks
201+
* @param {Array<Function>} tasks - Array of async functions to execute
202+
* @param {number} maxConcurrent - Maximum number of concurrent tasks
203+
* @returns {Promise<Array>} - Array of results from all tasks
204+
*/
205+
export async function limitConcurrency(tasks, maxConcurrent) {
206+
const results = [];
207+
const executing = [];
208+
209+
for (const task of tasks) {
210+
const promise = task().then((result) => {
211+
executing.splice(executing.indexOf(promise), 1);
212+
return result;
213+
});
214+
215+
results.push(promise);
216+
executing.push(promise);
217+
218+
if (executing.length >= maxConcurrent) {
219+
// eslint-disable-next-line no-await-in-loop
220+
await Promise.race(executing);
221+
}
222+
}
223+
224+
return Promise.all(results);
225+
}
226+
199227
const extractScrapedMetadataFromJson = (data, log) => {
200228
try {
201229
log.debug(`Extracting data from JSON (${data.finalUrl}:`, JSON.stringify(data.scrapeResult.tags));
@@ -296,6 +324,7 @@ export async function calculateCPCValue(context, siteId) {
296324
export const getScrapedDataForSiteId = async (site, context) => {
297325
const { s3Client, env, log } = context;
298326
const siteId = site.getId();
327+
const MAX_CONCURRENT_S3_READS = 10;
299328

300329
let allFiles = [];
301330
let isTruncated = true;
@@ -323,16 +352,16 @@ export const getScrapedDataForSiteId = async (site, context) => {
323352
}
324353

325354
async function fetchContentOfFiles(files) {
326-
return Promise.all(
327-
files.map(async (file) => {
328-
const fileContent = await getObjectFromKey(
355+
return limitConcurrency(
356+
files.map(
357+
(file) => async () => getObjectFromKey(
329358
s3Client,
330359
env.S3_SCRAPER_BUCKET_NAME,
331360
file.Key,
332361
log,
333-
);
334-
return fileContent;
335-
}),
362+
),
363+
),
364+
MAX_CONCURRENT_S3_READS,
336365
);
337366
}
338367

@@ -347,16 +376,19 @@ export const getScrapedDataForSiteId = async (site, context) => {
347376
};
348377
}
349378

350-
const extractedData = await Promise.all(
351-
allFiles.map(async (file) => {
352-
const fileContent = await getObjectFromKey(
353-
s3Client,
354-
env.S3_SCRAPER_BUCKET_NAME,
355-
file.Key,
356-
log,
357-
);
358-
return extractScrapedMetadataFromJson(fileContent, log);
359-
}),
379+
const extractedData = await limitConcurrency(
380+
allFiles.map(
381+
(file) => async () => {
382+
const fileContent = await getObjectFromKey(
383+
s3Client,
384+
env.S3_SCRAPER_BUCKET_NAME,
385+
file.Key,
386+
log,
387+
);
388+
return extractScrapedMetadataFromJson(fileContent, log);
389+
},
390+
),
391+
MAX_CONCURRENT_S3_READS,
360392
);
361393

362394
const indexFile = allFiles

0 commit comments

Comments
 (0)