Skip to content

Commit c2e4426

Browse files
authored
1 parent 71f1c30 commit c2e4426

File tree

4 files changed

+758
-43
lines changed

4 files changed

+758
-43
lines changed

src/broken-links-guidance/guidance-handler.js

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,20 +48,58 @@ export default async function handler(message, context) {
4848
return badRequest('Site ID mismatch');
4949
}
5050

51+
// Validate brokenLinks array
52+
if (!brokenLinks || !Array.isArray(brokenLinks)) {
53+
log.error(`[${opportunity.getType()} Guidance] Invalid brokenLinks format. Expected array, got: ${typeof brokenLinks}. Message: ${JSON.stringify(message)}`);
54+
return badRequest('Invalid brokenLinks format');
55+
}
56+
57+
if (brokenLinks.length === 0) {
58+
log.info(`[${opportunity.getType()} Guidance] No broken links provided in Mystique response`);
59+
return ok();
60+
}
61+
5162
await Promise.all(brokenLinks.map(async (brokenLink) => {
5263
const suggestion = await Suggestion.findById(brokenLink.suggestionId);
5364
if (!suggestion) {
5465
log.error(`[${opportunity.getType()}] Suggestion not found for ID: ${brokenLink.suggestionId}`);
5566
return {};
5667
}
68+
69+
const suggestedUrls = brokenLink.suggestedUrls || [];
70+
71+
// Validate that suggestedUrls is an array
72+
if (!Array.isArray(suggestedUrls)) {
73+
log.info(
74+
`[${opportunity.getType()}] Invalid suggestedUrls format for suggestion ${brokenLink.suggestionId}. `
75+
+ `Expected array, got: ${typeof suggestedUrls}. Available fields: ${Object.keys(brokenLink).join(', ')}`,
76+
);
77+
}
78+
79+
// Filter and validate suggested URLs
80+
const validSuggestedUrls = Array.isArray(suggestedUrls) ? suggestedUrls : [];
5781
const filteredSuggestedUrls = await filterBrokenSuggestedUrls(
58-
brokenLink.suggestedUrls,
82+
validSuggestedUrls,
5983
site.getBaseURL(),
6084
);
85+
86+
// Handle AI rationale - clear it if all URLs were filtered out
87+
// This prevents showing rationale for URLs that don't exist
88+
let aiRationale = brokenLink.aiRationale || '';
89+
if (filteredSuggestedUrls.length === 0 && validSuggestedUrls.length > 0) {
90+
// All URLs were filtered out (likely invalid/broken), clear rationale
91+
log.info('All the suggested URLs were filtered out');
92+
aiRationale = '';
93+
} else if (filteredSuggestedUrls.length === 0 && validSuggestedUrls.length === 0) {
94+
// No URLs were provided by Mystique, clear rationale
95+
log.info('No suggested URLs provided by Mystique');
96+
aiRationale = '';
97+
}
98+
6199
suggestion.setData({
62100
...suggestion.getData(),
63101
urlsSuggested: filteredSuggestedUrls,
64-
aiRationale: brokenLink.aiRationale,
102+
aiRationale,
65103
});
66104

67105
return suggestion.save();

src/internal-links/handler.js

Lines changed: 77 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -226,48 +226,94 @@ export const opportunityAndSuggestionsStep = async (context) => {
226226
const configuration = await Configuration.findLatest();
227227
const topPages = await SiteTopPage.allBySiteIdAndSourceAndGeo(site.getId(), 'ahrefs', 'global');
228228

229+
log.info(
230+
`[${AUDIT_TYPE}] [Site: ${site.getId()}] Found ${topPages.length} top pages from Ahrefs`,
231+
);
232+
229233
// Filter top pages by audit scope (subpath/locale) if baseURL has a subpath
234+
// This determines what alternatives Mystique will see:
235+
// - If baseURL is "site.com/en-ca" → only /en-ca alternatives
236+
// - If baseURL is "site.com" → ALL locales alternatives
237+
// Mystique will then filter by domain (not locale), so cross-locale suggestions
238+
// are possible if audit scope includes multiple locales
230239
const baseURL = site.getBaseURL();
231240
const filteredTopPages = filterByAuditScope(topPages, baseURL, { urlProperty: 'getUrl' }, log);
232241

242+
log.info(
243+
`[${AUDIT_TYPE}] [Site: ${site.getId()}] After audit scope filtering: ${filteredTopPages.length} top pages available`,
244+
);
245+
233246
if (configuration.isHandlerEnabledForSite('broken-internal-links-auto-suggest', site)) {
234247
const suggestions = await Suggestion.allByOpportunityIdAndStatus(
235248
opportunity.getId(),
236249
SuggestionDataAccess.STATUSES.NEW,
237250
);
238251

239-
// Filter alternatives per broken link by its locale/subpath
240-
const brokenLinksWithFilteredAlternatives = suggestions.map((suggestion) => {
241-
const urlFrom = suggestion?.getData()?.urlFrom;
242-
const urlTo = suggestion?.getData()?.urlTo;
243-
244-
// Extract path prefix from broken link to filter alternatives
245-
const brokenLinkPathPrefix = extractPathPrefix(urlTo) || extractPathPrefix(urlFrom);
246-
247-
// Filter alternatives to same locale/subpath as broken link
248-
let filteredAlternatives = filteredTopPages.map((page) => page.getUrl());
249-
if (brokenLinkPathPrefix) {
250-
filteredAlternatives = filteredAlternatives.filter((url) => {
251-
const urlPathPrefix = extractPathPrefix(url);
252-
return urlPathPrefix === brokenLinkPathPrefix;
253-
});
254-
255-
// Log warning if no alternatives found for this locale
256-
if (filteredAlternatives.length === 0) {
257-
log.warn(
258-
`[${AUDIT_TYPE}] [Site: ${site.getId()}] No alternatives found for broken link `
259-
+ `with prefix ${brokenLinkPathPrefix}. urlTo: ${urlTo}, urlFrom: ${urlFrom}`,
260-
);
261-
}
252+
// Build broken links array without per-link alternatives
253+
// Mystique expects: brokenLinks with only urlFrom, urlTo, suggestionId
254+
const brokenLinks = suggestions
255+
.map((suggestion) => ({
256+
urlFrom: suggestion?.getData()?.urlFrom,
257+
urlTo: suggestion?.getData()?.urlTo,
258+
suggestionId: suggestion?.getId(),
259+
}))
260+
.filter((link) => link.urlFrom && link.urlTo && link.suggestionId); // Filter invalid entries
261+
262+
// Filter alternatives by locales/subpaths present in broken links
263+
// This limits suggestions to relevant locales only
264+
const allTopPageUrls = filteredTopPages.map((page) => page.getUrl());
265+
266+
// Extract unique locales/subpaths from broken links
267+
const brokenLinkLocales = new Set();
268+
brokenLinks.forEach((link) => {
269+
const locale = extractPathPrefix(link.urlTo) || extractPathPrefix(link.urlFrom);
270+
if (locale) {
271+
brokenLinkLocales.add(locale);
262272
}
273+
});
263274

275+
// Filter alternatives to only include URLs matching broken links' locales
276+
// If no locales found (no subpath), include all alternatives
277+
// Always ensure alternativeUrls is an array (even if empty)
278+
let alternativeUrls = [];
279+
if (brokenLinkLocales.size > 0) {
280+
alternativeUrls = allTopPageUrls.filter((url) => {
281+
const urlLocale = extractPathPrefix(url);
282+
// Include if URL matches one of the broken links' locales, or has no locale
283+
return !urlLocale || brokenLinkLocales.has(urlLocale);
284+
});
285+
} else {
286+
// No locale prefixes found, include all alternatives
287+
alternativeUrls = allTopPageUrls;
288+
}
289+
290+
// Validate before sending to Mystique
291+
if (brokenLinks.length === 0) {
292+
log.warn(
293+
`[${AUDIT_TYPE}] [Site: ${site.getId()}] No valid broken links to send to Mystique. Skipping message.`,
294+
);
264295
return {
265-
urlFrom,
266-
urlTo,
267-
suggestionId: suggestion?.getId(),
268-
alternativeUrls: filteredAlternatives,
296+
status: 'complete',
269297
};
270-
});
298+
}
299+
300+
if (!opportunity?.getId()) {
301+
log.error(
302+
`[${AUDIT_TYPE}] [Site: ${site.getId()}] Opportunity ID is missing. Cannot send to Mystique.`,
303+
);
304+
return {
305+
status: 'complete',
306+
};
307+
}
308+
309+
if (alternativeUrls.length === 0) {
310+
log.warn(
311+
`[${AUDIT_TYPE}] [Site: ${site.getId()}] No alternative URLs available. Cannot generate suggestions. Skipping message to Mystique.`,
312+
);
313+
return {
314+
status: 'complete',
315+
};
316+
}
271317

272318
const message = {
273319
type: 'guidance:broken-links',
@@ -276,9 +322,9 @@ export const opportunityAndSuggestionsStep = async (context) => {
276322
deliveryType: site.getDeliveryType(),
277323
time: new Date().toISOString(),
278324
data: {
279-
alternativeUrls: filteredTopPages.map((page) => page.getUrl()),
280-
opportunityId: opportunity?.getId(),
281-
brokenLinks: brokenLinksWithFilteredAlternatives,
325+
alternativeUrls,
326+
opportunityId: opportunity.getId(),
327+
brokenLinks,
282328
},
283329
};
284330
await sqs.sendMessage(env.QUEUE_SPACECAT_TO_MYSTIQUE, message);

0 commit comments

Comments
 (0)