Skip to content
42 changes: 40 additions & 2 deletions src/broken-links-guidance/guidance-handler.js
Original file line number Diff line number Diff line change
Expand Up @@ -48,20 +48,58 @@ export default async function handler(message, context) {
return badRequest('Site ID mismatch');
}

// Validate brokenLinks array
if (!brokenLinks || !Array.isArray(brokenLinks)) {
log.error(`[${opportunity.getType()} Guidance] Invalid brokenLinks format. Expected array, got: ${typeof brokenLinks}. Message: ${JSON.stringify(message)}`);
return badRequest('Invalid brokenLinks format');
}

if (brokenLinks.length === 0) {
log.info(`[${opportunity.getType()} Guidance] No broken links provided in Mystique response`);
return ok();
}

await Promise.all(brokenLinks.map(async (brokenLink) => {
const suggestion = await Suggestion.findById(brokenLink.suggestionId);
if (!suggestion) {
log.error(`[${opportunity.getType()}] Suggestion not found for ID: ${brokenLink.suggestionId}`);
return {};
}

const suggestedUrls = brokenLink.suggestedUrls || [];

// Validate that suggestedUrls is an array
if (!Array.isArray(suggestedUrls)) {
log.info(
`[${opportunity.getType()}] Invalid suggestedUrls format for suggestion ${brokenLink.suggestionId}. `
+ `Expected array, got: ${typeof suggestedUrls}. Available fields: ${Object.keys(brokenLink).join(', ')}`,
);
}

// Filter and validate suggested URLs
const validSuggestedUrls = Array.isArray(suggestedUrls) ? suggestedUrls : [];
const filteredSuggestedUrls = await filterBrokenSuggestedUrls(
brokenLink.suggestedUrls,
validSuggestedUrls,
site.getBaseURL(),
);

// Handle AI rationale - clear it if all URLs were filtered out
// This prevents showing rationale for URLs that don't exist
let aiRationale = brokenLink.aiRationale || '';
if (filteredSuggestedUrls.length === 0 && validSuggestedUrls.length > 0) {
// All URLs were filtered out (likely invalid/broken), clear rationale
log.info('All the suggested URLs were filtered out');
aiRationale = '';
} else if (filteredSuggestedUrls.length === 0 && validSuggestedUrls.length === 0) {
// No URLs were provided by Mystique, clear rationale
log.info('No suggested URLs provided by Mystique');
aiRationale = '';
}

suggestion.setData({
...suggestion.getData(),
urlsSuggested: filteredSuggestedUrls,
aiRationale: brokenLink.aiRationale,
aiRationale,
});

return suggestion.save();
Expand Down
108 changes: 77 additions & 31 deletions src/internal-links/handler.js
Original file line number Diff line number Diff line change
Expand Up @@ -226,48 +226,94 @@ export const opportunityAndSuggestionsStep = async (context) => {
const configuration = await Configuration.findLatest();
const topPages = await SiteTopPage.allBySiteIdAndSourceAndGeo(site.getId(), 'ahrefs', 'global');

log.info(
`[${AUDIT_TYPE}] [Site: ${site.getId()}] Found ${topPages.length} top pages from Ahrefs`,
);

// Filter top pages by audit scope (subpath/locale) if baseURL has a subpath
// This determines what alternatives Mystique will see:
// - If baseURL is "site.com/en-ca" → only /en-ca alternatives
// - If baseURL is "site.com" → ALL locales alternatives
// Mystique will then filter by domain (not locale), so cross-locale suggestions
// are possible if audit scope includes multiple locales
const baseURL = site.getBaseURL();
const filteredTopPages = filterByAuditScope(topPages, baseURL, { urlProperty: 'getUrl' }, log);

log.info(
`[${AUDIT_TYPE}] [Site: ${site.getId()}] After audit scope filtering: ${filteredTopPages.length} top pages available`,
);

if (configuration.isHandlerEnabledForSite('broken-internal-links-auto-suggest', site)) {
const suggestions = await Suggestion.allByOpportunityIdAndStatus(
opportunity.getId(),
SuggestionDataAccess.STATUSES.NEW,
);

// Filter alternatives per broken link by its locale/subpath
const brokenLinksWithFilteredAlternatives = suggestions.map((suggestion) => {
const urlFrom = suggestion?.getData()?.urlFrom;
const urlTo = suggestion?.getData()?.urlTo;

// Extract path prefix from broken link to filter alternatives
const brokenLinkPathPrefix = extractPathPrefix(urlTo) || extractPathPrefix(urlFrom);

// Filter alternatives to same locale/subpath as broken link
let filteredAlternatives = filteredTopPages.map((page) => page.getUrl());
if (brokenLinkPathPrefix) {
filteredAlternatives = filteredAlternatives.filter((url) => {
const urlPathPrefix = extractPathPrefix(url);
return urlPathPrefix === brokenLinkPathPrefix;
});

// Log warning if no alternatives found for this locale
if (filteredAlternatives.length === 0) {
log.warn(
`[${AUDIT_TYPE}] [Site: ${site.getId()}] No alternatives found for broken link `
+ `with prefix ${brokenLinkPathPrefix}. urlTo: ${urlTo}, urlFrom: ${urlFrom}`,
);
}
// Build broken links array without per-link alternatives
// Mystique expects: brokenLinks with only urlFrom, urlTo, suggestionId
const brokenLinks = suggestions
.map((suggestion) => ({
urlFrom: suggestion?.getData()?.urlFrom,
urlTo: suggestion?.getData()?.urlTo,
suggestionId: suggestion?.getId(),
}))
.filter((link) => link.urlFrom && link.urlTo && link.suggestionId); // Filter invalid entries

// Filter alternatives by locales/subpaths present in broken links
// This limits suggestions to relevant locales only
const allTopPageUrls = filteredTopPages.map((page) => page.getUrl());

// Extract unique locales/subpaths from broken links
const brokenLinkLocales = new Set();
brokenLinks.forEach((link) => {
const locale = extractPathPrefix(link.urlTo) || extractPathPrefix(link.urlFrom);
if (locale) {
brokenLinkLocales.add(locale);
}
});

// Filter alternatives to only include URLs matching broken links' locales
// If no locales found (no subpath), include all alternatives
// Always ensure alternativeUrls is an array (even if empty)
let alternativeUrls = [];
if (brokenLinkLocales.size > 0) {
alternativeUrls = allTopPageUrls.filter((url) => {
const urlLocale = extractPathPrefix(url);
// Include if URL matches one of the broken links' locales, or has no locale
return !urlLocale || brokenLinkLocales.has(urlLocale);
});
} else {
// No locale prefixes found, include all alternatives
alternativeUrls = allTopPageUrls;
}

// Validate before sending to Mystique
if (brokenLinks.length === 0) {
log.warn(
`[${AUDIT_TYPE}] [Site: ${site.getId()}] No valid broken links to send to Mystique. Skipping message.`,
);
return {
urlFrom,
urlTo,
suggestionId: suggestion?.getId(),
alternativeUrls: filteredAlternatives,
status: 'complete',
};
});
}

if (!opportunity?.getId()) {
log.error(
`[${AUDIT_TYPE}] [Site: ${site.getId()}] Opportunity ID is missing. Cannot send to Mystique.`,
);
return {
status: 'complete',
};
}

if (alternativeUrls.length === 0) {
log.warn(
`[${AUDIT_TYPE}] [Site: ${site.getId()}] No alternative URLs available. Cannot generate suggestions. Skipping message to Mystique.`,
);
return {
status: 'complete',
};
}

const message = {
type: 'guidance:broken-links',
Expand All @@ -276,9 +322,9 @@ export const opportunityAndSuggestionsStep = async (context) => {
deliveryType: site.getDeliveryType(),
time: new Date().toISOString(),
data: {
alternativeUrls: filteredTopPages.map((page) => page.getUrl()),
opportunityId: opportunity?.getId(),
brokenLinks: brokenLinksWithFilteredAlternatives,
alternativeUrls,
opportunityId: opportunity.getId(),
brokenLinks,
},
};
await sqs.sendMessage(env.QUEUE_SPACECAT_TO_MYSTIQUE, message);
Expand Down
Loading