@@ -226,48 +226,94 @@ export const opportunityAndSuggestionsStep = async (context) => {
226226 const configuration = await Configuration . findLatest ( ) ;
227227 const topPages = await SiteTopPage . allBySiteIdAndSourceAndGeo ( site . getId ( ) , 'ahrefs' , 'global' ) ;
228228
229+ log . info (
230+ `[${ AUDIT_TYPE } ] [Site: ${ site . getId ( ) } ] Found ${ topPages . length } top pages from Ahrefs` ,
231+ ) ;
232+
229233 // Filter top pages by audit scope (subpath/locale) if baseURL has a subpath
234+ // This determines what alternatives Mystique will see:
235+ // - If baseURL is "site.com/en-ca" → only /en-ca alternatives
236+ // - If baseURL is "site.com" → ALL locales alternatives
237+ // Mystique will then filter by domain (not locale), so cross-locale suggestions
238+ // are possible if audit scope includes multiple locales
230239 const baseURL = site . getBaseURL ( ) ;
231240 const filteredTopPages = filterByAuditScope ( topPages , baseURL , { urlProperty : 'getUrl' } , log ) ;
232241
242+ log . info (
243+ `[${ AUDIT_TYPE } ] [Site: ${ site . getId ( ) } ] After audit scope filtering: ${ filteredTopPages . length } top pages available` ,
244+ ) ;
245+
233246 if ( configuration . isHandlerEnabledForSite ( 'broken-internal-links-auto-suggest' , site ) ) {
234247 const suggestions = await Suggestion . allByOpportunityIdAndStatus (
235248 opportunity . getId ( ) ,
236249 SuggestionDataAccess . STATUSES . NEW ,
237250 ) ;
238251
239- // Filter alternatives per broken link by its locale/subpath
240- const brokenLinksWithFilteredAlternatives = suggestions . map ( ( suggestion ) => {
241- const urlFrom = suggestion ?. getData ( ) ?. urlFrom ;
242- const urlTo = suggestion ?. getData ( ) ?. urlTo ;
243-
244- // Extract path prefix from broken link to filter alternatives
245- const brokenLinkPathPrefix = extractPathPrefix ( urlTo ) || extractPathPrefix ( urlFrom ) ;
246-
247- // Filter alternatives to same locale/subpath as broken link
248- let filteredAlternatives = filteredTopPages . map ( ( page ) => page . getUrl ( ) ) ;
249- if ( brokenLinkPathPrefix ) {
250- filteredAlternatives = filteredAlternatives . filter ( ( url ) => {
251- const urlPathPrefix = extractPathPrefix ( url ) ;
252- return urlPathPrefix === brokenLinkPathPrefix ;
253- } ) ;
254-
255- // Log warning if no alternatives found for this locale
256- if ( filteredAlternatives . length === 0 ) {
257- log . warn (
258- `[${ AUDIT_TYPE } ] [Site: ${ site . getId ( ) } ] No alternatives found for broken link `
259- + `with prefix ${ brokenLinkPathPrefix } . urlTo: ${ urlTo } , urlFrom: ${ urlFrom } ` ,
260- ) ;
261- }
252+ // Build broken links array without per-link alternatives
253+ // Mystique expects: brokenLinks with only urlFrom, urlTo, suggestionId
254+ const brokenLinks = suggestions
255+ . map ( ( suggestion ) => ( {
256+ urlFrom : suggestion ?. getData ( ) ?. urlFrom ,
257+ urlTo : suggestion ?. getData ( ) ?. urlTo ,
258+ suggestionId : suggestion ?. getId ( ) ,
259+ } ) )
260+ . filter ( ( link ) => link . urlFrom && link . urlTo && link . suggestionId ) ; // Filter invalid entries
261+
262+ // Filter alternatives by locales/subpaths present in broken links
263+ // This limits suggestions to relevant locales only
264+ const allTopPageUrls = filteredTopPages . map ( ( page ) => page . getUrl ( ) ) ;
265+
266+ // Extract unique locales/subpaths from broken links
267+ const brokenLinkLocales = new Set ( ) ;
268+ brokenLinks . forEach ( ( link ) => {
269+ const locale = extractPathPrefix ( link . urlTo ) || extractPathPrefix ( link . urlFrom ) ;
270+ if ( locale ) {
271+ brokenLinkLocales . add ( locale ) ;
262272 }
273+ } ) ;
263274
275+ // Filter alternatives to only include URLs matching broken links' locales
276+ // If no locales found (no subpath), include all alternatives
277+ // Always ensure alternativeUrls is an array (even if empty)
278+ let alternativeUrls = [ ] ;
279+ if ( brokenLinkLocales . size > 0 ) {
280+ alternativeUrls = allTopPageUrls . filter ( ( url ) => {
281+ const urlLocale = extractPathPrefix ( url ) ;
282+ // Include if URL matches one of the broken links' locales, or has no locale
283+ return ! urlLocale || brokenLinkLocales . has ( urlLocale ) ;
284+ } ) ;
285+ } else {
286+ // No locale prefixes found, include all alternatives
287+ alternativeUrls = allTopPageUrls ;
288+ }
289+
290+ // Validate before sending to Mystique
291+ if ( brokenLinks . length === 0 ) {
292+ log . warn (
293+ `[${ AUDIT_TYPE } ] [Site: ${ site . getId ( ) } ] No valid broken links to send to Mystique. Skipping message.` ,
294+ ) ;
264295 return {
265- urlFrom,
266- urlTo,
267- suggestionId : suggestion ?. getId ( ) ,
268- alternativeUrls : filteredAlternatives ,
296+ status : 'complete' ,
269297 } ;
270- } ) ;
298+ }
299+
300+ if ( ! opportunity ?. getId ( ) ) {
301+ log . error (
302+ `[${ AUDIT_TYPE } ] [Site: ${ site . getId ( ) } ] Opportunity ID is missing. Cannot send to Mystique.` ,
303+ ) ;
304+ return {
305+ status : 'complete' ,
306+ } ;
307+ }
308+
309+ if ( alternativeUrls . length === 0 ) {
310+ log . warn (
311+ `[${ AUDIT_TYPE } ] [Site: ${ site . getId ( ) } ] No alternative URLs available. Cannot generate suggestions. Skipping message to Mystique.` ,
312+ ) ;
313+ return {
314+ status : 'complete' ,
315+ } ;
316+ }
271317
272318 const message = {
273319 type : 'guidance:broken-links' ,
@@ -276,9 +322,9 @@ export const opportunityAndSuggestionsStep = async (context) => {
276322 deliveryType : site . getDeliveryType ( ) ,
277323 time : new Date ( ) . toISOString ( ) ,
278324 data : {
279- alternativeUrls : filteredTopPages . map ( ( page ) => page . getUrl ( ) ) ,
280- opportunityId : opportunity ? .getId ( ) ,
281- brokenLinks : brokenLinksWithFilteredAlternatives ,
325+ alternativeUrls,
326+ opportunityId : opportunity . getId ( ) ,
327+ brokenLinks,
282328 } ,
283329 } ;
284330 await sqs . sendMessage ( env . QUEUE_SPACECAT_TO_MYSTIQUE , message ) ;
0 commit comments