diff --git a/src/broken-links-guidance/guidance-handler.js b/src/broken-links-guidance/guidance-handler.js index 3de889180a..f2b99d7427 100644 --- a/src/broken-links-guidance/guidance-handler.js +++ b/src/broken-links-guidance/guidance-handler.js @@ -48,20 +48,58 @@ export default async function handler(message, context) { return badRequest('Site ID mismatch'); } + // Validate brokenLinks array + if (!brokenLinks || !Array.isArray(brokenLinks)) { + log.error(`[${opportunity.getType()} Guidance] Invalid brokenLinks format. Expected array, got: ${typeof brokenLinks}. Message: ${JSON.stringify(message)}`); + return badRequest('Invalid brokenLinks format'); + } + + if (brokenLinks.length === 0) { + log.info(`[${opportunity.getType()} Guidance] No broken links provided in Mystique response`); + return ok(); + } + await Promise.all(brokenLinks.map(async (brokenLink) => { const suggestion = await Suggestion.findById(brokenLink.suggestionId); if (!suggestion) { log.error(`[${opportunity.getType()}] Suggestion not found for ID: ${brokenLink.suggestionId}`); return {}; } + + const suggestedUrls = brokenLink.suggestedUrls || []; + + // Validate that suggestedUrls is an array + if (!Array.isArray(suggestedUrls)) { + log.info( + `[${opportunity.getType()}] Invalid suggestedUrls format for suggestion ${brokenLink.suggestionId}. ` + + `Expected array, got: ${typeof suggestedUrls}. Available fields: ${Object.keys(brokenLink).join(', ')}`, + ); + } + + // Filter and validate suggested URLs + const validSuggestedUrls = Array.isArray(suggestedUrls) ? suggestedUrls : []; const filteredSuggestedUrls = await filterBrokenSuggestedUrls( - brokenLink.suggestedUrls, + validSuggestedUrls, site.getBaseURL(), ); + + // Handle AI rationale - clear it if all URLs were filtered out + // This prevents showing rationale for URLs that don't exist + let aiRationale = brokenLink.aiRationale || ''; + if (filteredSuggestedUrls.length === 0 && validSuggestedUrls.length > 0) { + // All URLs were filtered out (likely invalid/broken), clear rationale + log.info('All the suggested URLs were filtered out'); + aiRationale = ''; + } else if (filteredSuggestedUrls.length === 0 && validSuggestedUrls.length === 0) { + // No URLs were provided by Mystique, clear rationale + log.info('No suggested URLs provided by Mystique'); + aiRationale = ''; + } + suggestion.setData({ ...suggestion.getData(), urlsSuggested: filteredSuggestedUrls, - aiRationale: brokenLink.aiRationale, + aiRationale, }); return suggestion.save(); diff --git a/src/internal-links/handler.js b/src/internal-links/handler.js index e82f09f604..da0902401a 100644 --- a/src/internal-links/handler.js +++ b/src/internal-links/handler.js @@ -226,48 +226,94 @@ export const opportunityAndSuggestionsStep = async (context) => { const configuration = await Configuration.findLatest(); const topPages = await SiteTopPage.allBySiteIdAndSourceAndGeo(site.getId(), 'ahrefs', 'global'); + log.info( + `[${AUDIT_TYPE}] [Site: ${site.getId()}] Found ${topPages.length} top pages from Ahrefs`, + ); + // Filter top pages by audit scope (subpath/locale) if baseURL has a subpath + // This determines what alternatives Mystique will see: + // - If baseURL is "site.com/en-ca" → only /en-ca alternatives + // - If baseURL is "site.com" → ALL locales alternatives + // Mystique will then filter by domain (not locale), so cross-locale suggestions + // are possible if audit scope includes multiple locales const baseURL = site.getBaseURL(); const filteredTopPages = filterByAuditScope(topPages, baseURL, { urlProperty: 'getUrl' }, log); + log.info( + `[${AUDIT_TYPE}] [Site: ${site.getId()}] After audit scope filtering: ${filteredTopPages.length} top pages available`, + ); + if (configuration.isHandlerEnabledForSite('broken-internal-links-auto-suggest', site)) { const suggestions = await Suggestion.allByOpportunityIdAndStatus( opportunity.getId(), SuggestionDataAccess.STATUSES.NEW, ); - // Filter alternatives per broken link by its locale/subpath - const brokenLinksWithFilteredAlternatives = suggestions.map((suggestion) => { - const urlFrom = suggestion?.getData()?.urlFrom; - const urlTo = suggestion?.getData()?.urlTo; - - // Extract path prefix from broken link to filter alternatives - const brokenLinkPathPrefix = extractPathPrefix(urlTo) || extractPathPrefix(urlFrom); - - // Filter alternatives to same locale/subpath as broken link - let filteredAlternatives = filteredTopPages.map((page) => page.getUrl()); - if (brokenLinkPathPrefix) { - filteredAlternatives = filteredAlternatives.filter((url) => { - const urlPathPrefix = extractPathPrefix(url); - return urlPathPrefix === brokenLinkPathPrefix; - }); - - // Log warning if no alternatives found for this locale - if (filteredAlternatives.length === 0) { - log.warn( - `[${AUDIT_TYPE}] [Site: ${site.getId()}] No alternatives found for broken link ` - + `with prefix ${brokenLinkPathPrefix}. urlTo: ${urlTo}, urlFrom: ${urlFrom}`, - ); - } + // Build broken links array without per-link alternatives + // Mystique expects: brokenLinks with only urlFrom, urlTo, suggestionId + const brokenLinks = suggestions + .map((suggestion) => ({ + urlFrom: suggestion?.getData()?.urlFrom, + urlTo: suggestion?.getData()?.urlTo, + suggestionId: suggestion?.getId(), + })) + .filter((link) => link.urlFrom && link.urlTo && link.suggestionId); // Filter invalid entries + + // Filter alternatives by locales/subpaths present in broken links + // This limits suggestions to relevant locales only + const allTopPageUrls = filteredTopPages.map((page) => page.getUrl()); + + // Extract unique locales/subpaths from broken links + const brokenLinkLocales = new Set(); + brokenLinks.forEach((link) => { + const locale = extractPathPrefix(link.urlTo) || extractPathPrefix(link.urlFrom); + if (locale) { + brokenLinkLocales.add(locale); } + }); + // Filter alternatives to only include URLs matching broken links' locales + // If no locales found (no subpath), include all alternatives + // Always ensure alternativeUrls is an array (even if empty) + let alternativeUrls = []; + if (brokenLinkLocales.size > 0) { + alternativeUrls = allTopPageUrls.filter((url) => { + const urlLocale = extractPathPrefix(url); + // Include if URL matches one of the broken links' locales, or has no locale + return !urlLocale || brokenLinkLocales.has(urlLocale); + }); + } else { + // No locale prefixes found, include all alternatives + alternativeUrls = allTopPageUrls; + } + + // Validate before sending to Mystique + if (brokenLinks.length === 0) { + log.warn( + `[${AUDIT_TYPE}] [Site: ${site.getId()}] No valid broken links to send to Mystique. Skipping message.`, + ); return { - urlFrom, - urlTo, - suggestionId: suggestion?.getId(), - alternativeUrls: filteredAlternatives, + status: 'complete', }; - }); + } + + if (!opportunity?.getId()) { + log.error( + `[${AUDIT_TYPE}] [Site: ${site.getId()}] Opportunity ID is missing. Cannot send to Mystique.`, + ); + return { + status: 'complete', + }; + } + + if (alternativeUrls.length === 0) { + log.warn( + `[${AUDIT_TYPE}] [Site: ${site.getId()}] No alternative URLs available. Cannot generate suggestions. Skipping message to Mystique.`, + ); + return { + status: 'complete', + }; + } const message = { type: 'guidance:broken-links', @@ -276,9 +322,9 @@ export const opportunityAndSuggestionsStep = async (context) => { deliveryType: site.getDeliveryType(), time: new Date().toISOString(), data: { - alternativeUrls: filteredTopPages.map((page) => page.getUrl()), - opportunityId: opportunity?.getId(), - brokenLinks: brokenLinksWithFilteredAlternatives, + alternativeUrls, + opportunityId: opportunity.getId(), + brokenLinks, }, }; await sqs.sendMessage(env.QUEUE_SPACECAT_TO_MYSTIQUE, message); diff --git a/test/audits/internal-links/handler.test.js b/test/audits/internal-links/handler.test.js index 91094023a4..3675de98d6 100755 --- a/test/audits/internal-links/handler.test.js +++ b/test/audits/internal-links/handler.test.js @@ -284,6 +284,11 @@ describe('broken-internal-links audit opportunity and suggestions', () => { allBySiteIdAndSourceAndGeo: sandbox.stub().resolves(topPages), }; + // Initialize Suggestion in dataAccess for Mystique integration tests + if (!context.dataAccess.Suggestion) { + context.dataAccess.Suggestion = {}; + } + opportunity = { getType: () => 'broken-internal-links', getId: () => 'oppty-id-1', @@ -321,8 +326,10 @@ describe('broken-internal-links audit opportunity and suggestions', () => { handler = await esmock('../../../src/internal-links/handler.js', { '../../../src/internal-links/suggestions-generator.js': { generateSuggestionData: () => AUDIT_RESULT_DATA_WITH_SUGGESTIONS, + syncBrokenInternalLinksSuggestions: sandbox.stub().resolves(), }, }); + // Stub is already initialized in beforeEach, just update the method context.dataAccess.Suggestion.allByOpportunityIdAndStatus = sandbox.stub() .resolves(AUDIT_RESULT_DATA_WITH_SUGGESTIONS.map((data) => ( { getData: () => data, getId: () => '1111', save: () => {} }))); @@ -337,6 +344,21 @@ describe('broken-internal-links audit opportunity and suggestions', () => { context.dataAccess.Opportunity.create.resolves(opportunity); context.site.getLatestAuditByAuditType = () => auditData; context.site.getDeliveryType = () => 'aem_edge'; + // Stub Configuration to prevent errors when checking feature flag + context.dataAccess.Configuration = { + findLatest: () => ({ + isHandlerEnabledForSite: () => false, // Feature flag disabled for this test + }), + }; + + // Re-esmock handler without stubbing syncBrokenInternalLinksSuggestions + // so the real function runs and calls opportunity.addSuggestions + handler = await esmock('../../../src/internal-links/handler.js', { + '../../../src/internal-links/suggestions-generator.js': { + generateSuggestionData: () => AUDIT_RESULT_DATA_WITH_SUGGESTIONS, + // Don't stub syncBrokenInternalLinksSuggestions - let it run for this test + }, + }); const result = await handler.opportunityAndSuggestionsStep(context); @@ -385,6 +407,11 @@ describe('broken-internal-links audit opportunity and suggestions', () => { }).timeout(5000); it('handles SQS message sending errors', async () => { + context.dataAccess.Configuration = { + findLatest: () => ({ + isHandlerEnabledForSite: () => true, + }), + }; context.dataAccess.Opportunity.allBySiteIdAndStatus.resolves([]); context.dataAccess.Opportunity.create.resolves(opportunity); context.sqs.sendMessage.rejects(new Error('SQS error')); @@ -392,10 +419,48 @@ describe('broken-internal-links audit opportunity and suggestions', () => { context.site.getLatestAuditByAuditType = () => auditData; context.site.getDeliveryType = () => 'aem_edge'; + // Ensure getBaseURL returns the same domain as the top pages for filterByAuditScope to work + context.site.getBaseURL = () => 'https://example.com'; - await expect( - handler.opportunityAndSuggestionsStep(context), - ).to.be.rejectedWith('SQS error'); + // Ensure opportunity.getSuggestions() returns empty so syncSuggestions creates new ones + opportunity.getSuggestions = sandbox.stub().resolves([]); + opportunity.addSuggestions = sandbox.stub().resolves({ createdItems: [], errorItems: [] }); + + handler = await esmock('../../../src/internal-links/handler.js', { + '../../../src/internal-links/suggestions-generator.js': { + generateSuggestionData: () => AUDIT_RESULT_DATA_WITH_SUGGESTIONS, + syncBrokenInternalLinksSuggestions: sandbox.stub().resolves(), + }, + }); + + // Ensure we have suggestions and alternativeUrls for SQS to be called + // Stub must be set up AFTER handler is created to ensure it uses the correct context + // Use root URLs (pathname === '/') so extractPathPrefix returns empty string + // This ensures brokenLinkLocales.size === 0, so all alternatives are included + const validSuggestions = [ + { + getData: () => ({ + urlFrom: 'https://example.com/', + urlTo: 'https://example.com/', // Root URL - extractPathPrefix returns '' (empty string) + }), + getId: () => 'suggestion-1', + }, + ]; + if (!context.dataAccess.Suggestion) { + context.dataAccess.Suggestion = {}; + } + // Stub must accept any opportunity ID (the code calls it with opportunity.getId()) + context.dataAccess.Suggestion.allByOpportunityIdAndStatus = sandbox.stub() + .callsFake(() => Promise.resolve(validSuggestions)); + context.dataAccess.SiteTopPage.allBySiteIdAndSourceAndGeo = sandbox.stub() + .resolves([{ getUrl: () => 'https://example.com/page1' }]); + + try { + await handler.opportunityAndSuggestionsStep(context); + expect.fail('Expected promise to be rejected'); + } catch (error) { + expect(error.message).to.include('SQS error'); + } expect(context.dataAccess.Opportunity.create).to.have.been.calledOnceWith( expectedOpportunity, @@ -403,12 +468,52 @@ describe('broken-internal-links audit opportunity and suggestions', () => { }).timeout(5000); it('creating a new opportunity object succeeds and sends SQS messages', async () => { + context.dataAccess.Configuration = { + findLatest: () => ({ + isHandlerEnabledForSite: () => true, + }), + }; context.dataAccess.Opportunity.allBySiteIdAndStatus.resolves([]); context.dataAccess.Opportunity.create.resolves(opportunity); sandbox.stub(GoogleClient, 'createFrom').resolves({}); context.site.getLatestAuditByAuditType = () => auditData; context.site.getDeliveryType = () => 'aem_edge'; + // Ensure getBaseURL returns the same domain as the top pages for filterByAuditScope to work + context.site.getBaseURL = () => 'https://example.com'; + + // Ensure opportunity.getSuggestions() returns empty so syncSuggestions creates new ones + opportunity.getSuggestions = sandbox.stub().resolves([]); + opportunity.addSuggestions = sandbox.stub().resolves({ createdItems: [], errorItems: [] }); + + handler = await esmock('../../../src/internal-links/handler.js', { + '../../../src/internal-links/suggestions-generator.js': { + generateSuggestionData: () => AUDIT_RESULT_DATA_WITH_SUGGESTIONS, + syncBrokenInternalLinksSuggestions: sandbox.stub().resolves(), + }, + }); + + // Ensure we have suggestions and alternativeUrls for SQS to be called + // Stub must be set up AFTER handler is created to ensure it uses the correct context + // Use root URLs (pathname === '/') so extractPathPrefix returns empty string + // This ensures brokenLinkLocales.size === 0, so all alternatives are included + const validSuggestions = [ + { + getData: () => ({ + urlFrom: 'https://example.com/', + urlTo: 'https://example.com/', // Root URL - extractPathPrefix returns '' (empty string) + }), + getId: () => 'suggestion-1', + }, + ]; + if (!context.dataAccess.Suggestion) { + context.dataAccess.Suggestion = {}; + } + // Stub must accept any opportunity ID (the code calls it with opportunity.getId()) + context.dataAccess.Suggestion.allByOpportunityIdAndStatus = sandbox.stub() + .callsFake(() => Promise.resolve(validSuggestions)); + context.dataAccess.SiteTopPage.allBySiteIdAndSourceAndGeo = sandbox.stub() + .resolves([{ getUrl: () => 'https://example.com/page1' }]); const result = await handler.opportunityAndSuggestionsStep(context); @@ -628,6 +733,11 @@ describe('broken-internal-links audit opportunity and suggestions', () => { it('should use urlTo prefix when urlTo has prefix', async () => { // Test case where urlTo has path prefix, so extractPathPrefix(urlTo) returns '/uk' // This covers the case where the || operator uses the first operand (urlTo) + context.dataAccess.Configuration = { + findLatest: () => ({ + isHandlerEnabledForSite: () => true, + }), + }; context.dataAccess.Opportunity.allBySiteIdAndStatus.resolves([opportunity]); context.site.getBaseURL = () => 'https://bulk.com/uk'; // Site with subpath context.site.getDeliveryType = () => 'aem_edge'; @@ -661,13 +771,18 @@ describe('broken-internal-links audit opportunity and suggestions', () => { expect(context.sqs.sendMessage).to.have.been.calledOnce; const messageArg = context.sqs.sendMessage.getCall(0).args[1]; - // Verify that brokenLinks array contains filtered alternatives + // Verify that brokenLinks array does NOT contain per-link alternatives expect(messageArg.data.brokenLinks).to.be.an('array').with.lengthOf(1); const brokenLink = messageArg.data.brokenLinks[0]; + expect(brokenLink).to.have.property('urlFrom'); + expect(brokenLink).to.have.property('urlTo'); + expect(brokenLink).to.have.property('suggestionId'); + expect(brokenLink).to.not.have.property('alternativeUrls'); // Per-link alternatives removed + // Verify that main alternativeUrls array is filtered by broken links' locales // Since urlTo has /uk prefix, alternatives should be filtered to only /uk URLs - expect(brokenLink.alternativeUrls).to.be.an('array'); - brokenLink.alternativeUrls.forEach((url) => { + expect(messageArg.data.alternativeUrls).to.be.an('array'); + messageArg.data.alternativeUrls.forEach((url) => { expect(url).to.include('/uk/'); }); }).timeout(5000); @@ -677,6 +792,11 @@ describe('broken-internal-links audit opportunity and suggestions', () => { // This triggers the fallback to extractPathPrefix(urlFrom) when urlTo has no prefix // NOTE: extractPathPrefix returns empty string only for root URLs (no path segments) // URLs like 'https://bulk.com/page1' return '/page1', not empty string! + context.dataAccess.Configuration = { + findLatest: () => ({ + isHandlerEnabledForSite: () => true, + }), + }; context.dataAccess.Opportunity.allBySiteIdAndStatus.resolves([opportunity]); context.site.getBaseURL = () => 'https://bulk.com/uk'; // Site with subpath context.site.getDeliveryType = () => 'aem_edge'; @@ -711,16 +831,202 @@ describe('broken-internal-links audit opportunity and suggestions', () => { expect(context.sqs.sendMessage).to.have.been.calledOnce; const messageArg = context.sqs.sendMessage.getCall(0).args[1]; - // Verify that brokenLinks array contains filtered alternatives + // Verify that brokenLinks array does NOT contain per-link alternatives expect(messageArg.data.brokenLinks).to.be.an('array').with.lengthOf(1); const brokenLink = messageArg.data.brokenLinks[0]; + expect(brokenLink).to.have.property('urlFrom'); + expect(brokenLink).to.have.property('urlTo'); + expect(brokenLink).to.have.property('suggestionId'); + expect(brokenLink).to.not.have.property('alternativeUrls'); // Per-link alternatives removed + // Verify that main alternativeUrls array is filtered by broken links' locales // Since urlTo has no prefix, it should fall back to urlFrom's prefix (/uk) - // So alternatives should be filtered to only /uk URLs (from the already filtered top pages) - expect(brokenLink.alternativeUrls).to.be.an('array'); + // So alternatives should be filtered to only /uk URLs + expect(messageArg.data.alternativeUrls).to.be.an('array'); // All alternatives should have /uk prefix since we're filtering by urlFrom's prefix - brokenLink.alternativeUrls.forEach((url) => { + messageArg.data.alternativeUrls.forEach((url) => { expect(url).to.include('/uk/'); }); }).timeout(5000); + + it('should skip sending to Mystique when all broken links are filtered out', async () => { + context.dataAccess.Configuration = { + findLatest: () => ({ + isHandlerEnabledForSite: () => true, + }), + }; + context.dataAccess.Opportunity.allBySiteIdAndStatus.resolves([opportunity]); + context.site.getBaseURL = () => 'https://bulk.com'; + context.site.getDeliveryType = () => 'aem_edge'; + + // Create suggestions that will all be filtered out (missing required fields) + const invalidSuggestions = [ + { + getData: () => ({ + // Missing urlFrom + urlTo: 'https://bulk.com/broken', + }), + getId: () => 'suggestion-1', + }, + { + getData: () => ({ + urlFrom: 'https://bulk.com/from', + // Missing urlTo + }), + getId: () => 'suggestion-2', + }, + { + getData: () => ({ + urlFrom: 'https://bulk.com/from', + urlTo: 'https://bulk.com/broken', + }), + getId: () => undefined, // Missing ID - will be filtered out + }, + ]; + + context.dataAccess.Suggestion.allByOpportunityIdAndStatus = sandbox.stub() + .resolves(invalidSuggestions); + + context.dataAccess.SiteTopPage.allBySiteIdAndSourceAndGeo = sandbox.stub() + .resolves([{ getUrl: () => 'https://bulk.com/page1' }]); + + const result = await handler.opportunityAndSuggestionsStep(context); + + // Should return complete without sending message + expect(result.status).to.equal('complete'); + expect(context.sqs.sendMessage).to.not.have.been.called; + expect(context.log.warn).to.have.been.calledWith( + sinon.match(/No valid broken links to send to Mystique/), + ); + }).timeout(5000); + + it('should skip sending to Mystique when opportunity ID is missing', async () => { + context.dataAccess.Configuration = { + findLatest: () => ({ + isHandlerEnabledForSite: () => true, + }), + }; + + // Create opportunity with missing getId() + const opportunityWithoutId = { + ...opportunity, + getId: () => undefined, // Missing ID + }; + + context.dataAccess.Opportunity.allBySiteIdAndStatus.resolves([opportunityWithoutId]); + context.site.getBaseURL = () => 'https://bulk.com'; + context.site.getDeliveryType = () => 'aem_edge'; + + const validSuggestions = [ + { + getData: () => ({ + urlFrom: 'https://bulk.com/from', + urlTo: 'https://bulk.com/broken', + }), + getId: () => 'suggestion-1', + }, + ]; + + context.dataAccess.Suggestion.allByOpportunityIdAndStatus = sandbox.stub() + .resolves(validSuggestions); + + context.dataAccess.SiteTopPage.allBySiteIdAndSourceAndGeo = sandbox.stub() + .resolves([{ getUrl: () => 'https://bulk.com/page1' }]); + + const result = await handler.opportunityAndSuggestionsStep(context); + + // Should return complete without sending message + expect(result.status).to.equal('complete'); + expect(context.sqs.sendMessage).to.not.have.been.called; + expect(context.log.error).to.have.been.calledWith( + sinon.match(/Opportunity ID is missing/), + ); + }).timeout(5000); + + it('should include all alternatives when no locale prefixes found in broken links', async () => { + context.dataAccess.Configuration = { + findLatest: () => ({ + isHandlerEnabledForSite: () => true, + }), + }; + context.dataAccess.Opportunity.allBySiteIdAndStatus.resolves([opportunity]); + context.site.getBaseURL = () => 'https://bulk.com'; // Root domain, no subpath + context.site.getDeliveryType = () => 'aem_edge'; + + // Create suggestions with URLs that have pathname '/' (extractPathPrefix returns '') + // This triggers the else branch where brokenLinkLocales.size === 0 + const suggestionsWithoutLocales = [ + { + getData: () => ({ + urlTo: 'https://bulk.com/', // Pathname is '/' - extractPathPrefix returns '' + urlFrom: 'https://bulk.com/', // Pathname is '/' - extractPathPrefix returns '' + }), + getId: () => 'suggestion-1', + }, + ]; + + context.dataAccess.Suggestion.allByOpportunityIdAndStatus = sandbox.stub() + .resolves(suggestionsWithoutLocales); + + // Mock top pages with mixed locales + const topPagesWithMixedLocales = [ + { getUrl: () => 'https://bulk.com/home' }, + { getUrl: () => 'https://bulk.com/uk/home' }, + { getUrl: () => 'https://bulk.com/de/home' }, + { getUrl: () => 'https://bulk.com/about' }, + ]; + context.dataAccess.SiteTopPage.allBySiteIdAndSourceAndGeo = sandbox.stub() + .resolves(topPagesWithMixedLocales); + + await handler.opportunityAndSuggestionsStep(context); + + // Verify SQS message was sent + expect(context.sqs.sendMessage).to.have.been.calledOnce; + const messageArg = context.sqs.sendMessage.getCall(0).args[1]; + + // Verify that all alternatives are included (no locale filtering) + expect(messageArg.data.alternativeUrls).to.be.an('array').with.lengthOf(4); + // All alternatives should be included since no locale filtering was applied + expect(messageArg.data.alternativeUrls).to.include('https://bulk.com/home'); + expect(messageArg.data.alternativeUrls).to.include('https://bulk.com/uk/home'); + expect(messageArg.data.alternativeUrls).to.include('https://bulk.com/de/home'); + expect(messageArg.data.alternativeUrls).to.include('https://bulk.com/about'); + }).timeout(5000); + + it('should skip sending to Mystique when alternativeUrls is empty', async () => { + context.dataAccess.Configuration = { + findLatest: () => ({ + isHandlerEnabledForSite: () => true, + }), + }; + context.dataAccess.Opportunity.allBySiteIdAndStatus.resolves([opportunity]); + context.site.getBaseURL = () => 'https://bulk.com'; + context.site.getDeliveryType = () => 'aem_edge'; + + const validSuggestions = [ + { + getData: () => ({ + urlFrom: 'https://bulk.com/from', + urlTo: 'https://bulk.com/broken', + }), + getId: () => 'suggestion-1', + }, + ]; + + context.dataAccess.Suggestion.allByOpportunityIdAndStatus = sandbox.stub() + .resolves(validSuggestions); + + // Mock empty top pages - no alternatives available + context.dataAccess.SiteTopPage.allBySiteIdAndSourceAndGeo = sandbox.stub() + .resolves([]); + + const result = await handler.opportunityAndSuggestionsStep(context); + + // Should return complete without sending message + expect(result.status).to.equal('complete'); + expect(context.sqs.sendMessage).to.not.have.been.called; + expect(context.log.warn).to.have.been.calledWith( + sinon.match(/No alternative URLs available/), + ); + }).timeout(5000); }); diff --git a/test/guidance-handlers/broken-links.test.js b/test/guidance-handlers/broken-links.test.js index ece96de83e..79fe67ed15 100644 --- a/test/guidance-handlers/broken-links.test.js +++ b/test/guidance-handlers/broken-links.test.js @@ -162,4 +162,329 @@ describe('guidance-broken-links-remediation handler', () => { await brokenLinksGuidanceHandler(mockMessage, mockContext); expect(mockContext.log.error).to.have.been.calledWith('[broken-backlinks] Suggestion not found for ID: test-suggestion-id-1'); }); + + it('should clear AI rationale when all URLs are filtered out', async () => { + const messageWithFilteredUrls = { + ...mockMessage, + data: { + ...mockMessage.data, + brokenLinks: [{ + suggestionId: 'test-suggestion-id-1', + brokenUrl: 'https://foo.com/redirects-throws-error', + suggestedUrls: ['https://external.com/invalid-url'], + aiRationale: 'This rationale should be cleared', + }], + }, + }; + mockContext.dataAccess.Site.findById = sandbox.stub().resolves({ + getId: () => mockMessage.siteId, + getBaseURL: () => 'https://foo.com', + }); + mockContext.dataAccess.Audit.findById = sandbox.stub().resolves({ + getId: () => auditDataMock.id, + getAuditType: () => 'broken-backlinks', + }); + mockContext.dataAccess.Opportunity.findById = sandbox.stub().resolves({ + getSiteId: () => mockMessage.siteId, + getId: () => mockMessage.data.opportunityId, + getType: () => 'broken-backlinks', + }); + const mockSetData = sandbox.stub(); + const mockSave = sandbox.stub().resolves(); + mockContext.dataAccess.Suggestion.findById = sandbox.stub().resolves({ + setData: mockSetData, + getData: sandbox.stub().returns({ + url_to: 'https://foo.com/redirects-throws-error', + url_from: 'https://foo.com/redirects-throws-error', + }), + save: mockSave, + }); + // URL will be filtered out because it's external domain + nock('https://external.com') + .get('/invalid-url') + .reply(200); + + const response = await brokenLinksGuidanceHandler(messageWithFilteredUrls, mockContext); + expect(response.status).to.equal(200); + expect(mockSetData).to.have.been.calledWith({ + url_to: 'https://foo.com/redirects-throws-error', + url_from: 'https://foo.com/redirects-throws-error', + urlsSuggested: [], + aiRationale: '', // Should be cleared + }); + expect(mockContext.log.info).to.have.been.calledWith( + sinon.match(/All .* suggested URLs were filtered out/), + ); + }); + + it('should clear AI rationale when no URLs are provided', async () => { + const messageWithNoUrls = { + ...mockMessage, + data: { + ...mockMessage.data, + brokenLinks: [{ + suggestionId: 'test-suggestion-id-1', + brokenUrl: 'https://foo.com/redirects-throws-error', + suggestedUrls: [], + aiRationale: 'This rationale should be cleared', + }], + }, + }; + mockContext.dataAccess.Site.findById = sandbox.stub().resolves({ + getId: () => mockMessage.siteId, + getBaseURL: () => 'https://foo.com', + }); + mockContext.dataAccess.Audit.findById = sandbox.stub().resolves({ + getId: () => auditDataMock.id, + getAuditType: () => 'broken-backlinks', + }); + mockContext.dataAccess.Opportunity.findById = sandbox.stub().resolves({ + getSiteId: () => mockMessage.siteId, + getId: () => mockMessage.data.opportunityId, + getType: () => 'broken-backlinks', + }); + const mockSetData = sandbox.stub(); + const mockSave = sandbox.stub().resolves(); + mockContext.dataAccess.Suggestion.findById = sandbox.stub().resolves({ + setData: mockSetData, + getData: sandbox.stub().returns({ + url_to: 'https://foo.com/redirects-throws-error', + url_from: 'https://foo.com/redirects-throws-error', + }), + save: mockSave, + }); + + const response = await brokenLinksGuidanceHandler(messageWithNoUrls, mockContext); + expect(response.status).to.equal(200); + expect(mockSetData).to.have.been.calledWith({ + url_to: 'https://foo.com/redirects-throws-error', + url_from: 'https://foo.com/redirects-throws-error', + urlsSuggested: [], + aiRationale: '', // Should be cleared + }); + expect(mockContext.log.info).to.have.been.calledWith( + sinon.match(/No suggested URLs provided by Mystique/), + ); + }); + + it('should return 400 if brokenLinks is not an array', async () => { + const messageWithInvalidBrokenLinks = { + ...mockMessage, + data: { + ...mockMessage.data, + brokenLinks: 'not-an-array', + }, + }; + mockContext.dataAccess.Site.findById = sandbox.stub().resolves({ + getId: () => mockMessage.siteId, + }); + mockContext.dataAccess.Audit.findById = sandbox.stub().resolves({}); + mockContext.dataAccess.Opportunity.findById = sandbox.stub().resolves({ + getSiteId: () => mockMessage.siteId, + getType: () => 'broken-backlinks', + }); + + const response = await brokenLinksGuidanceHandler(messageWithInvalidBrokenLinks, mockContext); + expect(response.status).to.equal(400); + expect(mockContext.log.error).to.have.been.calledWith( + sinon.match(/Invalid brokenLinks format/), + ); + }); + + it('should return 400 if brokenLinks is missing', async () => { + const messageWithMissingBrokenLinks = { + ...mockMessage, + data: { + ...mockMessage.data, + brokenLinks: null, + }, + }; + mockContext.dataAccess.Site.findById = sandbox.stub().resolves({ + getId: () => mockMessage.siteId, + }); + mockContext.dataAccess.Audit.findById = sandbox.stub().resolves({}); + mockContext.dataAccess.Opportunity.findById = sandbox.stub().resolves({ + getSiteId: () => mockMessage.siteId, + getType: () => 'broken-backlinks', + }); + + const response = await brokenLinksGuidanceHandler(messageWithMissingBrokenLinks, mockContext); + expect(response.status).to.equal(400); + expect(mockContext.log.error).to.have.been.calledWith( + sinon.match(/Invalid brokenLinks format/), + ); + }); + + it('should return 200 if brokenLinks is empty array', async () => { + const messageWithEmptyBrokenLinks = { + ...mockMessage, + data: { + ...mockMessage.data, + brokenLinks: [], + }, + }; + mockContext.dataAccess.Site.findById = sandbox.stub().resolves({ + getId: () => mockMessage.siteId, + }); + mockContext.dataAccess.Audit.findById = sandbox.stub().resolves({}); + mockContext.dataAccess.Opportunity.findById = sandbox.stub().resolves({ + getSiteId: () => mockMessage.siteId, + getType: () => 'broken-backlinks', + }); + mockContext.dataAccess.Suggestion.findById = sandbox.stub(); + + const response = await brokenLinksGuidanceHandler(messageWithEmptyBrokenLinks, mockContext); + expect(response.status).to.equal(200); + expect(mockContext.log.info).to.have.been.calledWith( + sinon.match(/No broken links provided in Mystique response/), + ); + expect(mockContext.dataAccess.Suggestion.findById).to.not.have.been.called; + }); + + it('should handle invalid suggestedUrls format (not an array)', async () => { + const messageWithInvalidSuggestedUrls = { + ...mockMessage, + data: { + ...mockMessage.data, + brokenLinks: [{ + suggestionId: 'test-suggestion-id-1', + brokenUrl: 'https://foo.com/redirects-throws-error', + suggestedUrls: 'not-an-array', + aiRationale: 'Test rationale', + }], + }, + }; + mockContext.dataAccess.Site.findById = sandbox.stub().resolves({ + getId: () => mockMessage.siteId, + getBaseURL: () => 'https://foo.com', + }); + mockContext.dataAccess.Audit.findById = sandbox.stub().resolves({ + getId: () => auditDataMock.id, + getAuditType: () => 'broken-backlinks', + }); + mockContext.dataAccess.Opportunity.findById = sandbox.stub().resolves({ + getSiteId: () => mockMessage.siteId, + getId: () => mockMessage.data.opportunityId, + getType: () => 'broken-backlinks', + }); + const mockSetData = sandbox.stub(); + const mockSave = sandbox.stub().resolves(); + mockContext.dataAccess.Suggestion.findById = sandbox.stub().resolves({ + setData: mockSetData, + getData: sandbox.stub().returns({ + url_to: 'https://foo.com/redirects-throws-error', + url_from: 'https://foo.com/redirects-throws-error', + }), + save: mockSave, + }); + + const response = await brokenLinksGuidanceHandler(messageWithInvalidSuggestedUrls, mockContext); + expect(response.status).to.equal(200); + expect(mockContext.log.info).to.have.been.calledWith( + sinon.match(/Invalid suggestedUrls format/), + ); + expect(mockSetData).to.have.been.calledWith({ + url_to: 'https://foo.com/redirects-throws-error', + url_from: 'https://foo.com/redirects-throws-error', + urlsSuggested: [], + aiRationale: '', // Rationale cleared because no valid URLs provided + }); + }); + + it('should handle missing suggestedUrls field', async () => { + const messageWithNoSuggestedUrls = { + ...mockMessage, + data: { + ...mockMessage.data, + brokenLinks: [{ + suggestionId: 'test-suggestion-id-1', + brokenUrl: 'https://foo.com/redirects-throws-error', + // No suggestedUrls field - Mystique consistently returns suggestedUrls (camelCase) + aiRationale: 'Test rationale', + }], + }, + }; + mockContext.dataAccess.Site.findById = sandbox.stub().resolves({ + getId: () => mockMessage.siteId, + getBaseURL: () => 'https://foo.com', + }); + mockContext.dataAccess.Audit.findById = sandbox.stub().resolves({ + getId: () => auditDataMock.id, + getAuditType: () => 'broken-backlinks', + }); + mockContext.dataAccess.Opportunity.findById = sandbox.stub().resolves({ + getSiteId: () => mockMessage.siteId, + getId: () => mockMessage.data.opportunityId, + getType: () => 'broken-backlinks', + }); + const mockSetData = sandbox.stub(); + const mockSave = sandbox.stub().resolves(); + mockContext.dataAccess.Suggestion.findById = sandbox.stub().resolves({ + setData: mockSetData, + getData: sandbox.stub().returns({ + url_to: 'https://foo.com/redirects-throws-error', + url_from: 'https://foo.com/redirects-throws-error', + }), + save: mockSave, + }); + + const response = await brokenLinksGuidanceHandler(messageWithNoSuggestedUrls, mockContext); + expect(response.status).to.equal(200); + expect(mockSetData).to.have.been.calledWith({ + url_to: 'https://foo.com/redirects-throws-error', + url_from: 'https://foo.com/redirects-throws-error', + urlsSuggested: [], + aiRationale: '', // Rationale cleared because no URLs provided + }); + }); + + it('should handle missing aiRationale field', async () => { + const messageWithNoRationale = { + ...mockMessage, + data: { + ...mockMessage.data, + brokenLinks: [{ + suggestionId: 'test-suggestion-id-1', + brokenUrl: 'https://foo.com/redirects-throws-error', + suggestedUrls: ['https://foo.com/redirects-throws-error-1'], + // No aiRationale field - Mystique consistently returns aiRationale (camelCase) + }], + }, + }; + mockContext.dataAccess.Site.findById = sandbox.stub().resolves({ + getId: () => mockMessage.siteId, + getBaseURL: () => 'https://foo.com', + }); + mockContext.dataAccess.Audit.findById = sandbox.stub().resolves({ + getId: () => auditDataMock.id, + getAuditType: () => 'broken-backlinks', + }); + mockContext.dataAccess.Opportunity.findById = sandbox.stub().resolves({ + getSiteId: () => mockMessage.siteId, + getId: () => mockMessage.data.opportunityId, + getType: () => 'broken-backlinks', + }); + const mockSetData = sandbox.stub(); + const mockSave = sandbox.stub().resolves(); + mockContext.dataAccess.Suggestion.findById = sandbox.stub().resolves({ + setData: mockSetData, + getData: sandbox.stub().returns({ + url_to: 'https://foo.com/redirects-throws-error', + url_from: 'https://foo.com/redirects-throws-error', + }), + save: mockSave, + }); + nock('https://foo.com') + .get('/redirects-throws-error-1') + .reply(200); + + const response = await brokenLinksGuidanceHandler(messageWithNoRationale, mockContext); + expect(response.status).to.equal(200); + expect(mockSetData).to.have.been.calledWith({ + url_to: 'https://foo.com/redirects-throws-error', + url_from: 'https://foo.com/redirects-throws-error', + urlsSuggested: ['https://foo.com/redirects-throws-error-1'], + aiRationale: '', // Empty string when neither field exists + }); + }); });