diff --git a/src/cdn-content-fragment-404/handler.js b/src/cdn-content-fragment-404/handler.js new file mode 100644 index 000000000..490090b09 --- /dev/null +++ b/src/cdn-content-fragment-404/handler.js @@ -0,0 +1,107 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +import { getStaticContent } from '@adobe/spacecat-shared-utils'; +import { AWSAthenaClient } from '@adobe/spacecat-shared-athena-client'; +import { AuditBuilder } from '../common/audit-builder.js'; +import { wwwUrlResolver } from '../common/base-audit.js'; +import { getImsOrgId } from '../utils/data-access.js'; +import { extractCustomerDomain } from '../utils/cdn-utils.js'; + +const ONE_HOUR_MS = 60 * 60 * 1000; + +function getHourParts() { + const previousHour = new Date(Date.now() - ONE_HOUR_MS); + + const year = previousHour.getUTCFullYear().toString(); + const month = String(previousHour.getUTCMonth() + 1).padStart(2, '0'); + const day = String(previousHour.getUTCDate()).padStart(2, '0'); + const hour = String(previousHour.getUTCHours()).padStart(2, '0'); + + return { + year, month, day, hour, + }; +} + +async function loadSql(filename, variables) { + return getStaticContent(variables, `./src/cdn-content-fragment-404/sql/${filename}.sql`); +} + +export async function cdnContentFragment404Runner(context) { + const { + site, rawBucket, dataAccess, log, + } = context; + const sanitizedHostname = extractCustomerDomain(site); + const { + year, month, day, hour, + } = getHourParts(); + + if (!rawBucket) { + throw new Error('Raw bucket is required'); + } + + const imsOrg = await getImsOrgId(site, dataAccess, log); + if (!imsOrg) { + throw new Error('Unable to retrieve IMS organization ID'); + } + + const database = `cdn_logs_${sanitizedHostname}`; + const rawTable = `raw_logs_status_${sanitizedHostname}`; + const tempLocation = `s3://${rawBucket}/temp/athena-results/`; + const athenaClient = AWSAthenaClient.fromContext(context, tempLocation); + + // Create database + const sqlDb = await loadSql('create-database', { database }); + const sqlDbDescription = `[Athena Query] Create database ${database}`; + await athenaClient.execute(sqlDb, database, sqlDbDescription); + + // Each tenant has its own folder mapped via IMS org within the raw bucket + const bucket = `${rawBucket}/${imsOrg}`; + // Subfolder aem-cs-fastly is used for raw logs currently + const rawLocation = `s3://${bucket}/raw/aem-cs-fastly`; + + // Create table + const sqlTable = await loadSql('create-raw-table', { + database, + rawTable, + rawLocation, + }); + const sqlTableDescription = `[Athena Query] Create raw logs table ${database}.${rawTable} from ${rawLocation}`; + await athenaClient.execute(sqlTable, database, sqlTableDescription); + + const output = `s3://${bucket}/aggregated-404/${year}/${month}/${day}/${hour}/`; + const sqlUnload = await loadSql('unload-404-content', { + database, + rawTable, + year, + month, + day, + hour, + output, + }); + const sqlUnloadDescription = `[Athena Query] Unload 404 content data to ${output}`; + await athenaClient.execute(sqlUnload, database, sqlUnloadDescription); + + return { + auditResult: { + database, + rawTable, + completedAt: new Date().toISOString(), + }, + fullAuditRef: output, + }; +} + +export default new AuditBuilder() + .withUrlResolver(wwwUrlResolver) + .withRunner(cdnContentFragment404Runner) + .build(); diff --git a/src/cdn-content-fragment-404/sql/create-database.sql b/src/cdn-content-fragment-404/sql/create-database.sql new file mode 100644 index 000000000..e76819be2 --- /dev/null +++ b/src/cdn-content-fragment-404/sql/create-database.sql @@ -0,0 +1 @@ +CREATE DATABASE IF NOT EXISTS {{database}}; diff --git a/src/cdn-content-fragment-404/sql/create-raw-table.sql b/src/cdn-content-fragment-404/sql/create-raw-table.sql new file mode 100644 index 000000000..c473fafc9 --- /dev/null +++ b/src/cdn-content-fragment-404/sql/create-raw-table.sql @@ -0,0 +1,29 @@ +CREATE EXTERNAL TABLE IF NOT EXISTS {{database}}.{{rawTable}} ( + url string, + request_user_agent string, + response_status int +) +PARTITIONED BY ( + year string, + month string, + day string, + hour string +) +ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe' +LOCATION '{{rawLocation}}' +TBLPROPERTIES ( + 'projection.enabled' = 'true', + 'storage.location.template' = '{{rawLocation}}/${year}/${month}/${day}/${hour}/', + 'projection.year.type' = 'integer', + 'projection.year.range' = '2024,2030', + 'projection.month.type' = 'integer', + 'projection.month.range' = '1,12', + 'projection.month.digits' = '2', + 'projection.day.type' = 'integer', + 'projection.day.range' = '1,31', + 'projection.day.digits' = '2', + 'projection.hour.type' = 'integer', + 'projection.hour.range' = '0,23', + 'projection.hour.digits' = '2', + 'has_encrypted_data' = 'false' +); diff --git a/src/cdn-content-fragment-404/sql/unload-404-content.sql b/src/cdn-content-fragment-404/sql/unload-404-content.sql new file mode 100644 index 000000000..0580a8f78 --- /dev/null +++ b/src/cdn-content-fragment-404/sql/unload-404-content.sql @@ -0,0 +1,19 @@ +UNLOAD ( + SELECT + url, + request_user_agent, + COALESCE(REGEXP_EXTRACT(url, '/content/dam/([^/]+)', 1), 'unknown') AS tenant, + count(*) AS request_count + FROM {{database}}.{{rawTable}} + WHERE year = '{{year}}' + AND month = '{{month}}' + AND day = '{{day}}' + AND hour = '{{hour}}' + + AND response_status = 404 + -- Only include content fragment requests + AND url LIKE '/content/dam/%' + + GROUP BY url, request_user_agent, COALESCE(REGEXP_EXTRACT(url, '/content/dam/([^/]+)', 1), 'unknown') +) TO '{{output}}' +WITH (format = 'PARQUET'); diff --git a/src/content-fragment-404/analysis/analysis-strategy.js b/src/content-fragment-404/analysis/analysis-strategy.js new file mode 100644 index 000000000..8077386eb --- /dev/null +++ b/src/content-fragment-404/analysis/analysis-strategy.js @@ -0,0 +1,101 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +import { PublishRule } from '../rules/publish-rule.js'; +import { LocaleFallbackRule } from '../rules/locale-fallback-rule.js'; +import { SimilarPathRule } from '../rules/similar-path-rule.js'; +import { Suggestion, SuggestionType } from '../domain/suggestion/suggestion.js'; + +export class AnalysisStrategy { + constructor(context, aemClient, pathIndex) { + this.context = context; + this.aemClient = aemClient; + this.pathIndex = pathIndex; + this.rules = [ + new PublishRule(context, this.aemClient), + new LocaleFallbackRule(context, this.aemClient), + new SimilarPathRule(context, this.aemClient, pathIndex), + ].sort((a, b) => a.getPriority() - b.getPriority()); + } + + async analyze(contentFragment404s) { + const suggestions = []; + + for (const path of contentFragment404s) { + // eslint-disable-next-line no-await-in-loop + const suggestion = await this.analyzePath(path); + if (suggestion) { + suggestions.push(suggestion); + } + } + + // Post-process suggestions to check content status + return this.processSuggestions(suggestions); + } + + async analyzePath(brokenPath) { + const { log } = this.context; + log.info(`Analyzing broken path: ${brokenPath}`); + + for (const rule of this.rules) { + try { + // eslint-disable-next-line no-await-in-loop + const suggestion = await rule.apply(brokenPath); + + if (suggestion) { + log.info(`Rule ${rule.constructor.name} applied to ${brokenPath}`); + return suggestion; + } + } catch (error) { + log.error(`Error applying rule ${rule.constructor.name} to ${brokenPath}: ${error.message}`); + // Continue to next rule + } + } + + log.warn(`No rules applied to ${brokenPath}`); + return Suggestion.notFound(brokenPath); + } + + async processSuggestions(suggestions) { + const { log } = this.context; + log.info(`Post-processing ${suggestions.length} suggestions`); + + const processedSuggestions = []; + + for (const suggestion of suggestions) { + if (suggestion.type !== SuggestionType.LOCALE && suggestion.type !== SuggestionType.SIMILAR) { + processedSuggestions.push(suggestion); + // eslint-disable-next-line no-continue + continue; + } + + const { suggestedPath } = suggestion; + log.debug(`Checking content status for suggestion: ${suggestedPath} with type: ${suggestion.type}`); + + // Path must be available as it was suggested + const contentPath = this.pathIndex.find(suggestedPath); + const { status } = contentPath; + + if (contentPath.isPublished()) { + processedSuggestions.push(suggestion); + log.debug(`Kept original suggestion type for ${suggestedPath} with status: ${status}`); + // eslint-disable-next-line no-continue + continue; + } + + suggestion.reason = `Content is in ${status} state. Suggest publishing.`; + processedSuggestions.push(suggestion); + } + + return processedSuggestions; + } +} diff --git a/src/content-fragment-404/cache/cache-strategy.js b/src/content-fragment-404/cache/cache-strategy.js new file mode 100644 index 000000000..020b79e66 --- /dev/null +++ b/src/content-fragment-404/cache/cache-strategy.js @@ -0,0 +1,43 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +export class CacheStrategy { + /** + * Find direct children of a parent path + * @param {string} parentPath - The parent path + * @returns {Array} Array of child ContentPath objects + */ + // eslint-disable-next-line no-unused-vars, class-methods-use-this + findChildren(parentPath) { + throw new Error('findChildren() must be implemented by subclass'); + } + + /** + * Cache content items. + * @param {Array} items - Array of content items + * @param {Function} statusParser - Function to parse content status + * @returns {void} + */ + // eslint-disable-next-line no-unused-vars, class-methods-use-this + cacheItems(items, statusParser) { + throw new Error('cacheItems() must be implemented by subclass'); + } + + /** + * Check if this cache strategy is available. + * @returns {boolean} + */ + // eslint-disable-next-line class-methods-use-this + isAvailable() { + throw new Error('isAvailable() must be implemented by subclass'); + } +} diff --git a/src/content-fragment-404/cache/noop-cache.js b/src/content-fragment-404/cache/noop-cache.js new file mode 100644 index 000000000..57f23f868 --- /dev/null +++ b/src/content-fragment-404/cache/noop-cache.js @@ -0,0 +1,33 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +import { CacheStrategy } from './cache-strategy.js'; + +/** + * No-operation cache that doesn't store anything. + */ +export class NoOpCache extends CacheStrategy { + // eslint-disable-next-line class-methods-use-this + findChildren() { + return []; + } + + // eslint-disable-next-line no-unused-vars, class-methods-use-this + cacheItems(items, statusParser) { + // Do nothing + } + + // eslint-disable-next-line class-methods-use-this + isAvailable() { + return false; + } +} diff --git a/src/content-fragment-404/cache/path-index-cache.js b/src/content-fragment-404/cache/path-index-cache.js new file mode 100644 index 000000000..43c1b89e7 --- /dev/null +++ b/src/content-fragment-404/cache/path-index-cache.js @@ -0,0 +1,50 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +import { ContentPath } from '../domain/content/content-path.js'; +import { Locale } from '../domain/language/locale.js'; +import { CacheStrategy } from './cache-strategy.js'; + +/** + * Cache implementation that uses PathIndex for storage. + * Provides hierarchical path caching and lookup capabilities. + */ +export class PathIndexCache extends CacheStrategy { + constructor(pathIndex) { + super(); + this.pathIndex = pathIndex; + } + + findChildren(parentPath) { + return this.pathIndex.findChildren(parentPath); + } + + cacheItems(items, statusParser) { + if (!items || items.length === 0) { + return; + } + + for (const item of items) { + const contentPath = new ContentPath( + item.path, + statusParser(item.status), + Locale.fromPath(item.path), + ); + this.pathIndex.insertContentPath(contentPath); + } + } + + // eslint-disable-next-line class-methods-use-this + isAvailable() { + return true; + } +} diff --git a/src/content-fragment-404/clients/aem-client.js b/src/content-fragment-404/clients/aem-client.js new file mode 100644 index 000000000..ea48f98b8 --- /dev/null +++ b/src/content-fragment-404/clients/aem-client.js @@ -0,0 +1,273 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +import { tracingFetch as fetch } from '@adobe/spacecat-shared-utils'; +import { NoOpCache } from '../cache/noop-cache.js'; +import { PathUtils } from '../utils/path-utils.js'; + +export class AemClient { + static API_SITES_BASE = '/adobe/sites'; + + static API_SITES_FRAGMENTS = `${AemClient.API_SITES_BASE}/cf/fragments`; + + // Safety limit to prevent too many paginated queries + static MAX_PAGES = 10; + + // Delay between pagination requests for rate limiting + static PAGINATION_DELAY_MS = 100; + + constructor(context, authorUrl, authToken, cache = new NoOpCache()) { + this.context = context; + this.authorUrl = authorUrl; + this.authToken = authToken; + this.cache = cache; + } + + static createFrom(context, cache = new NoOpCache()) { + const { site, env } = context; + const authorUrl = site.getDeliveryConfig().authorURL; + const authToken = env.AEM_AUTHOR_TOKEN; + + if (!authorUrl || !authToken) { + throw new Error('AEM Author configuration missing: AEM_AUTHOR_URL and AEM_AUTHOR_TOKEN required'); + } + + return new AemClient(context, authorUrl, authToken, cache); + } + + static isBreakingPoint(path) { + return !path || !path.startsWith('/content/dam/') || path === '/content/dam'; + } + + static parseContentStatus(status) { + if (!status) { + return 'UNKNOWN'; + } + + const upperStatus = status.toUpperCase(); + switch (upperStatus) { + case 'PUBLISHED': return 'PUBLISHED'; + case 'MODIFIED': return 'MODIFIED'; + case 'DRAFT': return 'DRAFT'; + case 'ARCHIVED': return 'ARCHIVED'; + case 'DELETED': return 'DELETED'; + default: return 'UNKNOWN'; + } + } + + /** + * Simple delay utility for rate limiting + * @param {number} ms - Milliseconds to delay + * @returns {Promise} + */ + static async delay(ms) { + return new Promise((resolve) => { + setTimeout(() => resolve(), ms); + }); + } + + async isAvailable(path) { + const { log } = this.context; + + try { + const response = await fetch(this.createUrl(path).toString(), { + headers: this.createAuthHeaders(), + }); + + if (!response.ok) { + log.error(`AEM Author returned ${response.status} for ${path}: ${response.statusText}`); + return false; + } + + const data = await response.json(); + // Sites API returns 200 with empty items array when path doesn't exist + const isAvailable = data?.items && data.items.length !== 0; + + // If there is content, cache it + if (data?.items) { + this.cache.cacheItems(data.items, AemClient.parseContentStatus); + } + + return isAvailable; + } catch (error) { + throw new Error(`Failed to check AEM Author availability for ${path}: ${error.message}`); + } + } + + async fetchContent(path) { + try { + return await this.fetchContentWithPagination(path); + } catch (error) { + throw new Error(`Failed to fetch AEM Author content for ${path}: ${error.message}`); + } + } + + /** + * Crawl all content from a path using cursor-based pagination + * @param {string} path - The path to crawl + * @returns {Promise} - All content items found + */ + async fetchContentWithPagination(path) { + const { log } = this.context; + + const allItems = []; + let cursor = null; + let pageCount = 0; + + log.debug(`Starting crawl for path: ${path}`); + + do { + try { + pageCount += 1; + + log.debug(`Fetching page ${pageCount} for path: ${path}${cursor ? ` (cursor: ${cursor})` : ''}`); + + // eslint-disable-next-line no-await-in-loop + const response = await this.fetchWithPagination(path, cursor); + + if (response.items && response.items.length > 0) { + allItems.push(...response.items); + log.debug(`Page ${pageCount}: Found ${response.items.length} items (total: ${allItems.length})`); + } + + cursor = response.cursor; + + // Add small delay to implement rate limiting + if (cursor) { + // eslint-disable-next-line no-await-in-loop + await AemClient.delay(AemClient.PAGINATION_DELAY_MS); + } + } catch (error) { + log.error(`Error fetching page ${pageCount} for path ${path}: ${error.message}`); + // Return what we have so far instead of failing completely + break; + } + } while (cursor && pageCount < AemClient.MAX_PAGES); + + // Cache items + this.cache.cacheItems(allItems, AemClient.parseContentStatus); + + log.info(`Complete crawl finished for path: ${path}. Found ${allItems.length} total items across ${pageCount} pages`); + return allItems; + } + + /** + * Fetch a single page of content with optional cursor + * @param {string} path - The path to fetch + * @param {string|null} cursor - The cursor for pagination + * @returns {Promise<{items: Array, cursor: string|null}>} - Response with items and next cursor + */ + async fetchWithPagination(path, cursor = null) { + const url = this.createUrlWithPagination(path, cursor); + + const response = await fetch(url.toString(), { + headers: this.createAuthHeaders(), + }); + + if (!response.ok) { + throw new Error(`HTTP ${response.status}: ${response.statusText}`); + } + + const data = await response.json(); + return { + items: data?.items || [], + cursor: data?.cursor || null, + }; + } + + async getChildrenFromPath(parentPath) { + const { log } = this.context; + + log.debug(`Getting children paths from parent: ${parentPath}`); + + if (!this.cache.isAvailable()) { + log.debug('Cache not available, returning empty list'); + return []; + } + + if (AemClient.isBreakingPoint(parentPath)) { + log.debug(`Reached breaking point: ${parentPath}`); + return []; + } + + const cachedChildren = this.cache.findChildren(parentPath); + if (cachedChildren.length > 0) { + log.debug(`Found ${cachedChildren.length} children in cache for parent: ${parentPath}`); + return cachedChildren; + } + + log.debug('No children found in cache'); + + let isAvailable = false; + try { + isAvailable = await this.isAvailable(parentPath); + } catch (error) { + log.error(`Error getting children from path ${parentPath}:`, error); + return []; + } + + if (isAvailable) { + log.info(`Parent path is available on Author: ${parentPath}`); + + // Cache content here since it is available + try { + await this.fetchContent(parentPath); + log.debug(`Fetched all content for parent path: ${parentPath}`); + } catch (error) { + log.warn(`Failed to fetch complete content for ${parentPath}: ${error.message}`); + // Continue with cached data if available + } + + return this.cache.findChildren(parentPath); + } + + const nextParent = PathUtils.getParentPath(parentPath); + if (!nextParent) { + log.debug(`No next parent found for: ${parentPath}`); + return []; + } + + // Try the next parent up the hierarchy + log.debug(`Parent path not available, trying next parent up: ${nextParent}`); + return this.getChildrenFromPath(nextParent); + } + + /** + * Create URL with pagination parameters + * @param {string} path - The path to fetch + * @param {string|null} cursor - The cursor for pagination + * @returns {string} - Complete URL with pagination + */ + createUrlWithPagination(fragmentPath, cursor = null) { + const url = this.createUrl(fragmentPath); + + if (cursor) { + url.searchParams.set('cursor', cursor); + } + + return url; + } + + createUrl(fragmentPath) { + const url = new URL(AemClient.API_SITES_FRAGMENTS, this.authorUrl); + url.searchParams.set('path', fragmentPath); + url.searchParams.set('projection', 'minimal'); + return url; + } + + createAuthHeaders() { + return { + Authorization: `Bearer ${this.authToken}`, + Accept: 'application/json', + }; + } +} diff --git a/src/content-fragment-404/collectors/athena-collector.js b/src/content-fragment-404/collectors/athena-collector.js new file mode 100644 index 000000000..6747aeeb7 --- /dev/null +++ b/src/content-fragment-404/collectors/athena-collector.js @@ -0,0 +1,198 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +import { getStaticContent } from '@adobe/spacecat-shared-utils'; +import { AWSAthenaClient } from '@adobe/spacecat-shared-athena-client'; +import { getImsOrgId } from '../../utils/data-access.js'; +import { isAssetUrl } from '../../utils/asset-utils.js'; +import { extractCustomerDomain } from '../../utils/cdn-utils.js'; + +export class AthenaCollector { + static GRAPHQL_SUFFIX = /\.cfm.*\.json$/; + + constructor(context) { + this.context = context; + } + + static async createFrom(context) { + const { site, dataAccess, log } = context; + + const imsOrg = await getImsOrgId(site, dataAccess, log); + if (!imsOrg) { + throw new Error('Unable to retrieve IMS organization ID'); + } + + const collector = new AthenaCollector(context); + collector.imsOrg = imsOrg; + collector.sanitizedHostname = extractCustomerDomain(site); + collector.initialize(); + return collector; + } + + static cleanPath(path) { + if (AthenaCollector.GRAPHQL_SUFFIX.test(path)) { + return path.replace(AthenaCollector.GRAPHQL_SUFFIX, ''); + } + return path; + } + + static getPreviousDayParts() { + const yesterday = new Date(); + yesterday.setUTCDate(yesterday.getUTCDate() - 1); + return AthenaCollector.getDateParts(yesterday); + } + + static getDateParts(date = new Date()) { + const year = date.getUTCFullYear().toString(); + const month = String(date.getUTCMonth() + 1).padStart(2, '0'); + const day = String(date.getUTCDate()).padStart(2, '0'); + + return { year, month, day }; + } + + static async loadSql(filename, variables) { + return getStaticContent(variables, `./src/content-fragment-404/sql/${filename}.sql`); + } + + initialize() { + this.validate(); + this.config = this.getAthenaConfig(); + this.athenaClient = AWSAthenaClient.fromContext(this.context, this.config.tempLocation); + } + + validate() { + const { env } = this.context; + + if (!env.S3_BUCKET) { + throw new Error('Raw bucket is required'); + } + + if (!this.imsOrg) { + throw new Error('IMS organization is required'); + } + + if (!this.sanitizedHostname) { + throw new Error('Sanitized hostname is required'); + } + } + + getAthenaConfig() { + const { env } = this.context; + const bucket = `${env.S3_BUCKET}/${this.imsOrg}`; + const database = `cdn_logs_${this.sanitizedHostname}`; + const tableName = 'content_fragment_404'; + + return { + database, + tableName, + location: `s3://${bucket}/aggregated-404`, + tempLocation: `s3://${env.S3_BUCKET}/temp/athena-results/`, + }; + } + + async ensureDatabase() { + const sqlDb = await AthenaCollector.loadSql('create-database', { + database: this.config.database, + }); + + const sqlDbDescription = `[Athena Query] Create database ${this.config.database}`; + await this.athenaClient.execute(sqlDb, this.config.database, sqlDbDescription); + } + + async ensureTable() { + const sqlTable = await AthenaCollector.loadSql('create-table', { + database: this.config.database, + tableName: this.config.tableName, + location: this.config.location, + }); + + const sqlTableDescription = `[Athena Query] Create table ${this.config.database}.${this.config.tableName}`; + await this.athenaClient.execute(sqlTable, this.config.database, sqlTableDescription); + } + + async fetchContentFragment404s() { + const { log } = this.context; + const { year, month, day } = AthenaCollector.getPreviousDayParts(); + + log.info(`Fetching content fragment 404s for ${year}-${month}-${day} from Athena`); + + try { + await this.ensureDatabase(); + await this.ensureTable(); + + const contentFragment404s = await this.queryContentFragment404s(year, month, day); + + log.info(`Found ${contentFragment404s.length} content fragment 404s from Athena`); + return contentFragment404s; + } catch (error) { + log.error(`Athena query failed: ${error.message}`); + throw new Error(`Athena query failed: ${error.message}`); + } + } + + async queryContentFragment404s(year, month, day) { + const sqlQuery = await AthenaCollector.loadSql('daily-query', { + database: this.config.database, + tableName: this.config.tableName, + year, + month, + day, + }); + + const sqlQueryDescription = `[Athena Query] Fetch content fragment 404s for ${year}-${month}-${day}`; + const result = await this.athenaClient.query( + sqlQuery, + this.config.database, + sqlQueryDescription, + ); + + // Group by URL and collect all user agents with counts for each URL, excluding assets (for now) + const urlMap = new Map(); + result.filter((row) => row.url && !isAssetUrl(row.url)).forEach((row) => { + const { + url, + request_user_agent: userAgent, + request_count: count, + } = row; + + // Clean the URL at the source (remove .cfm.gql.json suffixes, etc.) + const cleanedUrl = AthenaCollector.cleanPath(url); + + // Athena returns it as string + const parsedCount = parseInt(count, 10) || 0; + + if (!urlMap.has(cleanedUrl)) { + urlMap.set(cleanedUrl, { userAgents: [], totalCount: 0 }); + } + + const entry = urlMap.get(cleanedUrl); + + // Find existing user agent or add new one + const existingAgent = entry.userAgents.find((agent) => agent.userAgent === userAgent); + if (existingAgent) { + existingAgent.count += parsedCount; + } else if (userAgent) { + entry.userAgents.push({ userAgent, count: parsedCount }); + } + + entry.totalCount += parsedCount; + }); + + const contentFragment404s = Array.from(urlMap.entries()).map(([url, data]) => ({ + url, + requestUserAgents: data.userAgents, + requestCount: data.totalCount, + })); + + return contentFragment404s; + } +} diff --git a/src/content-fragment-404/domain/content/content-path.js b/src/content-fragment-404/domain/content/content-path.js new file mode 100644 index 000000000..235f20aa3 --- /dev/null +++ b/src/content-fragment-404/domain/content/content-path.js @@ -0,0 +1,44 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +export const ContentStatus = { + PUBLISHED: 'PUBLISHED', + MODIFIED: 'MODIFIED', + DRAFT: 'DRAFT', + ARCHIVED: 'ARCHIVED', + DELETED: 'DELETED', + UNKNOWN: 'UNKNOWN', +}; + +export class ContentPath { + constructor(path, status, locale) { + this.path = path; + this.status = status; + this.locale = locale; + } + + isValid() { + return typeof this.path === 'string' && this.path.trim().length > 0; + } + + isPublished() { + return this.status === ContentStatus.PUBLISHED; + } + + toJSON() { + return { + path: this.path, + status: this.status, + locale: this.locale?.toJSON?.() || this.locale, + }; + } +} diff --git a/src/content-fragment-404/domain/index/path-index.js b/src/content-fragment-404/domain/index/path-index.js new file mode 100644 index 000000000..213785326 --- /dev/null +++ b/src/content-fragment-404/domain/index/path-index.js @@ -0,0 +1,156 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +import { ContentPath } from '../content/content-path.js'; +import { PathNode } from './path-node.js'; + +export class PathIndex { + constructor(context) { + this.context = context; + this.root = new PathNode(); + } + + insert(path, status, locale) { + const contentPath = new ContentPath(path, status, locale); + this.insertContentPath(contentPath); + } + + insertContentPath(contentPath) { + if (!contentPath.isValid()) { + return; + } + + let current = this.root; + for (const letter of contentPath.path) { + if (!current.children.has(letter)) { + current.children.set(letter, new PathNode()); + } + current = current.children.get(letter); + } + + const existed = current.isEnd && current.path !== null; + current.isEnd = true; + current.path = contentPath; + + if (!existed) { + const { log } = this.context; + log.debug(`Inserted new path: ${contentPath.path}`); + } + } + + contains(path) { + if (!path || path.length === 0) { + return false; + } + + let current = this.root; + for (const letter of path) { + if (!current.children.has(letter)) { + return false; + } + current = current.children.get(letter); + } + + return current.isEnd && current.path !== null; + } + + find(path) { + if (!path || path.length === 0) { + return null; + } + + let current = this.root; + for (const letter of path) { + if (!current.children.has(letter)) { + return null; + } + current = current.children.get(letter); + } + + return current.isEnd ? current.path : null; + } + + delete(path) { + if (!path || path.length === 0) { + return false; + } + + let current = this.root; + for (const letter of path) { + if (!current.children.has(letter)) { + return false; + } + current = current.children.get(letter); + } + + if (current.isEnd) { + current.isEnd = false; + current.path = null; + return true; + } + + return false; + } + + findChildren(parentPath) { + const directChildren = []; + const allChildren = this.findPathsWithPrefix(`${parentPath}/`); + + for (const child of allChildren) { + const childPath = child.path; + const relativePath = childPath.substring(parentPath.length + 1); + + // Check if this is a direct child + if (!relativePath.includes('/')) { + directChildren.push(child); + } + } + + return directChildren; + } + + findPathsWithPrefix(prefix) { + if (!prefix || prefix.length === 0) { + return this.getPaths(); + } + + const paths = []; + let current = this.root; + + for (const letter of prefix) { + if (!current.children.has(letter)) { + return paths; // Prefix not found + } + current = current.children.get(letter); + } + + // Collect all paths from this node + this.getPathsFromNode(current, prefix, paths); + return paths; + } + + getPaths() { + const paths = []; + this.getPathsFromNode(this.root, '', paths); + return paths; + } + + getPathsFromNode(node, currentPath, paths) { + if (node.isEnd && node.path !== null) { + paths.push(node.path); + } + + for (const [letter, child] of node.children) { + this.getPathsFromNode(child, currentPath + letter, paths); + } + } +} diff --git a/src/content-fragment-404/domain/index/path-node.js b/src/content-fragment-404/domain/index/path-node.js new file mode 100644 index 000000000..3932dde64 --- /dev/null +++ b/src/content-fragment-404/domain/index/path-node.js @@ -0,0 +1,19 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +export class PathNode { + constructor() { + this.children = new Map(); + this.isEnd = false; + this.path = null; + } +} diff --git a/src/content-fragment-404/domain/language/language-tree.js b/src/content-fragment-404/domain/language/language-tree.js new file mode 100644 index 000000000..29ea2883b --- /dev/null +++ b/src/content-fragment-404/domain/language/language-tree.js @@ -0,0 +1,165 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +export class LanguageTree { + // 2-letter country code language groups + static COUNTRY_CODE_GROUPS = { + FR: ['FR', 'MC'], + DE: ['DE', 'AT', 'LI'], + US: ['US', 'GB', 'CA', 'AU', 'NZ', 'IE'], + ES: ['ES', 'MX', 'AR', 'CO', 'PE', 'VE'], + IT: ['IT', 'SM', 'VA'], + CN: ['CN', 'TW', 'HK', 'MO', 'SG'], + RU: ['RU', 'BY', 'KZ', 'KG', 'TJ', 'UZ'], + }; + + // 5-letter locale code language groups + static LOCALE_CODE_GROUPS = { + 'fr-FR': ['fr-FR', 'ca-FR', 'fr-CA', 'fr-BE', 'fr-CH'], + 'de-DE': ['de-DE', 'de-AT', 'de-CH', 'de-LU'], + 'en-US': ['en-US', 'en-GB', 'en-CA', 'en-AU', 'en-NZ'], + 'es-ES': ['es-ES', 'es-MX', 'es-AR', 'es-CO'], + 'it-IT': ['it-IT', 'it-CH'], + 'zh-CN': ['zh-CN', 'zh-TW', 'zh-HK', 'zh-MO'], + 'ru-RU': ['ru-RU', 'ru-BY', 'ru-KZ'], + }; + + // Reverse mappings + static COUNTRY_TO_ROOT = {}; + + static LOCALE_TO_ROOT = {}; + + static { + // Build reverse mappings + LanguageTree.buildReverseMappings(); + } + + static buildReverseMappings() { + // Build country to root mappings + for (const [root, children] of Object.entries(LanguageTree.COUNTRY_CODE_GROUPS)) { + for (const child of children) { + LanguageTree.COUNTRY_TO_ROOT[child] = root; + } + } + + // Build locale to root mappings + for (const [root, children] of Object.entries(LanguageTree.LOCALE_CODE_GROUPS)) { + for (const child of children) { + LanguageTree.LOCALE_TO_ROOT[child] = root; + } + } + } + + static findSimilarLanguageRoots(locale) { + if (!locale || locale.length === 0) { + return []; + } + + const similarRoots = []; + + // Generate case variations + const caseVariations = LanguageTree.generateCaseVariations(locale); + similarRoots.push(...caseVariations); + + // Always add English as default fallback + similarRoots.push(...LanguageTree.findEnglishFallbacks()); + + // Find root for locale and add siblings + const languageRoot = LanguageTree.findRootForLocale(locale); + if (languageRoot) { + const siblings = LanguageTree.LOCALE_CODE_GROUPS[languageRoot] + || LanguageTree.COUNTRY_CODE_GROUPS[languageRoot] + || []; + + similarRoots.push(...siblings); + + // Don't include itself + const index = similarRoots.indexOf(locale); + if (index > -1) { + similarRoots.splice(index, 1); + } + } + + return similarRoots; + } + + static generateCaseVariations(locale) { + const variations = []; + + if (!locale || locale.length === 0) { + return variations; + } + + if (locale.length === 2) { + variations.push(locale.toLowerCase()); + variations.push(locale.toUpperCase()); + } + + if (locale.length === 5 && (locale.includes('-') || locale.includes('_'))) { + const parts = locale.split(/[-_]/); + if (parts.length === 2) { + const language = parts[0]; + const country = parts[1]; + + // Generate different case combinations with hyphens + variations.push(`${language.toLowerCase()}-${country.toLowerCase()}`); + variations.push(`${language.toLowerCase()}-${country.toUpperCase()}`); + variations.push(`${language.toUpperCase()}-${country.toLowerCase()}`); + variations.push(`${language.toUpperCase()}-${country.toUpperCase()}`); + + // Generate different case combinations with underscores + variations.push(`${language.toLowerCase()}_${country.toLowerCase()}`); + variations.push(`${language.toLowerCase()}_${country.toUpperCase()}`); + variations.push(`${language.toUpperCase()}_${country.toLowerCase()}`); + variations.push(`${language.toUpperCase()}_${country.toUpperCase()}`); + } + } + + // Remove itself from variations + const index = variations.indexOf(locale); + if (index > -1) { + variations.splice(index, 1); + } + + return variations; + } + + static findRootForLocale(locale) { + if (!locale || locale.length === 0) { + return null; + } + + const root2 = LanguageTree.COUNTRY_TO_ROOT[locale]; + if (root2) { + return root2; + } + + const root5 = LanguageTree.LOCALE_TO_ROOT[locale]; + if (root5) { + return root5; + } + + // If not found, the locale might be a root itself + if (LanguageTree.COUNTRY_CODE_GROUPS[locale] || LanguageTree.LOCALE_CODE_GROUPS[locale]) { + return locale; + } + + return null; + } + + static findEnglishFallbacks() { + return [ + 'us', 'US', 'en-us', 'en_us', 'en-US', 'en_US', + 'gb', 'GB', 'en-gb', 'en_gb', 'en-GB', 'en_GB', + ]; + } +} diff --git a/src/content-fragment-404/domain/language/locale-type.js b/src/content-fragment-404/domain/language/locale-type.js new file mode 100644 index 000000000..111878c16 --- /dev/null +++ b/src/content-fragment-404/domain/language/locale-type.js @@ -0,0 +1,16 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +export const LocaleType = { + FIVE_LETTER_LOCALE: 'FIVE_LETTER_LOCALE', + TWO_LETTER_COUNTRY: 'TWO_LETTER_COUNTRY', +}; diff --git a/src/content-fragment-404/domain/language/locale.js b/src/content-fragment-404/domain/language/locale.js new file mode 100644 index 000000000..81c430f74 --- /dev/null +++ b/src/content-fragment-404/domain/language/locale.js @@ -0,0 +1,110 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +import { LocaleType } from './locale-type.js'; + +export class Locale { + static FIVE_LETTER_PATTERN = /^[a-zA-Z]{2}[-_][a-zA-Z]{2}$/; + + static TWO_LETTER_PATTERN = /^[a-zA-Z]{2}$/; + + constructor(code, type, language, country) { + this.code = code; + this.type = type; + this.language = language; + this.country = country; + } + + static fromCode(code) { + if (!code || code.trim().length === 0) { + return null; + } + + const normalizedCode = code.trim(); + + // Check for 5-letter locale pattern (e.g., en-US, en_US, fr-FR, fr_FR) + if (Locale.FIVE_LETTER_PATTERN.test(normalizedCode)) { + const parts = normalizedCode.split(/[-_]/); + return new Locale( + normalizedCode, + LocaleType.FIVE_LETTER_LOCALE, + parts[0].toLowerCase(), + parts[1].toUpperCase(), + ); + } + + // Check for 2-letter country pattern (e.g., US, FR) + if (Locale.TWO_LETTER_PATTERN.test(normalizedCode)) { + return new Locale( + normalizedCode, + LocaleType.TWO_LETTER_COUNTRY, + null, // TODO: Add language root mapping + normalizedCode.toUpperCase(), + ); + } + + return null; + } + + static fromPath(path) { + if (!path) { + return null; + } + + const segments = path.split('/'); + for (const segment of segments) { + const locale = Locale.fromCode(segment); + if (locale) { + return locale; + } + } + + return null; + } + + getCode() { + return this.code; + } + + getType() { + return this.type; + } + + getLanguage() { + return this.language; + } + + getCountry() { + return this.country; + } + + isValid() { + return Boolean(this.code && this.code.length > 0); + } + + replaceInPath(path, newLocale) { + if (!path || !this.code) { + return path; + } + + return path.replace(`/${this.code}/`, `/${newLocale}/`); + } + + toJSON() { + return { + code: this.code, + type: this.type, + language: this.language, + country: this.country, + }; + } +} diff --git a/src/content-fragment-404/domain/suggestion/suggestion.js b/src/content-fragment-404/domain/suggestion/suggestion.js new file mode 100644 index 000000000..5de63bda3 --- /dev/null +++ b/src/content-fragment-404/domain/suggestion/suggestion.js @@ -0,0 +1,52 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +export const SuggestionType = { + PUBLISH: 'PUBLISH', + LOCALE: 'LOCALE', + SIMILAR: 'SIMILAR', + NOT_FOUND: 'NOT_FOUND', +}; + +export class Suggestion { + constructor(requestedPath, suggestedPath, type, reason) { + this.requestedPath = requestedPath; + this.suggestedPath = suggestedPath; + this.type = type; + this.reason = reason; + } + + static publish(requestedPath, suggestedPath = null, reason = 'Content exists on Author') { + return new Suggestion(requestedPath, suggestedPath, SuggestionType.PUBLISH, reason); + } + + static locale(requestedPath, suggestedPath, reason = 'Locale fallback detected') { + return new Suggestion(requestedPath, suggestedPath, SuggestionType.LOCALE, reason); + } + + static similar(requestedPath, suggestedPath, reason = 'Similar path found') { + return new Suggestion(requestedPath, suggestedPath, SuggestionType.SIMILAR, reason); + } + + static notFound(requestedPath, reason = 'Not found') { + return new Suggestion(requestedPath, null, SuggestionType.NOT_FOUND, reason); + } + + toJSON() { + return { + requestedPath: this.requestedPath, + suggestedPath: this.suggestedPath, + type: this.type, + reason: this.reason, + }; + } +} diff --git a/src/content-fragment-404/handler.js b/src/content-fragment-404/handler.js new file mode 100644 index 000000000..086eeeb06 --- /dev/null +++ b/src/content-fragment-404/handler.js @@ -0,0 +1,194 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +import { Suggestion as SuggestionModel } from '@adobe/spacecat-shared-data-access'; +import { AuditBuilder } from '../common/audit-builder.js'; +import { wwwUrlResolver } from '../common/index.js'; +import { convertToOpportunity } from '../common/opportunity.js'; +import { syncSuggestions } from '../utils/data-access.js'; +import { AnalysisStrategy } from './analysis/analysis-strategy.js'; +import { PathIndexCache } from './cache/path-index-cache.js'; +import { AemClient } from './clients/aem-client.js'; +import { AthenaCollector } from './collectors/athena-collector.js'; +import { PathIndex } from './domain/index/path-index.js'; +import { createOpportunityData } from './opportunity-data-mapper.js'; + +// TODO: Change to Audit.AUDIT_TYPES.CONTENT_FRAGMENT_404 +// See https://github.com/adobe/spacecat-shared/pull/1049 +export const AUDIT_TYPE = 'content-fragment-404'; +export const GUIDANCE_TYPE = `guidance:${AUDIT_TYPE}`; + +async function fetchContentFragment404s(context) { + const { log } = context; + + const collector = await AthenaCollector.createFrom(context); + const contentFragment404s = await collector.fetchContentFragment404s(); + + log.info(`Found ${contentFragment404s.length} content fragment 404s from ${collector.constructor.name}`); + + return contentFragment404s; +} + +async function analyzeContentFragment404s(context, contentFragment404s) { + const { log } = context; + + const pathIndex = new PathIndex(context); + const cache = new PathIndexCache(pathIndex); + const aemClient = AemClient.createFrom(context, cache); + const strategy = new AnalysisStrategy(context, aemClient, pathIndex); + + // Extract URLs for analysis while keeping the full contentFragment404s data + const urls = contentFragment404s.map((item) => item.url || item); + const suggestions = await strategy.analyze(urls); + + log.info(`Found ${suggestions.length} suggestions for content fragment 404s`); + + return suggestions.map((suggestion) => suggestion.toJSON()); +} + +export async function createContentFragmentPathSuggestions( + auditUrl, + auditData, + context, +) { + const { log } = context; + const { contentFragment404s, suggestions } = auditData.auditResult; + + if (!suggestions || suggestions.length === 0) { + log.info('No suggestions to create'); + return; + } + + const opportunity = await convertToOpportunity( + auditUrl, + auditData, + context, + createOpportunityData, + AUDIT_TYPE, + ); + + const contentFragment404sMap = new Map( + contentFragment404s.map((brokenPath) => [brokenPath.url, brokenPath]), + ); + + // Enrich suggestions with request metadata + const enrichedSuggestions = suggestions.map((suggestion) => { + const contentFragment404Data = contentFragment404sMap.get(suggestion.requestedPath); + return { + ...suggestion, + requestCount: contentFragment404Data?.requestCount || 0, + requestUserAgents: contentFragment404Data?.requestUserAgents || [], + }; + }); + + const buildKey = (data) => `${data.requestedPath}|${data.type}`; + + await syncSuggestions({ + context, + opportunity, + newData: enrichedSuggestions, + buildKey, + getRank: (data) => data.requestCount, + mapNewSuggestion: (suggestion) => ({ + opportunityId: opportunity.getId(), + type: SuggestionModel.TYPES.AI_INSIGHTS, + rank: suggestion.requestCount, + data: suggestion, + }), + }); + + log.info(`Created ${suggestions.length} suggestions for opportunity ${opportunity.getId()}`); +} + +export async function enrichContentFragmentPathSuggestions( + auditUrl, + auditData, + context, + site, +) { + const { + dataAccess, log, sqs, env, + } = context; + const { Configuration, Suggestion, Opportunity } = dataAccess; + + const configuration = await Configuration.findLatest(); + if (!configuration.isHandlerEnabledForSite(AUDIT_TYPE, site)) { + log.info(`Auto-Suggest is disabled for site ${site.getId()}`); + return; + } + + const opportunities = await Opportunity.allBySiteIdAndStatus(site.getId(), 'NEW'); + const opportunity = opportunities.find( + (opp) => opp.getType() === AUDIT_TYPE && opp.getAuditId() === auditData.id, + ); + if (!opportunity) { + log.info('No opportunity found for this audit, skipping Mystique message'); + return; + } + + const syncedSuggestions = await Suggestion.allByOpportunityIdAndStatus( + opportunity.getId(), + SuggestionModel.STATUSES.NEW, + ); + if (!syncedSuggestions || syncedSuggestions.length === 0) { + log.info('No suggestions to enrich, skipping Mystique message'); + return; + } + + const message = { + type: GUIDANCE_TYPE, + siteId: site.getId(), + auditId: auditData.id, + deliveryType: site.getDeliveryType(), + time: new Date().toISOString(), + url: auditUrl, + data: { + opportunityId: opportunity.getId(), + contentFragment404s: syncedSuggestions.map((suggestion) => ({ + suggestionId: suggestion.getId(), + requestedPath: suggestion.getData().requestedPath, + requestCount: suggestion.getData().requestCount, + // Array of {userAgent: string, count: number} for detailed breakdown + requestUserAgents: suggestion.getData().requestUserAgents, + suggestedPath: suggestion.getData().suggestedPath, + reason: suggestion.getData().reason, + })), + }, + }; + await sqs.sendMessage(env.QUEUE_SPACECAT_TO_MYSTIQUE, message); + + log.info(`Sent ${syncedSuggestions.length} content fragment path suggestions to Mystique for enrichment`); +} + +export async function contentFragment404AuditRunner(baseURL, context, site) { + const auditContext = { ...context, site }; + + const contentFragment404s = await fetchContentFragment404s(auditContext); + const suggestions = await analyzeContentFragment404s(auditContext, contentFragment404s); + + return { + fullAuditRef: baseURL, + auditResult: { + contentFragment404s, + suggestions, + }, + }; +} + +export default new AuditBuilder() + .withUrlResolver(wwwUrlResolver) + .withRunner(contentFragment404AuditRunner) + .withPostProcessors([ + createContentFragmentPathSuggestions, + enrichContentFragmentPathSuggestions, + ]) + .build(); diff --git a/src/content-fragment-404/opportunity-data-mapper.js b/src/content-fragment-404/opportunity-data-mapper.js new file mode 100644 index 000000000..48f89a8ea --- /dev/null +++ b/src/content-fragment-404/opportunity-data-mapper.js @@ -0,0 +1,33 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +import { DATA_SOURCES } from '../common/constants.js'; + +export function createOpportunityData() { + return { + runbook: 'https://adobe.sharepoint.com/sites/aemsites-engineering/Shared%20Documents/3%20-%20Experience%20Success/SpaceCat/Runbooks/', + origin: 'AUTOMATION', + title: 'Content Fragment requests are failing and breaking digital experiences', + description: 'Fixing broken Content Fragment requests by publishing missing content or setting up proper redirects ensures seamless API responses, prevents application errors and maintains consistent digital experiences across all touchpoints.', + guidance: { + steps: [ + 'Review the requested Content Fragment paths grouped by suggestion type.', + 'Compare each requested path with its suggested path to identify the issue and what changed.', + 'Generate a short, user-friendly description of the difference.', + ], + }, + tags: ['Headless'], + data: { + dataSources: [DATA_SOURCES.SITE], + }, + }; +} diff --git a/src/content-fragment-404/rules/base-rule.js b/src/content-fragment-404/rules/base-rule.js new file mode 100644 index 000000000..7f6b1749c --- /dev/null +++ b/src/content-fragment-404/rules/base-rule.js @@ -0,0 +1,43 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +export class BaseRule { + constructor(context, priority = 42, aemClient = null) { + this.context = context; + this.priority = priority; + this.aemClient = aemClient; + } + + async apply(brokenPath) { + return this.applyRule(brokenPath); + } + + getPriority() { + return this.priority; + } + + getAemClient() { + const { log } = this.context; + + if (this.aemClient) { + return this.aemClient; + } + + log.error('AemClient not injected'); + throw new Error('AemClient not injected'); + } + + // eslint-disable-next-line no-unused-vars, class-methods-use-this + async applyRule(brokenPath) { + throw new Error('Subclasses must implement applyRule()'); + } +} diff --git a/src/content-fragment-404/rules/locale-fallback-rule.js b/src/content-fragment-404/rules/locale-fallback-rule.js new file mode 100644 index 000000000..309993210 --- /dev/null +++ b/src/content-fragment-404/rules/locale-fallback-rule.js @@ -0,0 +1,85 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +import { BaseRule } from './base-rule.js'; +import { Suggestion } from '../domain/suggestion/suggestion.js'; +import { Locale } from '../domain/language/locale.js'; +import { LanguageTree } from '../domain/language/language-tree.js'; +import { PathUtils } from '../utils/path-utils.js'; + +export class LocaleFallbackRule extends BaseRule { + constructor(context, aemClient) { + super(context, 2, aemClient); // Second priority + } + + async applyRule(brokenPath) { + const { log } = this.context; + log.debug(`Applying LocaleFallbackRule to path: ${brokenPath}`); + + const detectedLocale = Locale.fromPath(brokenPath); + if (!detectedLocale) { + if (!PathUtils.hasDoubleSlashes(brokenPath)) { + return null; + } + + log.info('Double slash detected'); + + // Check if there's a double slash that might indicate missing locale + const localeSuggestion = await this.tryLocaleInsertion(brokenPath); + return localeSuggestion; + } + + log.debug(`Detected locale: ${detectedLocale.getCode()} in path: ${brokenPath}`); + + const similarRoots = LanguageTree.findSimilarLanguageRoots(detectedLocale.getCode()); + for (const similarRoot of similarRoots) { + const suggestedPath = detectedLocale.replaceInPath(brokenPath, similarRoot); + log.debug(`Trying locale fallback: ${detectedLocale.getCode()} -> ${similarRoot}`); + + // eslint-disable-next-line no-await-in-loop + if (await this.getAemClient().isAvailable(suggestedPath)) { + log.info(`Found locale fallback for ${brokenPath}: ${detectedLocale.getCode()} -> ${similarRoot}`); + return Suggestion.locale(brokenPath, suggestedPath); + } + } + + return null; + } + + /** + * Try to fix double slashes by inserting English fallback locales + * @param {string} brokenPath - The path with double slashes + * @returns {Promise} - Locale suggestion if found, null otherwise + */ + async tryLocaleInsertion(brokenPath) { + const { log } = this.context; + + // Get all English fallback locales from LanguageTree + const englishLocales = LanguageTree.findEnglishFallbacks(); + + // Try inserting each English locale at the first double slash position + for (const localeCode of englishLocales) { + // Replace the first occurrence of // with /locale/ + const localePath = brokenPath.replace('//', `/${localeCode}/`); + + log.debug(`Trying locale insertion: ${brokenPath} -> ${localePath}`); + + // eslint-disable-next-line no-await-in-loop + if (await this.getAemClient().isAvailable(localePath)) { + log.info(`Found content with locale insertion: ${brokenPath} -> ${localePath}`); + return Suggestion.locale(brokenPath, localePath); + } + } + + return null; + } +} diff --git a/src/content-fragment-404/rules/publish-rule.js b/src/content-fragment-404/rules/publish-rule.js new file mode 100644 index 000000000..09afacd3e --- /dev/null +++ b/src/content-fragment-404/rules/publish-rule.js @@ -0,0 +1,32 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +import { BaseRule } from './base-rule.js'; +import { Suggestion } from '../domain/suggestion/suggestion.js'; + +export class PublishRule extends BaseRule { + constructor(context, aemClient) { + super(context, 1, aemClient); // Highest priority + } + + async applyRule(brokenPath) { + const { log } = this.context; + log.debug(`Applying PublishRule to path: ${brokenPath}`); + + if (await this.getAemClient().isAvailable(brokenPath)) { + log.info(`Found content on Author for path: ${brokenPath}`); + return Suggestion.publish(brokenPath); + } + + return null; + } +} diff --git a/src/content-fragment-404/rules/similar-path-rule.js b/src/content-fragment-404/rules/similar-path-rule.js new file mode 100644 index 000000000..feaec4cae --- /dev/null +++ b/src/content-fragment-404/rules/similar-path-rule.js @@ -0,0 +1,120 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +import { BaseRule } from './base-rule.js'; +import { Suggestion } from '../domain/suggestion/suggestion.js'; +import { LevenshteinDistance } from '../utils/levenshtein-distance.js'; +import { PathUtils } from '../utils/path-utils.js'; + +export class SimilarPathRule extends BaseRule { + constructor(context, aemClient, pathIndex) { + super(context, 3, aemClient); // Third priority + this.pathIndex = pathIndex; + } + + async applyRule(brokenPath) { + const { log } = this.context; + log.debug(`Applying SimilarPathRule to path: ${brokenPath}`); + + let path = brokenPath; + const result = await this.checkDoubleSlash(brokenPath); + if (result) { + // If we have a suggestion, return it directly + if (result.suggestion) { + return result.suggestion; + } + // If we have a fixed path but no suggestion, continue with checks using the fixed path + if (result.fixedPath) { + path = result.fixedPath; + log.debug(`Continuing similarity check with fixed path: ${path}`); + } + } + + const parentPath = PathUtils.getParentPath(path); + if (!parentPath) { + log.debug(`No parent path found for: ${path}`); + return null; + } + + // We are traversing up the hierarchy until we find a path that is available on Author + const childrenPaths = await this.getAemClient().getChildrenFromPath( + parentPath, + ); + if (childrenPaths.length === 0) { + log.debug(`No children paths found for parent: ${path}`); + return null; + } + + // Use Levenshtein distance <= 1 for typos + const similar = SimilarPathRule.findSimilarPath(path, childrenPaths, 1); + if (similar) { + log.info(`Found similar path for ${path}: ${similar.path}`); + return Suggestion.similar(path, similar.path); + } + + return null; + } + + /** + * Check if the broken path can be fixed by removing double slashes + * @param {string} brokenPath - The path with potential double slashes + * @returns {Promise<{suggestion: Suggestion|null, fixedPath: string|null}|null>} + * Object with suggestion and fixedPath fields, null if no double slashes + */ + async checkDoubleSlash(brokenPath) { + const { log } = this.context; + + // Check if path contains double slashes + if (!PathUtils.hasDoubleSlashes(brokenPath)) { + return null; + } + + // Remove double slashes by replacing them with single slashes + const fixedPath = PathUtils.removeDoubleSlashes(brokenPath); + + log.debug(`Checking double slash removal: ${brokenPath} -> ${fixedPath}`); + + // Check if the fixed path exists on Author + if (await this.getAemClient().isAvailable(fixedPath)) { + log.info(`Found content for double-slash corrected path: ${brokenPath} -> ${fixedPath}`); + return { suggestion: Suggestion.similar(brokenPath, fixedPath), fixedPath }; + } + + log.debug(`Fixed path not available on Author, will continue with similarity check: ${fixedPath}`); + return { suggestion: null, fixedPath }; + } + + static findSimilarPath(brokenPath, candidatePaths, maxDistance) { + // Extract non-locale parts for comparison + const brokenPathWithoutLocale = PathUtils.removeLocaleFromPath(brokenPath); + + // Find the best match by comparing non-locale parts + let closestMatch = null; + let bestDistance = Infinity; + + for (const candidatePath of candidatePaths) { + const candidateWithoutLocale = PathUtils.removeLocaleFromPath(candidatePath.path); + + const distance = LevenshteinDistance.calculate( + brokenPathWithoutLocale, + candidateWithoutLocale, + ); + + if (distance <= maxDistance && distance < bestDistance) { + bestDistance = distance; + closestMatch = candidatePath; + } + } + + return closestMatch; + } +} diff --git a/src/content-fragment-404/sql/create-database.sql b/src/content-fragment-404/sql/create-database.sql new file mode 100644 index 000000000..e76819be2 --- /dev/null +++ b/src/content-fragment-404/sql/create-database.sql @@ -0,0 +1 @@ +CREATE DATABASE IF NOT EXISTS {{database}}; diff --git a/src/content-fragment-404/sql/create-table.sql b/src/content-fragment-404/sql/create-table.sql new file mode 100644 index 000000000..22e878a14 --- /dev/null +++ b/src/content-fragment-404/sql/create-table.sql @@ -0,0 +1,26 @@ +CREATE EXTERNAL TABLE IF NOT EXISTS {{database}}.{{tableName}} ( + url string, + request_count int, + request_user_agent string, + tenant string +) +PARTITIONED BY ( + year string, + month string, + day string +) +STORED AS PARQUET +LOCATION '{{location}}' +TBLPROPERTIES ( + 'projection.enabled' = 'true', + 'projection.year.type' = 'integer', + 'projection.year.range' = '2024,2030', + 'projection.month.type' = 'integer', + 'projection.month.range' = '1,12', + 'projection.month.digits' = '2', + 'projection.day.type' = 'integer', + 'projection.day.range' = '1,31', + 'projection.day.digits' = '2', + 'storage.location.template' = '{{location}}/${year}/${month}/${day}/', + 'has_encrypted_data' = 'false' +); diff --git a/src/content-fragment-404/sql/daily-query.sql b/src/content-fragment-404/sql/daily-query.sql new file mode 100644 index 000000000..000845af7 --- /dev/null +++ b/src/content-fragment-404/sql/daily-query.sql @@ -0,0 +1,8 @@ +SELECT + url, + request_count, + request_user_agent +FROM {{database}}.{{tableName}} +WHERE year = '{{year}}' + AND month = '{{month}}' + AND day = '{{day}}' \ No newline at end of file diff --git a/src/content-fragment-404/utils/levenshtein-distance.js b/src/content-fragment-404/utils/levenshtein-distance.js new file mode 100644 index 000000000..fc90a9e4c --- /dev/null +++ b/src/content-fragment-404/utils/levenshtein-distance.js @@ -0,0 +1,56 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +export class LevenshteinDistance { + static calculate(source, target) { + if (source === null || target === null) { + throw new Error('Strings cannot be null'); + } + + const sourceLength = source.length; + const targetLength = target.length; + + if (sourceLength === 0) return targetLength; + if (targetLength === 0) return sourceLength; + + const distance = Array.from( + { length: sourceLength + 1 }, + () => Array(targetLength + 1).fill(0), + ); + + for (let i = 0; i <= sourceLength; i += 1) { + distance[i][0] = i; + } + + for (let j = 0; j <= targetLength; j += 1) { + distance[0][j] = j; + } + + for (let i = 1; i <= sourceLength; i += 1) { + for (let j = 1; j <= targetLength; j += 1) { + if (source.charAt(i - 1) === target.charAt(j - 1)) { + distance[i][j] = distance[i - 1][j - 1]; + } else { + distance[i][j] = Math.min( + distance[i - 1][j - 1] + 1, + Math.min( + distance[i - 1][j] + 1, + distance[i][j - 1] + 1, + ), + ); + } + } + } + + return distance[sourceLength][targetLength]; + } +} diff --git a/src/content-fragment-404/utils/path-utils.js b/src/content-fragment-404/utils/path-utils.js new file mode 100644 index 000000000..f580c6a53 --- /dev/null +++ b/src/content-fragment-404/utils/path-utils.js @@ -0,0 +1,100 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +import { Locale } from '../domain/language/locale.js'; + +export class PathUtils { + static removeLocaleFromPath(path) { + if (!path || !path.startsWith('/content/dam/')) { + return path; + } + + let removedPath = path; + // Remove trailing slash from the path + const endsWithSlash = removedPath.endsWith('/'); + if (endsWithSlash) { + removedPath = removedPath.slice(0, -1); + } + + const segments = removedPath.split('/'); + const result = []; + let hasLocale = false; + + for (const segment of segments) { + const isLocale = segment.match(Locale.TWO_LETTER_PATTERN) + || segment.match(Locale.FIVE_LETTER_PATTERN); + if (isLocale) { + hasLocale = true; + } else { + result.push(segment); + } + } + + if (!hasLocale) { + return endsWithSlash ? path : removedPath; + } + + return result.join('/'); + } + + static getParentPath(path) { + if (!path || !path.startsWith('/content/dam/')) { + return null; + } + + let removedPath = path; + + // Remove trailing slash from the path + if (path.endsWith('/')) { + removedPath = removedPath.slice(0, -1); + } + + return removedPath.substring(0, removedPath.lastIndexOf('/')); + } + + /** + * Check if a path has double slashes (excluding protocol slashes) + * @param {string} path - The path to check + * @returns {boolean} - True if path contains double slashes + */ + static hasDoubleSlashes(path) { + if (!path) return false; + + // Check for double slashes but exclude protocol slashes (http://, https://) + const withoutProtocol = path.replace(/^[^:]+:\/\//, ''); + return withoutProtocol.includes('//'); + } + + /** + * Remove double slashes in a path + * @param {string} path - The path to fix + * @returns {string} - The path with double slashes removed + */ + static removeDoubleSlashes(path) { + if (!path) return path; + + // Check if path starts with a protocol + const protocolMatch = path.match(/^([^:]+:\/\/)/); + + if (protocolMatch) { + // Split into protocol and rest + const protocol = protocolMatch[1]; + const rest = path.substring(protocol.length); + // Remove double slashes from the rest of the path only + const fixedPath = rest.replace(/\/+/g, '/'); + return protocol + fixedPath; + } + + // No protocol, just remove double slashes normally + return path.replace(/\/+/g, '/'); + } +} diff --git a/src/index.js b/src/index.js index d28b85092..5f306a8d3 100644 --- a/src/index.js +++ b/src/index.js @@ -77,6 +77,8 @@ import hreflang from './hreflang/handler.js'; import optimizationReportCallback from './optimization-report/handler.js'; import llmoCustomerAnalysis from './llmo-customer-analysis/handler.js'; import headings from './headings/handler.js'; +import contentFragment404 from './content-fragment-404/handler.js'; +import cdnContentFragment404 from './cdn-content-fragment-404/handler.js'; import vulnerabilities from './vulnerabilities/handler.js'; import prerender from './prerender/handler.js'; import productMetatags from './product-metatags/handler.js'; @@ -154,6 +156,8 @@ const HANDLERS = { 'guidance:summarization': summarizationGuidance, hreflang, headings, + 'cdn-content-fragment-404': cdnContentFragment404, + 'content-fragment-404': contentFragment404, prerender, 'product-metatags': productMetatags, 'security-vulnerabilities': vulnerabilities, diff --git a/src/utils/asset-utils.js b/src/utils/asset-utils.js new file mode 100644 index 000000000..e71728d88 --- /dev/null +++ b/src/utils/asset-utils.js @@ -0,0 +1,82 @@ +/* + * Copyright 2024 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/** + * Common asset file extensions organized by category + */ +const ASSET_EXTENSIONS = { + images: ['.jpg', '.jpeg', '.png', '.gif', '.svg', '.webp', '.ico', '.bmp', '.tiff', '.tif'], + documents: ['.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', '.odt', '.ods', '.odp'], + media: ['.mp4', '.avi', '.mov', '.wmv', '.flv', '.webm', '.mp3', '.wav', '.ogg', '.m4a'], + archives: ['.zip', '.rar', '.tar', '.gz', '.7z', '.bz2'], + fonts: ['.woff', '.woff2', '.ttf', '.eot', '.otf'], +}; + +/** + * Flattened list of all asset extensions + */ +const ALL_ASSET_EXTENSIONS = Object.values(ASSET_EXTENSIONS).flat(); + +/** + * Checks if a URL points to an asset file based on its extension + * @param {string} url - The URL to check + * @param {string[]} [extensions] - Optional custom list of extensions to check against + * @returns {boolean} - True if the URL ends with an asset extension + */ +export function isAssetUrl(url, extensions = ALL_ASSET_EXTENSIONS) { + if (!url || typeof url !== 'string') { + return false; + } + + const urlLower = url.toLowerCase(); + return extensions.some((extension) => urlLower.endsWith(extension)); +} + +/** + * Checks if a URL points to a specific category of assets + * @param {string} url - The URL to check + * @param {string} category - The asset category (images, documents, media, archives, fonts, code) + * @returns {boolean} - True if the URL ends with an extension from the specified category + */ +export function isAssetCategory(url, category) { + const extensions = ASSET_EXTENSIONS[category]; + if (!extensions) { + throw new Error(`Unknown asset category: ${category}. Valid categories are: ${Object.keys(ASSET_EXTENSIONS).join(', ')}`); + } + return isAssetUrl(url, extensions); +} + +/** + * Filters an array of URLs to exclude asset URLs + * @param {string[]} urls - Array of URLs to filter + * @param {string[]} [extensions] - Optional custom list of extensions to exclude + * @returns {string[]} - Array of non-asset URLs + */ +export function filterAssetUrls(urls, extensions = ALL_ASSET_EXTENSIONS) { + return urls.filter((url) => !isAssetUrl(url, extensions)); +} + +/** + * Get all asset extensions (for reference or testing) + * @returns {string[]} - Array of all asset extensions + */ +export function getAllAssetExtensions() { + return [...ALL_ASSET_EXTENSIONS]; +} + +/** + * Get asset extensions by category + * @returns {Object} - Object containing asset extensions organized by category + */ +export function getAssetExtensionsByCategory() { + return { ...ASSET_EXTENSIONS }; +} diff --git a/src/utils/data-access.js b/src/utils/data-access.js index 544988131..c5af045aa 100755 --- a/src/utils/data-access.js +++ b/src/utils/data-access.js @@ -177,6 +177,8 @@ const defaultMergeDataFunction = (existingData, newData) => ({ * @param {Array} params.newData - Array of new data objects to sync. * @param {Function} params.buildKey - Function to generate a unique key for each item. * @param {Function} params.mapNewSuggestion - Function to map new data to suggestion objects. + * @param {Function} [params.getRank] - Function to extract rank from data item. + * If provided, rank will be updated for existing suggestions. * @param {Function} [params.mergeDataFunction] - Function to merge existing and new data. * Defaults to shallow merge. * @param {string} [params.statusToSetForOutdated] - Status to set for outdated suggestions. @@ -188,6 +190,7 @@ export async function syncSuggestions({ newData, buildKey, mapNewSuggestion, + getRank = null, mergeDataFunction = defaultMergeDataFunction, statusToSetForOutdated = SuggestionDataAccess.STATUSES.OUTDATED, }) { @@ -218,11 +221,21 @@ export async function syncSuggestions({ }) .map((existing) => { const newDataItem = newData.find((data) => buildKey(data) === buildKey(existing.getData())); + + if (getRank && newDataItem) { + const rank = getRank(newDataItem); + if (rank !== undefined && rank !== null) { + existing.setRank(rank); + } + } + existing.setData(mergeDataFunction(existing.getData(), newDataItem)); + if ([SuggestionDataAccess.STATUSES.OUTDATED].includes(existing.getStatus())) { log.warn('Resolved suggestion found in audit. Possible regression.'); existing.setStatus(SuggestionDataAccess.STATUSES.NEW); } + existing.setUpdatedBy('system'); return existing.save(); }), diff --git a/test/audits/cdn-content-fragment-404/handler.test.js b/test/audits/cdn-content-fragment-404/handler.test.js new file mode 100644 index 000000000..2145ac8fe --- /dev/null +++ b/test/audits/cdn-content-fragment-404/handler.test.js @@ -0,0 +1,296 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/* eslint-env mocha */ +import { expect, use } from 'chai'; +import sinon from 'sinon'; +import sinonChai from 'sinon-chai'; +import chaiAsPromised from 'chai-as-promised'; +import esmock from 'esmock'; +import { MockContextBuilder } from '../../shared.js'; + +use(sinonChai); +use(chaiAsPromised); + +import { + TEST_IMS_ORG_ID, + TEST_ORG_ID, + TEST_BASE_URL_EXAMPLE, + TEST_BASE_URL_SITE, + TEST_S3_BUCKET, + TEST_DATE_2025_09_18, + TEST_DATE_2025_01_15_14_30, + TEST_DATE_2025_01_15_00_30, + TEST_YEAR, + TEST_MONTH_09, + TEST_MONTH, + TEST_DAY_18, + TEST_DAY, + TEST_DAY_PREVIOUS, + TEST_HOUR_13, + TEST_HOUR_23, + EXPECTED_CALL_COUNT_THRICE, +} from '../content-fragment-404/test-constants.js'; + +describe('CDN 404 Analysis Handler', () => { + let sandbox; + let context; + let site; + let handlerModule; + let athenaClientStub; + let getStaticContentStub; + + beforeEach(async () => { + sandbox = sinon.createSandbox(); + sandbox.stub(Date, 'now').returns(TEST_DATE_2025_09_18.getTime()); + site = { + getBaseURL: sandbox.stub().returns(TEST_BASE_URL_EXAMPLE), + getOrganizationId: sandbox.stub().returns(TEST_ORG_ID), + }; + context = new MockContextBuilder() + .withSandbox(sandbox) + .withOverrides({ + site, + log: { + info: sandbox.spy(), + debug: sandbox.spy(), + warn: sandbox.spy(), + error: sandbox.spy(), + }, + s3Client: { send: sandbox.stub() }, + rawBucket: TEST_S3_BUCKET, + dataAccess: { + Organization: { + findById: sandbox.stub().resolves({ + getImsOrgId: () => TEST_IMS_ORG_ID, + }), + }, + }, + }) + .build(); + athenaClientStub = { + execute: sandbox.stub().resolves(), + }; + getStaticContentStub = sandbox.stub().resolves('SELECT 1;'); + handlerModule = await esmock('../../../src/cdn-content-fragment-404/handler.js', { + '@adobe/spacecat-shared-athena-client': { AWSAthenaClient: { fromContext: () => athenaClientStub } }, + '@adobe/spacecat-shared-utils': { getStaticContent: getStaticContentStub }, + }); + }); + + afterEach(() => { + sandbox.restore(); + }); + + it('runs the full cdnContentFragment404Runner flow', async () => { + const result = await handlerModule.cdnContentFragment404Runner(context); + + expect(getStaticContentStub).to.have.been.callCount(EXPECTED_CALL_COUNT_THRICE); + expect(athenaClientStub.execute).to.have.been.callCount(EXPECTED_CALL_COUNT_THRICE); + expect(result).to.have.property('auditResult'); + expect(result).to.have.property('fullAuditRef'); + expect(result.auditResult).to.include.keys('database', 'rawTable', 'completedAt'); + expect(result.auditResult.database).to.equal('cdn_logs_example_com'); + expect(result.auditResult.rawTable).to.equal('raw_logs_status_example_com'); + expect(result.fullAuditRef).to.equal(`s3://${TEST_S3_BUCKET}/${TEST_IMS_ORG_ID}/aggregated-404/${TEST_YEAR}/${TEST_MONTH_09}/${TEST_DAY_18}/${TEST_HOUR_13}/`); + }); + + it('correctly extracts and escapes customer domain', async () => { + site.getBaseURL.returns(TEST_BASE_URL_SITE); + + const result = await handlerModule.cdnContentFragment404Runner(context); + + expect(result.auditResult.database).to.equal('cdn_logs_test_site_com'); + expect(result.auditResult.rawTable).to.equal('raw_logs_status_test_site_com'); + }); + + it('generates correct S3 paths with IMS org', async () => { + const result = await handlerModule.cdnContentFragment404Runner(context); + + // Verify the output path includes the IMS org + expect(result.fullAuditRef).to.include(`${TEST_S3_BUCKET}/${TEST_IMS_ORG_ID}/aggregated-404/`); + + // Verify SQL calls were made with correct parameters + expect(getStaticContentStub.firstCall.args[0]).to.have.property('database', 'cdn_logs_example_com'); + expect(getStaticContentStub.secondCall.args[0]).to.have.property('rawLocation', `s3://${TEST_S3_BUCKET}/${TEST_IMS_ORG_ID}/raw/aem-cs-fastly`); + expect(getStaticContentStub.thirdCall.args[0]).to.have.property('output').that.includes(`${TEST_S3_BUCKET}/${TEST_IMS_ORG_ID}/aggregated-404/`); + }); + + it('uses correct time partitioning for previous hour', async () => { + // Mock Date.now to return a specific time + const mockTime = TEST_DATE_2025_01_15_14_30.getTime(); + const originalDateNow = Date.now; + Date.now = sandbox.stub().returns(mockTime); + + try { + await handlerModule.cdnContentFragment404Runner(context); + + // Should use the previous hour (13:00) + const unloadCall = getStaticContentStub.thirdCall.args[0]; + expect(unloadCall).to.have.property('year', TEST_YEAR); + expect(unloadCall).to.have.property('month', TEST_MONTH); + expect(unloadCall).to.have.property('day', TEST_DAY); + expect(unloadCall).to.have.property('hour', TEST_HOUR_13); + } finally { + Date.now = originalDateNow; + } + }); + + it('handles hour boundary correctly (previous day)', async () => { + // Mock Date.now to return midnight + const mockTime = TEST_DATE_2025_01_15_00_30.getTime(); + const originalDateNow = Date.now; + Date.now = sandbox.stub().returns(mockTime); + + try { + await handlerModule.cdnContentFragment404Runner(context); + + // Should use the previous hour (23:00 of previous day) + const unloadCall = getStaticContentStub.thirdCall.args[0]; + expect(unloadCall).to.have.property('year', TEST_YEAR); + expect(unloadCall).to.have.property('month', TEST_MONTH); + expect(unloadCall).to.have.property('day', TEST_DAY_PREVIOUS); + expect(unloadCall).to.have.property('hour', TEST_HOUR_23); + } finally { + Date.now = originalDateNow; + } + }); + + it('returns completedAt timestamp in ISO format', async () => { + const beforeTime = new Date(); + const result = await handlerModule.cdnContentFragment404Runner(context); + const afterTime = new Date(); + + expect(result.auditResult.completedAt).to.be.a('string'); + const completedAtDate = new Date(result.auditResult.completedAt); + expect(completedAtDate).to.be.at.least(beforeTime); + expect(completedAtDate).to.be.at.most(afterTime); + }); + + it('calls athena client with correct descriptions', async () => { + await handlerModule.cdnContentFragment404Runner(context); + + expect(athenaClientStub.execute.firstCall.args[2]).to.equal('[Athena Query] Create database cdn_logs_example_com'); + expect(athenaClientStub.execute.secondCall.args[2]).to.equal('[Athena Query] Create raw logs table cdn_logs_example_com.raw_logs_status_example_com from s3://test-raw-bucket/1234567890/raw/aem-cs-fastly'); + expect(athenaClientStub.execute.thirdCall.args[2]).to.include(`[Athena Query] Unload 404 content data to s3://${TEST_S3_BUCKET}/${TEST_IMS_ORG_ID}/aggregated-404/`); + }); + + it('loads correct SQL files with proper variables', async () => { + await handlerModule.cdnContentFragment404Runner(context); + + expect(getStaticContentStub.firstCall.args[1]).to.equal('./src/cdn-content-fragment-404/sql/create-database.sql'); + expect(getStaticContentStub.secondCall.args[1]).to.equal('./src/cdn-content-fragment-404/sql/create-raw-table.sql'); + expect(getStaticContentStub.thirdCall.args[1]).to.equal('./src/cdn-content-fragment-404/sql/unload-404-content.sql'); + }); + + it('throws if getStaticContent throws on database creation', async () => { + getStaticContentStub.onFirstCall().rejects(new Error('SQL load error')); + + await expect( + handlerModule.cdnContentFragment404Runner(context), + ).to.be.rejectedWith('SQL load error'); + }); + + it('throws if getStaticContent throws on table creation', async () => { + getStaticContentStub.onSecondCall().rejects(new Error('Table SQL load error')); + + await expect( + handlerModule.cdnContentFragment404Runner(context), + ).to.be.rejectedWith('Table SQL load error'); + }); + + it('throws if getStaticContent throws on unload query', async () => { + getStaticContentStub.onThirdCall().rejects(new Error('Unload SQL load error')); + + await expect( + handlerModule.cdnContentFragment404Runner(context), + ).to.be.rejectedWith('Unload SQL load error'); + }); + + it('throws if athenaClient.execute throws on database creation', async () => { + athenaClientStub.execute.onFirstCall().rejects(new Error('Database creation error')); + + await expect( + handlerModule.cdnContentFragment404Runner(context), + ).to.be.rejectedWith('Database creation error'); + }); + + it('throws if athenaClient.execute throws on table creation', async () => { + athenaClientStub.execute.onSecondCall().rejects(new Error('Table creation error')); + + await expect( + handlerModule.cdnContentFragment404Runner(context), + ).to.be.rejectedWith('Table creation error'); + }); + + it('throws if athenaClient.execute throws on unload operation', async () => { + athenaClientStub.execute.onThirdCall().rejects(new Error('Unload operation error')); + + await expect( + handlerModule.cdnContentFragment404Runner(context), + ).to.be.rejectedWith('Unload operation error'); + }); + + it('throws if rawBucket is undefined in context', async () => { + const contextWithoutRawBucket = new MockContextBuilder() + .withSandbox(sandbox) + .withOverrides({ + site, + log: { + info: sandbox.spy(), + debug: sandbox.spy(), + warn: sandbox.spy(), + error: sandbox.spy(), + }, + s3Client: { send: sandbox.stub() }, + rawBucket: undefined, + dataAccess: { + Organization: { + findById: sandbox.stub().resolves({ + getImsOrgId: () => TEST_IMS_ORG_ID, + }), + }, + }, + }) + .build(); + + await expect( + handlerModule.cdnContentFragment404Runner(contextWithoutRawBucket), + ).to.be.rejectedWith('Raw bucket is required'); + }); + + it('throws if imsOrg cannot be retrieved', async () => { + const contextWithoutImsOrg = new MockContextBuilder() + .withSandbox(sandbox) + .withOverrides({ + site, + log: { + info: sandbox.spy(), + debug: sandbox.spy(), + warn: sandbox.spy(), + error: sandbox.spy(), + }, + s3Client: { send: sandbox.stub() }, + rawBucket: TEST_S3_BUCKET, + dataAccess: { + Organization: { + findById: sandbox.stub().resolves(null), + }, + }, + }) + .build(); + + await expect( + handlerModule.cdnContentFragment404Runner(contextWithoutImsOrg), + ).to.be.rejectedWith('Unable to retrieve IMS organization ID'); + }); +}); diff --git a/test/audits/content-fragment-404/aem-client.test.js b/test/audits/content-fragment-404/aem-client.test.js new file mode 100644 index 000000000..b8ef4f3a3 --- /dev/null +++ b/test/audits/content-fragment-404/aem-client.test.js @@ -0,0 +1,991 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/* eslint-env mocha */ +import { expect, use } from 'chai'; +import sinon from 'sinon'; +import sinonChai from 'sinon-chai'; +import chaiAsPromised from 'chai-as-promised'; +import esmock from 'esmock'; +import { MockContextBuilder } from '../../shared.js'; +import { NoOpCache } from '../../../src/content-fragment-404/cache/noop-cache.js'; +import { PathIndexCache } from '../../../src/content-fragment-404/cache/path-index-cache.js'; + +use(sinonChai); +use(chaiAsPromised); + +import { + TEST_AEM_AUTHOR_URL, + TEST_AEM_AUTHOR_TOKEN, + TEST_AEM_AUTHOR_TOKEN_ALT, + TEST_PATH_PARENT, + TEST_PATH_TEST, + TEST_PATH_CONTENT_DAM, + TEST_PATH_CONTENT, + TEST_PATH_OTHER, + TEST_PATH_RELATIVE, + TEST_PATH_EN_US_IMAGES, + TEST_PATH_FOLDER_FILE, + TEST_PATH_IMAGE, + TEST_PATH_IMAGE_1, + TEST_PATH_IMAGE_2, + TEST_PATH_CHILD_1, + TEST_PATH_CHILD, + TEST_PATH_PARENT_CHILD, + STATUS_UNKNOWN, + STATUS_PUBLISHED, + STATUS_DRAFT, + LOCALE_CODE_EN_US, + MAX_PAGES_VALUE, + PAGINATION_DELAY_MS_VALUE, + DELAY_MS_TEST, + DELAY_TOLERANCE_MS, + DELAY_ZERO, + DELAY_THRESHOLD_MS, + TEST_PATH_IMAGE_WITH_SPACES, + HTTP_STATUS_NOT_FOUND, + HTTP_STATUS_TEXT_NOT_FOUND, + BEARER_PREFIX, + ACCEPT_JSON, + API_SITES_FRAGMENTS, + PROJECTION_MINIMAL, + TEST_CURSOR, +} from './test-constants.js'; + +const EXPECTED_SUGGESTIONS_COUNT_1 = 1; + +describe('AemClient', () => { + let sandbox; + let context; + let mockFetch; + let mockPathIndex; + let mockCache; + let mockContentPath; + let mockLocale; + let mockPathUtils; + let AemClient; + + beforeEach(async () => { + sandbox = sinon.createSandbox(); + + const mockSite = { + getDeliveryConfig: sandbox.stub().returns({ + authorURL: TEST_AEM_AUTHOR_URL, + }), + }; + + context = new MockContextBuilder() + .withSandbox(sandbox) + .withOverrides({ + log: { + info: sandbox.spy(), + debug: sandbox.spy(), + warn: sandbox.spy(), + error: sandbox.spy(), + }, + env: { + AEM_AUTHOR_TOKEN: TEST_AEM_AUTHOR_TOKEN, + }, + site: mockSite, + }) + .build(); + + mockFetch = sandbox.stub(); + + mockPathIndex = { + insertContentPath: sandbox.stub(), + findChildren: sandbox.stub().returns([]), + }; + + mockCache = { + cacheItems: sandbox.stub(), + findChildren: sandbox.stub().returns([]), + isAvailable: sandbox.stub().returns(true), + }; + + mockContentPath = sandbox.stub(); + mockLocale = { + fromPath: sandbox.stub().returns({ code: LOCALE_CODE_EN_US }), + }; + + mockPathUtils = { + getParentPath: sandbox.stub().returns(TEST_PATH_PARENT), + }; + + const module = await esmock('../../../src/content-fragment-404/clients/aem-client.js', { + '@adobe/spacecat-shared-utils': { + tracingFetch: mockFetch, + }, + '../../../src/content-fragment-404/utils/path-utils.js': { + PathUtils: mockPathUtils, + }, + }); + + AemClient = module.AemClient; + }); + + afterEach(() => { + sandbox.restore(); + }); + + describe('static constants', () => { + it('should have correct API endpoints', () => { + expect(AemClient.API_SITES_BASE).to.equal('/adobe/sites'); + expect(AemClient.API_SITES_FRAGMENTS).to.equal('/adobe/sites/cf/fragments'); + }); + + it('should have pagination constants', () => { + expect(AemClient.MAX_PAGES).to.equal(MAX_PAGES_VALUE); + expect(AemClient.PAGINATION_DELAY_MS).to.equal(PAGINATION_DELAY_MS_VALUE); + }); + }); + + describe('constructor', () => { + it('should create client with cache strategy', () => { + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, TEST_AEM_AUTHOR_TOKEN_ALT, mockCache); + + expect(client.authorUrl).to.equal(TEST_AEM_AUTHOR_URL); + expect(client.authToken).to.equal(TEST_AEM_AUTHOR_TOKEN_ALT); + expect(client.context).to.equal(context); + expect(client.cache).to.equal(mockCache); + }); + + it('should create client with NoOpCache by default', () => { + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, TEST_AEM_AUTHOR_TOKEN_ALT); + + expect(client.authorUrl).to.equal(TEST_AEM_AUTHOR_URL); + expect(client.authToken).to.equal(TEST_AEM_AUTHOR_TOKEN_ALT); + expect(client.context).to.equal(context); + expect(client.cache).to.be.instanceOf(NoOpCache); + }); + + it('should create client with PathIndexCache', () => { + const cache = new PathIndexCache(mockPathIndex); + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, TEST_AEM_AUTHOR_TOKEN_ALT, cache); + + expect(client.context).to.equal(context); + expect(client.cache).to.equal(cache); + expect(client.cache).to.be.instanceOf(PathIndexCache); + }); + }); + + describe('createFrom static factory method', () => { + it('should create client from context with cache strategy', () => { + const client = AemClient.createFrom(context, mockCache); + + expect(client.authorUrl).to.equal(TEST_AEM_AUTHOR_URL); + expect(client.authToken).to.equal(TEST_AEM_AUTHOR_TOKEN); + expect(client.context).to.equal(context); + expect(client.cache).to.equal(mockCache); + }); + + it('should create client with NoOpCache by default', () => { + const client = AemClient.createFrom(context); + + expect(client.authorUrl).to.equal(TEST_AEM_AUTHOR_URL); + expect(client.authToken).to.equal(TEST_AEM_AUTHOR_TOKEN); + expect(client.context).to.equal(context); + expect(client.cache).to.be.instanceOf(NoOpCache); + }); + + it('should throw error when AEM_AUTHOR_URL is missing', () => { + const mockSiteWithoutUrl = { + getDeliveryConfig: sandbox.stub().returns({ + authorURL: null, + }), + }; + + const contextWithoutUrl = new MockContextBuilder() + .withSandbox(sandbox) + .withOverrides({ + env: { + AEM_AUTHOR_TOKEN: TEST_AEM_AUTHOR_TOKEN, + }, + site: mockSiteWithoutUrl, + }) + .build(); + + expect(() => AemClient.createFrom(contextWithoutUrl)) + .to.throw('AEM Author configuration missing: AEM_AUTHOR_URL and AEM_AUTHOR_TOKEN required'); + }); + + it('should throw error when AEM_AUTHOR_TOKEN is missing', () => { + const mockSiteWithUrl = { + getDeliveryConfig: sandbox.stub().returns({ + authorURL: TEST_AEM_AUTHOR_URL, + }), + }; + + const contextWithoutToken = new MockContextBuilder() + .withSandbox(sandbox) + .withOverrides({ + env: { + AEM_AUTHOR_TOKEN: null, + }, + site: mockSiteWithUrl, + }) + .build(); + + expect(() => AemClient.createFrom(contextWithoutToken)) + .to.throw('AEM Author configuration missing: AEM_AUTHOR_URL and AEM_AUTHOR_TOKEN required'); + }); + + it('should throw error when both environment variables are missing', () => { + const mockSiteWithoutUrl = { + getDeliveryConfig: sandbox.stub().returns({ + authorURL: null, + }), + }; + + const contextWithoutConfig = new MockContextBuilder() + .withSandbox(sandbox) + .withOverrides({ + env: {}, + site: mockSiteWithoutUrl, + }) + .build(); + + expect(() => AemClient.createFrom(contextWithoutConfig)) + .to.throw('AEM Author configuration missing: AEM_AUTHOR_URL and AEM_AUTHOR_TOKEN required'); + }); + + it('should throw error when authorURL is undefined', () => { + const mockSiteWithUndefinedUrl = { + getDeliveryConfig: sandbox.stub().returns({ + authorURL: undefined, + }), + }; + + const contextWithUndefinedUrl = new MockContextBuilder() + .withSandbox(sandbox) + .withOverrides({ + env: { + AEM_AUTHOR_TOKEN: TEST_AEM_AUTHOR_TOKEN, + }, + site: mockSiteWithUndefinedUrl, + }) + .build(); + + expect(() => AemClient.createFrom(contextWithUndefinedUrl)) + .to.throw('AEM Author configuration missing: AEM_AUTHOR_URL and AEM_AUTHOR_TOKEN required'); + }); + + it('should throw error when authorURL is empty string', () => { + const mockSiteWithEmptyUrl = { + getDeliveryConfig: sandbox.stub().returns({ + authorURL: '', + }), + }; + + const contextWithEmptyUrl = new MockContextBuilder() + .withSandbox(sandbox) + .withOverrides({ + env: { + AEM_AUTHOR_TOKEN: TEST_AEM_AUTHOR_TOKEN, + }, + site: mockSiteWithEmptyUrl, + }) + .build(); + + expect(() => AemClient.createFrom(contextWithEmptyUrl)) + .to.throw('AEM Author configuration missing: AEM_AUTHOR_URL and AEM_AUTHOR_TOKEN required'); + }); + + it('should throw error when authToken is undefined', () => { + const mockSiteWithUrl = { + getDeliveryConfig: sandbox.stub().returns({ + authorURL: TEST_AEM_AUTHOR_URL, + }), + }; + + const contextWithUndefinedToken = new MockContextBuilder() + .withSandbox(sandbox) + .withOverrides({ + env: { + AEM_AUTHOR_TOKEN: undefined, + }, + site: mockSiteWithUrl, + }) + .build(); + + expect(() => AemClient.createFrom(contextWithUndefinedToken)) + .to.throw('AEM Author configuration missing: AEM_AUTHOR_URL and AEM_AUTHOR_TOKEN required'); + }); + + it('should throw error when authToken is empty string', () => { + const mockSiteWithUrl = { + getDeliveryConfig: sandbox.stub().returns({ + authorURL: TEST_AEM_AUTHOR_URL, + }), + }; + + const contextWithEmptyToken = new MockContextBuilder() + .withSandbox(sandbox) + .withOverrides({ + env: { + AEM_AUTHOR_TOKEN: '', + }, + site: mockSiteWithUrl, + }) + .build(); + + expect(() => AemClient.createFrom(contextWithEmptyToken)) + .to.throw('AEM Author configuration missing: AEM_AUTHOR_URL and AEM_AUTHOR_TOKEN required'); + }); + }); + + describe('isBreakingPoint static method', () => { + it('should return true for null path', () => { + expect(AemClient.isBreakingPoint(null)).to.be.true; + }); + + it('should return true for undefined path', () => { + expect(AemClient.isBreakingPoint(undefined)).to.be.true; + }); + + it('should return true for empty string path', () => { + expect(AemClient.isBreakingPoint('')).to.be.true; + }); + + it('should return true for paths not starting with /content/dam/', () => { + expect(AemClient.isBreakingPoint(TEST_PATH_CONTENT)).to.be.true; + expect(AemClient.isBreakingPoint(TEST_PATH_OTHER)).to.be.true; + expect(AemClient.isBreakingPoint(TEST_PATH_RELATIVE)).to.be.true; + }); + + it('should return true for exact /content/dam path', () => { + expect(AemClient.isBreakingPoint(TEST_PATH_CONTENT_DAM)).to.be.true; + }); + + it('should return false for valid content dam paths', () => { + expect(AemClient.isBreakingPoint(TEST_PATH_TEST)).to.be.false; + expect(AemClient.isBreakingPoint(TEST_PATH_EN_US_IMAGES)).to.be.false; + expect(AemClient.isBreakingPoint(TEST_PATH_FOLDER_FILE)).to.be.false; + }); + }); + + describe('parseContentStatus static method', () => { + it('should return UNKNOWN for null status', () => { + expect(AemClient.parseContentStatus(null)).to.equal(STATUS_UNKNOWN); + }); + + it('should return UNKNOWN for undefined status', () => { + expect(AemClient.parseContentStatus(undefined)).to.equal(STATUS_UNKNOWN); + }); + + it('should return UNKNOWN for empty string status', () => { + expect(AemClient.parseContentStatus('')).to.equal(STATUS_UNKNOWN); + }); + + it('should parse valid statuses case-insensitively', () => { + expect(AemClient.parseContentStatus('published')).to.equal(STATUS_PUBLISHED); + expect(AemClient.parseContentStatus('PUBLISHED')).to.equal(STATUS_PUBLISHED); + expect(AemClient.parseContentStatus('Published')).to.equal(STATUS_PUBLISHED); + + expect(AemClient.parseContentStatus('modified')).to.equal('MODIFIED'); + expect(AemClient.parseContentStatus('MODIFIED')).to.equal('MODIFIED'); + + expect(AemClient.parseContentStatus('draft')).to.equal(STATUS_DRAFT); + expect(AemClient.parseContentStatus('DRAFT')).to.equal(STATUS_DRAFT); + + expect(AemClient.parseContentStatus('archived')).to.equal('ARCHIVED'); + expect(AemClient.parseContentStatus('ARCHIVED')).to.equal('ARCHIVED'); + + expect(AemClient.parseContentStatus('deleted')).to.equal('DELETED'); + expect(AemClient.parseContentStatus('DELETED')).to.equal('DELETED'); + }); + + it('should return UNKNOWN for invalid statuses', () => { + expect(AemClient.parseContentStatus('invalid')).to.equal(STATUS_UNKNOWN); + expect(AemClient.parseContentStatus('pending')).to.equal(STATUS_UNKNOWN); + expect(AemClient.parseContentStatus('123')).to.equal(STATUS_UNKNOWN); + }); + }); + + describe('delay static method', () => { + it('should delay for specified milliseconds', async () => { + const start = Date.now(); + await AemClient.delay(DELAY_MS_TEST); + const end = Date.now(); + + expect(end - start).to.be.at.least(DELAY_TOLERANCE_MS); // Allow some tolerance + }); + + it('should handle zero delay', async () => { + const start = Date.now(); + await AemClient.delay(DELAY_ZERO); + const end = Date.now(); + + expect(end - start).to.be.lessThan(DELAY_THRESHOLD_MS); + }); + }); + + describe('createUrl method', () => { + it('should create correct URL with path and projection', () => { + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, TEST_AEM_AUTHOR_TOKEN_ALT); + const url = client.createUrl(TEST_PATH_IMAGE); + + expect(url.toString()).to.equal(`${TEST_AEM_AUTHOR_URL}${API_SITES_FRAGMENTS}?path=${encodeURIComponent(TEST_PATH_IMAGE)}&projection=${PROJECTION_MINIMAL}`); + }); + + it('should handle paths with special characters', () => { + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, TEST_AEM_AUTHOR_TOKEN_ALT); + const url = client.createUrl(TEST_PATH_IMAGE_WITH_SPACES); + + expect(url.toString()).to.include('image+with+spaces'); + }); + + it('should handle authorUrl with trailing slash', () => { + const client = new AemClient(context, `${TEST_AEM_AUTHOR_URL}/`, TEST_AEM_AUTHOR_TOKEN_ALT); + const url = client.createUrl(TEST_PATH_TEST); + + expect(url.toString()).to.equal(`${TEST_AEM_AUTHOR_URL}${API_SITES_FRAGMENTS}?path=${encodeURIComponent(TEST_PATH_TEST)}&projection=${PROJECTION_MINIMAL}`); + }); + }); + + describe('createUrlWithPagination method', () => { + it('should create URL without cursor when cursor is null', () => { + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, TEST_AEM_AUTHOR_TOKEN_ALT); + const url = client.createUrlWithPagination(TEST_PATH_TEST, null); + + expect(url.toString()).to.equal(`${TEST_AEM_AUTHOR_URL}${API_SITES_FRAGMENTS}?path=${encodeURIComponent(TEST_PATH_TEST)}&projection=${PROJECTION_MINIMAL}`); + }); + + it('should create URL with cursor when cursor is provided', () => { + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, TEST_AEM_AUTHOR_TOKEN_ALT); + const url = client.createUrlWithPagination(TEST_PATH_TEST, TEST_CURSOR); + + expect(url.toString()).to.equal(`${TEST_AEM_AUTHOR_URL}${API_SITES_FRAGMENTS}?path=${encodeURIComponent(TEST_PATH_TEST)}&projection=${PROJECTION_MINIMAL}&cursor=${TEST_CURSOR}`); + }); + + it('should handle empty string cursor', () => { + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, TEST_AEM_AUTHOR_TOKEN_ALT); + const url = client.createUrlWithPagination(TEST_PATH_TEST, ''); + + // Empty string cursor is falsy, so it doesn't get added + expect(url.toString()).to.equal(`${TEST_AEM_AUTHOR_URL}${API_SITES_FRAGMENTS}?path=${encodeURIComponent(TEST_PATH_TEST)}&projection=${PROJECTION_MINIMAL}`); + }); + }); + + describe('createAuthHeaders method', () => { + it('should create correct authorization headers', () => { + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, TEST_AEM_AUTHOR_TOKEN_ALT); + const headers = client.createAuthHeaders(); + + expect(headers).to.deep.equal({ + Authorization: `${BEARER_PREFIX}${TEST_AEM_AUTHOR_TOKEN_ALT}`, + Accept: ACCEPT_JSON, + }); + }); + + it('should handle empty token', () => { + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, ''); + const headers = client.createAuthHeaders(); + + expect(headers).to.deep.equal({ + Authorization: BEARER_PREFIX, + Accept: ACCEPT_JSON, + }); + }); + }); + + describe('isAvailable method', () => { + it('should return true when content is available', async () => { + const mockResponse = { + ok: true, + json: sandbox.stub().resolves({ + items: [{ path: TEST_PATH_IMAGE, status: STATUS_PUBLISHED }], + }), + }; + mockFetch.resolves(mockResponse); + + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, TEST_AEM_AUTHOR_TOKEN_ALT, mockCache); + const result = await client.isAvailable(TEST_PATH_IMAGE); + + expect(result).to.be.true; + expect(mockFetch).to.have.been.calledOnce; + }); + + it('should return false when response is not ok', async () => { + const mockResponse = { ok: false }; + mockFetch.resolves(mockResponse); + + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, TEST_AEM_AUTHOR_TOKEN_ALT); + const result = await client.isAvailable(TEST_PATH_IMAGE); + + expect(result).to.be.false; + }); + + it('should return false when no items found', async () => { + const mockResponse = { + ok: true, + json: sandbox.stub().resolves({ items: [] }), + }; + mockFetch.resolves(mockResponse); + + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, TEST_AEM_AUTHOR_TOKEN_ALT); + const result = await client.isAvailable(TEST_PATH_IMAGE); + + expect(result).to.be.false; + }); + + // TODO: Need to investigate the wanted behavior: should we return true or false? + it('should return true when multiple items found (folder access)', async () => { + const mockResponse = { + ok: true, + json: sandbox.stub().resolves({ + items: [ + { path: TEST_PATH_IMAGE_1, status: STATUS_PUBLISHED }, + { path: TEST_PATH_IMAGE_2, status: STATUS_DRAFT }, + ], + }), + }; + mockFetch.resolves(mockResponse); + + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, TEST_AEM_AUTHOR_TOKEN_ALT); + const result = await client.isAvailable(TEST_PATH_TEST); + + expect(result).to.be.true; + }); + + it('should cache content when cache strategy is provided', async () => { + const mockResponse = { + ok: true, + json: sandbox.stub().resolves({ + items: [{ path: TEST_PATH_IMAGE, status: STATUS_PUBLISHED }], + }), + }; + mockFetch.resolves(mockResponse); + + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, TEST_AEM_AUTHOR_TOKEN_ALT, mockCache); + await client.isAvailable(TEST_PATH_IMAGE); + + expect(mockCache.cacheItems).to.have.been.calledOnce; + expect(mockCache.cacheItems).to.have.been.calledWith( + [{ path: TEST_PATH_IMAGE, status: STATUS_PUBLISHED }], + AemClient.parseContentStatus, + ); + }); + + it('should use NoOpCache when no cache is provided', async () => { + const mockResponse = { + ok: true, + json: sandbox.stub().resolves({ + items: [{ path: TEST_PATH_IMAGE, status: STATUS_PUBLISHED }], + }), + }; + mockFetch.resolves(mockResponse); + + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, TEST_AEM_AUTHOR_TOKEN_ALT); + const result = await client.isAvailable(TEST_PATH_IMAGE); + + // NoOpCache doesn't throw, it just doesn't cache + expect(result).to.be.true; + }); + + it('should throw error when fetch fails', async () => { + mockFetch.rejects(new Error('Network error')); + + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, TEST_AEM_AUTHOR_TOKEN_ALT); + + await expect(client.isAvailable(TEST_PATH_IMAGE)) + .to.be.rejectedWith(`Failed to check AEM Author availability for ${TEST_PATH_IMAGE}: Network error`); + }); + + it('should throw error when JSON parsing fails', async () => { + const mockResponse = { + ok: true, + json: sandbox.stub().rejects(new Error('Invalid JSON')), + }; + mockFetch.resolves(mockResponse); + + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, TEST_AEM_AUTHOR_TOKEN_ALT); + + await expect(client.isAvailable(TEST_PATH_IMAGE)) + .to.be.rejectedWith(`Failed to check AEM Author availability for ${TEST_PATH_IMAGE}: Invalid JSON`); + }); + }); + + describe('fetchWithPagination method', () => { + it('should fetch single page successfully', async () => { + const mockResponse = { + ok: true, + json: sandbox.stub().resolves({ + items: [{ path: TEST_PATH_IMAGE, status: STATUS_PUBLISHED }], + cursor: null, + }), + }; + mockFetch.resolves(mockResponse); + + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, TEST_AEM_AUTHOR_TOKEN_ALT); + const result = await client.fetchWithPagination(TEST_PATH_TEST); + + expect(result).to.deep.equal({ + items: [{ path: TEST_PATH_IMAGE, status: STATUS_PUBLISHED }], + cursor: null, + }); + }); + + it('should fetch page with cursor', async () => { + const mockResponse = { + ok: true, + json: sandbox.stub().resolves({ + items: [{ path: TEST_PATH_IMAGE, status: STATUS_PUBLISHED }], + cursor: 'next-cursor', + }), + }; + mockFetch.resolves(mockResponse); + + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, TEST_AEM_AUTHOR_TOKEN_ALT); + const result = await client.fetchWithPagination(TEST_PATH_TEST, 'current-cursor'); + + expect(result).to.deep.equal({ + items: [{ path: TEST_PATH_IMAGE, status: STATUS_PUBLISHED }], + cursor: 'next-cursor', + }); + }); + + it('should handle empty response', async () => { + const mockResponse = { + ok: true, + json: sandbox.stub().resolves({}), + }; + mockFetch.resolves(mockResponse); + + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, TEST_AEM_AUTHOR_TOKEN_ALT); + const result = await client.fetchWithPagination(TEST_PATH_TEST); + + expect(result).to.deep.equal({ + items: [], + cursor: null, + }); + }); + + it('should throw error for non-ok response', async () => { + const mockResponse = { + ok: false, + status: HTTP_STATUS_NOT_FOUND, + statusText: HTTP_STATUS_TEXT_NOT_FOUND, + }; + mockFetch.resolves(mockResponse); + + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, TEST_AEM_AUTHOR_TOKEN_ALT); + + await expect(client.fetchWithPagination(TEST_PATH_TEST)) + .to.be.rejectedWith(`HTTP ${HTTP_STATUS_NOT_FOUND}: ${HTTP_STATUS_TEXT_NOT_FOUND}`); + }); + + it('should throw error when fetch fails', async () => { + mockFetch.rejects(new Error('Network error')); + + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, TEST_AEM_AUTHOR_TOKEN_ALT); + + await expect(client.fetchWithPagination(TEST_PATH_TEST)) + .to.be.rejectedWith('Network error'); + }); + }); + + describe('fetchContentWithPagination method', () => { + it('should fetch all pages and return combined results', async () => { + const mockResponses = [ + { + ok: true, + json: sandbox.stub().resolves({ + items: [{ path: TEST_PATH_IMAGE_1, status: STATUS_PUBLISHED }], + cursor: 'cursor-2', + }), + }, + { + ok: true, + json: sandbox.stub().resolves({ + items: [{ path: TEST_PATH_IMAGE_2, status: STATUS_DRAFT }], + cursor: null, + }), + }, + ]; + mockFetch.onCall(0).resolves(mockResponses[0]); + mockFetch.onCall(1).resolves(mockResponses[1]); + + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, TEST_AEM_AUTHOR_TOKEN_ALT, mockCache); + const result = await client.fetchContentWithPagination(TEST_PATH_TEST); + + expect(result).to.have.lengthOf(2); + expect(result[0].path).to.equal(TEST_PATH_IMAGE_1); + expect(result[1].path).to.equal(TEST_PATH_IMAGE_2); + expect(mockFetch).to.have.been.calledTwice; + }); + + it('should stop at maximum page limit', async () => { + const mockResponse = { + ok: true, + json: sandbox.stub().resolves({ + items: [{ path: TEST_PATH_IMAGE, status: STATUS_PUBLISHED }], + cursor: 'always-has-cursor', + }), + }; + mockFetch.resolves(mockResponse); + + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, TEST_AEM_AUTHOR_TOKEN_ALT); + const result = await client.fetchContentWithPagination(TEST_PATH_TEST); + + expect(mockFetch.callCount).to.equal(AemClient.MAX_PAGES); + expect(result).to.have.lengthOf(AemClient.MAX_PAGES); + }); + + it('should handle errors gracefully and return partial results', async () => { + const mockResponses = [ + { + ok: true, + json: sandbox.stub().resolves({ + items: [{ path: TEST_PATH_IMAGE_1, status: STATUS_PUBLISHED }], + cursor: 'cursor-2', + }), + }, + ]; + mockFetch.onCall(0).resolves(mockResponses[0]); + mockFetch.onCall(1).rejects(new Error('Network error')); + + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, TEST_AEM_AUTHOR_TOKEN_ALT); + const result = await client.fetchContentWithPagination(TEST_PATH_TEST); + + expect(result).to.have.lengthOf(1); + expect(result[0].path).to.equal(TEST_PATH_IMAGE_1); + }); + + it('should cache all fetched items when cache strategy is provided', async () => { + const mockResponse = { + ok: true, + json: sandbox.stub().resolves({ + items: [ + { path: TEST_PATH_IMAGE_1, status: STATUS_PUBLISHED }, + { path: TEST_PATH_IMAGE_2, status: STATUS_DRAFT }, + ], + cursor: null, + }), + }; + mockFetch.resolves(mockResponse); + + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, TEST_AEM_AUTHOR_TOKEN_ALT, mockCache); + await client.fetchContentWithPagination(TEST_PATH_TEST); + + expect(mockCache.cacheItems).to.have.been.calledOnce; + expect(mockCache.cacheItems).to.have.been.calledWith( + [ + { path: TEST_PATH_IMAGE_1, status: STATUS_PUBLISHED }, + { path: TEST_PATH_IMAGE_2, status: STATUS_DRAFT }, + ], + AemClient.parseContentStatus, + ); + }); + }); + + describe('fetchContent method', () => { + it('should delegate to fetchContentWithPagination', async () => { + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, TEST_AEM_AUTHOR_TOKEN_ALT); + const fetchContentWithPaginationStub = sandbox.stub(client, 'fetchContentWithPagination').resolves([]); + + await client.fetchContent(TEST_PATH_TEST); + + expect(fetchContentWithPaginationStub).to.have.been.calledWith(TEST_PATH_TEST); + }); + + it('should wrap errors with descriptive message', async () => { + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, TEST_AEM_AUTHOR_TOKEN_ALT); + sandbox.stub(client, 'fetchContentWithPagination').rejects(new Error('Original error')); + + await expect(client.fetchContent(TEST_PATH_TEST)) + .to.be.rejectedWith(`Failed to fetch AEM Author content for ${TEST_PATH_TEST}: Original error`); + }); + }); + + describe('getChildrenFromPath method', () => { + it('should return empty array when cache is not available', async () => { + const noOpCache = new NoOpCache(); + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, TEST_AEM_AUTHOR_TOKEN_ALT, noOpCache); + const result = await client.getChildrenFromPath(TEST_PATH_TEST); + + expect(result).to.deep.equal([]); + }); + + it('should return empty array for breaking point paths', async () => { + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, TEST_AEM_AUTHOR_TOKEN_ALT, mockCache); + const result = await client.getChildrenFromPath(TEST_PATH_CONTENT_DAM); + + expect(result).to.deep.equal([]); + }); + + it('should return cached children when available', async () => { + const cachedChildren = [{ path: TEST_PATH_CHILD_1 }]; + mockCache.findChildren.returns(cachedChildren); + + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, TEST_AEM_AUTHOR_TOKEN_ALT, mockCache); + const result = await client.getChildrenFromPath(TEST_PATH_TEST); + + expect(result).to.equal(cachedChildren); + expect(mockCache.findChildren).to.have.been.calledWith(TEST_PATH_TEST); + }); + + it('should fetch content when parent is available but not cached', async () => { + mockCache.findChildren.onCall(0).returns([]); // No cached children initially + mockCache.findChildren.onCall(EXPECTED_SUGGESTIONS_COUNT_1).returns([{ path: TEST_PATH_CHILD_1 }]); // After fetching + + const mockResponse = { + ok: true, + json: sandbox.stub().resolves({ + items: [{ path: TEST_PATH_CHILD_1, status: STATUS_PUBLISHED }], + }), + }; + mockFetch.resolves(mockResponse); + + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, TEST_AEM_AUTHOR_TOKEN_ALT, mockCache); + const fetchContentStub = sandbox.stub(client, 'fetchContent').resolves(); + + const result = await client.getChildrenFromPath(TEST_PATH_TEST); + + expect(fetchContentStub).to.have.been.calledWith(TEST_PATH_TEST); + expect(result).to.deep.equal([{ path: TEST_PATH_CHILD_1 }]); + }); + + it('should traverse up hierarchy when parent is not available', async () => { + // Setup: first path has no children, second path (parent) has children + mockCache.findChildren.onCall(0).returns([]); // /content/dam/test/child + mockCache.findChildren.onCall(EXPECTED_SUGGESTIONS_COUNT_1).returns([{ path: TEST_PATH_PARENT_CHILD }]); // /content/dam/parent + + mockPathUtils.getParentPath.returns(TEST_PATH_PARENT); + + // First fetch fails (path not available), second succeeds (parent available) + const mockResponse1 = { ok: false }; + const mockResponse2 = { + ok: true, + json: sandbox.stub().resolves({ + items: [{ path: TEST_PATH_PARENT_CHILD, status: STATUS_PUBLISHED }], + }), + }; + mockFetch.onCall(0).resolves(mockResponse1); // First isAvailable call + mockFetch.onCall(1).resolves(mockResponse2); // Second isAvailable call for parent + + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, TEST_AEM_AUTHOR_TOKEN_ALT, mockCache); + const result = await client.getChildrenFromPath(TEST_PATH_CHILD); + + expect(mockPathUtils.getParentPath).to.have.been.calledWith(TEST_PATH_CHILD); + expect(result).to.deep.equal([{ path: TEST_PATH_PARENT_CHILD }]); + }); + + it('should return empty array when no parent path found', async () => { + mockCache.findChildren.returns([]); + mockPathUtils.getParentPath.returns(null); + + const mockResponse = { ok: false }; + mockFetch.resolves(mockResponse); + + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, TEST_AEM_AUTHOR_TOKEN_ALT, mockCache); + const result = await client.getChildrenFromPath(TEST_PATH_TEST); + + expect(result).to.deep.equal([]); + }); + + it('should handle errors during availability check', async () => { + mockCache.findChildren.returns([]); + mockFetch.rejects(new Error('Network error')); + + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, TEST_AEM_AUTHOR_TOKEN_ALT, mockCache); + const result = await client.getChildrenFromPath(TEST_PATH_TEST); + + expect(result).to.deep.equal([]); + }); + + it('should continue with cached data when fetchContent fails', async () => { + mockCache.findChildren.onCall(0).returns([]); // No cached children initially + mockCache.findChildren.onCall(EXPECTED_SUGGESTIONS_COUNT_1).returns([{ path: TEST_PATH_CHILD_1 }]); // After failed fetch + + const mockResponse = { + ok: true, + json: sandbox.stub().resolves({ + items: [{ path: TEST_PATH_CHILD_1, status: STATUS_PUBLISHED }], + }), + }; + mockFetch.resolves(mockResponse); + + const client = new AemClient(context, TEST_AEM_AUTHOR_URL, TEST_AEM_AUTHOR_TOKEN_ALT, mockCache); + const fetchContentStub = sandbox.stub(client, 'fetchContent').rejects(new Error('Fetch failed')); + + const result = await client.getChildrenFromPath(TEST_PATH_TEST); + + expect(fetchContentStub).to.have.been.calledWith(TEST_PATH_TEST); + expect(result).to.deep.equal([{ path: TEST_PATH_CHILD_1 }]); + }); + }); + + describe('integration scenarios', () => { + it('should work end-to-end for available content', async () => { + const mockResponse = { + ok: true, + json: sandbox.stub().resolves({ + items: [{ path: TEST_PATH_IMAGE, status: STATUS_PUBLISHED }], + }), + }; + mockFetch.resolves(mockResponse); + + const client = AemClient.createFrom(context, mockCache); + const isAvailable = await client.isAvailable(TEST_PATH_IMAGE); + + expect(isAvailable).to.be.true; + expect(mockFetch).to.have.been.calledWith( + `${TEST_AEM_AUTHOR_URL}${API_SITES_FRAGMENTS}?path=${encodeURIComponent(TEST_PATH_IMAGE)}&projection=${PROJECTION_MINIMAL}`, + { + headers: { + Authorization: `${BEARER_PREFIX}${TEST_AEM_AUTHOR_TOKEN}`, + Accept: ACCEPT_JSON, + }, + }, + ); + }); + + it('should handle complete pagination workflow', async () => { + const mockResponses = [ + { + ok: true, + json: sandbox.stub().resolves({ + items: [{ path: TEST_PATH_IMAGE_1, status: STATUS_PUBLISHED }], + cursor: 'cursor-2', + }), + }, + { + ok: true, + json: sandbox.stub().resolves({ + items: [{ path: TEST_PATH_IMAGE_2, status: STATUS_DRAFT }], + cursor: null, + }), + }, + ]; + mockFetch.onCall(0).resolves(mockResponses[0]); + mockFetch.onCall(1).resolves(mockResponses[1]); + + const client = AemClient.createFrom(context, mockCache); + const result = await client.fetchContent(TEST_PATH_TEST); + + expect(result).to.have.lengthOf(2); + expect(mockFetch).to.have.been.calledTwice; + expect(mockCache.cacheItems).to.have.been.calledOnce; + }); + + it('should handle complete getChildrenFromPath workflow', async () => { + const cachedChildren = [{ path: TEST_PATH_CHILD_1 }]; + mockCache.findChildren.returns(cachedChildren); + + const client = AemClient.createFrom(context, mockCache); + const result = await client.getChildrenFromPath(TEST_PATH_TEST); + + expect(result).to.equal(cachedChildren); + }); + }); +}); diff --git a/test/audits/content-fragment-404/analysis-strategy.test.js b/test/audits/content-fragment-404/analysis-strategy.test.js new file mode 100644 index 000000000..3c93bad88 --- /dev/null +++ b/test/audits/content-fragment-404/analysis-strategy.test.js @@ -0,0 +1,520 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/* eslint-env mocha */ +import { expect, use } from 'chai'; +import sinon from 'sinon'; +import sinonChai from 'sinon-chai'; +import chaiAsPromised from 'chai-as-promised'; +import esmock from 'esmock'; +import { MockContextBuilder } from '../../shared.js'; + +use(sinonChai); +use(chaiAsPromised); + +import { + TEST_PATH_BROKEN, + TEST_PATH_BROKEN_1, + TEST_PATH_BROKEN_2, + TEST_PATH_BROKEN_3, + TEST_PATH_BROKEN_NO_EXT, + TEST_PATH_FIXED_1, + TEST_PATH_SUGGESTED, + TEST_PATH_SUGGESTED_2, + PUBLISH_RULE_PRIORITY, + LOCALE_FALLBACK_RULE_PRIORITY, + SIMILAR_PATH_RULE_PRIORITY, + STATUS_PUBLISHED, + STATUS_DRAFT, + SUGGESTION_TYPE_PUBLISH, + SUGGESTION_TYPE_LOCALE, + SUGGESTION_TYPE_SIMILAR, + SUGGESTION_TYPE_NOT_FOUND, + LOCALE_CODE_EN_US, + EXPECTED_RULES_COUNT, + EXPECTED_EMPTY_COUNT, +} from './test-constants.js'; + +const EXPECTED_SUGGESTIONS_COUNT_3 = 3; +const EXPECTED_SUGGESTIONS_COUNT_1 = 1; + +describe('AnalysisStrategy', () => { + let sandbox; + let context; + let mockAemClient; + let mockPathIndex; + let mockPublishRule; + let mockLocaleFallbackRule; + let mockSimilarPathRule; + let mockSuggestion; + let mockContentPath; + let mockLocale; + let AnalysisStrategy; + + beforeEach(async () => { + sandbox = sinon.createSandbox(); + + context = new MockContextBuilder() + .withSandbox(sandbox) + .withOverrides({ + log: { + info: sandbox.spy(), + debug: sandbox.spy(), + warn: sandbox.spy(), + error: sandbox.spy(), + }, + }) + .build(); + + mockPathIndex = { + find: sandbox.stub().returns(null), + insertContentPath: sandbox.stub(), + parseContentStatus: sandbox.stub().returns(STATUS_PUBLISHED), + }; + + // Mock rules with priority methods + mockPublishRule = { + getPriority: sandbox.stub().returns(PUBLISH_RULE_PRIORITY), + apply: sandbox.stub().resolves(null), + constructor: { name: 'PublishRule' }, + }; + + mockLocaleFallbackRule = { + getPriority: sandbox.stub().returns(LOCALE_FALLBACK_RULE_PRIORITY), + apply: sandbox.stub().resolves(null), + constructor: { name: 'LocaleFallbackRule' }, + }; + + mockSimilarPathRule = { + getPriority: sandbox.stub().returns(SIMILAR_PATH_RULE_PRIORITY), + apply: sandbox.stub().resolves(null), + constructor: { name: 'SimilarPathRule' }, + }; + + mockSuggestion = { + notFound: sandbox.stub().returns({ + type: SUGGESTION_TYPE_NOT_FOUND, + requestedPath: TEST_PATH_BROKEN, + suggestedPath: null, + reason: 'Not found', + }), + }; + + mockContentPath = sandbox.stub().returns({ + isPublished: sandbox.stub().returns(true), + status: STATUS_PUBLISHED, + }); + + mockLocale = { + fromPath: sandbox.stub().returns({ code: LOCALE_CODE_EN_US }), + }; + + const module = await esmock('../../../src/content-fragment-404/analysis/analysis-strategy.js', { + '../../../src/content-fragment-404/rules/publish-rule.js': { + PublishRule: function PublishRule() { return mockPublishRule; }, + }, + '../../../src/content-fragment-404/rules/locale-fallback-rule.js': { + LocaleFallbackRule: function LocaleFallbackRule() { return mockLocaleFallbackRule; }, + }, + '../../../src/content-fragment-404/rules/similar-path-rule.js': { + SimilarPathRule: function SimilarPathRule() { return mockSimilarPathRule; }, + }, + '../../../src/content-fragment-404/domain/suggestion/suggestion.js': { + Suggestion: mockSuggestion, + SuggestionType: { + PUBLISH: 'PUBLISH', + LOCALE: 'LOCALE', + SIMILAR: 'SIMILAR', + NOT_FOUND: 'NOT_FOUND', + }, + }, + '../../../src/content-fragment-404/domain/content/content-path.js': { + ContentPath: mockContentPath, + }, + '../../../src/content-fragment-404/domain/language/locale.js': { + Locale: mockLocale, + }, + '../../../src/content-fragment-404/domain/index/path-index.js': { + PathIndex: mockPathIndex, + }, + }); + + AnalysisStrategy = module.AnalysisStrategy; + }); + + afterEach(() => { + sandbox.restore(); + }); + + describe('constructor', () => { + it('should initialize with context, AEM client, and path index', () => { + const strategy = new AnalysisStrategy(context, mockAemClient, mockPathIndex); + + expect(strategy.context).to.equal(context); + expect(strategy.aemClient).to.equal(mockAemClient); + expect(strategy.pathIndex).to.equal(mockPathIndex); + expect(strategy.rules).to.have.lengthOf(EXPECTED_RULES_COUNT); + }); + + it('should sort rules by priority', () => { + mockPublishRule.getPriority.returns(SIMILAR_PATH_RULE_PRIORITY); + mockLocaleFallbackRule.getPriority.returns(PUBLISH_RULE_PRIORITY); + mockSimilarPathRule.getPriority.returns(LOCALE_FALLBACK_RULE_PRIORITY); + + const strategy = new AnalysisStrategy(context, mockAemClient, mockPathIndex); + + expect(strategy.rules[0]).to.equal(mockLocaleFallbackRule); + expect(strategy.rules[1]).to.equal(mockSimilarPathRule); + expect(strategy.rules[2]).to.equal(mockPublishRule); + }); + }); + + describe('analyze method', () => { + it('should analyze multiple broken paths', async () => { + const contentFragment404s = [ + TEST_PATH_BROKEN_1, + TEST_PATH_BROKEN_2, + ]; + + const suggestion1 = { type: SUGGESTION_TYPE_PUBLISH, requestedPath: TEST_PATH_BROKEN_1 }; + const suggestion2 = { type: SUGGESTION_TYPE_LOCALE, requestedPath: TEST_PATH_BROKEN_2 }; + + const strategy = new AnalysisStrategy(context, mockAemClient, mockPathIndex); + const analyzePathStub = sandbox.stub(strategy, 'analyzePath'); + analyzePathStub.onCall(0).resolves(suggestion1); + analyzePathStub.onCall(1).resolves(suggestion2); + + const processSuggestionsStub = sandbox.stub(strategy, 'processSuggestions').resolves([suggestion1, suggestion2]); + + const result = await strategy.analyze(contentFragment404s); + + expect(analyzePathStub).to.have.been.calledTwice; + expect(analyzePathStub.firstCall).to.have.been.calledWith(TEST_PATH_BROKEN_1); + expect(analyzePathStub.secondCall).to.have.been.calledWith(TEST_PATH_BROKEN_2); + expect(processSuggestionsStub).to.have.been.calledWith([suggestion1, suggestion2]); + expect(result).to.deep.equal([suggestion1, suggestion2]); + }); + + it('should filter out null suggestions', async () => { + const contentFragment404s = [ + TEST_PATH_BROKEN_1, + TEST_PATH_BROKEN_2, + TEST_PATH_BROKEN_3, + ]; + + const suggestion1 = { type: SUGGESTION_TYPE_PUBLISH, requestedPath: TEST_PATH_BROKEN_1 }; + + const strategy = new AnalysisStrategy(context, mockAemClient, mockPathIndex); + const analyzePathStub = sandbox.stub(strategy, 'analyzePath'); + analyzePathStub.onCall(0).resolves(suggestion1); + analyzePathStub.onCall(1).resolves(null); + analyzePathStub.onCall(2).resolves(undefined); + + const processSuggestionsStub = sandbox.stub(strategy, 'processSuggestions').resolves([suggestion1]); + + const result = await strategy.analyze(contentFragment404s); + + expect(analyzePathStub).to.have.been.calledThrice; + expect(processSuggestionsStub).to.have.been.calledWith([suggestion1]); + expect(result).to.deep.equal([suggestion1]); + }); + + it('should handle empty broken paths array', async () => { + const strategy = new AnalysisStrategy(context, mockAemClient, mockPathIndex); + const processSuggestionsStub = sandbox.stub(strategy, 'processSuggestions').resolves([]); + + const result = await strategy.analyze([]); + + expect(processSuggestionsStub).to.have.been.calledWith([]); + expect(result).to.deep.equal([]); + }); + }); + + describe('analyzePath method', () => { + it('should return first successful rule suggestion', async () => { + const brokenPath = TEST_PATH_BROKEN; + const suggestion = { type: SUGGESTION_TYPE_PUBLISH, requestedPath: brokenPath }; + + mockPublishRule.apply.resolves(suggestion); + mockLocaleFallbackRule.apply.resolves(null); + mockSimilarPathRule.apply.resolves(null); + + const strategy = new AnalysisStrategy(context, mockAemClient, mockPathIndex); + const result = await strategy.analyzePath(brokenPath); + + expect(mockPublishRule.apply).to.have.been.calledWith(brokenPath); + expect(mockLocaleFallbackRule.apply).to.not.have.been.called; + expect(mockSimilarPathRule.apply).to.not.have.been.called; + expect(result).to.equal(suggestion); + }); + + it('should try all rules until one succeeds', async () => { + const brokenPath = TEST_PATH_BROKEN; + const suggestion = { type: SUGGESTION_TYPE_LOCALE, requestedPath: brokenPath }; + + mockPublishRule.apply.resolves(null); + mockLocaleFallbackRule.apply.resolves(suggestion); + mockSimilarPathRule.apply.resolves(null); + + const strategy = new AnalysisStrategy(context, mockAemClient, mockPathIndex); + const result = await strategy.analyzePath(brokenPath); + + expect(mockPublishRule.apply).to.have.been.calledWith(brokenPath); + expect(mockLocaleFallbackRule.apply).to.have.been.calledWith(brokenPath); + expect(mockSimilarPathRule.apply).to.not.have.been.called; + expect(result).to.equal(suggestion); + }); + + it('should return notFound suggestion when no rules succeed', async () => { + const brokenPath = TEST_PATH_BROKEN; + const notFoundSuggestion = { type: SUGGESTION_TYPE_NOT_FOUND, requestedPath: brokenPath }; + + mockPublishRule.apply.resolves(null); + mockLocaleFallbackRule.apply.resolves(null); + mockSimilarPathRule.apply.resolves(null); + mockSuggestion.notFound.returns(notFoundSuggestion); + + const strategy = new AnalysisStrategy(context, mockAemClient, mockPathIndex); + const result = await strategy.analyzePath(brokenPath); + + expect(mockPublishRule.apply).to.have.been.calledWith(brokenPath); + expect(mockLocaleFallbackRule.apply).to.have.been.calledWith(brokenPath); + expect(mockSimilarPathRule.apply).to.have.been.calledWith(brokenPath); + expect(mockSuggestion.notFound).to.have.been.calledWith(brokenPath); + expect(result).to.equal(notFoundSuggestion); + }); + + it('should handle rule errors and continue to next rule', async () => { + const brokenPath = TEST_PATH_BROKEN; + const suggestion = { type: SUGGESTION_TYPE_SIMILAR, requestedPath: brokenPath }; + + mockPublishRule.apply.rejects(new Error('Publish rule failed')); + mockLocaleFallbackRule.apply.rejects(new Error('Locale rule failed')); + mockSimilarPathRule.apply.resolves(suggestion); + + const strategy = new AnalysisStrategy(context, mockAemClient, mockPathIndex); + const result = await strategy.analyzePath(brokenPath); + + expect(mockPublishRule.apply).to.have.been.calledWith(brokenPath); + expect(mockLocaleFallbackRule.apply).to.have.been.calledWith(brokenPath); + expect(mockSimilarPathRule.apply).to.have.been.calledWith(brokenPath); + expect(context.log.error).to.have.been.calledTwice; + expect(result).to.equal(suggestion); + }); + + it('should return notFound when all rules fail with errors', async () => { + const brokenPath = TEST_PATH_BROKEN; + const notFoundSuggestion = { type: SUGGESTION_TYPE_NOT_FOUND, requestedPath: brokenPath }; + + mockPublishRule.apply.rejects(new Error('Publish rule failed')); + mockLocaleFallbackRule.apply.rejects(new Error('Locale rule failed')); + mockSimilarPathRule.apply.rejects(new Error('Similar rule failed')); + mockSuggestion.notFound.returns(notFoundSuggestion); + + const strategy = new AnalysisStrategy(context, mockAemClient, mockPathIndex); + const result = await strategy.analyzePath(brokenPath); + + expect(context.log.error).to.have.been.calledThrice; + expect(result).to.equal(notFoundSuggestion); + }); + + it('should log rule application success', async () => { + const brokenPath = TEST_PATH_BROKEN; + const suggestion = { type: SUGGESTION_TYPE_PUBLISH, requestedPath: brokenPath }; + + mockPublishRule.apply.resolves(suggestion); + + const strategy = new AnalysisStrategy(context, mockAemClient, mockPathIndex); + await strategy.analyzePath(brokenPath); + + }); + }); + + describe('processSuggestions method', () => { + it('should pass through PUBLISH and NOT_FOUND suggestions unchanged', async () => { + const suggestions = [ + { type: SUGGESTION_TYPE_PUBLISH, requestedPath: TEST_PATH_BROKEN_1, suggestedPath: TEST_PATH_FIXED_1 }, + { type: SUGGESTION_TYPE_NOT_FOUND, requestedPath: TEST_PATH_BROKEN_2, suggestedPath: null }, + ]; + + const strategy = new AnalysisStrategy(context, mockAemClient, mockPathIndex); + const result = await strategy.processSuggestions(suggestions); + + expect(result).to.deep.equal(suggestions); + expect(mockPathIndex.find).to.not.have.been.called; + }); + + it('should process LOCALE suggestions with published content', async () => { + const suggestions = [ + { type: SUGGESTION_TYPE_LOCALE, requestedPath: TEST_PATH_BROKEN, suggestedPath: TEST_PATH_SUGGESTED }, + ]; + + const contentPath = { + isPublished: sandbox.stub().returns(true), + status: STATUS_PUBLISHED, + }; + mockPathIndex.find.returns(contentPath); + + const strategy = new AnalysisStrategy(context, mockAemClient, mockPathIndex); + const result = await strategy.processSuggestions(suggestions); + + expect(mockPathIndex.find).to.have.been.calledWith(TEST_PATH_SUGGESTED); + expect(result).to.deep.equal(suggestions); + }); + + it('should process SIMILAR suggestions with published content', async () => { + const suggestions = [ + { type: SUGGESTION_TYPE_SIMILAR, requestedPath: TEST_PATH_BROKEN, suggestedPath: TEST_PATH_SUGGESTED }, + ]; + + const contentPath = { + isPublished: sandbox.stub().returns(true), + status: STATUS_PUBLISHED, + }; + mockPathIndex.find.returns(contentPath); + + const strategy = new AnalysisStrategy(context, mockAemClient, mockPathIndex); + const result = await strategy.processSuggestions(suggestions); + + expect(result).to.deep.equal(suggestions); + }); + + it('should update reason for unpublished content', async () => { + const suggestions = [ + { type: SUGGESTION_TYPE_LOCALE, requestedPath: TEST_PATH_BROKEN, suggestedPath: TEST_PATH_SUGGESTED }, + ]; + + const contentPath = { + isPublished: sandbox.stub().returns(false), + status: STATUS_DRAFT, + }; + mockPathIndex.find.returns(contentPath); + + const strategy = new AnalysisStrategy(context, mockAemClient, mockPathIndex); + const result = await strategy.processSuggestions(suggestions); + + expect(result).to.have.lengthOf(EXPECTED_SUGGESTIONS_COUNT_1); + expect(result[0].reason).to.equal(`Content is in ${STATUS_DRAFT} state. Suggest publishing.`); + }); + + it('should handle empty suggestions array', async () => { + const strategy = new AnalysisStrategy(context, mockAemClient, mockPathIndex); + const result = await strategy.processSuggestions([]); + + expect(result).to.deep.equal([]); + }); + + it('should handle mixed suggestion types', async () => { + const suggestions = [ + { type: SUGGESTION_TYPE_PUBLISH, requestedPath: TEST_PATH_BROKEN_1, suggestedPath: TEST_PATH_FIXED_1 }, + { type: SUGGESTION_TYPE_LOCALE, requestedPath: TEST_PATH_BROKEN_2, suggestedPath: TEST_PATH_SUGGESTED_2 }, + { type: SUGGESTION_TYPE_NOT_FOUND, requestedPath: TEST_PATH_BROKEN_3, suggestedPath: null }, + ]; + + const contentPath = { + isPublished: sandbox.stub().returns(true), + status: STATUS_PUBLISHED, + }; + mockPathIndex.find.onCall(EXPECTED_EMPTY_COUNT).returns(contentPath); + + const strategy = new AnalysisStrategy(context, mockAemClient, mockPathIndex); + const result = await strategy.processSuggestions(suggestions); + + expect(result).to.have.lengthOf(EXPECTED_SUGGESTIONS_COUNT_3); + expect(result[0]).to.equal(suggestions[0]); // PUBLISH unchanged + expect(result[1]).to.equal(suggestions[1]); // LOCALE processed but unchanged (published) + expect(result[2]).to.equal(suggestions[2]); // NOT_FOUND unchanged + }); + }); + + describe('integration scenarios', () => { + it('should work end-to-end with successful rule application', async () => { + const contentFragment404s = [TEST_PATH_BROKEN_NO_EXT]; + const suggestion = { + type: SUGGESTION_TYPE_LOCALE, + requestedPath: TEST_PATH_BROKEN_NO_EXT, + suggestedPath: TEST_PATH_SUGGESTED, + }; + + mockLocaleFallbackRule.apply.resolves(suggestion); + + const contentPath = { + isPublished: sandbox.stub().returns(true), + status: STATUS_PUBLISHED, + }; + mockPathIndex.find.returns(contentPath); + + const strategy = new AnalysisStrategy(context, mockAemClient, mockPathIndex); + const result = await strategy.analyze(contentFragment404s); + + expect(result).to.have.lengthOf(EXPECTED_SUGGESTIONS_COUNT_1); + expect(result[0]).to.equal(suggestion); + }); + + it('should handle no successful rules scenario', async () => { + const contentFragment404s = [TEST_PATH_BROKEN]; + const notFoundSuggestion = { + type: SUGGESTION_TYPE_NOT_FOUND, + requestedPath: TEST_PATH_BROKEN, + suggestedPath: null, + }; + + mockPublishRule.apply.resolves(null); + mockLocaleFallbackRule.apply.resolves(null); + mockSimilarPathRule.apply.resolves(null); + mockSuggestion.notFound.returns(notFoundSuggestion); + + const strategy = new AnalysisStrategy(context, mockAemClient, mockPathIndex); + const result = await strategy.analyze(contentFragment404s); + + expect(result).to.have.lengthOf(EXPECTED_SUGGESTIONS_COUNT_1); + expect(result[0]).to.equal(notFoundSuggestion); + }); + + it('should handle multiple paths with different outcomes', async () => { + const contentFragment404s = [ + TEST_PATH_BROKEN_1, + TEST_PATH_BROKEN_2, + TEST_PATH_BROKEN_3, + ]; + + const suggestion1 = { type: SUGGESTION_TYPE_PUBLISH, requestedPath: TEST_PATH_BROKEN_1 }; + const suggestion2 = { type: SUGGESTION_TYPE_LOCALE, requestedPath: TEST_PATH_BROKEN_2, suggestedPath: TEST_PATH_SUGGESTED_2 }; + const notFoundSuggestion = { type: SUGGESTION_TYPE_NOT_FOUND, requestedPath: TEST_PATH_BROKEN_3 }; + + // Setup rule responses for different paths + mockPublishRule.apply.onCall(EXPECTED_EMPTY_COUNT).resolves(suggestion1); + mockPublishRule.apply.onCall(EXPECTED_SUGGESTIONS_COUNT_1).resolves(null); + mockPublishRule.apply.onCall(LOCALE_FALLBACK_RULE_PRIORITY).resolves(null); + + mockLocaleFallbackRule.apply.onCall(EXPECTED_EMPTY_COUNT).resolves(suggestion2); + mockLocaleFallbackRule.apply.onCall(EXPECTED_SUGGESTIONS_COUNT_1).resolves(null); + + mockSimilarPathRule.apply.resolves(null); + mockSuggestion.notFound.returns(notFoundSuggestion); + + const contentPath = { + isPublished: sandbox.stub().returns(true), + status: STATUS_PUBLISHED, + }; + mockPathIndex.find.returns(contentPath); + + const strategy = new AnalysisStrategy(context, mockAemClient, mockPathIndex); + const result = await strategy.analyze(contentFragment404s); + + expect(result).to.have.lengthOf(EXPECTED_SUGGESTIONS_COUNT_3); + expect(result[0]).to.equal(suggestion1); + expect(result[1]).to.equal(suggestion2); + expect(result[2]).to.equal(notFoundSuggestion); + }); + }); +}); diff --git a/test/audits/content-fragment-404/athena-collector.test.js b/test/audits/content-fragment-404/athena-collector.test.js new file mode 100644 index 000000000..6fa61f0aa --- /dev/null +++ b/test/audits/content-fragment-404/athena-collector.test.js @@ -0,0 +1,952 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/* eslint-env mocha */ +import { expect, use } from 'chai'; +import sinon from 'sinon'; +import sinonChai from 'sinon-chai'; +import chaiAsPromised from 'chai-as-promised'; +import esmock from 'esmock'; +import { MockContextBuilder } from '../../shared.js'; + +use(sinonChai); +use(chaiAsPromised); + +import { + TEST_DATABASE, + TEST_TABLE, + TEST_IMS_ORG, + TEST_HOSTNAME, + TEST_BASE_URL_SITE as TEST_BASE_URL, + TEST_ORG_ID, + TEST_S3_BUCKET, + TEST_PATH_1, + TEST_PATH_2, + TEST_ASSET_PATH, + TEST_YEAR, + TEST_MONTH, + TEST_DAY, + TEST_DAY_PREVIOUS, + TEST_MONTH_MAR, + TEST_DAY_5, + TEST_MONTH_DEC, + TEST_DAY_25, + TEST_DAY_31, + TEST_USER_AGENT_1, + TEST_USER_AGENT_2, + TEST_USER_AGENT_3, + TEST_USER_AGENT_4, + TEST_USER_AGENT_5, + REQUEST_COUNT_SMALL, + REQUEST_COUNT_MEDIUM, + REQUEST_COUNT_TINY, + REQUEST_COUNT_HIGH, + REQUEST_COUNT_LOW_1, + REQUEST_COUNT_LOW_2, + REQUEST_COUNT_LOW_3, + REQUEST_COUNT_LOW_4, + REQUEST_COUNT_LOW_5, + REQUEST_COUNT_MID_1, + REQUEST_COUNT_MID_2, + REQUEST_COUNT_MID_3, + REQUEST_COUNT_HIGH_1, + REQUEST_COUNT_HIGH_2, + REQUEST_COUNT_NONE, + TEST_DATE_2025_01_14, + TEST_DATE_2025_01_15, + TEST_DATE_2025_02_01, + TEST_DATE_2025_03_05, + TEST_DATE_2025_12_25, + DEFAULT_DATABASE_NAME, + DEFAULT_TABLE_NAME, + S3_PATH_AGGREGATED_404, + S3_PATH_TEMP_ATHENA_RESULTS, + TEST_DATABASE_NAME, + TEST_SQL_RESULT, + ATHENA_QUERY_PREFIX, + CUSTOM_BUCKET_NAME, + CUSTOM_IMS_ORG, + TEST_PATH_FRAGMENT, + TEST_PATH_IMAGE_JPG, + TEST_PATH_DOCUMENT_PDF, + TEST_PATH_VIDEO_MP4, + TEST_PATH_FONT_WOFF, + TEST_PATH_ARCHIVE_ZIP, + TEST_PATH_ANOTHER_FRAGMENT, + TEST_PATH_ANOTHER, + TEST_PATH_VALID_FRAGMENT, + TEST_PATH_ANOTHER_FRAGMENT_2, + TEST_PATH_FRAGMENT1, + TEST_PATH_FRAGMENT2, + TEST_PATH_FRAGMENT3, + TEST_PATH_FRAGMENT4, + TEST_PATH_FRAGMENT5, +} from './test-constants.js'; + +describe('AthenaCollector', () => { + let sandbox; + let context; + let athenaClientStub; + let getStaticContentStub; + let AthenaCollector; + + // Helper to set test config on collector + const setTestConfig = (collector) => { + collector.config = { + database: TEST_DATABASE, + tableName: TEST_TABLE, + location: collector.config.location, + tempLocation: collector.config.tempLocation, + }; + }; + + beforeEach(async () => { + sandbox = sinon.createSandbox(); + + athenaClientStub = { + execute: sandbox.stub().resolves(), + query: sandbox.stub().resolves([ + { url: TEST_PATH_1, request_user_agent: TEST_USER_AGENT_1, request_count: String(REQUEST_COUNT_SMALL) }, + { url: TEST_ASSET_PATH, request_user_agent: TEST_USER_AGENT_1, request_count: String(REQUEST_COUNT_TINY) }, // Asset URL should be filtered + { url: null }, // Null URL should be filtered + { url: TEST_PATH_2, request_user_agent: TEST_USER_AGENT_2, request_count: String(REQUEST_COUNT_MEDIUM) }, + ]), + }; + + getStaticContentStub = sandbox.stub().resolves('SELECT * FROM test_table;'); + + context = new MockContextBuilder() + .withSandbox(sandbox) + .withOverrides({ + log: { + info: sandbox.spy(), + debug: sandbox.spy(), + warn: sandbox.spy(), + error: sandbox.spy(), + }, + env: { + S3_BUCKET: TEST_S3_BUCKET, + }, + site: { + getBaseURL: () => TEST_BASE_URL, + getOrganizationId: () => TEST_ORG_ID, + }, + dataAccess: { + Organization: { + findById: sandbox.stub().resolves({ + getImsOrgId: () => TEST_IMS_ORG, + }), + }, + }, + }) + .build(); + + const module = await esmock('../../../src/content-fragment-404/collectors/athena-collector.js', { + '@adobe/spacecat-shared-utils': { + getStaticContent: getStaticContentStub, + }, + '@adobe/spacecat-shared-athena-client': { + AWSAthenaClient: { + fromContext: sandbox.stub().returns(athenaClientStub), + }, + }, + '../../../src/utils/cdn-utils.js': { + extractCustomerDomain: sandbox.stub().returns(TEST_HOSTNAME), + }, + }); + + AthenaCollector = module.AthenaCollector; + }); + + afterEach(() => { + sandbox.restore(); + }); + + describe('constructor', () => { + it('should initialize with context', () => { + const collector = new AthenaCollector(context); + collector.imsOrg = TEST_IMS_ORG; + collector.sanitizedHostname = TEST_HOSTNAME; + collector.initialize(); + setTestConfig(collector); + + expect(collector.context).to.equal(context); + expect(collector.imsOrg).to.equal(TEST_IMS_ORG); + expect(collector.config).to.exist; + expect(collector.config.location).to.include('s3://'); + expect(collector.config.tempLocation).to.include('s3://'); + }); + + it('should create athena client with correct temp location', () => { + const collector = new AthenaCollector(context); + collector.imsOrg = TEST_IMS_ORG; + collector.sanitizedHostname = TEST_HOSTNAME; + collector.initialize(); + setTestConfig(collector); + + expect(collector.athenaClient).to.equal(athenaClientStub); + }); + }); + + describe('validate', () => { + it('should throw error when S3_BUCKET is missing', () => { + const collector = new AthenaCollector({ + ...context, + env: { + S3_BUCKET: undefined, + }, + }); + collector.imsOrg = TEST_IMS_ORG; + collector.sanitizedHostname = TEST_HOSTNAME; + + expect(() => collector.validate()) + .to.throw('Raw bucket is required'); + }); + + it('should throw error when imsOrg is missing', () => { + const collector = new AthenaCollector(context); + collector.sanitizedHostname = TEST_HOSTNAME; + + expect(() => collector.validate()) + .to.throw('IMS organization is required'); + }); + + it('should throw error when sanitizedHostname is missing', () => { + const collector = new AthenaCollector(context); + collector.imsOrg = TEST_IMS_ORG; + // Don't set sanitizedHostname - testing that it throws when missing + + expect(() => collector.validate()) + .to.throw('Sanitized hostname is required'); + }); + + it('should not throw when all requirements are met', () => { + const collector = new AthenaCollector(context); + collector.imsOrg = TEST_IMS_ORG; + collector.sanitizedHostname = TEST_HOSTNAME; + + expect(() => collector.validate()).to.not.throw(); + }); + }); + + describe('static constants', () => { + it('should have GraphQL suffix regex', () => { + expect(AthenaCollector.GRAPHQL_SUFFIX).to.be.a('regexp'); + expect(AthenaCollector.GRAPHQL_SUFFIX.test('/content/dam/test.cfm.json')).to.be.true; + expect(AthenaCollector.GRAPHQL_SUFFIX.test('/content/dam/test.cfm.model.json')).to.be.true; + expect(AthenaCollector.GRAPHQL_SUFFIX.test('/content/dam/test.cfm.variant.json')).to.be.true; + expect(AthenaCollector.GRAPHQL_SUFFIX.test('/content/dam/test.cfm.gql.json')).to.be.true; + expect(AthenaCollector.GRAPHQL_SUFFIX.test('/content/dam/test.jpg')).to.be.false; + expect(AthenaCollector.GRAPHQL_SUFFIX.test('/content/dam/test.json')).to.be.false; + }); + }); + + describe('cleanPath static method', () => { + it('should remove GraphQL suffix from paths', () => { + expect(AthenaCollector.cleanPath('/content/dam/test.cfm.json')).to.equal('/content/dam/test'); + expect(AthenaCollector.cleanPath('/content/dam/test.cfm.model.json')).to.equal('/content/dam/test'); + expect(AthenaCollector.cleanPath('/content/dam/folder/item.cfm.variant.json')).to.equal('/content/dam/folder/item'); + expect(AthenaCollector.cleanPath('/content/dam/test.cfm.gql.json')).to.equal('/content/dam/test'); + }); + + it('should return original path if no GraphQL suffix', () => { + expect(AthenaCollector.cleanPath('/content/dam/test.jpg')).to.equal('/content/dam/test.jpg'); + expect(AthenaCollector.cleanPath('/content/dam/test')).to.equal('/content/dam/test'); + expect(AthenaCollector.cleanPath('/content/dam/test.json')).to.equal('/content/dam/test.json'); + }); + + it('should handle edge cases', () => { + expect(AthenaCollector.cleanPath('')).to.equal(''); + expect(AthenaCollector.cleanPath('/content/dam/.cfm.json')).to.equal('/content/dam/'); + expect(AthenaCollector.cleanPath('/content/dam/test.cfm')).to.equal('/content/dam/test.cfm'); + }); + }); + + describe('createFrom static method', () => { + it('should create new AthenaCollector instance', async () => { + const collector = await AthenaCollector.createFrom(context); + + expect(collector).to.be.instanceOf(AthenaCollector); + expect(collector.context).to.equal(context); + expect(collector.imsOrg).to.equal(TEST_IMS_ORG); + }); + + it('should throw error when IMS org cannot be retrieved', async () => { + const invalidContext = { + ...context, + site: { + getBaseURL: () => 'https://test-site.com', + }, + }; + + await expect(AthenaCollector.createFrom(invalidContext)) + .to.be.rejectedWith('Unable to retrieve IMS organization ID'); + }); + }); + + describe('getAthenaConfig', () => { + it('should generate correct configuration from context', () => { + const collector = new AthenaCollector(context); + collector.imsOrg = TEST_IMS_ORG; + collector.sanitizedHostname = TEST_HOSTNAME; + const config = collector.getAthenaConfig(); + + // Verify actual implementation behavior + expect(config.database).to.equal(DEFAULT_DATABASE_NAME); + expect(config.tableName).to.equal(DEFAULT_TABLE_NAME); + expect(config.location).to.equal(`s3://${TEST_S3_BUCKET}/${TEST_IMS_ORG}/${S3_PATH_AGGREGATED_404}`); + expect(config.tempLocation).to.equal(`s3://${TEST_S3_BUCKET}/${S3_PATH_TEMP_ATHENA_RESULTS}`); + }); + + it('should handle different bucket and IMS org values', () => { + const customContext = { + ...context, + env: { + S3_BUCKET: CUSTOM_BUCKET_NAME, + }, + }; + + const collector = new AthenaCollector(customContext); + collector.imsOrg = CUSTOM_IMS_ORG; + const config = collector.getAthenaConfig(); + + expect(config.location).to.equal(`s3://${CUSTOM_BUCKET_NAME}/${CUSTOM_IMS_ORG}/${S3_PATH_AGGREGATED_404}`); + expect(config.tempLocation).to.equal(`s3://${CUSTOM_BUCKET_NAME}/${S3_PATH_TEMP_ATHENA_RESULTS}`); + }); + }); + + describe('getPreviousDayParts static method', () => { + it('should return previous day parts', () => { + const mockDate = TEST_DATE_2025_01_15; + const originalDate = global.Date; + global.Date = function MockDate(...args) { + if (args.length === 0) { + return mockDate; + } + return Reflect.construct(originalDate, args); + }; + global.Date.prototype = originalDate.prototype; + global.Date.UTC = originalDate.UTC; + + try { + const parts = AthenaCollector.getPreviousDayParts(); + + expect(parts).to.deep.equal({ + year: TEST_YEAR, + month: TEST_MONTH, + day: TEST_DAY_PREVIOUS, // Previous day + }); + } finally { + global.Date = originalDate; + } + }); + + it('should handle month boundary correctly', () => { + // Mock Date to return first day of month + const mockDate = TEST_DATE_2025_02_01; + const originalDate = global.Date; + global.Date = function MockDate(...args) { + if (args.length === 0) { + return mockDate; + } + return Reflect.construct(originalDate, args); + }; + global.Date.prototype = originalDate.prototype; + global.Date.UTC = originalDate.UTC; + + try { + const parts = AthenaCollector.getPreviousDayParts(); + + expect(parts).to.deep.equal({ + year: TEST_YEAR, + month: TEST_MONTH, + day: TEST_DAY_31, // Last day of previous month (hardcoded as it's relative to TEST_DATE_2025_02_01) + }); + } finally { + global.Date = originalDate; + } + }); + }); + + describe('getDateParts static method', () => { + it('should return correct date parts for given date', () => { + const testDate = new Date('2025-01-15T10:30:00Z'); + const parts = AthenaCollector.getDateParts(testDate); + + expect(parts).to.deep.equal({ + year: TEST_YEAR, + month: TEST_MONTH, + day: TEST_DAY, + }); + }); + + it('should pad single digit months and days with zero', () => { + const testDate = TEST_DATE_2025_03_05; + const parts = AthenaCollector.getDateParts(testDate); + + expect(parts).to.deep.equal({ + year: TEST_YEAR, + month: TEST_MONTH_MAR, + day: TEST_DAY_5, + }); + }); + + it('should use current date when no date provided', () => { + const mockDate = TEST_DATE_2025_12_25; + const originalDate = global.Date; + global.Date = function MockDate(...args) { + if (args.length === 0) { + return mockDate; + } + return Reflect.construct(originalDate, args); + }; + global.Date.prototype = originalDate.prototype; + global.Date.UTC = originalDate.UTC; + + try { + const parts = AthenaCollector.getDateParts(); + + expect(parts).to.deep.equal({ + year: TEST_YEAR, + month: TEST_MONTH_DEC, + day: TEST_DAY_25, + }); + } finally { + global.Date = originalDate; + } + }); + }); + + describe('loadSql static method', () => { + it('should load SQL file with variables', async () => { + const variables = { database: TEST_DATABASE_NAME, table: TEST_TABLE }; + const result = await AthenaCollector.loadSql('create-database', variables); + + expect(getStaticContentStub).to.have.been.calledWith( + variables, + './src/content-fragment-404/sql/create-database.sql', + ); + expect(result).to.equal(TEST_SQL_RESULT); + }); + + it('should handle different SQL file names', async () => { + const variables = { database: TEST_DATABASE_NAME }; + await AthenaCollector.loadSql('daily-query', variables); + + expect(getStaticContentStub).to.have.been.calledWith( + variables, + './src/content-fragment-404/sql/daily-query.sql', + ); + }); + + it('should handle getStaticContent errors', async () => { + getStaticContentStub.rejects(new Error('File not found')); + + await expect(AthenaCollector.loadSql('invalid-file', {})) + .to.be.rejectedWith('File not found'); + }); + }); + + describe('ensureDatabase', () => { + it('should create database with correct SQL and description', async () => { + const collector = new AthenaCollector(context); + collector.imsOrg = TEST_IMS_ORG; + collector.sanitizedHostname = TEST_HOSTNAME; + collector.initialize(); + setTestConfig(collector); + setTestConfig(collector); + + await collector.ensureDatabase(); + + expect(getStaticContentStub).to.have.been.calledWith( + { database: TEST_DATABASE }, + './src/content-fragment-404/sql/create-database.sql', + ); + expect(athenaClientStub.execute).to.have.been.calledWith( + TEST_SQL_RESULT, + TEST_DATABASE, + `${ATHENA_QUERY_PREFIX} Create database ${TEST_DATABASE}`, + ); + }); + + it('should handle SQL loading errors', async () => { + getStaticContentStub.rejects(new Error('SQL file not found')); + const collector = new AthenaCollector(context); + collector.imsOrg = TEST_IMS_ORG; + collector.sanitizedHostname = TEST_HOSTNAME; + collector.initialize(); + setTestConfig(collector); + + await expect(collector.ensureDatabase()) + .to.be.rejectedWith('SQL file not found'); + }); + + it('should handle athena execution errors', async () => { + athenaClientStub.execute.rejects(new Error('Athena execution failed')); + const collector = new AthenaCollector(context); + collector.imsOrg = TEST_IMS_ORG; + collector.sanitizedHostname = TEST_HOSTNAME; + collector.initialize(); + setTestConfig(collector); + setTestConfig(collector); + + await expect(collector.ensureDatabase()) + .to.be.rejectedWith('Athena execution failed'); + }); + }); + + describe('ensureTable', () => { + it('should create table with correct SQL and description', async () => { + const collector = new AthenaCollector(context); + collector.imsOrg = TEST_IMS_ORG; + collector.sanitizedHostname = TEST_HOSTNAME; + collector.initialize(); + setTestConfig(collector); + setTestConfig(collector); + + await collector.ensureTable(); + + expect(getStaticContentStub).to.have.been.calledWith( + { + database: TEST_DATABASE, + tableName: TEST_TABLE, + location: `s3://${TEST_S3_BUCKET}/${TEST_IMS_ORG}/${S3_PATH_AGGREGATED_404}`, + }, + './src/content-fragment-404/sql/create-table.sql', + ); + expect(athenaClientStub.execute).to.have.been.calledWith( + TEST_SQL_RESULT, + TEST_DATABASE, + ); + }); + + it('should handle SQL loading errors', async () => { + getStaticContentStub.rejects(new Error('Table SQL not found')); + const collector = new AthenaCollector(context); + collector.imsOrg = TEST_IMS_ORG; + collector.sanitizedHostname = TEST_HOSTNAME; + collector.initialize(); + setTestConfig(collector); + setTestConfig(collector); + + await expect(collector.ensureTable()) + .to.be.rejectedWith('Table SQL not found'); + }); + + it('should handle athena execution errors', async () => { + athenaClientStub.execute.rejects(new Error('Table creation failed')); + const collector = new AthenaCollector(context); + collector.imsOrg = TEST_IMS_ORG; + collector.sanitizedHostname = TEST_HOSTNAME; + collector.initialize(); + setTestConfig(collector); + setTestConfig(collector); + + await expect(collector.ensureTable()) + .to.be.rejectedWith('Table creation failed'); + }); + }); + + describe('queryContentFragment404s', () => { + it('should query broken paths with correct parameters and filter assets', async () => { + athenaClientStub.query.resolves([ + { url: TEST_PATH_1, request_user_agent: TEST_USER_AGENT_1, request_count: String(REQUEST_COUNT_SMALL) }, + { url: TEST_ASSET_PATH, request_user_agent: TEST_USER_AGENT_1, request_count: String(REQUEST_COUNT_TINY) }, // Asset should be filtered + { url: null }, // Should be filtered + { url: TEST_PATH_2, request_user_agent: TEST_USER_AGENT_2, request_count: String(REQUEST_COUNT_HIGH) }, + ]); + + const collector = new AthenaCollector(context); + collector.imsOrg = TEST_IMS_ORG; + collector.sanitizedHostname = TEST_HOSTNAME; + collector.initialize(); + setTestConfig(collector); + const result = await collector.queryContentFragment404s(TEST_YEAR, TEST_MONTH, TEST_DAY); + + expect(getStaticContentStub).to.have.been.calledWith( + { + database: TEST_DATABASE, + tableName: TEST_TABLE, + year: TEST_YEAR, + month: TEST_MONTH, + day: TEST_DAY, + }, + './src/content-fragment-404/sql/daily-query.sql', + ); + + expect(athenaClientStub.query).to.have.been.calledOnce; + expect(athenaClientStub.query.getCall(0).args[0]).to.equal(TEST_SQL_RESULT); + expect(athenaClientStub.query.getCall(0).args[1]).to.equal(TEST_DATABASE); + + expect(result).to.deep.equal([ + { + url: TEST_PATH_1, + requestUserAgents: [{ userAgent: TEST_USER_AGENT_1, count: REQUEST_COUNT_SMALL }], + requestCount: REQUEST_COUNT_SMALL, + }, + { + url: TEST_PATH_2, + requestUserAgents: [{ userAgent: TEST_USER_AGENT_2, count: REQUEST_COUNT_HIGH }], + requestCount: REQUEST_COUNT_HIGH, + }, + ]); + }); + + it('should filter out asset URLs (images, documents, media)', async () => { + athenaClientStub.query.resolves([ + { url: TEST_PATH_FRAGMENT, request_user_agent: TEST_USER_AGENT_1, request_count: String(REQUEST_COUNT_LOW_1) }, + { url: TEST_PATH_IMAGE_JPG, request_user_agent: TEST_USER_AGENT_1, request_count: String(REQUEST_COUNT_MID_2) }, + { url: TEST_PATH_DOCUMENT_PDF, request_user_agent: TEST_USER_AGENT_1, request_count: String(REQUEST_COUNT_LOW_4) }, + { url: TEST_PATH_VIDEO_MP4, request_user_agent: TEST_USER_AGENT_1, request_count: String(REQUEST_COUNT_MID_1) }, + { url: TEST_PATH_FONT_WOFF, request_user_agent: TEST_USER_AGENT_1, request_count: String(REQUEST_COUNT_LOW_5) }, + { url: TEST_PATH_ARCHIVE_ZIP, request_user_agent: TEST_USER_AGENT_1, request_count: String(REQUEST_COUNT_TINY) }, + { url: TEST_PATH_ANOTHER_FRAGMENT, request_user_agent: TEST_USER_AGENT_2, request_count: String(REQUEST_COUNT_LOW_3) }, + ]); + + const collector = new AthenaCollector(context); + collector.imsOrg = TEST_IMS_ORG; + collector.sanitizedHostname = TEST_HOSTNAME; + collector.initialize(); + setTestConfig(collector); + const result = await collector.queryContentFragment404s(TEST_YEAR, TEST_MONTH, TEST_DAY); + + expect(result).to.deep.equal([ + { + url: TEST_PATH_FRAGMENT, + requestUserAgents: [{ userAgent: TEST_USER_AGENT_1, count: REQUEST_COUNT_LOW_1 }], + requestCount: REQUEST_COUNT_LOW_1, + }, + { + url: TEST_PATH_ANOTHER_FRAGMENT, + requestUserAgents: [{ userAgent: TEST_USER_AGENT_2, count: REQUEST_COUNT_LOW_3 }], + requestCount: REQUEST_COUNT_LOW_3, + }, + ]); + }); + + it('should filter out null URLs from results', async () => { + athenaClientStub.query.resolves([ + { url: TEST_PATH_VALID_FRAGMENT, request_user_agent: TEST_USER_AGENT_1, request_count: String(REQUEST_COUNT_LOW_2) }, + { url: null }, + { url: '' }, + { url: TEST_PATH_ANOTHER_FRAGMENT_2, request_user_agent: TEST_USER_AGENT_2, request_count: String(REQUEST_COUNT_MID_1) }, + ]); + + const collector = new AthenaCollector(context); + collector.imsOrg = TEST_IMS_ORG; + collector.sanitizedHostname = TEST_HOSTNAME; + collector.initialize(); + setTestConfig(collector); + const result = await collector.queryContentFragment404s(TEST_YEAR, TEST_MONTH, TEST_DAY); + + expect(result).to.deep.equal([ + { + url: TEST_PATH_VALID_FRAGMENT, + requestUserAgents: [{ userAgent: TEST_USER_AGENT_1, count: REQUEST_COUNT_LOW_2 }], + requestCount: REQUEST_COUNT_LOW_2, + }, + { + url: TEST_PATH_ANOTHER_FRAGMENT_2, + requestUserAgents: [{ userAgent: TEST_USER_AGENT_2, count: REQUEST_COUNT_MID_1 }], + requestCount: REQUEST_COUNT_MID_1, + }, + ]); + }); + + it('should group multiple user agents for the same URL', async () => { + athenaClientStub.query.resolves([ + { url: TEST_PATH_FRAGMENT, request_user_agent: TEST_USER_AGENT_1, request_count: String(REQUEST_COUNT_MID_3) }, + { url: TEST_PATH_FRAGMENT, request_user_agent: TEST_USER_AGENT_2, request_count: String(REQUEST_COUNT_SMALL) }, + { url: TEST_PATH_FRAGMENT, request_user_agent: TEST_USER_AGENT_3, request_count: String(REQUEST_COUNT_TINY) }, + { url: TEST_PATH_ANOTHER, request_user_agent: TEST_USER_AGENT_1, request_count: String(REQUEST_COUNT_MEDIUM) }, + ]); + + const collector = new AthenaCollector(context); + collector.imsOrg = TEST_IMS_ORG; + collector.sanitizedHostname = TEST_HOSTNAME; + collector.initialize(); + setTestConfig(collector); + const result = await collector.queryContentFragment404s(TEST_YEAR, TEST_MONTH, TEST_DAY); + + expect(result).to.deep.equal([ + { + url: TEST_PATH_FRAGMENT, + requestUserAgents: [ + { userAgent: TEST_USER_AGENT_1, count: REQUEST_COUNT_MID_3 }, + { userAgent: TEST_USER_AGENT_2, count: REQUEST_COUNT_SMALL }, + { userAgent: TEST_USER_AGENT_3, count: REQUEST_COUNT_TINY }, + ], + requestCount: REQUEST_COUNT_HIGH_2, + }, + { + url: TEST_PATH_ANOTHER, + requestUserAgents: [{ userAgent: TEST_USER_AGENT_1, count: REQUEST_COUNT_MEDIUM }], + requestCount: REQUEST_COUNT_MEDIUM, + }, + ]); + }); + + it('should aggregate counts for duplicate user agents on the same URL', async () => { + athenaClientStub.query.resolves([ + { url: TEST_PATH_FRAGMENT, request_user_agent: TEST_USER_AGENT_1, request_count: String(REQUEST_COUNT_MID_2) }, + { url: TEST_PATH_FRAGMENT, request_user_agent: TEST_USER_AGENT_1, request_count: String(REQUEST_COUNT_MEDIUM) }, + { url: TEST_PATH_FRAGMENT, request_user_agent: TEST_USER_AGENT_2, request_count: String(REQUEST_COUNT_TINY) }, + ]); + + const collector = new AthenaCollector(context); + collector.imsOrg = TEST_IMS_ORG; + collector.sanitizedHostname = TEST_HOSTNAME; + collector.initialize(); + setTestConfig(collector); + const result = await collector.queryContentFragment404s(TEST_YEAR, TEST_MONTH, TEST_DAY); + + expect(result).to.deep.equal([ + { + url: TEST_PATH_FRAGMENT, + requestUserAgents: [ + { userAgent: TEST_USER_AGENT_1, count: REQUEST_COUNT_HIGH }, // REQUEST_COUNT_MID_2 + REQUEST_COUNT_MEDIUM aggregated + { userAgent: TEST_USER_AGENT_2, count: REQUEST_COUNT_TINY }, + ], + requestCount: REQUEST_COUNT_HIGH_1, // Total: REQUEST_COUNT_HIGH + REQUEST_COUNT_TINY + }, + ]); + }); + + it('should clean GraphQL suffixes from URLs', async () => { + const TEST_PATH_FRAGMENT_CFM_JSON = '/content/dam/fragment.cfm.json'; + const TEST_PATH_FRAGMENT_CFM_MODEL_JSON = '/content/dam/fragment.cfm.model.json'; + const TEST_PATH_ANOTHER_CFM_GQL_JSON = '/content/dam/another.cfm.gql.json'; + + athenaClientStub.query.resolves([ + { url: TEST_PATH_FRAGMENT_CFM_JSON, request_user_agent: TEST_USER_AGENT_1, request_count: String(REQUEST_COUNT_SMALL) }, + { url: TEST_PATH_FRAGMENT_CFM_MODEL_JSON, request_user_agent: TEST_USER_AGENT_2, request_count: String(REQUEST_COUNT_TINY) }, + { url: TEST_PATH_ANOTHER_CFM_GQL_JSON, request_user_agent: TEST_USER_AGENT_3, request_count: String(REQUEST_COUNT_LOW_4) }, + ]); + + const collector = new AthenaCollector(context); + collector.imsOrg = TEST_IMS_ORG; + collector.sanitizedHostname = TEST_HOSTNAME; + collector.initialize(); + setTestConfig(collector); + const result = await collector.queryContentFragment404s(TEST_YEAR, TEST_MONTH, TEST_DAY); + + expect(result).to.deep.equal([ + { + url: TEST_PATH_FRAGMENT, + requestUserAgents: [ + { userAgent: TEST_USER_AGENT_1, count: REQUEST_COUNT_SMALL }, + { userAgent: TEST_USER_AGENT_2, count: REQUEST_COUNT_TINY }, + ], + requestCount: REQUEST_COUNT_MID_3, + }, + { + url: TEST_PATH_ANOTHER, + requestUserAgents: [{ userAgent: TEST_USER_AGENT_3, count: REQUEST_COUNT_LOW_4 }], + requestCount: REQUEST_COUNT_LOW_4, + }, + ]); + }); + + it('should handle empty query results', async () => { + athenaClientStub.query.resolves([]); + + const collector = new AthenaCollector(context); + collector.imsOrg = TEST_IMS_ORG; + collector.sanitizedHostname = TEST_HOSTNAME; + collector.initialize(); + setTestConfig(collector); + const result = await collector.queryContentFragment404s(TEST_YEAR, TEST_MONTH, TEST_DAY); + + expect(result).to.deep.equal([]); + }); + + it('should handle SQL loading errors', async () => { + getStaticContentStub.rejects(new Error('Query SQL not found')); + const collector = new AthenaCollector(context); + collector.imsOrg = TEST_IMS_ORG; + collector.sanitizedHostname = TEST_HOSTNAME; + collector.initialize(); + setTestConfig(collector); + + await expect(collector.queryContentFragment404s(TEST_YEAR, TEST_MONTH, TEST_DAY)) + .to.be.rejectedWith('Query SQL not found'); + }); + + it('should handle athena query errors', async () => { + athenaClientStub.query.rejects(new Error('Query execution failed')); + const collector = new AthenaCollector(context); + collector.imsOrg = TEST_IMS_ORG; + collector.sanitizedHostname = TEST_HOSTNAME; + collector.initialize(); + setTestConfig(collector); + + await expect(collector.queryContentFragment404s(TEST_YEAR, TEST_MONTH, TEST_DAY)) + .to.be.rejectedWith('Query execution failed'); + }); + + it('should handle invalid request_count values and default to 0', async () => { + athenaClientStub.query.resolves([ + { url: TEST_PATH_FRAGMENT1, request_user_agent: TEST_USER_AGENT_1, request_count: 'invalid' }, + { url: TEST_PATH_FRAGMENT2, request_user_agent: TEST_USER_AGENT_2, request_count: null }, + { url: TEST_PATH_FRAGMENT3, request_user_agent: TEST_USER_AGENT_3, request_count: undefined }, + { url: TEST_PATH_FRAGMENT4, request_user_agent: TEST_USER_AGENT_4, request_count: '' }, + { url: TEST_PATH_FRAGMENT5, request_user_agent: TEST_USER_AGENT_5, request_count: String(REQUEST_COUNT_SMALL) }, + ]); + + const collector = new AthenaCollector(context); + collector.imsOrg = TEST_IMS_ORG; + collector.sanitizedHostname = TEST_HOSTNAME; + collector.initialize(); + setTestConfig(collector); + const result = await collector.queryContentFragment404s(TEST_YEAR, TEST_MONTH, TEST_DAY); + + expect(result).to.deep.equal([ + { + url: TEST_PATH_FRAGMENT1, + requestUserAgents: [{ userAgent: TEST_USER_AGENT_1, count: REQUEST_COUNT_NONE }], + requestCount: REQUEST_COUNT_NONE, + }, + { + url: TEST_PATH_FRAGMENT2, + requestUserAgents: [{ userAgent: TEST_USER_AGENT_2, count: REQUEST_COUNT_NONE }], + requestCount: REQUEST_COUNT_NONE, + }, + { + url: TEST_PATH_FRAGMENT3, + requestUserAgents: [{ userAgent: TEST_USER_AGENT_3, count: REQUEST_COUNT_NONE }], + requestCount: REQUEST_COUNT_NONE, + }, + { + url: TEST_PATH_FRAGMENT4, + requestUserAgents: [{ userAgent: TEST_USER_AGENT_4, count: REQUEST_COUNT_NONE }], + requestCount: REQUEST_COUNT_NONE, + }, + { + url: TEST_PATH_FRAGMENT5, + requestUserAgents: [{ userAgent: TEST_USER_AGENT_5, count: REQUEST_COUNT_SMALL }], + requestCount: REQUEST_COUNT_SMALL, + }, + ]); + }); + }); + + describe('fetchContentFragment404s', () => { + it('should fetch broken paths successfully and exclude assets', async () => { + // Mock getPreviousDayParts to return specific date + const originalGetPreviousDayParts = AthenaCollector.getPreviousDayParts; + AthenaCollector.getPreviousDayParts = sandbox.stub().returns({ + year: TEST_YEAR, + month: TEST_MONTH, + day: TEST_DAY_PREVIOUS, + }); + + try { + const collector = new AthenaCollector(context); + collector.imsOrg = TEST_IMS_ORG; + collector.sanitizedHostname = TEST_HOSTNAME; + collector.initialize(); + setTestConfig(collector); + const result = await collector.fetchContentFragment404s(); + + expect(result).to.deep.equal([ + { + url: TEST_PATH_1, + requestUserAgents: [{ userAgent: TEST_USER_AGENT_1, count: REQUEST_COUNT_SMALL }], + requestCount: REQUEST_COUNT_SMALL, + }, + { + url: TEST_PATH_2, + requestUserAgents: [{ userAgent: TEST_USER_AGENT_2, count: REQUEST_COUNT_MEDIUM }], + requestCount: REQUEST_COUNT_MEDIUM, + }, + ]); + } finally { + AthenaCollector.getPreviousDayParts = originalGetPreviousDayParts; + } + }); + + it('should handle database creation errors', async () => { + athenaClientStub.execute.onFirstCall().rejects(new Error('Database creation failed')); + + const collector = new AthenaCollector(context); + collector.imsOrg = TEST_IMS_ORG; + collector.sanitizedHostname = TEST_HOSTNAME; + collector.initialize(); + setTestConfig(collector); + + await expect(collector.fetchContentFragment404s()) + .to.be.rejectedWith('Athena query failed: Database creation failed'); + + expect(context.log.error).to.have.been.calledWith('Athena query failed: Database creation failed'); + }); + + it('should handle table creation errors', async () => { + athenaClientStub.execute.onSecondCall().rejects(new Error('Table creation failed')); + + const collector = new AthenaCollector(context); + collector.imsOrg = TEST_IMS_ORG; + collector.sanitizedHostname = TEST_HOSTNAME; + collector.initialize(); + setTestConfig(collector); + + await expect(collector.fetchContentFragment404s()) + .to.be.rejectedWith('Athena query failed: Table creation failed'); + + expect(context.log.error).to.have.been.calledWith('Athena query failed: Table creation failed'); + }); + + it('should handle query execution errors', async () => { + athenaClientStub.query.rejects(new Error('Query failed')); + + const collector = new AthenaCollector(context); + collector.imsOrg = TEST_IMS_ORG; + collector.sanitizedHostname = TEST_HOSTNAME; + collector.initialize(); + setTestConfig(collector); + + await expect(collector.fetchContentFragment404s()) + .to.be.rejectedWith('Athena query failed: Query failed'); + + expect(context.log.error).to.have.been.calledWith('Athena query failed: Query failed'); + }); + + it('should call ensureDatabase and ensureTable in correct order', async () => { + const collector = new AthenaCollector(context); + collector.imsOrg = TEST_IMS_ORG; + collector.sanitizedHostname = TEST_HOSTNAME; + collector.initialize(); + setTestConfig(collector); + const ensureDatabaseSpy = sandbox.spy(collector, 'ensureDatabase'); + const ensureTableSpy = sandbox.spy(collector, 'ensureTable'); + const queryContentFragment404sSpy = sandbox.spy(collector, 'queryContentFragment404s'); + + await collector.fetchContentFragment404s(); + + expect(ensureDatabaseSpy).to.have.been.calledBefore(ensureTableSpy); + expect(ensureTableSpy).to.have.been.calledBefore(queryContentFragment404sSpy); + }); + + it('should handle empty results gracefully', async () => { + athenaClientStub.query.resolves([]); + + const collector = new AthenaCollector(context); + collector.imsOrg = TEST_IMS_ORG; + collector.sanitizedHostname = TEST_HOSTNAME; + collector.initialize(); + setTestConfig(collector); + const result = await collector.fetchContentFragment404s(); + + expect(result).to.deep.equal([]); + }); + }); + +}); diff --git a/test/audits/content-fragment-404/base-rule.test.js b/test/audits/content-fragment-404/base-rule.test.js new file mode 100644 index 000000000..d39217a83 --- /dev/null +++ b/test/audits/content-fragment-404/base-rule.test.js @@ -0,0 +1,233 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/* eslint-env mocha */ +import { expect, use } from 'chai'; +import sinon from 'sinon'; +import sinonChai from 'sinon-chai'; +import chaiAsPromised from 'chai-as-promised'; +import { BaseRule } from '../../../src/content-fragment-404/rules/base-rule.js'; +import { MockContextBuilder } from '../../shared.js'; +import { TEST_PATH_BROKEN, BASE_RULE_DEFAULT_PRIORITY } from './test-constants.js'; + +use(sinonChai); +use(chaiAsPromised); + +describe('BaseRule', () => { + let sandbox; + let context; + let mockAemClient; + + beforeEach(() => { + sandbox = sinon.createSandbox(); + + context = new MockContextBuilder() + .withSandbox(sandbox) + .withOverrides({ + log: { + info: sandbox.spy(), + debug: sandbox.spy(), + warn: sandbox.spy(), + error: sandbox.spy(), + }, + }) + .build(); + + mockAemClient = { + isAvailable: sandbox.stub().resolves(true), + getChildrenFromPath: sandbox.stub().resolves([]), + }; + }); + + afterEach(() => { + sandbox.restore(); + }); + + describe('constructor', () => { + it('should initialize with default priority and no AEM client', () => { + const rule = new BaseRule(context); + + expect(rule.context).to.equal(context); + expect(rule.priority).to.equal(BASE_RULE_DEFAULT_PRIORITY); + expect(rule.aemClient).to.be.null; + }); + + it('should initialize with custom priority', () => { + const rule = new BaseRule(context, 10); + + expect(rule.context).to.equal(context); + expect(rule.priority).to.equal(10); + expect(rule.aemClient).to.be.null; + }); + + it('should initialize with AEM client', () => { + const rule = new BaseRule(context, BASE_RULE_DEFAULT_PRIORITY, mockAemClient); + + expect(rule.context).to.equal(context); + expect(rule.priority).to.equal(BASE_RULE_DEFAULT_PRIORITY); + expect(rule.aemClient).to.equal(mockAemClient); + }); + + it('should initialize with all parameters', () => { + const rule = new BaseRule(context, 5, mockAemClient); + + expect(rule.context).to.equal(context); + expect(rule.priority).to.equal(5); + expect(rule.aemClient).to.equal(mockAemClient); + }); + }); + + describe('apply', () => { + it('should delegate to applyRule method', async () => { + const rule = new BaseRule(context); + const applyRuleSpy = sandbox.spy(rule, 'applyRule'); + const brokenPath = TEST_PATH_BROKEN; + + await expect(rule.apply(brokenPath)) + .to.be.rejectedWith('Subclasses must implement applyRule()'); + + expect(applyRuleSpy).to.have.been.calledOnceWith(brokenPath); + }); + + it('should pass through return value from applyRule', async () => { + const rule = new BaseRule(context); + const mockSuggestion = { type: 'test', path: '/test' }; + + // Override applyRule to return a mock suggestion + rule.applyRule = sandbox.stub().resolves(mockSuggestion); + + const result = await rule.apply(TEST_PATH_BROKEN); + + expect(result).to.equal(mockSuggestion); + }); + + it('should pass through errors from applyRule', async () => { + const rule = new BaseRule(context); + const testError = new Error('Test error'); + + // Override applyRule to throw an error + rule.applyRule = sandbox.stub().rejects(testError); + + await expect(rule.apply(TEST_PATH_BROKEN)) + .to.be.rejectedWith('Test error'); + }); + }); + + describe('getPriority', () => { + it('should return default priority', () => { + const rule = new BaseRule(context); + + expect(rule.getPriority()).to.equal(BASE_RULE_DEFAULT_PRIORITY); + }); + + it('should return custom priority', () => { + const rule = new BaseRule(context, 15); + + expect(rule.getPriority()).to.equal(15); + }); + + it('should return zero priority', () => { + const rule = new BaseRule(context, 0); + + expect(rule.getPriority()).to.equal(0); + }); + + it('should return negative priority', () => { + const rule = new BaseRule(context, -5); + + expect(rule.getPriority()).to.equal(-5); + }); + }); + + describe('getAemClient', () => { + it('should return injected AEM client when available', () => { + const rule = new BaseRule(context, BASE_RULE_DEFAULT_PRIORITY, mockAemClient); + + const result = rule.getAemClient(); + + expect(result).to.equal(mockAemClient); + expect(context.log.error).not.to.have.been.called; + }); + + it('should throw error when AEM client not injected', () => { + const rule = new BaseRule(context); + + expect(() => rule.getAemClient()) + .to.throw('AemClient not injected'); + + expect(context.log.error).to.have.been.calledOnceWith('AemClient not injected'); + }); + + it('should throw error when AEM client is null', () => { + const rule = new BaseRule(context, BASE_RULE_DEFAULT_PRIORITY, null); + + expect(() => rule.getAemClient()) + .to.throw('AemClient not injected'); + + expect(context.log.error).to.have.been.calledOnceWith('AemClient not injected'); + }); + + it('should throw error when AEM client is undefined', () => { + const rule = new BaseRule(context, BASE_RULE_DEFAULT_PRIORITY, undefined); + + expect(() => rule.getAemClient()) + .to.throw('AemClient not injected'); + + expect(context.log.error).to.have.been.calledOnceWith('AemClient not injected'); + }); + }); + + describe('applyRule', () => { + it('should throw error indicating subclasses must implement', async () => { + const rule = new BaseRule(context); + + await expect(rule.applyRule(TEST_PATH_BROKEN)) + .to.be.rejectedWith('Subclasses must implement applyRule()'); + }); + + it('should throw error with any path input', async () => { + const rule = new BaseRule(context); + + await expect(rule.applyRule('/different/path.pdf')) + .to.be.rejectedWith('Subclasses must implement applyRule()'); + }); + + it('should throw error with null path', async () => { + const rule = new BaseRule(context); + + await expect(rule.applyRule(null)) + .to.be.rejectedWith('Subclasses must implement applyRule()'); + }); + + it('should throw error with empty path', async () => { + const rule = new BaseRule(context); + + await expect(rule.applyRule('')) + .to.be.rejectedWith('Subclasses must implement applyRule()'); + }); + }); + + describe('integration scenarios', () => { + it('should work in a typical rule application flow', async () => { + const rule = new BaseRule(context, 10, mockAemClient); + + // Override applyRule to simulate a real implementation + rule.applyRule = sandbox.stub().resolves({ type: 'publish', path: '/test' }); + + expect(rule.getPriority()).to.equal(10); + expect(rule.getAemClient()).to.equal(mockAemClient); + + const result = await rule.apply(TEST_PATH_BROKEN); + expect(result).to.deep.equal({ type: 'publish', path: '/test' }); + }); + }); +}); diff --git a/test/audits/content-fragment-404/cache-strategy.test.js b/test/audits/content-fragment-404/cache-strategy.test.js new file mode 100644 index 000000000..f2b7ddd03 --- /dev/null +++ b/test/audits/content-fragment-404/cache-strategy.test.js @@ -0,0 +1,146 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/* eslint-env mocha */ +import { expect } from 'chai'; +import sinon from 'sinon'; +import { CacheStrategy } from '../../../src/content-fragment-404/cache/cache-strategy.js'; +import { NoOpCache } from '../../../src/content-fragment-404/cache/noop-cache.js'; +import { PathIndexCache } from '../../../src/content-fragment-404/cache/path-index-cache.js'; +import { TEST_PATH_TEST_IMAGE, TEST_PATH_TEST } from './test-constants.js'; + +describe('Cache Strategy', () => { + describe('CacheStrategy (Base Class)', () => { + it('should throw error when findChildren is not implemented', () => { + const strategy = new CacheStrategy(); + expect(() => strategy.findChildren('/some/path')).to.throw('findChildren() must be implemented by subclass'); + }); + + it('should throw error when cacheItems is not implemented', () => { + const strategy = new CacheStrategy(); + expect(() => strategy.cacheItems([], () => {})).to.throw('cacheItems() must be implemented by subclass'); + }); + + it('should throw error when isAvailable is not implemented', () => { + const strategy = new CacheStrategy(); + expect(() => strategy.isAvailable()).to.throw('isAvailable() must be implemented by subclass'); + }); + }); + + describe('NoOpCache', () => { + let cache; + + beforeEach(() => { + cache = new NoOpCache(); + }); + + it('should return empty array for findChildren', () => { + const result = cache.findChildren(TEST_PATH_TEST); + expect(result).to.deep.equal([]); + }); + + it('should not throw when cacheItems is called', () => { + const items = [{ path: TEST_PATH_TEST_IMAGE, status: 'PUBLISHED' }]; + const statusParser = sinon.stub().returns('PUBLISHED'); + + expect(() => cache.cacheItems(items, statusParser)).to.not.throw(); + }); + + it('should return false for isAvailable', () => { + expect(cache.isAvailable()).to.be.false; + }); + }); + + describe('PathIndexCache', () => { + let mockPathIndex; + let cache; + + beforeEach(() => { + mockPathIndex = { + insertContentPath: sinon.stub(), + findChildren: sinon.stub(), + }; + cache = new PathIndexCache(mockPathIndex); + }); + + describe('findChildren', () => { + it('should delegate to pathIndex.findChildren', () => { + const expectedChildren = [{ path: '/content/dam/test/child1.jpg' }]; + mockPathIndex.findChildren.returns(expectedChildren); + + const result = cache.findChildren(TEST_PATH_TEST); + + expect(result).to.equal(expectedChildren); + expect(mockPathIndex.findChildren).to.have.been.calledWith(TEST_PATH_TEST); + }); + + it('should handle empty children array', () => { + mockPathIndex.findChildren.returns([]); + + const result = cache.findChildren(TEST_PATH_TEST); + + expect(result).to.deep.equal([]); + }); + }); + + describe('cacheItems', () => { + it('should cache items by creating ContentPath and inserting into pathIndex', () => { + const items = [ + { path: '/content/dam/en-us/test/image1.jpg', status: 'PUBLISHED' }, + { path: '/content/dam/en-us/test/image2.jpg', status: 'DRAFT' }, + ]; + const statusParser = (status) => status.toUpperCase(); + + cache.cacheItems(items, statusParser); + + expect(mockPathIndex.insertContentPath).to.have.been.calledTwice; + }); + + it('should handle empty items array', () => { + cache.cacheItems([], (status) => status); + + expect(mockPathIndex.insertContentPath).to.not.have.been.called; + }); + + it('should handle null items', () => { + cache.cacheItems(null, (status) => status); + + expect(mockPathIndex.insertContentPath).to.not.have.been.called; + }); + + it('should handle undefined items', () => { + cache.cacheItems(undefined, (status) => status); + + expect(mockPathIndex.insertContentPath).to.not.have.been.called; + }); + + it('should parse status using provided statusParser', () => { + const items = [ + { path: TEST_PATH_TEST_IMAGE, status: 'published' }, + ]; + const statusParser = sinon.stub().returns('PUBLISHED'); + + cache.cacheItems(items, statusParser); + + expect(statusParser).to.have.been.calledWith('published'); + expect(mockPathIndex.insertContentPath).to.have.been.calledOnce; + }); + }); + + describe('isAvailable', () => { + it('should return true', () => { + expect(cache.isAvailable()).to.be.true; + }); + }); + }); +}); + diff --git a/test/audits/content-fragment-404/content-path.test.js b/test/audits/content-fragment-404/content-path.test.js new file mode 100644 index 000000000..ee2be9636 --- /dev/null +++ b/test/audits/content-fragment-404/content-path.test.js @@ -0,0 +1,469 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/* eslint-env mocha */ +import { expect } from 'chai'; +import { ContentPath, ContentStatus } from '../../../src/content-fragment-404/domain/content/content-path.js'; +import { TEST_PATH_TEST_IMAGE } from './test-constants.js'; + +describe('ContentPath', () => { + describe('ContentStatus enum', () => { + it('should have all expected content statuses', () => { + expect(ContentStatus.PUBLISHED).to.equal('PUBLISHED'); + expect(ContentStatus.MODIFIED).to.equal('MODIFIED'); + expect(ContentStatus.DRAFT).to.equal('DRAFT'); + expect(ContentStatus.ARCHIVED).to.equal('ARCHIVED'); + expect(ContentStatus.DELETED).to.equal('DELETED'); + expect(ContentStatus.UNKNOWN).to.equal('UNKNOWN'); + }); + + it('should contain exactly 6 status values', () => { + const statusValues = Object.values(ContentStatus); + expect(statusValues).to.have.lengthOf(6); + expect(statusValues).to.include.members([ + 'PUBLISHED', 'MODIFIED', 'DRAFT', 'ARCHIVED', 'DELETED', 'UNKNOWN', + ]); + }); + }); + + describe('constructor', () => { + it('should create a content path with all parameters', () => { + const path = '/content/dam/test/image.jpg'; + const status = ContentStatus.PUBLISHED; + const locale = { code: 'en-us', toJSON: () => ({ code: 'en-us' }) }; + + const contentPath = new ContentPath(path, status, locale); + + expect(contentPath.path).to.equal(path); + expect(contentPath.status).to.equal(status); + expect(contentPath.locale).to.equal(locale); + }); + + it('should create a content path with null parameters', () => { + const contentPath = new ContentPath(null, null, null); + + expect(contentPath.path).to.be.null; + expect(contentPath.status).to.be.null; + expect(contentPath.locale).to.be.null; + }); + + it('should create a content path with undefined parameters', () => { + const contentPath = new ContentPath(undefined, undefined, undefined); + + expect(contentPath.path).to.be.undefined; + expect(contentPath.status).to.be.undefined; + expect(contentPath.locale).to.be.undefined; + }); + + it('should create a content path with empty string path', () => { + const contentPath = new ContentPath('', ContentStatus.DRAFT, null); + + expect(contentPath.path).to.equal(''); + expect(contentPath.status).to.equal(ContentStatus.DRAFT); + expect(contentPath.locale).to.be.null; + }); + + it('should create a content path with simple locale object', () => { + const path = '/content/dam/test/image.jpg'; + const status = ContentStatus.PUBLISHED; + const locale = { code: 'fr-fr' }; + + const contentPath = new ContentPath(path, status, locale); + + expect(contentPath.path).to.equal(path); + expect(contentPath.status).to.equal(status); + expect(contentPath.locale).to.equal(locale); + }); + }); + + describe('isValid', () => { + it('should return true for valid non-empty path', () => { + const contentPath = new ContentPath('/content/dam/test/image.jpg', ContentStatus.PUBLISHED, null); + expect(contentPath.isValid()).to.be.true; + }); + + it('should return true for path with spaces', () => { + const contentPath = new ContentPath('/content/dam/test/image with spaces.jpg', ContentStatus.DRAFT, null); + expect(contentPath.isValid()).to.be.true; + }); + + it('should return true for path with special characters', () => { + const contentPath = new ContentPath('/content/dam/test/image-file_name.jpg', ContentStatus.MODIFIED, null); + expect(contentPath.isValid()).to.be.true; + }); + + it('should return false for null path', () => { + const contentPath = new ContentPath(null, ContentStatus.PUBLISHED, null); + expect(contentPath.isValid()).to.be.false; + }); + + it('should return false for undefined path', () => { + const contentPath = new ContentPath(undefined, ContentStatus.PUBLISHED, null); + expect(contentPath.isValid()).to.be.false; + }); + + it('should return false for empty string path', () => { + const contentPath = new ContentPath('', ContentStatus.PUBLISHED, null); + expect(contentPath.isValid()).to.be.false; + }); + + it('should return false for whitespace-only path', () => { + const contentPath = new ContentPath(' ', ContentStatus.PUBLISHED, null); + expect(contentPath.isValid()).to.be.false; + }); + + it('should return false for tab and newline whitespace', () => { + const contentPath = new ContentPath('\t\n\r ', ContentStatus.PUBLISHED, null); + expect(contentPath.isValid()).to.be.false; + }); + + it('should return true for path with leading/trailing spaces but content', () => { + const contentPath = new ContentPath(' /content/dam/test/image.jpg ', ContentStatus.PUBLISHED, null); + expect(contentPath.isValid()).to.be.true; + }); + + it('should not be affected by status or locale values', () => { + const validPath = '/content/dam/test/image.jpg'; + + const contentPath1 = new ContentPath(validPath, null, null); + const contentPath2 = new ContentPath(validPath, undefined, undefined); + const contentPath3 = new ContentPath(validPath, ContentStatus.DELETED, { invalid: true }); + + expect(contentPath1.isValid()).to.be.true; + expect(contentPath2.isValid()).to.be.true; + expect(contentPath3.isValid()).to.be.true; + }); + + it('should return false for non-string path types', () => { + const numberPath = new ContentPath(123, ContentStatus.PUBLISHED, null); + const booleanPath = new ContentPath(true, ContentStatus.PUBLISHED, null); + const objectPath = new ContentPath({}, ContentStatus.PUBLISHED, null); + const arrayPath = new ContentPath([], ContentStatus.PUBLISHED, null); + + expect(numberPath.isValid()).to.be.false; + expect(booleanPath.isValid()).to.be.false; + expect(objectPath.isValid()).to.be.false; + expect(arrayPath.isValid()).to.be.false; + }); + }); + + describe('isPublished', () => { + it('should return true when status is PUBLISHED', () => { + const contentPath = new ContentPath(TEST_PATH_TEST_IMAGE, ContentStatus.PUBLISHED, null); + expect(contentPath.isPublished()).to.be.true; + }); + + it('should return false when status is MODIFIED', () => { + const contentPath = new ContentPath(TEST_PATH_TEST_IMAGE, ContentStatus.MODIFIED, null); + expect(contentPath.isPublished()).to.be.false; + }); + + it('should return false when status is DRAFT', () => { + const contentPath = new ContentPath(TEST_PATH_TEST_IMAGE, ContentStatus.DRAFT, null); + expect(contentPath.isPublished()).to.be.false; + }); + + it('should return false when status is ARCHIVED', () => { + const contentPath = new ContentPath(TEST_PATH_TEST_IMAGE, ContentStatus.ARCHIVED, null); + expect(contentPath.isPublished()).to.be.false; + }); + + it('should return false when status is DELETED', () => { + const contentPath = new ContentPath(TEST_PATH_TEST_IMAGE, ContentStatus.DELETED, null); + expect(contentPath.isPublished()).to.be.false; + }); + + it('should return false when status is UNKNOWN', () => { + const contentPath = new ContentPath(TEST_PATH_TEST_IMAGE, ContentStatus.UNKNOWN, null); + expect(contentPath.isPublished()).to.be.false; + }); + + it('should return false when status is null', () => { + const contentPath = new ContentPath(TEST_PATH_TEST_IMAGE, null, null); + expect(contentPath.isPublished()).to.be.false; + }); + + it('should return false when status is undefined', () => { + const contentPath = new ContentPath(TEST_PATH_TEST_IMAGE, undefined, null); + expect(contentPath.isPublished()).to.be.false; + }); + + it('should return false when status is an invalid string', () => { + const contentPath = new ContentPath(TEST_PATH_TEST_IMAGE, 'INVALID_STATUS', null); + expect(contentPath.isPublished()).to.be.false; + }); + + it('should use strict equality comparison', () => { + const contentPath = new ContentPath(TEST_PATH_TEST_IMAGE, 'published', null); + expect(contentPath.isPublished()).to.be.false; // Case sensitive + }); + + it('should not be affected by path or locale values', () => { + const publishedPath1 = new ContentPath(null, ContentStatus.PUBLISHED, null); + const publishedPath2 = new ContentPath('', ContentStatus.PUBLISHED, undefined); + const publishedPath3 = new ContentPath('/valid/path', ContentStatus.PUBLISHED, { invalid: 'locale' }); + + expect(publishedPath1.isPublished()).to.be.true; + expect(publishedPath2.isPublished()).to.be.true; + expect(publishedPath3.isPublished()).to.be.true; + }); + }); + + describe('toJSON', () => { + it('should serialize a complete content path with locale having toJSON method', () => { + const path = TEST_PATH_TEST_IMAGE; + const status = ContentStatus.PUBLISHED; + const locale = { + code: 'en-us', + name: 'English (US)', + toJSON: () => ({ code: 'en-us', name: 'English (US)' }), + }; + + const contentPath = new ContentPath(path, status, locale); + const json = contentPath.toJSON(); + + expect(json).to.deep.equal({ + path, + status, + locale: { code: 'en-us', name: 'English (US)' }, + }); + }); + + it('should serialize a content path with locale without toJSON method', () => { + const path = TEST_PATH_TEST_IMAGE; + const status = ContentStatus.DRAFT; + const locale = { code: 'fr-fr', name: 'French (France)' }; + + const contentPath = new ContentPath(path, status, locale); + const json = contentPath.toJSON(); + + expect(json).to.deep.equal({ + path, + status, + locale: { code: 'fr-fr', name: 'French (France)' }, + }); + }); + + it('should serialize a content path with null locale', () => { + const path = TEST_PATH_TEST_IMAGE; + const status = ContentStatus.MODIFIED; + + const contentPath = new ContentPath(path, status, null); + const json = contentPath.toJSON(); + + expect(json).to.deep.equal({ + path, + status, + locale: null, + }); + }); + + it('should serialize a content path with undefined locale', () => { + const path = TEST_PATH_TEST_IMAGE; + const status = ContentStatus.ARCHIVED; + + const contentPath = new ContentPath(path, status, undefined); + const json = contentPath.toJSON(); + + expect(json).to.deep.equal({ + path, + status, + locale: undefined, + }); + }); + + it('should serialize a content path with all null values', () => { + const contentPath = new ContentPath(null, null, null); + const json = contentPath.toJSON(); + + expect(json).to.deep.equal({ + path: null, + status: null, + locale: null, + }); + }); + + it('should serialize a content path with all undefined values', () => { + const contentPath = new ContentPath(undefined, undefined, undefined); + const json = contentPath.toJSON(); + + expect(json).to.deep.equal({ + path: undefined, + status: undefined, + locale: undefined, + }); + }); + + it('should handle locale with toJSON method that returns null', () => { + const path = TEST_PATH_TEST_IMAGE; + const status = ContentStatus.DELETED; + const locale = { + code: 'invalid', + toJSON: () => null, + }; + + const contentPath = new ContentPath(path, status, locale); + const json = contentPath.toJSON(); + + expect(json).to.deep.equal({ + path, + status, + locale: { code: 'invalid', toJSON: locale.toJSON }, + }); + }); + + it('should handle locale with toJSON method that returns undefined', () => { + const path = TEST_PATH_TEST_IMAGE; + const status = ContentStatus.UNKNOWN; + const locale = { + code: 'test', + toJSON: () => undefined, + }; + + const contentPath = new ContentPath(path, status, locale); + const json = contentPath.toJSON(); + + expect(json).to.deep.equal({ + path, + status, + locale: { code: 'test', toJSON: locale.toJSON }, + }); + }); + + it('should handle primitive locale values', () => { + const path = TEST_PATH_TEST_IMAGE; + const status = ContentStatus.PUBLISHED; + + const contentPath1 = new ContentPath(path, status, 'en-us'); + const contentPath2 = new ContentPath(path, status, 123); + const contentPath3 = new ContentPath(path, status, true); + + expect(contentPath1.toJSON().locale).to.equal('en-us'); + expect(contentPath2.toJSON().locale).to.equal(123); + expect(contentPath3.toJSON().locale).to.equal(true); + }); + }); + + describe('integration scenarios', () => { + it('should work with JSON.stringify', () => { + const contentPath = new ContentPath( + TEST_PATH_TEST_IMAGE, + ContentStatus.PUBLISHED, + { code: 'en-us', toJSON: () => ({ code: 'en-us' }) }, + ); + + const jsonString = JSON.stringify(contentPath); + const parsed = JSON.parse(jsonString); + + expect(parsed).to.deep.equal({ + path: TEST_PATH_TEST_IMAGE, + status: ContentStatus.PUBLISHED, + locale: { code: 'en-us' }, + }); + }); + + it('should handle all content statuses consistently', () => { + const path = TEST_PATH_TEST_IMAGE; + const locale = { code: 'en-us' }; + + const statuses = [ + ContentStatus.PUBLISHED, + ContentStatus.MODIFIED, + ContentStatus.DRAFT, + ContentStatus.ARCHIVED, + ContentStatus.DELETED, + ContentStatus.UNKNOWN, + ]; + + statuses.forEach((status) => { + const contentPath = new ContentPath(path, status, locale); + + expect(contentPath.isValid()).to.be.true; + expect(contentPath.isPublished()).to.equal(status === ContentStatus.PUBLISHED); + + const json = contentPath.toJSON(); + expect(json.path).to.equal(path); + expect(json.status).to.equal(status); + expect(json.locale).to.equal(locale); + }); + }); + + it('should handle complex locale objects with nested properties', () => { + const complexLocale = { + code: 'en-us', + name: 'English (US)', + region: 'North America', + metadata: { + currency: 'USD', + timezone: 'PST', + }, + toJSON: function toJSON() { + return { + code: this.code, + name: this.name, + region: this.region, + }; + }, + }; + + const contentPath = new ContentPath(TEST_PATH_TEST_IMAGE, ContentStatus.PUBLISHED, complexLocale); + const json = contentPath.toJSON(); + + expect(json.locale).to.deep.equal({ + code: 'en-us', + name: 'English (US)', + region: 'North America', + }); + }); + + it('should be immutable after creation', () => { + const originalPath = TEST_PATH_TEST_IMAGE; + const originalStatus = ContentStatus.PUBLISHED; + const originalLocale = { code: 'en-us' }; + + const contentPath = new ContentPath(originalPath, originalStatus, originalLocale); + + // Modify properties + contentPath.path = '/modified/path'; + contentPath.status = ContentStatus.DRAFT; + contentPath.locale = { code: 'fr-fr' }; + + // Properties should be modified (JavaScript objects are mutable) + expect(contentPath.path).to.equal('/modified/path'); + expect(contentPath.status).to.equal(ContentStatus.DRAFT); + expect(contentPath.locale.code).to.equal('fr-fr'); + + // But creating a new instance should work with original values + const newContentPath = new ContentPath(originalPath, originalStatus, originalLocale); + expect(newContentPath.path).to.equal(originalPath); + expect(newContentPath.status).to.equal(originalStatus); + expect(newContentPath.locale.code).to.equal('en-us'); + }); + + it('should handle edge cases in path validation', () => { + const edgeCases = [ + { path: '/', expected: true }, + { path: '/content', expected: true }, + { path: '/content/dam', expected: true }, + { path: '/content/dam/', expected: true }, + { path: 'relative/path', expected: true }, + { path: ' /content/dam/test ', expected: true }, // Trimmed to non-empty + { path: '\n\t\r', expected: false }, // Only whitespace + ]; + + edgeCases.forEach(({ path, expected }) => { + const contentPath = new ContentPath(path, ContentStatus.PUBLISHED, null); + expect(contentPath.isValid()).to.equal(expected, `Path "${path}" should be ${expected ? 'valid' : 'invalid'}`); + }); + }); + }); +}); diff --git a/test/audits/content-fragment-404/handler.test.js b/test/audits/content-fragment-404/handler.test.js new file mode 100644 index 000000000..b8a5af6e0 --- /dev/null +++ b/test/audits/content-fragment-404/handler.test.js @@ -0,0 +1,547 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/* eslint-env mocha */ +import { expect, use } from 'chai'; +import sinon from 'sinon'; +import sinonChai from 'sinon-chai'; +import chaiAsPromised from 'chai-as-promised'; +import esmock from 'esmock'; +import { Suggestion as SuggestionModel } from '@adobe/spacecat-shared-data-access'; +import { AUDIT_TYPE, GUIDANCE_TYPE } from '../../../src/content-fragment-404/handler.js'; +import { MockContextBuilder } from '../../shared.js'; + +use(sinonChai); +use(chaiAsPromised); + +import { + TEST_SITE_ID, + TEST_OPPORTUNITY_ID, + TEST_AUDIT_ID, + TEST_SUGGESTION_ID, + TEST_SUGGESTION_ID_2, + TEST_BASE_URL, + TEST_CUSTOM_URL, + TEST_PATH_1, + TEST_PATH_2, + TEST_SUGGESTED_PATH_1, + TEST_OBJECT_FORMAT_PATH, + TEST_STRING_FORMAT_PATH, + REQUEST_COUNT_1, + REQUEST_COUNT_2, + REQUEST_COUNT_LOW, + REQUEST_COUNT_NONE, + USER_AGENT_COUNT_1, + USER_AGENT_COUNT_2, + TEST_USER_AGENT_1, + TEST_USER_AGENT_2, + EXPECTED_SUGGESTIONS_COUNT, + EXPECTED_SINGLE_SUGGESTION_COUNT, +} from './test-constants.js'; + +describe('Broken Content Fragment Links Handler', () => { + let sandbox; + let context; + let site; + let baseURL; + let handlerModule; + let athenaCollectorStub; + let pathIndexStub; + let aemClientStub; + let analysisStrategyStub; + let convertToOpportunityStub; + let syncSuggestionsStub; + let mockOpportunity; + let mockSuggestion; + let mockConfiguration; + + beforeEach(async () => { + sandbox = sinon.createSandbox(); + + baseURL = TEST_BASE_URL; + site = { + getId: () => TEST_SITE_ID, + getBaseURL: () => baseURL, + getDeliveryType: () => 'aem_edge', + }; + + mockOpportunity = { + getId: () => TEST_OPPORTUNITY_ID, + getType: () => AUDIT_TYPE, + getAuditId: () => TEST_AUDIT_ID, + }; + + mockSuggestion = { + getId: () => TEST_SUGGESTION_ID, + getData: () => ({ + requestedPath: TEST_PATH_1, + suggestedPath: TEST_SUGGESTED_PATH_1, + type: 'SIMILAR', + reason: 'Similar path found', + requestCount: REQUEST_COUNT_1, + requestUserAgents: [{ userAgent: TEST_USER_AGENT_1, count: USER_AGENT_COUNT_1 }], + }), + }; + + mockConfiguration = { + isHandlerEnabledForSite: sandbox.stub().returns(true), + }; + + athenaCollectorStub = { + fetchContentFragment404s: sandbox.stub().resolves([ + { url: TEST_PATH_1, requestCount: REQUEST_COUNT_1, requestUserAgents: [{ userAgent: TEST_USER_AGENT_1, count: USER_AGENT_COUNT_1 }] }, + { url: TEST_PATH_2, requestCount: REQUEST_COUNT_2, requestUserAgents: [{ userAgent: TEST_USER_AGENT_2, count: USER_AGENT_COUNT_2 }] }, + ]), + constructor: { name: 'AthenaCollector' }, + }; + + pathIndexStub = sandbox.stub(); + aemClientStub = sandbox.stub(); + analysisStrategyStub = { + analyze: sandbox.stub().resolves([ + { toJSON: () => ({ requestedPath: TEST_PATH_1, suggestedPath: TEST_SUGGESTED_PATH_1, type: 'SIMILAR', reason: 'Similar path found' }) }, + { toJSON: () => ({ requestedPath: TEST_PATH_2, suggestedPath: null, type: 'PUBLISH', reason: 'Content not published' }) }, + ]), + }; + + convertToOpportunityStub = sandbox.stub().resolves(mockOpportunity); + syncSuggestionsStub = sandbox.stub().resolves(); + + context = new MockContextBuilder() + .withSandbox(sandbox) + .withOverrides({ + log: { + info: sandbox.spy(), + debug: sandbox.spy(), + warn: sandbox.spy(), + error: sandbox.spy(), + }, + site, + sqs: { + sendMessage: sandbox.stub().resolves(), + }, + env: { + QUEUE_SPACECAT_TO_MYSTIQUE: 'test-mystique-queue', + }, + dataAccess: { + Configuration: { + findLatest: sandbox.stub().resolves(mockConfiguration), + }, + Suggestion: { + allByOpportunityIdAndStatus: sandbox.stub().resolves([mockSuggestion]), + }, + Opportunity: { + allBySiteIdAndStatus: sandbox.stub().resolves([mockOpportunity]), + }, + }, + }) + .build(); + + handlerModule = await esmock('../../../src/content-fragment-404/handler.js', { + '../../../src/content-fragment-404/collectors/athena-collector.js': { + AthenaCollector: { + createFrom: sandbox.stub().resolves(athenaCollectorStub), + }, + }, + '../../../src/content-fragment-404/domain/index/path-index.js': { + PathIndex: function MockPathIndex() { + return pathIndexStub; + }, + }, + '../../../src/content-fragment-404/clients/aem-client.js': { + AemClient: { + createFrom: sandbox.stub().returns(aemClientStub), + }, + }, + '../../../src/content-fragment-404/analysis/analysis-strategy.js': { + AnalysisStrategy: function MockAnalysisStrategy() { + return analysisStrategyStub; + }, + }, + '../../../src/common/opportunity.js': { + convertToOpportunity: convertToOpportunityStub, + }, + '../../../src/utils/data-access.js': { + syncSuggestions: syncSuggestionsStub, + }, + }); + }); + + afterEach(() => { + sandbox.restore(); + }); + + describe('contentFragment404AuditRunner', () => { + it('should successfully fetch and analyze broken content fragment paths', async () => { + const result = await handlerModule.contentFragment404AuditRunner(baseURL, context, site); + + expect(athenaCollectorStub.fetchContentFragment404s).to.have.been.calledOnce; + expect(analysisStrategyStub.analyze).to.have.been.calledWith([ + TEST_PATH_1, + TEST_PATH_2, + ]); + + expect(result).to.deep.equal({ + fullAuditRef: baseURL, + auditResult: { + contentFragment404s: [ + { url: TEST_PATH_1, requestCount: REQUEST_COUNT_1, requestUserAgents: [{ userAgent: TEST_USER_AGENT_1, count: USER_AGENT_COUNT_1 }] }, + { url: TEST_PATH_2, requestCount: REQUEST_COUNT_2, requestUserAgents: [{ userAgent: TEST_USER_AGENT_2, count: USER_AGENT_COUNT_2 }] }, + ], + suggestions: [ + { requestedPath: TEST_PATH_1, suggestedPath: TEST_SUGGESTED_PATH_1, type: 'SIMILAR', reason: 'Similar path found' }, + { requestedPath: TEST_PATH_2, suggestedPath: null, type: 'PUBLISH', reason: 'Content not published' }, + ], + }, + }); + }); + + it('should handle empty results from collector', async () => { + athenaCollectorStub.fetchContentFragment404s.resolves([]); + analysisStrategyStub.analyze.resolves([]); + + const result = await handlerModule.contentFragment404AuditRunner(baseURL, context, site); + + expect(result.auditResult.contentFragment404s).to.deep.equal([]); + expect(result.auditResult.suggestions).to.deep.equal([]); + }); + + it('should handle mixed format in contentFragment404s (objects and strings)', async () => { + athenaCollectorStub.fetchContentFragment404s.resolves([ + { url: TEST_OBJECT_FORMAT_PATH, requestCount: REQUEST_COUNT_LOW, requestUserAgents: [{ userAgent: TEST_USER_AGENT_1, count: USER_AGENT_COUNT_1 }] }, + TEST_STRING_FORMAT_PATH, + ]); + analysisStrategyStub.analyze.resolves([]); + + await handlerModule.contentFragment404AuditRunner(baseURL, context, site); + + expect(analysisStrategyStub.analyze).to.have.been.calledWith([ + TEST_OBJECT_FORMAT_PATH, + TEST_STRING_FORMAT_PATH, + ]); + }); + + it('should pass site in auditContext', async () => { + await handlerModule.contentFragment404AuditRunner(baseURL, context, site); + + expect(athenaCollectorStub.fetchContentFragment404s).to.have.been.calledOnce; + expect(analysisStrategyStub.analyze).to.have.been.calledOnce; + }); + + it('should use correct baseURL in response', async () => { + const result = await handlerModule.contentFragment404AuditRunner(TEST_CUSTOM_URL, context, site); + + expect(result.fullAuditRef).to.equal(TEST_CUSTOM_URL); + }); + }); + + describe('createContentFragmentPathSuggestions', () => { + let auditData; + + beforeEach(() => { + auditData = { + id: TEST_AUDIT_ID, + auditResult: { + contentFragment404s: [ + { url: TEST_PATH_1, requestCount: REQUEST_COUNT_1, requestUserAgents: [{ userAgent: TEST_USER_AGENT_1, count: USER_AGENT_COUNT_1 }] }, + { url: TEST_PATH_2, requestCount: REQUEST_COUNT_2, requestUserAgents: [{ userAgent: TEST_USER_AGENT_2, count: USER_AGENT_COUNT_2 }] }, + ], + suggestions: [ + { requestedPath: TEST_PATH_1, suggestedPath: TEST_SUGGESTED_PATH_1, type: 'SIMILAR', reason: 'Similar path found' }, + { requestedPath: TEST_PATH_2, suggestedPath: null, type: 'PUBLISH', reason: 'Content not published' }, + ], + }, + }; + }); + + it('should create opportunity and sync suggestions with enriched data', async () => { + await handlerModule.createContentFragmentPathSuggestions(baseURL, auditData, context); + + expect(convertToOpportunityStub).to.have.been.calledOnce; + expect(convertToOpportunityStub.firstCall.args[0]).to.equal(baseURL); + expect(convertToOpportunityStub.firstCall.args[1]).to.equal(auditData); + expect(convertToOpportunityStub.firstCall.args[2]).to.equal(context); + expect(typeof convertToOpportunityStub.firstCall.args[3]).to.equal('function'); + expect(convertToOpportunityStub.firstCall.args[4]).to.equal(AUDIT_TYPE); + + expect(syncSuggestionsStub).to.have.been.calledOnce; + const syncArgs = syncSuggestionsStub.firstCall.args[0]; + expect(syncArgs.context).to.equal(context); + expect(syncArgs.opportunity).to.equal(mockOpportunity); + expect(syncArgs.newData).to.have.lengthOf(EXPECTED_SUGGESTIONS_COUNT); + expect(syncArgs.newData[0]).to.deep.include({ + requestedPath: TEST_PATH_1, + suggestedPath: TEST_SUGGESTED_PATH_1, + type: 'SIMILAR', + reason: 'Similar path found', + requestCount: REQUEST_COUNT_1, + requestUserAgents: [{ userAgent: TEST_USER_AGENT_1, count: USER_AGENT_COUNT_1 }], + }); + + }); + + it('should skip when no suggestions are provided', async () => { + auditData.auditResult.suggestions = []; + + await handlerModule.createContentFragmentPathSuggestions(baseURL, auditData, context); + + expect(convertToOpportunityStub).not.to.have.been.called; + expect(syncSuggestionsStub).not.to.have.been.called; + }); + + it('should skip when suggestions is null', async () => { + auditData.auditResult.suggestions = null; + + await handlerModule.createContentFragmentPathSuggestions(baseURL, auditData, context); + + expect(convertToOpportunityStub).not.to.have.been.called; + expect(syncSuggestionsStub).not.to.have.been.called; + }); + + it('should enrich suggestions with requestCount and requestUserAgents from contentFragment404s', async () => { + await handlerModule.createContentFragmentPathSuggestions(baseURL, auditData, context); + + const syncArgs = syncSuggestionsStub.firstCall.args[0]; + expect(syncArgs.newData[0].requestCount).to.equal(REQUEST_COUNT_1); + expect(syncArgs.newData[0].requestUserAgents).to.deep.equal([{ userAgent: TEST_USER_AGENT_1, count: USER_AGENT_COUNT_1 }]); + expect(syncArgs.newData[1].requestCount).to.equal(REQUEST_COUNT_2); + expect(syncArgs.newData[1].requestUserAgents).to.deep.equal([{ userAgent: TEST_USER_AGENT_2, count: USER_AGENT_COUNT_2 }]); + }); + + it('should handle missing requestCount and requestUserAgents in contentFragment404s', async () => { + auditData.auditResult.contentFragment404s = [ + { url: TEST_PATH_1 }, + ]; + auditData.auditResult.suggestions = [ + { requestedPath: TEST_PATH_1, suggestedPath: TEST_SUGGESTED_PATH_1, type: 'SIMILAR', reason: 'Similar' }, + ]; + + await handlerModule.createContentFragmentPathSuggestions(baseURL, auditData, context); + + const syncArgs = syncSuggestionsStub.firstCall.args[0]; + expect(syncArgs.newData[0].requestCount).to.equal(REQUEST_COUNT_NONE); + expect(syncArgs.newData[0].requestUserAgents).to.deep.equal([]); + }); + + it('should use correct buildKey function', async () => { + await handlerModule.createContentFragmentPathSuggestions(baseURL, auditData, context); + + const syncArgs = syncSuggestionsStub.firstCall.args[0]; + const { buildKey } = syncArgs; + + const key1 = buildKey(auditData.auditResult.suggestions[0]); + const key2 = buildKey(auditData.auditResult.suggestions[1]); + + expect(key1).to.equal(`${TEST_PATH_1}|SIMILAR`); + expect(key2).to.equal(`${TEST_PATH_2}|PUBLISH`); + }); + + it('should use correct getRank function', async () => { + await handlerModule.createContentFragmentPathSuggestions(baseURL, auditData, context); + + const syncArgs = syncSuggestionsStub.firstCall.args[0]; + const { getRank } = syncArgs; + + const rank1 = getRank({ requestCount: REQUEST_COUNT_1 }); + const rank2 = getRank({ requestCount: REQUEST_COUNT_2 }); + + expect(rank1).to.equal(REQUEST_COUNT_1); + expect(rank2).to.equal(REQUEST_COUNT_2); + }); + + it('should map new suggestions correctly', async () => { + await handlerModule.createContentFragmentPathSuggestions(baseURL, auditData, context); + + const syncArgs = syncSuggestionsStub.firstCall.args[0]; + const { mapNewSuggestion } = syncArgs; + + const enrichedSuggestion = { + requestedPath: TEST_PATH_1, + suggestedPath: TEST_SUGGESTED_PATH_1, + type: 'SIMILAR', + reason: 'Similar path found', + requestCount: REQUEST_COUNT_1, + requestUserAgents: [{ userAgent: TEST_USER_AGENT_1, count: USER_AGENT_COUNT_1 }], + }; + + const mapped = mapNewSuggestion(enrichedSuggestion); + + expect(mapped).to.deep.equal({ + opportunityId: TEST_OPPORTUNITY_ID, + type: SuggestionModel.TYPES.AI_INSIGHTS, + rank: REQUEST_COUNT_1, + data: enrichedSuggestion, + }); + }); + }); + + describe('enrichContentFragmentPathSuggestions', () => { + let auditData; + + beforeEach(() => { + auditData = { + id: TEST_AUDIT_ID, + auditResult: { + contentFragment404s: [ + { url: TEST_PATH_1, requestCount: REQUEST_COUNT_1, requestUserAgents: [{ userAgent: TEST_USER_AGENT_1, count: USER_AGENT_COUNT_1 }] }, + ], + suggestions: [ + { requestedPath: TEST_PATH_1, suggestedPath: TEST_SUGGESTED_PATH_1, type: 'SIMILAR', reason: 'Similar path found' }, + ], + }, + }; + }); + + it('should send suggestions to Mystique when handler is enabled', async () => { + await handlerModule.enrichContentFragmentPathSuggestions(baseURL, auditData, context, site); + + expect(context.dataAccess.Configuration.findLatest).to.have.been.calledOnce; + expect(mockConfiguration.isHandlerEnabledForSite).to.have.been.calledWith(AUDIT_TYPE, site); + expect(context.dataAccess.Opportunity.allBySiteIdAndStatus).to.have.been.calledWith(TEST_SITE_ID, 'NEW'); + expect(context.dataAccess.Suggestion.allByOpportunityIdAndStatus).to.have.been.calledWith(TEST_OPPORTUNITY_ID, SuggestionModel.STATUSES.NEW); + expect(context.sqs.sendMessage).to.have.been.calledOnce; + }); + + it('should skip when handler is disabled for site', async () => { + mockConfiguration.isHandlerEnabledForSite.returns(false); + + await handlerModule.enrichContentFragmentPathSuggestions(baseURL, auditData, context, site); + + expect(context.dataAccess.Opportunity.allBySiteIdAndStatus).not.to.have.been.called; + expect(context.sqs.sendMessage).not.to.have.been.called; + }); + + it('should skip when no opportunity found for this audit', async () => { + context.dataAccess.Opportunity.allBySiteIdAndStatus.resolves([]); + + await handlerModule.enrichContentFragmentPathSuggestions(baseURL, auditData, context, site); + + expect(context.dataAccess.Suggestion.allByOpportunityIdAndStatus).not.to.have.been.called; + expect(context.sqs.sendMessage).not.to.have.been.called; + }); + + it('should skip when opportunity type does not match', async () => { + const wrongOpportunity = { + getId: () => 'wrong-opportunity-id', + getType: () => 'different-type', + getAuditId: () => TEST_AUDIT_ID, + }; + context.dataAccess.Opportunity.allBySiteIdAndStatus.resolves([wrongOpportunity]); + + await handlerModule.enrichContentFragmentPathSuggestions(baseURL, auditData, context, site); + + expect(context.sqs.sendMessage).not.to.have.been.called; + }); + + it('should skip when opportunity audit ID does not match', async () => { + const wrongOpportunity = { + getId: () => 'wrong-opportunity-id', + getType: () => AUDIT_TYPE, + getAuditId: () => 'different-audit-id', + }; + context.dataAccess.Opportunity.allBySiteIdAndStatus.resolves([wrongOpportunity]); + + await handlerModule.enrichContentFragmentPathSuggestions(baseURL, auditData, context, site); + + expect(context.sqs.sendMessage).not.to.have.been.called; + }); + + it('should skip when no synced suggestions found', async () => { + context.dataAccess.Suggestion.allByOpportunityIdAndStatus.resolves([]); + + await handlerModule.enrichContentFragmentPathSuggestions(baseURL, auditData, context, site); + + expect(context.sqs.sendMessage).not.to.have.been.called; + }); + + it('should skip when synced suggestions is null', async () => { + context.dataAccess.Suggestion.allByOpportunityIdAndStatus.resolves(null); + + await handlerModule.enrichContentFragmentPathSuggestions(baseURL, auditData, context, site); + + expect(context.sqs.sendMessage).not.to.have.been.called; + }); + + it('should send correct message structure to Mystique', async () => { + await handlerModule.enrichContentFragmentPathSuggestions(baseURL, auditData, context, site); + + expect(context.sqs.sendMessage).to.have.been.calledOnce; + const sqsCall = context.sqs.sendMessage.getCall(0); + expect(sqsCall.args[0]).to.equal('test-mystique-queue'); + + const message = sqsCall.args[1]; + expect(message).to.have.property('type', GUIDANCE_TYPE); + expect(message).to.have.property('siteId', TEST_SITE_ID); + expect(message).to.have.property('auditId', TEST_AUDIT_ID); + expect(message).to.have.property('deliveryType', 'aem_edge'); + expect(message).to.have.property('url', baseURL); + expect(message).to.have.property('time'); + expect(new Date(message.time)).to.be.a('date'); + + expect(message.data).to.have.property('opportunityId', TEST_OPPORTUNITY_ID); + expect(message.data.contentFragment404s).to.be.an('array').with.lengthOf(EXPECTED_SINGLE_SUGGESTION_COUNT); + + const brokenPath = message.data.contentFragment404s[0]; + expect(brokenPath).to.have.property('suggestionId', TEST_SUGGESTION_ID); + expect(brokenPath).to.have.property('requestedPath', TEST_PATH_1); + expect(brokenPath).to.have.property('requestCount', REQUEST_COUNT_1); + expect(brokenPath).to.have.property('requestUserAgents'); + expect(brokenPath.requestUserAgents).to.deep.equal([{ userAgent: TEST_USER_AGENT_1, count: USER_AGENT_COUNT_1 }]); + expect(brokenPath).to.have.property('suggestedPath', TEST_SUGGESTED_PATH_1); + expect(brokenPath).to.have.property('reason', 'Similar path found'); + }); + + it('should handle multiple suggestions in Mystique message', async () => { + const mockSuggestion2 = { + getId: () => TEST_SUGGESTION_ID_2, + getData: () => ({ + requestedPath: TEST_PATH_2, + suggestedPath: null, + type: 'PUBLISH', + reason: 'Content not published', + requestCount: REQUEST_COUNT_2, + requestUserAgents: [{ userAgent: TEST_USER_AGENT_2, count: USER_AGENT_COUNT_2 }], + }), + }; + context.dataAccess.Suggestion.allByOpportunityIdAndStatus.resolves([mockSuggestion, mockSuggestion2]); + + await handlerModule.enrichContentFragmentPathSuggestions(baseURL, auditData, context, site); + + const message = context.sqs.sendMessage.getCall(0).args[1]; + expect(message.data.contentFragment404s).to.have.lengthOf(EXPECTED_SUGGESTIONS_COUNT); + expect(message.data.contentFragment404s[0].suggestionId).to.equal(TEST_SUGGESTION_ID); + expect(message.data.contentFragment404s[1].suggestionId).to.equal(TEST_SUGGESTION_ID_2); + }); + }); + + describe('audit builder configuration', () => { + it('should export default audit builder', () => { + expect(handlerModule.default).to.exist; + expect(typeof handlerModule.default).to.equal('object'); + }); + + it('should export contentFragment404AuditRunner', () => { + expect(handlerModule.contentFragment404AuditRunner).to.exist; + expect(typeof handlerModule.contentFragment404AuditRunner).to.equal('function'); + }); + + it('should export createContentFragmentPathSuggestions', () => { + expect(handlerModule.createContentFragmentPathSuggestions).to.exist; + expect(typeof handlerModule.createContentFragmentPathSuggestions).to.equal('function'); + }); + + it('should export enrichContentFragmentPathSuggestions', () => { + expect(handlerModule.enrichContentFragmentPathSuggestions).to.exist; + expect(typeof handlerModule.enrichContentFragmentPathSuggestions).to.equal('function'); + }); + }); +}); diff --git a/test/audits/content-fragment-404/language-tree.test.js b/test/audits/content-fragment-404/language-tree.test.js new file mode 100644 index 000000000..218e05c15 --- /dev/null +++ b/test/audits/content-fragment-404/language-tree.test.js @@ -0,0 +1,282 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/* eslint-env mocha */ +import { expect } from 'chai'; +import { LanguageTree } from '../../../src/content-fragment-404/domain/language/language-tree.js'; + +describe('LanguageTree', () => { + describe('COUNTRY_CODE_GROUPS', () => { + it('should contain expected country code groups', () => { + expect(LanguageTree.COUNTRY_CODE_GROUPS).to.have.property('FR'); + expect(LanguageTree.COUNTRY_CODE_GROUPS).to.have.property('DE'); + expect(LanguageTree.COUNTRY_CODE_GROUPS).to.have.property('US'); + expect(LanguageTree.COUNTRY_CODE_GROUPS).to.have.property('ES'); + expect(LanguageTree.COUNTRY_CODE_GROUPS).to.have.property('IT'); + expect(LanguageTree.COUNTRY_CODE_GROUPS).to.have.property('CN'); + expect(LanguageTree.COUNTRY_CODE_GROUPS).to.have.property('RU'); + }); + + it('should have FR group with correct countries', () => { + expect(LanguageTree.COUNTRY_CODE_GROUPS.FR).to.deep.equal(['FR', 'MC']); + }); + + it('should have DE group with correct countries', () => { + expect(LanguageTree.COUNTRY_CODE_GROUPS.DE).to.deep.equal(['DE', 'AT', 'LI']); + }); + + it('should have US group with correct countries', () => { + expect(LanguageTree.COUNTRY_CODE_GROUPS.US).to.deep.equal(['US', 'GB', 'CA', 'AU', 'NZ', 'IE']); + }); + }); + + describe('LOCALE_CODE_GROUPS', () => { + it('should contain expected locale code groups', () => { + expect(LanguageTree.LOCALE_CODE_GROUPS).to.have.property('fr-FR'); + expect(LanguageTree.LOCALE_CODE_GROUPS).to.have.property('de-DE'); + expect(LanguageTree.LOCALE_CODE_GROUPS).to.have.property('en-US'); + expect(LanguageTree.LOCALE_CODE_GROUPS).to.have.property('es-ES'); + expect(LanguageTree.LOCALE_CODE_GROUPS).to.have.property('it-IT'); + expect(LanguageTree.LOCALE_CODE_GROUPS).to.have.property('zh-CN'); + expect(LanguageTree.LOCALE_CODE_GROUPS).to.have.property('ru-RU'); + }); + + it('should have fr-FR group with correct locales', () => { + expect(LanguageTree.LOCALE_CODE_GROUPS['fr-FR']).to.deep.equal(['fr-FR', 'ca-FR', 'fr-CA', 'fr-BE', 'fr-CH']); + }); + + it('should have en-US group with correct locales', () => { + expect(LanguageTree.LOCALE_CODE_GROUPS['en-US']).to.deep.equal(['en-US', 'en-GB', 'en-CA', 'en-AU', 'en-NZ']); + }); + }); + + describe('COUNTRY_TO_ROOT', () => { + it('should have reverse mappings for country codes', () => { + expect(LanguageTree.COUNTRY_TO_ROOT.FR).to.equal('FR'); + expect(LanguageTree.COUNTRY_TO_ROOT.MC).to.equal('FR'); + expect(LanguageTree.COUNTRY_TO_ROOT.DE).to.equal('DE'); + expect(LanguageTree.COUNTRY_TO_ROOT.AT).to.equal('DE'); + expect(LanguageTree.COUNTRY_TO_ROOT.US).to.equal('US'); + expect(LanguageTree.COUNTRY_TO_ROOT.GB).to.equal('US'); + }); + }); + + describe('LOCALE_TO_ROOT', () => { + it('should have reverse mappings for locale codes', () => { + expect(LanguageTree.LOCALE_TO_ROOT['fr-FR']).to.equal('fr-FR'); + expect(LanguageTree.LOCALE_TO_ROOT['ca-FR']).to.equal('fr-FR'); + expect(LanguageTree.LOCALE_TO_ROOT['en-US']).to.equal('en-US'); + expect(LanguageTree.LOCALE_TO_ROOT['en-GB']).to.equal('en-US'); + }); + }); + + describe('findSimilarLanguageRoots', () => { + it('should return empty array for null locale', () => { + const result = LanguageTree.findSimilarLanguageRoots(null); + expect(result).to.deep.equal([]); + }); + + it('should return empty array for empty locale', () => { + const result = LanguageTree.findSimilarLanguageRoots(''); + expect(result).to.deep.equal([]); + }); + + it('should return case variations for 2-letter locale', () => { + const result = LanguageTree.findSimilarLanguageRoots('FR'); + expect(result).to.include('fr'); + // Note: FR itself is removed from the result since it's the original + }); + + it('should return case variations for 5-letter locale', () => { + const result = LanguageTree.findSimilarLanguageRoots('fr-FR'); + expect(result).to.include('fr-fr'); + expect(result).to.include('FR-fr'); + expect(result).to.include('FR-FR'); + expect(result).to.include('fr_fr'); + expect(result).to.include('fr_FR'); + expect(result).to.include('FR_fr'); + expect(result).to.include('FR_FR'); + // Note: fr-FR itself is removed from the result since it's the original + }); + + it('should include English fallbacks', () => { + const result = LanguageTree.findSimilarLanguageRoots('fr-FR'); + expect(result).to.include('us'); + expect(result).to.include('US'); + expect(result).to.include('en-us'); + expect(result).to.include('en-US'); + expect(result).to.include('gb'); + expect(result).to.include('GB'); + expect(result).to.include('en-gb'); + expect(result).to.include('en-GB'); + }); + + it('should include siblings for known locale', () => { + const result = LanguageTree.findSimilarLanguageRoots('fr-FR'); + expect(result).to.include('ca-FR'); + expect(result).to.include('fr-CA'); + expect(result).to.include('fr-BE'); + expect(result).to.include('fr-CH'); + }); + + it('should not include the original locale', () => { + const result = LanguageTree.findSimilarLanguageRoots('fr-FR'); + expect(result).to.not.include('fr-FR'); + }); + + it('should handle unknown locale gracefully', () => { + const result = LanguageTree.findSimilarLanguageRoots('xx-XX'); + expect(result).to.include('xx-xx'); + expect(result).to.include('XX-xx'); + expect(result).to.include('XX-XX'); + expect(result).to.include('xx_xx'); + expect(result).to.include('xx_XX'); + expect(result).to.include('XX_xx'); + expect(result).to.include('XX_XX'); + // Note: xx-XX itself is removed from the result since it's the original + }); + + it('should handle case where no siblings can be found', () => { + // Temporarily modify the mappings to create this scenario + const originalCountryToRoot = { ...LanguageTree.COUNTRY_TO_ROOT }; + const originalCountryGroups = { ...LanguageTree.COUNTRY_CODE_GROUPS }; + + try { + // Make COUNTRY_TO_ROOT return a value that doesn't exist in COUNTRY_CODE_GROUPS + LanguageTree.COUNTRY_TO_ROOT.ZZ = 'NONEXISTENT'; + + // Ensure NONEXISTENT is not in COUNTRY_CODE_GROUPS + delete LanguageTree.COUNTRY_CODE_GROUPS.NONEXISTENT; + + const result = LanguageTree.findSimilarLanguageRoots('ZZ'); + + // Should still return case variations and English fallbacks, but no siblings + expect(result).to.include('zz'); // Case variation + expect(result).to.include('us'); // English fallback + expect(result).to.include('en-us'); // English fallback + // Should not crash and should handle the empty siblings array (|| []) + expect(result).to.be.an('array'); + } finally { + // Restore original mappings + LanguageTree.COUNTRY_TO_ROOT = originalCountryToRoot; + LanguageTree.COUNTRY_CODE_GROUPS = originalCountryGroups; + } + }); + }); + + describe('generateCaseVariations', () => { + it('should return empty array for null locale', () => { + const result = LanguageTree.generateCaseVariations(null); + expect(result).to.deep.equal([]); + }); + + it('should return empty array for empty locale', () => { + const result = LanguageTree.generateCaseVariations(''); + expect(result).to.deep.equal([]); + }); + + it('should generate variations for 2-letter locale', () => { + const result = LanguageTree.generateCaseVariations('FR'); + expect(result).to.deep.equal(['fr']); + }); + + it('should generate variations for 5-letter locale with hyphen', () => { + const result = LanguageTree.generateCaseVariations('fr-FR'); + expect(result).to.deep.equal([ + 'fr-fr', 'FR-fr', 'FR-FR', + 'fr_fr', 'fr_FR', 'FR_fr', 'FR_FR', + ]); + }); + + it('should generate variations for 5-letter locale with underscore', () => { + const result = LanguageTree.generateCaseVariations('fr_FR'); + expect(result).to.deep.equal([ + 'fr-fr', 'fr-FR', 'FR-fr', 'FR-FR', + 'fr_fr', 'FR_fr', 'FR_FR', + ]); + }); + + it('should not include the original locale', () => { + const result = LanguageTree.generateCaseVariations('fr-FR'); + expect(result).to.not.include('fr-FR'); + }); + + it('should handle invalid 5-letter locale', () => { + const result = LanguageTree.generateCaseVariations('fr-FR-X'); + expect(result).to.deep.equal([]); + }); + }); + + describe('findRootForLocale', () => { + it('should return null for null locale', () => { + const result = LanguageTree.findRootForLocale(null); + expect(result).to.be.null; + }); + + it('should return null for empty locale', () => { + const result = LanguageTree.findRootForLocale(''); + expect(result).to.be.null; + }); + + it('should find root for 2-letter country code', () => { + const result = LanguageTree.findRootForLocale('FR'); + expect(result).to.equal('FR'); + }); + + it('should find root for 2-letter country code in group', () => { + const result = LanguageTree.findRootForLocale('MC'); + expect(result).to.equal('FR'); + }); + + it('should find root for 5-letter locale code', () => { + const result = LanguageTree.findRootForLocale('fr-FR'); + expect(result).to.equal('fr-FR'); + }); + + it('should find root for 5-letter locale code in group', () => { + const result = LanguageTree.findRootForLocale('ca-FR'); + expect(result).to.equal('fr-FR'); + }); + + it('should return null for unknown locale', () => { + const result = LanguageTree.findRootForLocale('xx-XX'); + expect(result).to.be.null; + }); + + it('should return locale itself when it is a root in groups but not in reverse mappings', () => { + // Temporarily modify the reverse mappings to simulate this scenario + const originalCountryToRoot = { ...LanguageTree.COUNTRY_TO_ROOT }; + const originalLocaleToRoot = { ...LanguageTree.LOCALE_TO_ROOT }; + + try { + // Remove 'FR' from COUNTRY_TO_ROOT but keep it in COUNTRY_CODE_GROUPS + delete LanguageTree.COUNTRY_TO_ROOT.FR; + + const result = LanguageTree.findRootForLocale('FR'); + expect(result).to.equal('FR'); // Should return the locale itself + } finally { + LanguageTree.COUNTRY_TO_ROOT = originalCountryToRoot; + LanguageTree.LOCALE_TO_ROOT = originalLocaleToRoot; + } + }); + }); + + describe('findEnglishFallbacks', () => { + it('should return expected English fallbacks', () => { + const result = LanguageTree.findEnglishFallbacks(); + expect(result).to.deep.equal([ + 'us', 'US', 'en-us', 'en_us', 'en-US', 'en_US', + 'gb', 'GB', 'en-gb', 'en_gb', 'en-GB', 'en_GB', + ]); + }); + }); +}); diff --git a/test/audits/content-fragment-404/levenshtein-distance.test.js b/test/audits/content-fragment-404/levenshtein-distance.test.js new file mode 100644 index 000000000..c99b84e7c --- /dev/null +++ b/test/audits/content-fragment-404/levenshtein-distance.test.js @@ -0,0 +1,109 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/* eslint-env mocha */ +import { expect } from 'chai'; +import { LevenshteinDistance } from '../../../src/content-fragment-404/utils/levenshtein-distance.js'; +import { + DISTANCE_SINGLE_CHAR, + DISTANCE_TWO_CHARS, + DISTANCE_THREE_CHARS, + DISTANCE_FOUR_CHARS, + STRING_LENGTH_HELLO, + TEST_PATH_EN_US_IMAGES_PHOTO_JPG, + TEST_PATH_EN_US_IMAGES_PHOTO_PNG, + TEST_PATH_FR_FR_IMAGES_PHOTO_JPG, +} from './test-constants.js'; + +describe('LevenshteinDistance', () => { + describe('calculate', () => { + it('should throw error for null source', () => { + expect(() => LevenshteinDistance.calculate(null, 'target')).to.throw('Strings cannot be null'); + }); + + it('should throw error for null target', () => { + expect(() => LevenshteinDistance.calculate('source', null)).to.throw('Strings cannot be null'); + }); + + it('should return 0 for identical strings', () => { + expect(LevenshteinDistance.calculate('hello', 'hello')).to.equal(0); + }); + + it('should return target length for empty source', () => { + expect(LevenshteinDistance.calculate('', 'hello')).to.equal(STRING_LENGTH_HELLO); + }); + + it('should return source length for empty target', () => { + expect(LevenshteinDistance.calculate('hello', '')).to.equal(STRING_LENGTH_HELLO); + }); + + it('should return 0 for both empty strings', () => { + expect(LevenshteinDistance.calculate('', '')).to.equal(0); + }); + + it('should calculate distance for single character difference', () => { + expect(LevenshteinDistance.calculate('hello', 'helo')).to.equal(DISTANCE_SINGLE_CHAR); + expect(LevenshteinDistance.calculate('hello', 'hallo')).to.equal(DISTANCE_SINGLE_CHAR); + expect(LevenshteinDistance.calculate('hello', 'hell')).to.equal(DISTANCE_SINGLE_CHAR); + expect(LevenshteinDistance.calculate('hello', 'helloo')).to.equal(DISTANCE_SINGLE_CHAR); + }); + + it('should calculate distance for multiple character differences', () => { + expect(LevenshteinDistance.calculate('hello', 'world')).to.equal(DISTANCE_FOUR_CHARS); + expect(LevenshteinDistance.calculate('kitten', 'sitting')).to.equal(DISTANCE_THREE_CHARS); + expect(LevenshteinDistance.calculate('saturday', 'sunday')).to.equal(DISTANCE_THREE_CHARS); + }); + + it('should handle case differences', () => { + expect(LevenshteinDistance.calculate('Hello', 'hello')).to.equal(DISTANCE_SINGLE_CHAR); + expect(LevenshteinDistance.calculate('HELLO', 'hello')).to.equal(STRING_LENGTH_HELLO); + }); + + it('should handle special characters', () => { + expect(LevenshteinDistance.calculate('hello-world', 'hello_world')).to.equal(DISTANCE_SINGLE_CHAR); + expect(LevenshteinDistance.calculate('test@example.com', 'test.example.com')).to.equal(DISTANCE_SINGLE_CHAR); + }); + + it('should handle numbers', () => { + expect(LevenshteinDistance.calculate('12345', '12346')).to.equal(DISTANCE_SINGLE_CHAR); + expect(LevenshteinDistance.calculate('12345', '1234')).to.equal(DISTANCE_SINGLE_CHAR); + expect(LevenshteinDistance.calculate('12345', '123456')).to.equal(DISTANCE_SINGLE_CHAR); + }); + + it('should handle mixed content', () => { + expect(LevenshteinDistance.calculate('test123', 'test124')).to.equal(DISTANCE_SINGLE_CHAR); + expect(LevenshteinDistance.calculate('user@domain.com', 'user@domain.org')).to.equal(DISTANCE_THREE_CHARS); + }); + + it('should handle very long strings', () => { + const longString1 = 'a'.repeat(100); + const longString2 = `${'a'.repeat(99)}b`; + expect(LevenshteinDistance.calculate(longString1, longString2)).to.equal(DISTANCE_SINGLE_CHAR); + }); + + it('should handle path-like strings', () => { + expect(LevenshteinDistance.calculate(TEST_PATH_EN_US_IMAGES_PHOTO_JPG, TEST_PATH_EN_US_IMAGES_PHOTO_PNG)).to.equal(DISTANCE_TWO_CHARS); + expect(LevenshteinDistance.calculate(TEST_PATH_EN_US_IMAGES_PHOTO_JPG, TEST_PATH_FR_FR_IMAGES_PHOTO_JPG)).to.equal(DISTANCE_FOUR_CHARS); + }); + + it('should handle locale variations', () => { + expect(LevenshteinDistance.calculate('en-US', 'en-GB')).to.equal(DISTANCE_TWO_CHARS); + expect(LevenshteinDistance.calculate('fr-FR', 'fr-CA')).to.equal(DISTANCE_TWO_CHARS); + expect(LevenshteinDistance.calculate('de-DE', 'de-AT')).to.equal(DISTANCE_TWO_CHARS); + }); + + it('should handle complex transformations', () => { + expect(LevenshteinDistance.calculate('kitten', 'sitting')).to.equal(DISTANCE_THREE_CHARS); + expect(LevenshteinDistance.calculate('saturday', 'sunday')).to.equal(DISTANCE_THREE_CHARS); + }); + }); +}); diff --git a/test/audits/content-fragment-404/locale-fallback-rule.test.js b/test/audits/content-fragment-404/locale-fallback-rule.test.js new file mode 100644 index 000000000..5c81cf188 --- /dev/null +++ b/test/audits/content-fragment-404/locale-fallback-rule.test.js @@ -0,0 +1,401 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/* eslint-env mocha */ +import { expect, use } from 'chai'; +import sinon from 'sinon'; +import sinonChai from 'sinon-chai'; +import chaiAsPromised from 'chai-as-promised'; +import esmock from 'esmock'; +import { MockContextBuilder } from '../../shared.js'; +import { + TEST_PATH_BROKEN, + TEST_PATH_EN_US, + TEST_PATH_EN_GB, + TEST_PATH_FR_FR, + TEST_PATH_BROKEN_WITH_DOUBLE_SLASHES, + ERROR_AEM_CONNECTION_FAILED, + LOCALE_FALLBACK_RULE_PRIORITY, +} from './test-constants.js'; + +use(sinonChai); +use(chaiAsPromised); + +describe('LocaleFallbackRule', () => { + let sandbox; + let context; + let mockAemClient; + let mockSuggestion; + let mockLocale; + let mockPathUtils; + let mockLanguageTree; + let LocaleFallbackRule; + + beforeEach(async () => { + sandbox = sinon.createSandbox(); + + context = new MockContextBuilder() + .withSandbox(sandbox) + .withOverrides({ + log: { + info: sandbox.spy(), + debug: sandbox.spy(), + warn: sandbox.spy(), + error: sandbox.spy(), + }, + }) + .build(); + + mockAemClient = { + isAvailable: sandbox.stub().resolves(false), + }; + + mockSuggestion = { + type: 'locale', + originalPath: TEST_PATH_FR_FR, + suggestedPath: TEST_PATH_EN_US, + }; + + mockLocale = { + getCode: sandbox.stub().returns('fr-fr'), + replaceInPath: sandbox.stub().returns(TEST_PATH_EN_US), + }; + + mockPathUtils = { + hasDoubleSlashes: sandbox.stub().returns(false), + }; + + mockLanguageTree = { + findSimilarLanguageRoots: sandbox.stub().returns(['en-us', 'en-gb']), + findEnglishFallbacks: sandbox.stub().returns(['en-us', 'en-gb', 'en']), + }; + + const module = await esmock('../../../src/content-fragment-404/rules/locale-fallback-rule.js', { + '../../../src/content-fragment-404/domain/suggestion/suggestion.js': { + Suggestion: { + locale: sandbox.stub().returns(mockSuggestion), + }, + }, + '../../../src/content-fragment-404/domain/language/locale.js': { + Locale: { + fromPath: sandbox.stub().returns(mockLocale), + }, + }, + '../../../src/content-fragment-404/domain/language/language-tree.js': { + LanguageTree: mockLanguageTree, + }, + '../../../src/content-fragment-404/utils/path-utils.js': { + PathUtils: mockPathUtils, + }, + }); + + LocaleFallbackRule = module.LocaleFallbackRule; + }); + + afterEach(() => { + sandbox.restore(); + }); + + describe('constructor', () => { + it('should initialize with second priority (2)', () => { + const rule = new LocaleFallbackRule(context, mockAemClient); + + expect(rule.context).to.equal(context); + expect(rule.priority).to.equal(LOCALE_FALLBACK_RULE_PRIORITY); + expect(rule.aemClient).to.equal(mockAemClient); + }); + + it('should extend BaseRule', () => { + const rule = new LocaleFallbackRule(context, mockAemClient); + + expect(rule.getPriority).to.be.a('function'); + expect(rule.getAemClient).to.be.a('function'); + expect(rule.apply).to.be.a('function'); + }); + }); + + describe('applyRule with detected locale', () => { + it('should return locale suggestion when fallback is available', async () => { + mockLanguageTree.findSimilarLanguageRoots.returns(['en-us', 'en-gb']); + mockAemClient.isAvailable.onFirstCall().resolves(true); + + const rule = new LocaleFallbackRule(context, mockAemClient); + const brokenPath = TEST_PATH_FR_FR; + + const result = await rule.applyRule(brokenPath); + + expect(result).to.equal(mockSuggestion); + }); + + it('should try multiple fallback locales until one is found', async () => { + // Locale.fromPath is already mocked in esmock + mockLanguageTree.findSimilarLanguageRoots.returns(['en-us', 'en-gb', 'en']); + mockLocale.replaceInPath.onCall(0).returns(TEST_PATH_EN_US); + mockLocale.replaceInPath.onCall(1).returns(TEST_PATH_EN_GB); + mockAemClient.isAvailable.onCall(0).resolves(false); + mockAemClient.isAvailable.onCall(1).resolves(true); + + const rule = new LocaleFallbackRule(context, mockAemClient); + const brokenPath = TEST_PATH_FR_FR; + + const result = await rule.applyRule(brokenPath); + + expect(mockAemClient.isAvailable).to.have.been.calledTwice; + expect(result).to.equal(mockSuggestion); + }); + + it('should return null when no fallback locales are available', async () => { + mockLanguageTree.findSimilarLanguageRoots.returns(['en-us', 'en-gb']); + mockAemClient.isAvailable.resolves(false); + + const rule = new LocaleFallbackRule(context, mockAemClient); + const brokenPath = TEST_PATH_FR_FR; + + const result = await rule.applyRule(brokenPath); + + expect(mockAemClient.isAvailable).to.have.been.calledTwice; + expect(result).to.be.null; + }); + + it('should return null when no similar language roots found', async () => { + // Locale.fromPath is already mocked in esmock + mockLanguageTree.findSimilarLanguageRoots.returns([]); + + const rule = new LocaleFallbackRule(context, mockAemClient); + const brokenPath = TEST_PATH_FR_FR; + + const result = await rule.applyRule(brokenPath); + + expect(mockAemClient.isAvailable).not.to.have.been.called; + expect(result).to.be.null; + }); + }); + + describe('applyRule without detected locale', () => { + it('should return null when no locale detected and no double slashes', async () => { + // Create a rule with mocked dependencies that return null for Locale.fromPath + const ruleModule = await esmock('../../../src/content-fragment-404/rules/locale-fallback-rule.js', { + '../../../src/content-fragment-404/domain/language/locale.js': { + Locale: { + fromPath: sandbox.stub().returns(null), // Return null to trigger the uncovered lines + }, + }, + '../../../src/content-fragment-404/utils/path-utils.js': { + PathUtils: { + hasDoubleSlashes: sandbox.stub().returns(false), // No double slashes + }, + }, + }); + + const rule = new ruleModule.LocaleFallbackRule(context, mockAemClient); + const brokenPath = TEST_PATH_BROKEN; + + const result = await rule.applyRule(brokenPath); + + expect(result).to.be.null; + }); + + it('should try locale insertion when double slashes detected', async () => { + // Create a rule with mocked dependencies that return null for Locale.fromPath + const ruleModule = await esmock('../../../src/content-fragment-404/rules/locale-fallback-rule.js', { + '../../../src/content-fragment-404/domain/suggestion/suggestion.js': { + Suggestion: { + locale: sandbox.stub().returns(mockSuggestion), + }, + }, + '../../../src/content-fragment-404/domain/language/locale.js': { + Locale: { + fromPath: sandbox.stub().returns(null), // Return null for this test + }, + }, + '../../../src/content-fragment-404/domain/language/language-tree.js': { + LanguageTree: { + findEnglishFallbacks: sandbox.stub().returns(['en-us', 'en-gb']), + }, + }, + '../../../src/content-fragment-404/utils/path-utils.js': { + PathUtils: { + hasDoubleSlashes: sandbox.stub().returns(true), + }, + }, + }); + + mockAemClient.isAvailable.onFirstCall().resolves(true); + + const rule = new ruleModule.LocaleFallbackRule(context, mockAemClient); + const brokenPath = TEST_PATH_BROKEN; + + const result = await rule.applyRule(TEST_PATH_BROKEN_WITH_DOUBLE_SLASHES); + + expect(result).to.equal(mockSuggestion); + }); + + it('should try multiple English fallbacks for locale insertion', async () => { + // Create a rule with mocked dependencies that return null for Locale.fromPath + const ruleModule = await esmock('../../../src/content-fragment-404/rules/locale-fallback-rule.js', { + '../../../src/content-fragment-404/domain/suggestion/suggestion.js': { + Suggestion: { + locale: sandbox.stub().returns(mockSuggestion), + }, + }, + '../../../src/content-fragment-404/domain/language/locale.js': { + Locale: { + fromPath: sandbox.stub().returns(null), // Return null for this test + }, + }, + '../../../src/content-fragment-404/domain/language/language-tree.js': { + LanguageTree: { + findEnglishFallbacks: sandbox.stub().returns(['en-us', 'en-gb', 'en']), + }, + }, + '../../../src/content-fragment-404/utils/path-utils.js': { + PathUtils: { + hasDoubleSlashes: sandbox.stub().returns(true), + }, + }, + }); + + mockAemClient.isAvailable.onCall(0).resolves(false); + mockAemClient.isAvailable.onCall(1).resolves(true); + + const rule = new ruleModule.LocaleFallbackRule(context, mockAemClient); + const brokenPath = TEST_PATH_BROKEN; + + const result = await rule.applyRule(brokenPath); + + expect(mockAemClient.isAvailable).to.have.been.calledTwice; + expect(result).to.equal(mockSuggestion); + }); + + it('should return null when no English fallbacks work for locale insertion', async () => { + // Locale.fromPath is already mocked in esmock to return null + mockPathUtils.hasDoubleSlashes.returns(true); + mockLanguageTree.findEnglishFallbacks.returns(['en-us', 'en-gb']); + mockAemClient.isAvailable.resolves(false); + + const rule = new LocaleFallbackRule(context, mockAemClient); + const brokenPath = TEST_PATH_BROKEN; + + const result = await rule.applyRule(brokenPath); + + expect(mockAemClient.isAvailable).to.have.been.calledTwice; + expect(result).to.be.null; + }); + }); + + describe('tryLocaleInsertion', () => { + it('should replace double slashes with locale codes', async () => { + mockLanguageTree.findEnglishFallbacks.returns(['en-us']); + mockAemClient.isAvailable.resolves(true); + + const rule = new LocaleFallbackRule(context, mockAemClient); + const brokenPath = '/content/dam//assets/image.jpg'; + + const result = await rule.tryLocaleInsertion(brokenPath); + + expect(mockAemClient.isAvailable).to.have.been.calledWith('/content/dam/en-us/assets/image.jpg'); + expect(result).to.equal(mockSuggestion); + }); + + it('should handle multiple double slashes by replacing only the first', async () => { + mockLanguageTree.findEnglishFallbacks.returns(['en-us']); + mockAemClient.isAvailable.resolves(true); + + const rule = new LocaleFallbackRule(context, mockAemClient); + const brokenPath = '/content/dam//assets//image.jpg'; + + const result = await rule.tryLocaleInsertion(brokenPath); + + expect(mockAemClient.isAvailable).to.have.been.calledWith('/content/dam/en-us/assets//image.jpg'); + expect(result).to.equal(mockSuggestion); + }); + + it('should return null when no English fallbacks are available', async () => { + mockLanguageTree.findEnglishFallbacks.returns([]); + + const rule = new LocaleFallbackRule(context, mockAemClient); + const brokenPath = TEST_PATH_BROKEN_WITH_DOUBLE_SLASHES; + + const result = await rule.tryLocaleInsertion(brokenPath); + + expect(mockAemClient.isAvailable).not.to.have.been.called; + expect(result).to.be.null; + }); + }); + + describe('error handling', () => { + it('should handle AEM client errors during locale fallback', async () => { + // Locale.fromPath is already mocked in esmock + mockLanguageTree.findSimilarLanguageRoots.returns(['en-us']); + mockAemClient.isAvailable.rejects(new Error(ERROR_AEM_CONNECTION_FAILED)); + + const rule = new LocaleFallbackRule(context, mockAemClient); + const brokenPath = TEST_PATH_FR_FR; + + await expect(rule.applyRule(brokenPath)) + .to.be.rejectedWith(ERROR_AEM_CONNECTION_FAILED); + }); + + it('should handle AEM client errors during locale insertion', async () => { + // Locale.fromPath is already mocked in esmock to return null + mockPathUtils.hasDoubleSlashes.returns(true); + mockLanguageTree.findEnglishFallbacks.returns(['en-us']); + mockAemClient.isAvailable.rejects(new Error(ERROR_AEM_CONNECTION_FAILED)); + + const rule = new LocaleFallbackRule(context, mockAemClient); + const brokenPath = TEST_PATH_BROKEN_WITH_DOUBLE_SLASHES; + + await expect(rule.applyRule(brokenPath)) + .to.be.rejectedWith(ERROR_AEM_CONNECTION_FAILED); + }); + + it('should throw error when AEM client not available', async () => { + const rule = new LocaleFallbackRule(context, null); + const brokenPath = TEST_PATH_FR_FR; + + await expect(rule.applyRule(brokenPath)) + .to.be.rejectedWith('AemClient not injected'); + }); + }); + + describe('integration scenarios', () => { + it('should work through apply method', async () => { + // Locale.fromPath is already mocked in esmock + mockLanguageTree.findSimilarLanguageRoots.returns(['en-us']); + mockAemClient.isAvailable.resolves(true); + + const rule = new LocaleFallbackRule(context, mockAemClient); + const brokenPath = TEST_PATH_FR_FR; + + const result = await rule.apply(brokenPath); + + expect(result).to.equal(mockSuggestion); + }); + + it('should return correct priority', () => { + const rule = new LocaleFallbackRule(context, mockAemClient); + + expect(rule.getPriority()).to.equal(LOCALE_FALLBACK_RULE_PRIORITY); + }); + + it('should handle edge cases with empty paths', async () => { + // Locale.fromPath is already mocked in esmock to return null + + const rule = new LocaleFallbackRule(context, mockAemClient); + const brokenPath = ''; + + const result = await rule.applyRule(brokenPath); + + expect(result).to.be.null; + }); + }); +}); diff --git a/test/audits/content-fragment-404/locale.test.js b/test/audits/content-fragment-404/locale.test.js new file mode 100644 index 000000000..f46c4fe64 --- /dev/null +++ b/test/audits/content-fragment-404/locale.test.js @@ -0,0 +1,250 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/* eslint-env mocha */ +import { expect } from 'chai'; +import { Locale } from '../../../src/content-fragment-404/domain/language/locale.js'; +import { LocaleType } from '../../../src/content-fragment-404/domain/language/locale-type.js'; + +describe('Locale', () => { + describe('FIVE_LETTER_PATTERN', () => { + it('should match valid 5-letter locales with hyphen', () => { + expect(Locale.FIVE_LETTER_PATTERN.test('en-US')).to.be.true; + expect(Locale.FIVE_LETTER_PATTERN.test('fr-FR')).to.be.true; + expect(Locale.FIVE_LETTER_PATTERN.test('de-DE')).to.be.true; + }); + + it('should match valid 5-letter locales with underscore', () => { + expect(Locale.FIVE_LETTER_PATTERN.test('en_US')).to.be.true; + expect(Locale.FIVE_LETTER_PATTERN.test('fr_FR')).to.be.true; + expect(Locale.FIVE_LETTER_PATTERN.test('de_DE')).to.be.true; + }); + + it('should not match invalid patterns', () => { + expect(Locale.FIVE_LETTER_PATTERN.test('en')).to.be.false; + expect(Locale.FIVE_LETTER_PATTERN.test('en-US-')).to.be.false; + expect(Locale.FIVE_LETTER_PATTERN.test('en-US-X')).to.be.false; + expect(Locale.FIVE_LETTER_PATTERN.test('en_US_')).to.be.false; + expect(Locale.FIVE_LETTER_PATTERN.test('en-US')).to.be.true; + expect(Locale.FIVE_LETTER_PATTERN.test('en_US')).to.be.true; + }); + }); + + describe('TWO_LETTER_PATTERN', () => { + it('should match valid 2-letter codes', () => { + expect(Locale.TWO_LETTER_PATTERN.test('US')).to.be.true; + expect(Locale.TWO_LETTER_PATTERN.test('FR')).to.be.true; + expect(Locale.TWO_LETTER_PATTERN.test('DE')).to.be.true; + expect(Locale.TWO_LETTER_PATTERN.test('us')).to.be.true; + expect(Locale.TWO_LETTER_PATTERN.test('fr')).to.be.true; + }); + + it('should not match invalid patterns', () => { + expect(Locale.TWO_LETTER_PATTERN.test('U')).to.be.false; + expect(Locale.TWO_LETTER_PATTERN.test('USA')).to.be.false; + expect(Locale.TWO_LETTER_PATTERN.test('')).to.be.false; + expect(Locale.TWO_LETTER_PATTERN.test('US')).to.be.true; + expect(Locale.TWO_LETTER_PATTERN.test('FR')).to.be.true; + }); + }); + + describe('constructor', () => { + it('should create locale with correct properties', () => { + const locale = new Locale('en-US', LocaleType.FIVE_LETTER_LOCALE, 'en', 'US'); + expect(locale.code).to.equal('en-US'); + expect(locale.type).to.equal(LocaleType.FIVE_LETTER_LOCALE); + expect(locale.language).to.equal('en'); + expect(locale.country).to.equal('US'); + }); + }); + + describe('fromCode', () => { + it('should return null for null code', () => { + const result = Locale.fromCode(null); + expect(result).to.be.null; + }); + + it('should return null for empty code', () => { + const result = Locale.fromCode(''); + expect(result).to.be.null; + }); + + it('should return null for invalid code', () => { + const result = Locale.fromCode('invalid'); + expect(result).to.be.null; + }); + + it('should return null for whitespace only code', () => { + const result = Locale.fromCode(' '); + expect(result).to.be.null; + }); + + it('should create 5-letter locale with hyphen', () => { + const result = Locale.fromCode('en-US'); + expect(result).to.be.instanceOf(Locale); + expect(result.code).to.equal('en-US'); + expect(result.type).to.equal(LocaleType.FIVE_LETTER_LOCALE); + expect(result.language).to.equal('en'); + expect(result.country).to.equal('US'); + }); + + it('should create 5-letter locale with underscore', () => { + const result = Locale.fromCode('en_US'); + expect(result).to.be.instanceOf(Locale); + expect(result.code).to.equal('en_US'); + expect(result.type).to.equal(LocaleType.FIVE_LETTER_LOCALE); + expect(result.language).to.equal('en'); + expect(result.country).to.equal('US'); + }); + + it('should create 2-letter country code', () => { + const result = Locale.fromCode('US'); + expect(result).to.be.instanceOf(Locale); + expect(result.code).to.equal('US'); + expect(result.type).to.equal(LocaleType.TWO_LETTER_COUNTRY); + expect(result.language).to.be.null; + expect(result.country).to.equal('US'); + }); + + it('should handle lowercase 2-letter country code', () => { + const result = Locale.fromCode('us'); + expect(result).to.be.instanceOf(Locale); + expect(result.code).to.equal('us'); + expect(result.type).to.equal(LocaleType.TWO_LETTER_COUNTRY); + expect(result.language).to.be.null; + expect(result.country).to.equal('US'); + }); + + it('should trim whitespace', () => { + const result = Locale.fromCode(' en-US '); + expect(result).to.be.instanceOf(Locale); + expect(result.code).to.equal('en-US'); + }); + }); + + describe('fromPath', () => { + it('should return null for null path', () => { + const result = Locale.fromPath(null); + expect(result).to.be.null; + }); + + it('should return null for path without locale', () => { + const result = Locale.fromPath('/content/dam/images/photo.jpg'); + expect(result).to.be.null; + }); + + it('should extract 5-letter locale from path', () => { + const result = Locale.fromPath('/content/dam/en-US/images/photo.jpg'); + expect(result).to.be.instanceOf(Locale); + expect(result.code).to.equal('en-US'); + expect(result.type).to.equal(LocaleType.FIVE_LETTER_LOCALE); + }); + + it('should extract 5-letter locale with underscore from path', () => { + const result = Locale.fromPath('/content/dam/en_US/images/photo.jpg'); + expect(result).to.be.instanceOf(Locale); + expect(result.code).to.equal('en_US'); + expect(result.type).to.equal(LocaleType.FIVE_LETTER_LOCALE); + }); + + it('should extract 2-letter country from path', () => { + const result = Locale.fromPath('/content/dam/US/images/photo.jpg'); + expect(result).to.be.instanceOf(Locale); + expect(result.code).to.equal('US'); + expect(result.type).to.equal(LocaleType.TWO_LETTER_COUNTRY); + }); + }); + + describe('getCode', () => { + it('should return the code', () => { + const locale = new Locale('en-US', LocaleType.FIVE_LETTER_LOCALE, 'en', 'US'); + expect(locale.getCode()).to.equal('en-US'); + }); + }); + + describe('getType', () => { + it('should return the type', () => { + const locale = new Locale('en-US', LocaleType.FIVE_LETTER_LOCALE, 'en', 'US'); + expect(locale.getType()).to.equal(LocaleType.FIVE_LETTER_LOCALE); + }); + }); + + describe('getLanguage', () => { + it('should return the language', () => { + const locale = new Locale('en-US', LocaleType.FIVE_LETTER_LOCALE, 'en', 'US'); + expect(locale.getLanguage()).to.equal('en'); + }); + }); + + describe('getCountry', () => { + it('should return the country', () => { + const locale = new Locale('en-US', LocaleType.FIVE_LETTER_LOCALE, 'en', 'US'); + expect(locale.getCountry()).to.equal('US'); + }); + }); + + describe('isValid', () => { + it('should return true for valid locale', () => { + const locale = new Locale('en-US', LocaleType.FIVE_LETTER_LOCALE, 'en', 'US'); + expect(locale.isValid()).to.be.true; + }); + + it('should return false for locale without code', () => { + const locale = new Locale('', LocaleType.FIVE_LETTER_LOCALE, 'en', 'US'); + expect(locale.isValid()).to.be.false; + }); + + it('should return false for locale with null code', () => { + const locale = new Locale(null, LocaleType.FIVE_LETTER_LOCALE, 'en', 'US'); + expect(locale.isValid()).to.be.false; + }); + }); + + describe('replaceInPath', () => { + it('should return original path if no code', () => { + const locale = new Locale('', LocaleType.FIVE_LETTER_LOCALE, 'en', 'US'); + const result = locale.replaceInPath('/content/dam/en-US/images/photo.jpg', 'fr-FR'); + expect(result).to.equal('/content/dam/en-US/images/photo.jpg'); + }); + + it('should return original path if no path', () => { + const locale = new Locale('en-US', LocaleType.FIVE_LETTER_LOCALE, 'en', 'US'); + const result = locale.replaceInPath(null, 'fr-FR'); + expect(result).to.be.null; + }); + + it('should replace locale in path', () => { + const locale = new Locale('en-US', LocaleType.FIVE_LETTER_LOCALE, 'en', 'US'); + const result = locale.replaceInPath('/content/dam/en-US/images/photo.jpg', 'fr-FR'); + expect(result).to.equal('/content/dam/fr-FR/images/photo.jpg'); + }); + + it('should not replace locale if not found', () => { + const locale = new Locale('en-US', LocaleType.FIVE_LETTER_LOCALE, 'en', 'US'); + const result = locale.replaceInPath('/content/dam/fr-FR/images/photo.jpg', 'de-DE'); + expect(result).to.equal('/content/dam/fr-FR/images/photo.jpg'); + }); + }); + + describe('toJSON', () => { + it('should return JSON representation', () => { + const locale = new Locale('en-US', LocaleType.FIVE_LETTER_LOCALE, 'en', 'US'); + const result = locale.toJSON(); + expect(result).to.deep.equal({ + code: 'en-US', + type: LocaleType.FIVE_LETTER_LOCALE, + language: 'en', + country: 'US', + }); + }); + }); +}); diff --git a/test/audits/content-fragment-404/opportunity-data-mapper.test.js b/test/audits/content-fragment-404/opportunity-data-mapper.test.js new file mode 100644 index 000000000..1d024f04a --- /dev/null +++ b/test/audits/content-fragment-404/opportunity-data-mapper.test.js @@ -0,0 +1,100 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/* eslint-env mocha */ +import { expect } from 'chai'; +import { createOpportunityData } from '../../../src/content-fragment-404/opportunity-data-mapper.js'; +import { DATA_SOURCES } from '../../../src/common/constants.js'; + +describe('Opportunity Data Mapper', () => { + describe('createOpportunityData', () => { + it('should return the correct opportunity data structure', () => { + const result = createOpportunityData(); + + expect(result).to.be.an('object'); + expect(result).to.have.property('runbook'); + expect(result).to.have.property('origin', 'AUTOMATION'); + expect(result).to.have.property('title'); + expect(result).to.have.property('description'); + expect(result).to.have.property('guidance'); + expect(result).to.have.property('tags'); + expect(result).to.have.property('data'); + }); + + it('should have a valid runbook URL', () => { + const result = createOpportunityData(); + + expect(result.runbook).to.be.a('string'); + expect(result.runbook).to.include('https://'); + expect(result.runbook).to.include('adobe.sharepoint.com'); + }); + + it('should have appropriate title for Content Fragment broken links', () => { + const result = createOpportunityData(); + + expect(result.title).to.be.a('string'); + expect(result.title).to.include('Content Fragment'); + expect(result.title).to.include('failing'); + expect(result.title).to.include('breaking'); + }); + + it('should have a description explaining the issue and solution', () => { + const result = createOpportunityData(); + + expect(result.description).to.be.a('string'); + expect(result.description).to.include('Content Fragment'); + expect(result.description).to.include('redirect'); + }); + + it('should include guidance with steps', () => { + const result = createOpportunityData(); + + expect(result.guidance).to.be.an('object'); + expect(result.guidance).to.have.property('steps'); + expect(result.guidance.steps).to.be.an('array'); + expect(result.guidance.steps.length).to.be.greaterThan(0); + }); + + it('should have guidance steps that are actionable', () => { + const result = createOpportunityData(); + + result.guidance.steps.forEach((step) => { + expect(step).to.be.a('string'); + expect(step.length).to.be.greaterThan(0); + }); + }); + + it('should include "Headless" in tags', () => { + const result = createOpportunityData(); + + expect(result.tags).to.be.an('array'); + expect(result.tags).to.include('Headless'); + }); + + it('should include correct data sources', () => { + const result = createOpportunityData(); + + expect(result.data).to.be.an('object'); + expect(result.data).to.have.property('dataSources'); + expect(result.data.dataSources).to.be.an('array'); + expect(result.data.dataSources).to.include(DATA_SOURCES.SITE); + }); + + it('should return a consistent structure on multiple calls', () => { + const result1 = createOpportunityData(); + const result2 = createOpportunityData(); + + expect(result1).to.deep.equal(result2); + }); + }); +}); + diff --git a/test/audits/content-fragment-404/path-index.test.js b/test/audits/content-fragment-404/path-index.test.js new file mode 100644 index 000000000..651c40a74 --- /dev/null +++ b/test/audits/content-fragment-404/path-index.test.js @@ -0,0 +1,279 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/* eslint-env mocha */ +import { expect, use } from 'chai'; +import sinon from 'sinon'; +import sinonChai from 'sinon-chai'; +import { MockContextBuilder } from '../../shared.js'; +import { PathIndex } from '../../../src/content-fragment-404/domain/index/path-index.js'; +import { ContentPath } from '../../../src/content-fragment-404/domain/content/content-path.js'; + +use(sinonChai); + +import { + EXPECTED_COUNT_TWO, + EXPECTED_COUNT_FOUR, + TEST_PATH_EN_US_IMAGES_PHOTO, + TEST_PATH_FR_FR_IMAGES_PHOTO, + TEST_PATH_EN_US_IMAGES_PHOTO1, + TEST_PATH_EN_US_IMAGES_PHOTO2, + TEST_PATH_FR_FR_IMAGES_PHOTO1, + TEST_PATH_EN_US_IMAGES_SUBFOLDER_PHOTO3, + TEST_PATH_EN_US_IMAGES, + TEST_PATH_TEST_IMAGE, + TEST_PATH_TEST, + TEST_PATH_DE_DE_IMAGES_PHOTO, + TEST_PATH_DE_DE_IMAGES, + TEST_PATH_DE_DE, +} from './test-constants.js'; + +describe('PathIndex', () => { + let sandbox; + let mockContext; + let pathIndex; + + beforeEach(() => { + sandbox = sinon.createSandbox(); + + mockContext = new MockContextBuilder() + .withSandbox(sandbox) + .build(); + + pathIndex = new PathIndex(mockContext); + }); + + afterEach(() => { + sandbox.restore(); + }); + + describe('constructor', () => { + it('should create PathIndex with context', () => { + expect(pathIndex.context).to.equal(mockContext); + expect(pathIndex.root).to.not.be.null; + }); + }); + + describe('insert', () => { + it('should insert content path with status and locale', () => { + const insertSpy = sandbox.spy(pathIndex, 'insertContentPath'); + pathIndex.insert(TEST_PATH_EN_US_IMAGES_PHOTO, 'PUBLISHED', 'en-US'); + + expect(insertSpy).to.have.been.calledOnce; + const callArgs = insertSpy.firstCall.args[0]; + expect(callArgs).to.be.instanceOf(ContentPath); + expect(callArgs.path).to.equal(TEST_PATH_EN_US_IMAGES_PHOTO); + expect(callArgs.status).to.equal('PUBLISHED'); + expect(callArgs.locale).to.equal('en-US'); + }); + }); + + describe('insertContentPath', () => { + it('should insert valid content path', () => { + const contentPath = new ContentPath(TEST_PATH_EN_US_IMAGES_PHOTO, 'PUBLISHED', 'en-US'); + pathIndex.insertContentPath(contentPath); + + expect(pathIndex.contains(TEST_PATH_EN_US_IMAGES_PHOTO)).to.be.true; + }); + + it('should not insert invalid content path', () => { + const invalidContentPath = new ContentPath('', 'PUBLISHED', 'en-US'); + pathIndex.insertContentPath(invalidContentPath); + + expect(pathIndex.contains('')).to.be.false; + }); + + it('should handle duplicate insertion gracefully', () => { + const contentPath = new ContentPath(TEST_PATH_EN_US_IMAGES_PHOTO, 'PUBLISHED', 'en-US'); + pathIndex.insertContentPath(contentPath); + pathIndex.insertContentPath(contentPath); + + expect(pathIndex.contains(TEST_PATH_EN_US_IMAGES_PHOTO)).to.be.true; + }); + }); + + describe('contains', () => { + beforeEach(() => { + pathIndex.insert(TEST_PATH_EN_US_IMAGES_PHOTO, 'PUBLISHED', 'en-US'); + pathIndex.insert(TEST_PATH_FR_FR_IMAGES_PHOTO, 'PUBLISHED', 'fr-FR'); + }); + + it('should return false for null path', () => { + expect(pathIndex.contains(null)).to.be.false; + }); + + it('should return false for empty path', () => { + expect(pathIndex.contains('')).to.be.false; + }); + + it('should return true for existing path', () => { + expect(pathIndex.contains(TEST_PATH_EN_US_IMAGES_PHOTO)).to.be.true; + expect(pathIndex.contains(TEST_PATH_FR_FR_IMAGES_PHOTO)).to.be.true; + }); + + it('should return false for non-existing path', () => { + expect(pathIndex.contains(TEST_PATH_DE_DE_IMAGES_PHOTO)).to.be.false; + }); + + it('should return false for partial path', () => { + expect(pathIndex.contains(TEST_PATH_EN_US_IMAGES)).to.be.false; + }); + }); + + describe('find', () => { + beforeEach(() => { + pathIndex.insert(TEST_PATH_EN_US_IMAGES_PHOTO, 'PUBLISHED', 'en-US'); + pathIndex.insert(TEST_PATH_FR_FR_IMAGES_PHOTO, 'MODIFIED', 'fr-FR'); + }); + + it('should return null for null path', () => { + expect(pathIndex.find(null)).to.be.null; + }); + + it('should return null for empty path', () => { + expect(pathIndex.find('')).to.be.null; + }); + + it('should return content path for existing path', () => { + const result = pathIndex.find(TEST_PATH_EN_US_IMAGES_PHOTO); + expect(result).to.be.instanceOf(ContentPath); + expect(result.path).to.equal(TEST_PATH_EN_US_IMAGES_PHOTO); + expect(result.status).to.equal('PUBLISHED'); + expect(result.locale).to.equal('en-US'); + }); + + it('should return null for non-existing path', () => { + expect(pathIndex.find(TEST_PATH_DE_DE_IMAGES_PHOTO)).to.be.null; + }); + + it('should return null for prefix that exists but is not an end node', () => { + pathIndex.insert(TEST_PATH_TEST_IMAGE, 'PUBLISHED', 'en-US'); + + // Try to find a prefix that exists in the trie but is not marked as an end node + expect(pathIndex.find(TEST_PATH_TEST)).to.be.null; + }); + }); + + describe('delete', () => { + beforeEach(() => { + pathIndex.insert(TEST_PATH_EN_US_IMAGES_PHOTO, 'PUBLISHED', 'en-US'); + }); + + it('should return false for null path', () => { + expect(pathIndex.delete(null)).to.be.false; + }); + + it('should return false for empty path', () => { + expect(pathIndex.delete('')).to.be.false; + }); + + it('should return false for non-existing path', () => { + expect(pathIndex.delete(TEST_PATH_FR_FR_IMAGES_PHOTO)).to.be.false; + }); + + it('should delete existing path and return true', () => { + expect(pathIndex.contains(TEST_PATH_EN_US_IMAGES_PHOTO)).to.be.true; + expect(pathIndex.delete(TEST_PATH_EN_US_IMAGES_PHOTO)).to.be.true; + expect(pathIndex.contains(TEST_PATH_EN_US_IMAGES_PHOTO)).to.be.false; + }); + + it('should return false when trying to delete a prefix that exists but is not an end node', () => { + pathIndex.insert(TEST_PATH_TEST_IMAGE, 'PUBLISHED', 'en-US'); + + // Try to delete a prefix that exists in the trie but is not marked as an end node + expect(pathIndex.delete(TEST_PATH_TEST)).to.be.false; + // The original path should still exist + expect(pathIndex.contains(TEST_PATH_TEST_IMAGE)).to.be.true; + }); + }); + + describe('findChildren', () => { + beforeEach(() => { + pathIndex.insert(TEST_PATH_EN_US_IMAGES_PHOTO1, 'PUBLISHED', 'en-US'); + pathIndex.insert(TEST_PATH_EN_US_IMAGES_PHOTO2, 'PUBLISHED', 'en-US'); + pathIndex.insert(TEST_PATH_EN_US_IMAGES_SUBFOLDER_PHOTO3, 'PUBLISHED', 'en-US'); + pathIndex.insert(TEST_PATH_FR_FR_IMAGES_PHOTO1, 'PUBLISHED', 'fr-FR'); + }); + + it('should find direct children of parent path', () => { + const children = pathIndex.findChildren(TEST_PATH_EN_US_IMAGES); + expect(children).to.have.length(EXPECTED_COUNT_TWO); + expect(children[0].path).to.equal(TEST_PATH_EN_US_IMAGES_PHOTO1); + expect(children[1].path).to.equal(TEST_PATH_EN_US_IMAGES_PHOTO2); + }); + + it('should not include nested children', () => { + const children = pathIndex.findChildren(TEST_PATH_EN_US_IMAGES); + const childPaths = children.map((child) => child.path); + expect(childPaths).to.not.include(TEST_PATH_EN_US_IMAGES_SUBFOLDER_PHOTO3); + }); + + it('should return empty array for non-existing parent', () => { + const children = pathIndex.findChildren(TEST_PATH_DE_DE_IMAGES); + expect(children).to.deep.equal([]); + }); + }); + + describe('findPathsWithPrefix', () => { + beforeEach(() => { + pathIndex.insert(TEST_PATH_EN_US_IMAGES_PHOTO1, 'PUBLISHED', 'en-US'); + pathIndex.insert(TEST_PATH_EN_US_IMAGES_PHOTO2, 'PUBLISHED', 'en-US'); + pathIndex.insert(TEST_PATH_EN_US_IMAGES_SUBFOLDER_PHOTO3, 'PUBLISHED', 'en-US'); + pathIndex.insert(TEST_PATH_FR_FR_IMAGES_PHOTO1, 'PUBLISHED', 'fr-FR'); + }); + + it('should find all paths with given prefix', () => { + const paths = pathIndex.findPathsWithPrefix(TEST_PATH_EN_US_IMAGES); + expect(paths).to.have.length(3); + const pathStrings = paths.map((p) => p.path); + expect(pathStrings).to.include(TEST_PATH_EN_US_IMAGES_PHOTO1); + expect(pathStrings).to.include(TEST_PATH_EN_US_IMAGES_PHOTO2); + expect(pathStrings).to.include(TEST_PATH_EN_US_IMAGES_SUBFOLDER_PHOTO3); + }); + + it('should return empty array for non-existing prefix', () => { + const paths = pathIndex.findPathsWithPrefix(TEST_PATH_DE_DE); + expect(paths).to.deep.equal([]); + }); + + it('should return all paths for empty prefix', () => { + const paths = pathIndex.findPathsWithPrefix(''); + expect(paths).to.have.length(EXPECTED_COUNT_FOUR); + }); + + it('should return all paths for null prefix', () => { + const paths = pathIndex.findPathsWithPrefix(null); + expect(paths).to.have.length(EXPECTED_COUNT_FOUR); + }); + }); + + describe('getPaths', () => { + beforeEach(() => { + pathIndex.insert(TEST_PATH_EN_US_IMAGES_PHOTO1, 'PUBLISHED', 'en-US'); + pathIndex.insert(TEST_PATH_FR_FR_IMAGES_PHOTO1, 'PUBLISHED', 'fr-FR'); + }); + + it('should return all paths in the index', () => { + const paths = pathIndex.getPaths(); + expect(paths).to.have.length(EXPECTED_COUNT_TWO); + const pathStrings = paths.map((p) => p.path); + expect(pathStrings).to.include(TEST_PATH_EN_US_IMAGES_PHOTO1); + expect(pathStrings).to.include(TEST_PATH_FR_FR_IMAGES_PHOTO1); + }); + + it('should return empty array for empty index', () => { + const emptyIndex = new PathIndex(mockContext); + const paths = emptyIndex.getPaths(); + expect(paths).to.deep.equal([]); + }); + }); +}); diff --git a/test/audits/content-fragment-404/path-utils.test.js b/test/audits/content-fragment-404/path-utils.test.js new file mode 100644 index 000000000..d489a6b3a --- /dev/null +++ b/test/audits/content-fragment-404/path-utils.test.js @@ -0,0 +1,230 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/* eslint-env mocha */ +import { expect } from 'chai'; +import { PathUtils } from '../../../src/content-fragment-404/utils/path-utils.js'; +import { + TEST_PATH_CONTENT_DAM, + TEST_PATH_CONTENT_DAM_SLASH, + TEST_PATH_CONTENT_DAM_IMAGES_PHOTO, + TEST_PATH_CONTENT_DAM_IMAGES_SLASH, + TEST_PATH_CONTENT_DAM_EN_US, + TEST_PATH_CONTENT_DAM_EN_US_SLASH, + TEST_PATH_CONTENT_DAM_EN_US_IMAGES, + TEST_PATH_CONTENT_DAM_EN_US_IMAGES_PHOTO, + TEST_PATH_CONTENT_DAM_123_IMAGES_PHOTO, + TEST_PATH_CONTENT_DAM_123_SLASH, + TEST_PATH_CONTENT_DAM_US_IMAGES_PHOTO, + TEST_PATH_CONTENT_DAM_FR_IMAGES_PHOTO, + TEST_PATH_CONTENT_DAM_FR_FR_IMAGES_PHOTO, + TEST_PATH_CONTENT_DAM_EN_US_US_IMAGES_PHOTO, + TEST_PATH_CONTENT_DAM_DOUBLE_SLASH_IMAGES_PHOTO, + TEST_PATH_CONTENT_DOUBLE_SLASH_DAM_IMAGES_PHOTO, + TEST_PATH_CONTENT_DAM_IMAGES_DOUBLE_SLASH_PHOTO, + TEST_PATH_CONTENT_DAM_TRIPLE_SLASH_IMAGES_PHOTO, + TEST_PATH_CONTENT_QUAD_SLASH_DAM_IMAGES_PHOTO, + TEST_PATH_SIX_SLASHES_CONTENT_DAM_IMAGES_PHOTO, + TEST_URL_EXAMPLE_COM_CONTENT_DAM_IMAGES_PHOTO, +} from './test-constants.js'; + +describe('PathUtils', () => { + describe('removeLocaleFromPath', () => { + it('should return original path for null or empty input', () => { + expect(PathUtils.removeLocaleFromPath(null)).to.equal(null); + expect(PathUtils.removeLocaleFromPath('')).to.equal(''); + expect(PathUtils.removeLocaleFromPath(undefined)).to.equal(undefined); + }); + + it('should remove 2-letter locale from path', () => { + expect(PathUtils.removeLocaleFromPath(TEST_PATH_CONTENT_DAM_US_IMAGES_PHOTO)) + .to.equal(TEST_PATH_CONTENT_DAM_IMAGES_PHOTO); + expect(PathUtils.removeLocaleFromPath(TEST_PATH_CONTENT_DAM_FR_IMAGES_PHOTO)) + .to.equal(TEST_PATH_CONTENT_DAM_IMAGES_PHOTO); + }); + + it('should remove 5-letter locale from path', () => { + expect(PathUtils.removeLocaleFromPath(TEST_PATH_CONTENT_DAM_EN_US_IMAGES_PHOTO)) + .to.equal(TEST_PATH_CONTENT_DAM_IMAGES_PHOTO); + expect(PathUtils.removeLocaleFromPath(TEST_PATH_CONTENT_DAM_FR_FR_IMAGES_PHOTO)) + .to.equal(TEST_PATH_CONTENT_DAM_IMAGES_PHOTO); + }); + + it('should remove multiple locales from path', () => { + expect(PathUtils.removeLocaleFromPath(TEST_PATH_CONTENT_DAM_EN_US_US_IMAGES_PHOTO)) + .to.equal(TEST_PATH_CONTENT_DAM_IMAGES_PHOTO); + }); + + it('should not remove non-locale segments', () => { + expect(PathUtils.removeLocaleFromPath(TEST_PATH_CONTENT_DAM_IMAGES_PHOTO)) + .to.equal(TEST_PATH_CONTENT_DAM_IMAGES_PHOTO); + expect(PathUtils.removeLocaleFromPath(TEST_PATH_CONTENT_DAM_123_IMAGES_PHOTO)) + .to.equal(TEST_PATH_CONTENT_DAM_123_IMAGES_PHOTO); + }); + + it('should handle paths with no locales', () => { + expect(PathUtils.removeLocaleFromPath(TEST_PATH_CONTENT_DAM_IMAGES_PHOTO)) + .to.equal(TEST_PATH_CONTENT_DAM_IMAGES_PHOTO); + expect(PathUtils.removeLocaleFromPath(TEST_PATH_CONTENT_DAM_SLASH)) + .to.equal(TEST_PATH_CONTENT_DAM_SLASH); + }); + + it('should return null for paths not starting with /content/dam/', () => { + expect(PathUtils.removeLocaleFromPath('/')).to.equal('/'); + expect(PathUtils.removeLocaleFromPath('/en-US')).to.equal('/en-US'); + }); + }); + + describe('getParentPath', () => { + it('should return null for root paths', () => { + expect(PathUtils.getParentPath('/')).to.be.null; + expect(PathUtils.getParentPath('/content')).to.be.null; + expect(PathUtils.getParentPath(TEST_PATH_CONTENT_DAM)).to.be.null; + }); + + it('should return parent path for valid paths', () => { + expect(PathUtils.getParentPath(TEST_PATH_CONTENT_DAM_EN_US)) + .to.equal(TEST_PATH_CONTENT_DAM); + expect(PathUtils.getParentPath(TEST_PATH_CONTENT_DAM_EN_US_IMAGES)) + .to.equal(TEST_PATH_CONTENT_DAM_EN_US); + expect(PathUtils.getParentPath(TEST_PATH_CONTENT_DAM_EN_US_IMAGES_PHOTO)) + .to.equal(TEST_PATH_CONTENT_DAM_EN_US_IMAGES); + }); + + it('should return null for segment not starting with /content/dam/', () => { + expect(PathUtils.getParentPath('en-US')).to.be.null; + expect(PathUtils.getParentPath('/en-US')).to.be.null; + }); + + it('should handle null or empty input', () => { + expect(PathUtils.getParentPath(null)).to.be.null; + expect(PathUtils.getParentPath('')).to.be.null; + expect(PathUtils.getParentPath(undefined)).to.be.null; + }); + + it('should handle paths with trailing slash', () => { + expect(PathUtils.getParentPath(TEST_PATH_CONTENT_DAM_EN_US_SLASH)) + .to.equal(TEST_PATH_CONTENT_DAM); + }); + }); + + describe('hasDoubleSlashes', () => { + it('should return false for null or empty input', () => { + expect(PathUtils.hasDoubleSlashes(null)).to.be.false; + expect(PathUtils.hasDoubleSlashes('')).to.be.false; + expect(PathUtils.hasDoubleSlashes(undefined)).to.be.false; + }); + + it('should return true for paths with double slashes', () => { + expect(PathUtils.hasDoubleSlashes(TEST_PATH_CONTENT_DAM_DOUBLE_SLASH_IMAGES_PHOTO)).to.be.true; + expect(PathUtils.hasDoubleSlashes(TEST_PATH_CONTENT_DOUBLE_SLASH_DAM_IMAGES_PHOTO)).to.be.true; + expect(PathUtils.hasDoubleSlashes(TEST_PATH_CONTENT_DAM_IMAGES_DOUBLE_SLASH_PHOTO)).to.be.true; + }); + + it('should return true for paths with multiple consecutive slashes', () => { + expect(PathUtils.hasDoubleSlashes(TEST_PATH_CONTENT_DAM_TRIPLE_SLASH_IMAGES_PHOTO)).to.be.true; + expect(PathUtils.hasDoubleSlashes(TEST_PATH_CONTENT_QUAD_SLASH_DAM_IMAGES_PHOTO)).to.be.true; + }); + + it('should return false for paths without double slashes', () => { + expect(PathUtils.hasDoubleSlashes(TEST_PATH_CONTENT_DAM_IMAGES_PHOTO)).to.be.false; + expect(PathUtils.hasDoubleSlashes(TEST_PATH_CONTENT_DAM_SLASH)).to.be.false; + expect(PathUtils.hasDoubleSlashes('/')).to.be.false; + }); + + it('should ignore protocol slashes (http://, https://)', () => { + expect(PathUtils.hasDoubleSlashes('http://example.com/path')).to.be.false; + expect(PathUtils.hasDoubleSlashes('https://example.com/path')).to.be.false; + expect(PathUtils.hasDoubleSlashes('ftp://example.com/path')).to.be.false; + }); + + it('should detect double slashes after protocol', () => { + expect(PathUtils.hasDoubleSlashes('http://example.com//path')).to.be.true; + expect(PathUtils.hasDoubleSlashes('https://example.com/path//file')).to.be.true; + }); + }); + + describe('removeDoubleSlashes', () => { + it('should return original input for null or empty', () => { + expect(PathUtils.removeDoubleSlashes(null)).to.equal(null); + expect(PathUtils.removeDoubleSlashes('')).to.equal(''); + expect(PathUtils.removeDoubleSlashes(undefined)).to.equal(undefined); + }); + + it('should remove double slashes from paths', () => { + expect(PathUtils.removeDoubleSlashes(TEST_PATH_CONTENT_DAM_DOUBLE_SLASH_IMAGES_PHOTO)) + .to.equal(TEST_PATH_CONTENT_DAM_IMAGES_PHOTO); + expect(PathUtils.removeDoubleSlashes(TEST_PATH_CONTENT_DOUBLE_SLASH_DAM_IMAGES_PHOTO)) + .to.equal(TEST_PATH_CONTENT_DAM_IMAGES_PHOTO); + expect(PathUtils.removeDoubleSlashes(TEST_PATH_CONTENT_DAM_IMAGES_DOUBLE_SLASH_PHOTO)) + .to.equal(TEST_PATH_CONTENT_DAM_IMAGES_PHOTO); + }); + + it('should remove multiple consecutive slashes', () => { + expect(PathUtils.removeDoubleSlashes(TEST_PATH_CONTENT_DAM_TRIPLE_SLASH_IMAGES_PHOTO)) + .to.equal(TEST_PATH_CONTENT_DAM_IMAGES_PHOTO); + expect(PathUtils.removeDoubleSlashes(TEST_PATH_CONTENT_QUAD_SLASH_DAM_IMAGES_PHOTO)) + .to.equal(TEST_PATH_CONTENT_DAM_IMAGES_PHOTO); + expect(PathUtils.removeDoubleSlashes(TEST_PATH_SIX_SLASHES_CONTENT_DAM_IMAGES_PHOTO)) + .to.equal(TEST_PATH_CONTENT_DAM_IMAGES_PHOTO); + }); + + it('should preserve protocol slashes', () => { + expect(PathUtils.removeDoubleSlashes('http://example.com/path')) + .to.equal('http://example.com/path'); + expect(PathUtils.removeDoubleSlashes('https://example.com/path')) + .to.equal('https://example.com/path'); + expect(PathUtils.removeDoubleSlashes('ftp://example.com/path')) + .to.equal('ftp://example.com/path'); + }); + + it('should fix double slashes after protocol while preserving protocol', () => { + expect(PathUtils.removeDoubleSlashes('http://example.com//path')) + .to.equal('http://example.com/path'); + expect(PathUtils.removeDoubleSlashes('https://example.com///path//file')) + .to.equal('https://example.com/path/file'); + }); + + it('should handle paths without double slashes', () => { + expect(PathUtils.removeDoubleSlashes(TEST_PATH_CONTENT_DAM_IMAGES_PHOTO)) + .to.equal(TEST_PATH_CONTENT_DAM_IMAGES_PHOTO); + expect(PathUtils.removeDoubleSlashes(TEST_PATH_CONTENT_DAM_SLASH)) + .to.equal(TEST_PATH_CONTENT_DAM_SLASH); + expect(PathUtils.removeDoubleSlashes('/')) + .to.equal('/'); + }); + + it('should handle complex mixed scenarios', () => { + expect(PathUtils.removeDoubleSlashes('https://example.com///content//dam///images//photo.jpg')) + .to.equal(TEST_URL_EXAMPLE_COM_CONTENT_DAM_IMAGES_PHOTO); + }); + }); + + describe('removeLocaleFromPath edge cases', () => { + it('should preserve trailing slash when no locale is found', () => { + expect(PathUtils.removeLocaleFromPath(TEST_PATH_CONTENT_DAM_IMAGES_SLASH)) + .to.equal(TEST_PATH_CONTENT_DAM_IMAGES_SLASH); + expect(PathUtils.removeLocaleFromPath(TEST_PATH_CONTENT_DAM_123_SLASH)) + .to.equal(TEST_PATH_CONTENT_DAM_123_SLASH); + }); + + it('should remove trailing slash when locale is found', () => { + expect(PathUtils.removeLocaleFromPath(TEST_PATH_CONTENT_DAM_EN_US_SLASH)) + .to.equal(TEST_PATH_CONTENT_DAM); + }); + + it('should handle edge case with just /content/dam/', () => { + expect(PathUtils.removeLocaleFromPath(TEST_PATH_CONTENT_DAM_SLASH)) + .to.equal(TEST_PATH_CONTENT_DAM_SLASH); + }); + }); +}); diff --git a/test/audits/content-fragment-404/publish-rule.test.js b/test/audits/content-fragment-404/publish-rule.test.js new file mode 100644 index 000000000..ada4b3707 --- /dev/null +++ b/test/audits/content-fragment-404/publish-rule.test.js @@ -0,0 +1,246 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/* eslint-env mocha */ +import { expect, use } from 'chai'; +import sinon from 'sinon'; +import sinonChai from 'sinon-chai'; +import chaiAsPromised from 'chai-as-promised'; +import esmock from 'esmock'; +import { MockContextBuilder } from '../../shared.js'; +import { TEST_PATH_BROKEN, ERROR_AEM_CONNECTION_FAILED, PUBLISH_RULE_PRIORITY } from './test-constants.js'; + +use(sinonChai); +use(chaiAsPromised); + +describe('PublishRule', () => { + let sandbox; + let context; + let mockAemClient; + let mockSuggestion; + let PublishRule; + + beforeEach(async () => { + sandbox = sinon.createSandbox(); + + context = new MockContextBuilder() + .withSandbox(sandbox) + .withOverrides({ + log: { + info: sandbox.spy(), + debug: sandbox.spy(), + warn: sandbox.spy(), + error: sandbox.spy(), + }, + }) + .build(); + + mockAemClient = { + isAvailable: sandbox.stub().resolves(false), + }; + + mockSuggestion = { + type: 'publish', + path: TEST_PATH_BROKEN, + publish: sandbox.stub().returns({ type: 'publish', path: TEST_PATH_BROKEN }), + }; + + const module = await esmock('../../../src/content-fragment-404/rules/publish-rule.js', { + '../../../src/content-fragment-404/domain/suggestion/suggestion.js': { + Suggestion: { + publish: sandbox.stub().returns(mockSuggestion), + }, + }, + }); + + PublishRule = module.PublishRule; + }); + + afterEach(() => { + sandbox.restore(); + }); + + describe('constructor', () => { + it('should initialize with highest priority (1)', () => { + const rule = new PublishRule(context, mockAemClient); + + expect(rule.context).to.equal(context); + expect(rule.priority).to.equal(PUBLISH_RULE_PRIORITY); + expect(rule.aemClient).to.equal(mockAemClient); + }); + + it('should extend BaseRule', () => { + const rule = new PublishRule(context, mockAemClient); + + expect(rule.getPriority).to.be.a('function'); + expect(rule.getAemClient).to.be.a('function'); + expect(rule.apply).to.be.a('function'); + }); + + it('should work without AEM client initially', () => { + const rule = new PublishRule(context, null); + + expect(rule.context).to.equal(context); + expect(rule.priority).to.equal(PUBLISH_RULE_PRIORITY); + expect(rule.aemClient).to.be.null; + }); + }); + + describe('applyRule', () => { + it('should return publish suggestion when content is available on Author', async () => { + mockAemClient.isAvailable.resolves(true); + const rule = new PublishRule(context, mockAemClient); + const brokenPath = TEST_PATH_BROKEN; + + const result = await rule.applyRule(brokenPath); + + expect(mockAemClient.isAvailable).to.have.been.calledOnceWith(brokenPath); + expect(result).to.equal(mockSuggestion); + }); + + it('should return null when content is not available on Author', async () => { + mockAemClient.isAvailable.resolves(false); + const rule = new PublishRule(context, mockAemClient); + const brokenPath = TEST_PATH_BROKEN; + + const result = await rule.applyRule(brokenPath); + + expect(mockAemClient.isAvailable).to.have.been.calledOnceWith(brokenPath); + expect(result).to.be.null; + }); + + it('should handle different path formats', async () => { + mockAemClient.isAvailable.resolves(true); + const rule = new PublishRule(context, mockAemClient); + const testPaths = [ + '/content/dam/folder/file.pdf', + '/content/dam/en-us/assets/image.png', + '/content/dam/fr-fr/documents/doc.docx', + ]; + + for (const path of testPaths) { + mockAemClient.isAvailable.resetHistory(); + context.log.debug.resetHistory(); + context.log.info.resetHistory(); + + // eslint-disable-next-line no-await-in-loop + const result = await rule.applyRule(path); + + expect(mockAemClient.isAvailable).to.have.been.calledOnceWith(path); + expect(result).to.equal(mockSuggestion); + } + }); + + it('should handle AEM client errors gracefully', async () => { + const testError = new Error(ERROR_AEM_CONNECTION_FAILED); + mockAemClient.isAvailable.rejects(testError); + const rule = new PublishRule(context, mockAemClient); + const brokenPath = TEST_PATH_BROKEN; + + await expect(rule.applyRule(brokenPath)) + .to.be.rejectedWith(ERROR_AEM_CONNECTION_FAILED); + + expect(mockAemClient.isAvailable).to.have.been.calledOnceWith(brokenPath); + }); + + it('should throw error when AEM client not available', async () => { + const rule = new PublishRule(context, null); + const brokenPath = TEST_PATH_BROKEN; + + await expect(rule.applyRule(brokenPath)) + .to.be.rejectedWith('AemClient not injected'); + + expect(context.log.error).to.have.been.calledWith('AemClient not injected'); + }); + + it('should handle empty path', async () => { + mockAemClient.isAvailable.resolves(false); + const rule = new PublishRule(context, mockAemClient); + const brokenPath = ''; + + const result = await rule.applyRule(brokenPath); + + expect(mockAemClient.isAvailable).to.have.been.calledOnceWith(''); + expect(result).to.be.null; + }); + + it('should handle null path', async () => { + mockAemClient.isAvailable.resolves(false); + const rule = new PublishRule(context, mockAemClient); + const brokenPath = null; + + const result = await rule.applyRule(brokenPath); + + expect(mockAemClient.isAvailable).to.have.been.calledOnceWith(null); + expect(result).to.be.null; + }); + }); + + describe('integration with BaseRule', () => { + it('should work through apply method', async () => { + mockAemClient.isAvailable.resolves(true); + const rule = new PublishRule(context, mockAemClient); + const brokenPath = TEST_PATH_BROKEN; + + const result = await rule.apply(brokenPath); + + expect(result).to.equal(mockSuggestion); + }); + + it('should return correct priority', () => { + const rule = new PublishRule(context, mockAemClient); + + expect(rule.getPriority()).to.equal(PUBLISH_RULE_PRIORITY); + }); + + it('should return AEM client when available', () => { + const rule = new PublishRule(context, mockAemClient); + + expect(rule.getAemClient()).to.equal(mockAemClient); + }); + }); + + describe('edge cases', () => { + it('should handle very long paths', async () => { + mockAemClient.isAvailable.resolves(true); + const rule = new PublishRule(context, mockAemClient); + const longPath = `/content/dam/${'very-long-folder-name/'.repeat(20)}file.jpg`; + + const result = await rule.applyRule(longPath); + + expect(mockAemClient.isAvailable).to.have.been.calledOnceWith(longPath); + expect(result).to.equal(mockSuggestion); + }); + + it('should handle paths with special characters', async () => { + mockAemClient.isAvailable.resolves(true); + const rule = new PublishRule(context, mockAemClient); + const specialPath = '/content/dam/folder with spaces/file-with-dashes_and_underscores.jpg'; + + const result = await rule.applyRule(specialPath); + + expect(mockAemClient.isAvailable).to.have.been.calledOnceWith(specialPath); + expect(result).to.equal(mockSuggestion); + }); + + it('should handle paths with encoded characters', async () => { + mockAemClient.isAvailable.resolves(true); + const rule = new PublishRule(context, mockAemClient); + const encodedPath = '/content/dam/folder%20with%20spaces/file.jpg'; + + const result = await rule.applyRule(encodedPath); + + expect(mockAemClient.isAvailable).to.have.been.calledOnceWith(encodedPath); + expect(result).to.equal(mockSuggestion); + }); + }); +}); diff --git a/test/audits/content-fragment-404/similar-path-rule.test.js b/test/audits/content-fragment-404/similar-path-rule.test.js new file mode 100644 index 000000000..564d3cbd9 --- /dev/null +++ b/test/audits/content-fragment-404/similar-path-rule.test.js @@ -0,0 +1,432 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/* eslint-env mocha */ +import { expect, use } from 'chai'; +import sinon from 'sinon'; +import sinonChai from 'sinon-chai'; +import chaiAsPromised from 'chai-as-promised'; +import esmock from 'esmock'; +import { MockContextBuilder } from '../../shared.js'; +import { + TEST_PATH_BROKEN, + TEST_PATH_PARENT, + TEST_PATH_FIXED, + TEST_PATH_BROKEN_WITH_DOUBLE_SLASHES, + ERROR_AEM_CONNECTION_FAILED, + SIMILAR_PATH_RULE_PRIORITY, + MAX_LEVENSHTEIN_DISTANCE, +} from './test-constants.js'; + +use(sinonChai); +use(chaiAsPromised); + +describe('SimilarPathRule', () => { + let sandbox; + let context; + let mockAemClient; + let mockPathIndex; + let mockSuggestion; + let mockPathUtils; + let mockLevenshteinDistance; + let SimilarPathRule; + + beforeEach(async () => { + sandbox = sinon.createSandbox(); + + context = new MockContextBuilder() + .withSandbox(sandbox) + .withOverrides({ + log: { + info: sandbox.spy(), + debug: sandbox.spy(), + warn: sandbox.spy(), + error: sandbox.spy(), + }, + }) + .build(); + + mockAemClient = { + isAvailable: sandbox.stub().resolves(false), + getChildrenFromPath: sandbox.stub().resolves([]), + }; + + mockPathIndex = { + find: sandbox.stub().returns(null), + contains: sandbox.stub().returns(false), + }; + + mockSuggestion = { + type: 'similar', + originalPath: TEST_PATH_BROKEN, + suggestedPath: TEST_PATH_FIXED, + }; + + mockPathUtils = { + getParentPath: sandbox.stub().returns(TEST_PATH_PARENT), + hasDoubleSlashes: sandbox.stub().returns(false), + removeDoubleSlashes: sandbox.stub().returns(TEST_PATH_FIXED), + removeLocaleFromPath: sandbox.stub().callsFake((path) => path), + }; + + mockLevenshteinDistance = { + calculate: sandbox.stub().returns(MAX_LEVENSHTEIN_DISTANCE), + }; + + const module = await esmock('../../../src/content-fragment-404/rules/similar-path-rule.js', { + '../../../src/content-fragment-404/domain/suggestion/suggestion.js': { + Suggestion: { + similar: sandbox.stub().returns(mockSuggestion), + }, + }, + '../../../src/content-fragment-404/utils/levenshtein-distance.js': { + LevenshteinDistance: mockLevenshteinDistance, + }, + '../../../src/content-fragment-404/utils/path-utils.js': { + PathUtils: mockPathUtils, + }, + }); + + SimilarPathRule = module.SimilarPathRule; + }); + + afterEach(() => { + sandbox.restore(); + }); + + describe('constructor', () => { + it('should initialize with third priority (3) and path index', () => { + const rule = new SimilarPathRule(context, mockAemClient, mockPathIndex); + + expect(rule.context).to.equal(context); + expect(rule.priority).to.equal(SIMILAR_PATH_RULE_PRIORITY); + expect(rule.aemClient).to.equal(mockAemClient); + expect(rule.pathIndex).to.equal(mockPathIndex); + }); + + it('should extend BaseRule', () => { + const rule = new SimilarPathRule(context, mockAemClient, mockPathIndex); + + expect(rule.getPriority).to.be.a('function'); + expect(rule.getAemClient).to.be.a('function'); + expect(rule.apply).to.be.a('function'); + }); + }); + + describe('applyRule main flow', () => { + it('should return similar path suggestion when found', async () => { + mockPathUtils.getParentPath.returns(TEST_PATH_PARENT); + mockAemClient.getChildrenFromPath.resolves([ + { path: '/content/dam/test/similar.jpg' }, + { path: '/content/dam/test/other.pdf' }, + ]); + mockPathUtils.removeLocaleFromPath.callsFake((path) => path); + mockLevenshteinDistance.calculate.returns(MAX_LEVENSHTEIN_DISTANCE); + + const rule = new SimilarPathRule(context, mockAemClient, mockPathIndex); + const brokenPath = TEST_PATH_BROKEN; + + const result = await rule.applyRule(brokenPath); + + expect(mockAemClient.getChildrenFromPath).to.have.been.calledWith(TEST_PATH_PARENT); + expect(result).to.equal(mockSuggestion); + }); + + it('should return null when no parent path found', async () => { + mockPathUtils.getParentPath.returns(null); + + const rule = new SimilarPathRule(context, mockAemClient, mockPathIndex); + const brokenPath = TEST_PATH_BROKEN; + + const result = await rule.applyRule(brokenPath); + + expect(result).to.be.null; + }); + + it('should return null when no children paths found', async () => { + mockPathUtils.getParentPath.returns(TEST_PATH_PARENT); + mockAemClient.getChildrenFromPath.resolves([]); + + const rule = new SimilarPathRule(context, mockAemClient, mockPathIndex); + const brokenPath = TEST_PATH_BROKEN; + + const result = await rule.applyRule(brokenPath); + + expect(result).to.be.null; + }); + + it('should return null when no similar paths within distance threshold', async () => { + mockPathUtils.getParentPath.returns(TEST_PATH_PARENT); + mockAemClient.getChildrenFromPath.resolves([ + { path: '/content/dam/test/completely-different.jpg' }, + ]); + mockPathUtils.removeLocaleFromPath.callsFake((path) => path); + const distanceTooHigh = MAX_LEVENSHTEIN_DISTANCE + 9; + mockLevenshteinDistance.calculate.returns(distanceTooHigh); // Distance too high + + const rule = new SimilarPathRule(context, mockAemClient, mockPathIndex); + const brokenPath = TEST_PATH_BROKEN; + + const result = await rule.applyRule(brokenPath); + + expect(result).to.be.null; + }); + }); + + describe('applyRule with double slash handling', () => { + it('should return suggestion when double slash can be fixed directly', async () => { + mockPathUtils.hasDoubleSlashes.returns(true); + mockPathUtils.removeDoubleSlashes.returns(TEST_PATH_FIXED); + mockAemClient.isAvailable.resolves(true); + + const rule = new SimilarPathRule(context, mockAemClient, mockPathIndex); + const brokenPath = TEST_PATH_BROKEN; + + const result = await rule.applyRule(brokenPath); + + expect(result).to.equal(mockSuggestion); + }); + + it('should continue with similarity check when double slash fix not available', async () => { + mockPathUtils.hasDoubleSlashes.returns(true); + mockPathUtils.removeDoubleSlashes.returns(TEST_PATH_FIXED); + mockPathUtils.getParentPath.returns('/content/dam/test'); + mockAemClient.isAvailable.resolves(false); + mockAemClient.getChildrenFromPath.resolves([ + { path: '/content/dam/test/similar.jpg' }, + ]); + mockPathUtils.removeLocaleFromPath.callsFake((path) => path); + mockLevenshteinDistance.calculate.returns(MAX_LEVENSHTEIN_DISTANCE); + + const rule = new SimilarPathRule(context, mockAemClient, mockPathIndex); + const brokenPath = TEST_PATH_BROKEN_WITH_DOUBLE_SLASHES; + + const result = await rule.applyRule(brokenPath); + + expect(result).to.equal(mockSuggestion); + }); + + it('should handle paths without double slashes normally', async () => { + mockPathUtils.hasDoubleSlashes.returns(false); + mockPathUtils.getParentPath.returns(TEST_PATH_PARENT); + mockAemClient.getChildrenFromPath.resolves([ + { path: '/content/dam/test/similar.jpg' }, + ]); + mockPathUtils.removeLocaleFromPath.callsFake((path) => path); + mockLevenshteinDistance.calculate.returns(MAX_LEVENSHTEIN_DISTANCE); + + const rule = new SimilarPathRule(context, mockAemClient, mockPathIndex); + const brokenPath = TEST_PATH_BROKEN; + + const result = await rule.applyRule(brokenPath); + + expect(result).to.equal(mockSuggestion); + }); + }); + + describe('checkDoubleSlash', () => { + it('should return null when no double slashes present', async () => { + mockPathUtils.hasDoubleSlashes.returns(false); + + const rule = new SimilarPathRule(context, mockAemClient, mockPathIndex); + const brokenPath = TEST_PATH_BROKEN; + + const result = await rule.checkDoubleSlash(brokenPath); + + expect(result).to.be.null; + }); + + it('should return suggestion when fixed path is available', async () => { + mockPathUtils.hasDoubleSlashes.returns(true); + mockPathUtils.removeDoubleSlashes.returns(TEST_PATH_FIXED); + mockAemClient.isAvailable.resolves(true); + + const rule = new SimilarPathRule(context, mockAemClient, mockPathIndex); + const brokenPath = '/content/dam//test/broken.jpg'; + + const result = await rule.checkDoubleSlash(brokenPath); + + expect(result.suggestion).to.equal(mockSuggestion); + expect(result.fixedPath).to.equal(TEST_PATH_FIXED); + }); + + it('should return fixed path without suggestion when not available', async () => { + mockPathUtils.hasDoubleSlashes.returns(true); + mockPathUtils.removeDoubleSlashes.returns(TEST_PATH_FIXED); + mockAemClient.isAvailable.resolves(false); + + const rule = new SimilarPathRule(context, mockAemClient, mockPathIndex); + const brokenPath = TEST_PATH_BROKEN_WITH_DOUBLE_SLASHES; + + const result = await rule.checkDoubleSlash(brokenPath); + + expect(result.suggestion).to.be.null; + expect(result.fixedPath).to.equal(TEST_PATH_FIXED); + }); + }); + + describe('findSimilarPath static method', () => { + it('should find best match within distance threshold', () => { + mockPathUtils.removeLocaleFromPath.callsFake((path) => path); + const distanceAboveThreshold = MAX_LEVENSHTEIN_DISTANCE + 1; + const distanceAtThreshold = MAX_LEVENSHTEIN_DISTANCE; + const distanceTooHigh = MAX_LEVENSHTEIN_DISTANCE + 2; + mockLevenshteinDistance.calculate.onCall(0).returns(distanceAboveThreshold); + mockLevenshteinDistance.calculate.onCall(1).returns(distanceAtThreshold); + mockLevenshteinDistance.calculate.onCall(2).returns(distanceTooHigh); + + const candidatePaths = [ + { path: '/content/dam/test/far.jpg' }, + { path: '/content/dam/test/close.jpg' }, + { path: '/content/dam/test/very-far.jpg' }, + ]; + + const result = SimilarPathRule.findSimilarPath(TEST_PATH_BROKEN, candidatePaths, MAX_LEVENSHTEIN_DISTANCE); + + expect(result).to.equal(candidatePaths[1]); + }); + + it('should return null when no matches within threshold', () => { + mockPathUtils.removeLocaleFromPath.callsFake((path) => path); + const distanceTooHigh = MAX_LEVENSHTEIN_DISTANCE + 4; + mockLevenshteinDistance.calculate.returns(distanceTooHigh); + + const candidatePaths = [ + { path: '/content/dam/test/far.jpg' }, + ]; + + const result = SimilarPathRule.findSimilarPath(TEST_PATH_BROKEN, candidatePaths, MAX_LEVENSHTEIN_DISTANCE); + + expect(result).to.be.null; + }); + + it('should handle empty candidate paths', () => { + const result = SimilarPathRule.findSimilarPath('/content/dam/test/broken.jpg', [], MAX_LEVENSHTEIN_DISTANCE); + + expect(result).to.be.null; + }); + + it('should compare paths without locale information', () => { + mockPathUtils.removeLocaleFromPath.onCall(0).returns(TEST_PATH_BROKEN); + mockPathUtils.removeLocaleFromPath.onCall(1).returns('/content/dam/test/similar.jpg'); + mockLevenshteinDistance.calculate.returns(MAX_LEVENSHTEIN_DISTANCE); + + const candidatePaths = [ + { path: '/content/dam/en-us/test/similar.jpg' }, + ]; + + const result = SimilarPathRule.findSimilarPath('/content/dam/fr-fr/test/broken.jpg', candidatePaths, MAX_LEVENSHTEIN_DISTANCE); + + expect(mockPathUtils.removeLocaleFromPath).to.have.been.calledWith('/content/dam/fr-fr/test/broken.jpg'); + expect(mockPathUtils.removeLocaleFromPath).to.have.been.calledWith('/content/dam/en-us/test/similar.jpg'); + expect(result).to.equal(candidatePaths[0]); + }); + + it('should find closest match when multiple candidates within threshold', () => { + mockPathUtils.removeLocaleFromPath.callsFake((path) => path); + const perfectMatch = 0; + mockLevenshteinDistance.calculate.onCall(0).returns(MAX_LEVENSHTEIN_DISTANCE); + mockLevenshteinDistance.calculate.onCall(1).returns(perfectMatch); // Perfect match + mockLevenshteinDistance.calculate.onCall(2).returns(MAX_LEVENSHTEIN_DISTANCE); + + const candidatePaths = [ + { path: '/content/dam/test/close1.jpg' }, + { path: TEST_PATH_BROKEN }, // Exact match + { path: '/content/dam/test/close2.jpg' }, + ]; + + const result = SimilarPathRule.findSimilarPath(TEST_PATH_BROKEN, candidatePaths, MAX_LEVENSHTEIN_DISTANCE); + + expect(result).to.equal(candidatePaths[1]); + }); + }); + + describe('error handling', () => { + it('should handle AEM client errors during children fetch', async () => { + mockPathUtils.getParentPath.returns(TEST_PATH_PARENT); + mockAemClient.getChildrenFromPath.rejects(new Error(ERROR_AEM_CONNECTION_FAILED)); + + const rule = new SimilarPathRule(context, mockAemClient, mockPathIndex); + const brokenPath = TEST_PATH_BROKEN; + + await expect(rule.applyRule(brokenPath)) + .to.be.rejectedWith(ERROR_AEM_CONNECTION_FAILED); + }); + + it('should handle AEM client errors during double slash check', async () => { + mockPathUtils.hasDoubleSlashes.returns(true); + mockPathUtils.removeDoubleSlashes.returns('/content/dam/test/fixed.jpg'); + mockAemClient.isAvailable.rejects(new Error(ERROR_AEM_CONNECTION_FAILED)); + + const rule = new SimilarPathRule(context, mockAemClient, mockPathIndex); + const brokenPath = TEST_PATH_BROKEN_WITH_DOUBLE_SLASHES; + + await expect(rule.applyRule(brokenPath)) + .to.be.rejectedWith(ERROR_AEM_CONNECTION_FAILED); + }); + + it('should throw error when AEM client not available', async () => { + const rule = new SimilarPathRule(context, null, mockPathIndex); + const brokenPath = TEST_PATH_BROKEN; + + await expect(rule.applyRule(brokenPath)) + .to.be.rejectedWith('AemClient not injected'); + }); + }); + + describe('integration scenarios', () => { + it('should work through apply method', async () => { + mockPathUtils.getParentPath.returns(TEST_PATH_PARENT); + mockAemClient.getChildrenFromPath.resolves([ + { path: '/content/dam/test/similar.jpg' }, + ]); + mockPathUtils.removeLocaleFromPath.callsFake((path) => path); + mockLevenshteinDistance.calculate.returns(MAX_LEVENSHTEIN_DISTANCE); + + const rule = new SimilarPathRule(context, mockAemClient, mockPathIndex); + const brokenPath = TEST_PATH_BROKEN; + + const result = await rule.apply(brokenPath); + + expect(result).to.equal(mockSuggestion); + }); + + it('should return correct priority', () => { + const rule = new SimilarPathRule(context, mockAemClient, mockPathIndex); + + expect(rule.getPriority()).to.equal(SIMILAR_PATH_RULE_PRIORITY); + }); + + it('should handle complex similarity scenarios', async () => { + mockPathUtils.getParentPath.returns('/content/dam/test'); + mockAemClient.getChildrenFromPath.resolves([ + { path: '/content/dam/test/image1.jpg' }, + { path: '/content/dam/test/image2.png' }, + { path: '/content/dam/test/document.pdf' }, + ]); + mockPathUtils.removeLocaleFromPath.callsFake((path) => path); + const distanceAboveThreshold = MAX_LEVENSHTEIN_DISTANCE + 2; + const distanceWithinThreshold = MAX_LEVENSHTEIN_DISTANCE; + const distanceTooHigh = MAX_LEVENSHTEIN_DISTANCE + 4; + mockLevenshteinDistance.calculate.onCall(0).returns(distanceAboveThreshold); + mockLevenshteinDistance.calculate.onCall(1).returns(distanceWithinThreshold); + mockLevenshteinDistance.calculate.onCall(2).returns(distanceTooHigh); + + const rule = new SimilarPathRule(context, mockAemClient, mockPathIndex); + const brokenPath = '/content/dam/test/image3.jpg'; // Different path for this test case + + const result = await rule.applyRule(brokenPath); + + expect(result).to.equal(mockSuggestion); + }); + }); +}); diff --git a/test/audits/content-fragment-404/suggestion.test.js b/test/audits/content-fragment-404/suggestion.test.js new file mode 100644 index 000000000..2f61c07b0 --- /dev/null +++ b/test/audits/content-fragment-404/suggestion.test.js @@ -0,0 +1,430 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/* eslint-env mocha */ +import { expect } from 'chai'; +import { Suggestion, SuggestionType } from '../../../src/content-fragment-404/domain/suggestion/suggestion.js'; +import { + TEST_PATH_TEST_IMAGE, + TEST_PATH_TEST_BROKEN, + TEST_PATH_TEST_FIXED, + TEST_PATH_TEST_MISSING, + TEST_PATH_TEST_IMAG, + TEST_PATH_TEST_PUBLISHED_IMAGE, + TEST_PATH_FR_FR_TEST_IMAGE, + TEST_PATH_EN_US_TEST_IMAGE, + TEST_PATH_DE_DE_TEST_IMAGE, + TEST_PATH_TEST_PHOTO, + TEST_PATH_TEST_PHOTOS, + TEST_PATH_TEST_DELETED, + TEST_PATH_EN_US_COMPLEX, +} from './test-constants.js'; + +describe('Suggestion', () => { + describe('SuggestionType enum', () => { + it('should have all expected suggestion types', () => { + expect(SuggestionType.PUBLISH).to.equal('PUBLISH'); + expect(SuggestionType.LOCALE).to.equal('LOCALE'); + expect(SuggestionType.SIMILAR).to.equal('SIMILAR'); + expect(SuggestionType.NOT_FOUND).to.equal('NOT_FOUND'); + }); + }); + + describe('constructor', () => { + it('should create a suggestion with all parameters', () => { + const requestedPath = TEST_PATH_TEST_BROKEN; + const suggestedPath = TEST_PATH_TEST_FIXED; + const type = SuggestionType.SIMILAR; + const reason = 'Test reason'; + + const suggestion = new Suggestion(requestedPath, suggestedPath, type, reason); + + expect(suggestion.requestedPath).to.equal(requestedPath); + expect(suggestion.suggestedPath).to.equal(suggestedPath); + expect(suggestion.type).to.equal(type); + expect(suggestion.reason).to.equal(reason); + }); + + it('should create a suggestion with null suggestedPath', () => { + const requestedPath = TEST_PATH_TEST_BROKEN; + const type = SuggestionType.NOT_FOUND; + const reason = 'Content not found'; + + const suggestion = new Suggestion(requestedPath, null, type, reason); + + expect(suggestion.requestedPath).to.equal(requestedPath); + expect(suggestion.suggestedPath).to.be.null; + expect(suggestion.type).to.equal(type); + expect(suggestion.reason).to.equal(reason); + }); + + it('should create a suggestion with undefined parameters', () => { + const suggestion = new Suggestion(undefined, undefined, undefined, undefined); + + expect(suggestion.requestedPath).to.be.undefined; + expect(suggestion.suggestedPath).to.be.undefined; + expect(suggestion.type).to.be.undefined; + expect(suggestion.reason).to.be.undefined; + }); + + it('should create a suggestion with empty string parameters', () => { + const suggestion = new Suggestion('', '', '', ''); + + expect(suggestion.requestedPath).to.equal(''); + expect(suggestion.suggestedPath).to.equal(''); + expect(suggestion.type).to.equal(''); + expect(suggestion.reason).to.equal(''); + }); + }); + + describe('static factory methods', () => { + describe('publish', () => { + it('should create a PUBLISH suggestion with default parameters', () => { + const requestedPath = TEST_PATH_TEST_IMAGE; + const suggestion = Suggestion.publish(requestedPath); + + expect(suggestion.requestedPath).to.equal(requestedPath); + expect(suggestion.suggestedPath).to.be.null; + expect(suggestion.type).to.equal(SuggestionType.PUBLISH); + expect(suggestion.reason).to.equal('Content exists on Author'); + }); + + it('should create a PUBLISH suggestion with custom suggestedPath', () => { + const requestedPath = TEST_PATH_TEST_IMAGE; + const suggestedPath = TEST_PATH_TEST_PUBLISHED_IMAGE; + const suggestion = Suggestion.publish(requestedPath, suggestedPath); + + expect(suggestion.requestedPath).to.equal(requestedPath); + expect(suggestion.suggestedPath).to.equal(suggestedPath); + expect(suggestion.type).to.equal(SuggestionType.PUBLISH); + expect(suggestion.reason).to.equal('Content exists on Author'); + }); + + it('should create a PUBLISH suggestion with custom reason', () => { + const requestedPath = TEST_PATH_TEST_IMAGE; + const customReason = 'Custom publish reason'; + const suggestion = Suggestion.publish(requestedPath, null, customReason); + + expect(suggestion.requestedPath).to.equal(requestedPath); + expect(suggestion.suggestedPath).to.be.null; + expect(suggestion.type).to.equal(SuggestionType.PUBLISH); + expect(suggestion.reason).to.equal(customReason); + }); + + it('should create a PUBLISH suggestion with all custom parameters', () => { + const requestedPath = TEST_PATH_TEST_IMAGE; + const suggestedPath = TEST_PATH_TEST_PUBLISHED_IMAGE; + const customReason = 'Available for publishing'; + const suggestion = Suggestion.publish(requestedPath, suggestedPath, customReason); + + expect(suggestion.requestedPath).to.equal(requestedPath); + expect(suggestion.suggestedPath).to.equal(suggestedPath); + expect(suggestion.type).to.equal(SuggestionType.PUBLISH); + expect(suggestion.reason).to.equal(customReason); + }); + }); + + describe('locale', () => { + it('should create a LOCALE suggestion with default reason', () => { + const requestedPath = TEST_PATH_FR_FR_TEST_IMAGE; + const suggestedPath = TEST_PATH_EN_US_TEST_IMAGE; + const suggestion = Suggestion.locale(requestedPath, suggestedPath); + + expect(suggestion.requestedPath).to.equal(requestedPath); + expect(suggestion.suggestedPath).to.equal(suggestedPath); + expect(suggestion.type).to.equal(SuggestionType.LOCALE); + expect(suggestion.reason).to.equal('Locale fallback detected'); + }); + + it('should create a LOCALE suggestion with custom reason', () => { + const requestedPath = TEST_PATH_DE_DE_TEST_IMAGE; + const suggestedPath = TEST_PATH_EN_US_TEST_IMAGE; + const customReason = 'German locale not available, fallback to English'; + const suggestion = Suggestion.locale(requestedPath, suggestedPath, customReason); + + expect(suggestion.requestedPath).to.equal(requestedPath); + expect(suggestion.suggestedPath).to.equal(suggestedPath); + expect(suggestion.type).to.equal(SuggestionType.LOCALE); + expect(suggestion.reason).to.equal(customReason); + }); + + it('should handle null suggestedPath', () => { + const requestedPath = TEST_PATH_FR_FR_TEST_IMAGE; + const suggestion = Suggestion.locale(requestedPath, null); + + expect(suggestion.requestedPath).to.equal(requestedPath); + expect(suggestion.suggestedPath).to.be.null; + expect(suggestion.type).to.equal(SuggestionType.LOCALE); + expect(suggestion.reason).to.equal('Locale fallback detected'); + }); + }); + + describe('similar', () => { + it('should create a SIMILAR suggestion with default reason', () => { + const requestedPath = TEST_PATH_TEST_IMAG; + const suggestedPath = TEST_PATH_TEST_IMAGE; + const suggestion = Suggestion.similar(requestedPath, suggestedPath); + + expect(suggestion.requestedPath).to.equal(requestedPath); + expect(suggestion.suggestedPath).to.equal(suggestedPath); + expect(suggestion.type).to.equal(SuggestionType.SIMILAR); + expect(suggestion.reason).to.equal('Similar path found'); + }); + + it('should create a SIMILAR suggestion with custom reason', () => { + const requestedPath = TEST_PATH_TEST_PHOTO; + const suggestedPath = TEST_PATH_TEST_PHOTOS; + const customReason = 'Levenshtein distance: 1'; + const suggestion = Suggestion.similar(requestedPath, suggestedPath, customReason); + + expect(suggestion.requestedPath).to.equal(requestedPath); + expect(suggestion.suggestedPath).to.equal(suggestedPath); + expect(suggestion.type).to.equal(SuggestionType.SIMILAR); + expect(suggestion.reason).to.equal(customReason); + }); + + it('should handle null suggestedPath', () => { + const requestedPath = TEST_PATH_TEST_BROKEN; + const suggestion = Suggestion.similar(requestedPath, null); + + expect(suggestion.requestedPath).to.equal(requestedPath); + expect(suggestion.suggestedPath).to.be.null; + expect(suggestion.type).to.equal(SuggestionType.SIMILAR); + expect(suggestion.reason).to.equal('Similar path found'); + }); + }); + + describe('notFound', () => { + it('should create a NOT_FOUND suggestion with default reason', () => { + const requestedPath = TEST_PATH_TEST_MISSING; + const suggestion = Suggestion.notFound(requestedPath); + + expect(suggestion.requestedPath).to.equal(requestedPath); + expect(suggestion.suggestedPath).to.be.null; + expect(suggestion.type).to.equal(SuggestionType.NOT_FOUND); + expect(suggestion.reason).to.equal('Not found'); + }); + + it('should create a NOT_FOUND suggestion with custom reason', () => { + const requestedPath = TEST_PATH_TEST_DELETED; + const customReason = 'Content was deleted and no alternatives found'; + const suggestion = Suggestion.notFound(requestedPath, customReason); + + expect(suggestion.requestedPath).to.equal(requestedPath); + expect(suggestion.suggestedPath).to.be.null; + expect(suggestion.type).to.equal(SuggestionType.NOT_FOUND); + expect(suggestion.reason).to.equal(customReason); + }); + + it('should handle empty string requestedPath', () => { + const suggestion = Suggestion.notFound(''); + + expect(suggestion.requestedPath).to.equal(''); + expect(suggestion.suggestedPath).to.be.null; + expect(suggestion.type).to.equal(SuggestionType.NOT_FOUND); + expect(suggestion.reason).to.equal('Not found'); + }); + + it('should handle null requestedPath', () => { + const suggestion = Suggestion.notFound(null); + + expect(suggestion.requestedPath).to.be.null; + expect(suggestion.suggestedPath).to.be.null; + expect(suggestion.type).to.equal(SuggestionType.NOT_FOUND); + expect(suggestion.reason).to.equal('Not found'); + }); + }); + }); + + describe('toJSON', () => { + it('should serialize a complete suggestion to JSON', () => { + const requestedPath = TEST_PATH_TEST_BROKEN; + const suggestedPath = TEST_PATH_TEST_FIXED; + const type = SuggestionType.SIMILAR; + const reason = 'Similar path found'; + + const suggestion = new Suggestion(requestedPath, suggestedPath, type, reason); + const json = suggestion.toJSON(); + + expect(json).to.deep.equal({ + requestedPath, + suggestedPath, + type, + reason, + }); + }); + + it('should serialize a suggestion with null suggestedPath', () => { + const requestedPath = TEST_PATH_TEST_MISSING; + const type = SuggestionType.NOT_FOUND; + const reason = 'Content not found'; + + const suggestion = new Suggestion(requestedPath, null, type, reason); + const json = suggestion.toJSON(); + + expect(json).to.deep.equal({ + requestedPath, + suggestedPath: null, + type, + reason, + }); + }); + + it('should serialize a PUBLISH suggestion created via factory method', () => { + const requestedPath = TEST_PATH_TEST_IMAGE; + const suggestion = Suggestion.publish(requestedPath); + const json = suggestion.toJSON(); + + expect(json).to.deep.equal({ + requestedPath, + suggestedPath: null, + type: SuggestionType.PUBLISH, + reason: 'Content exists on Author', + }); + }); + + it('should serialize a LOCALE suggestion created via factory method', () => { + const requestedPath = TEST_PATH_FR_FR_TEST_IMAGE; + const suggestedPath = TEST_PATH_EN_US_TEST_IMAGE; + const suggestion = Suggestion.locale(requestedPath, suggestedPath); + const json = suggestion.toJSON(); + + expect(json).to.deep.equal({ + requestedPath, + suggestedPath, + type: SuggestionType.LOCALE, + reason: 'Locale fallback detected', + }); + }); + + it('should serialize a SIMILAR suggestion created via factory method', () => { + const requestedPath = TEST_PATH_TEST_IMAG; + const suggestedPath = TEST_PATH_TEST_IMAGE; + const suggestion = Suggestion.similar(requestedPath, suggestedPath); + const json = suggestion.toJSON(); + + expect(json).to.deep.equal({ + requestedPath, + suggestedPath, + type: SuggestionType.SIMILAR, + reason: 'Similar path found', + }); + }); + + it('should serialize a NOT_FOUND suggestion created via factory method', () => { + const requestedPath = TEST_PATH_TEST_MISSING; + const suggestion = Suggestion.notFound(requestedPath); + const json = suggestion.toJSON(); + + expect(json).to.deep.equal({ + requestedPath, + suggestedPath: null, + type: SuggestionType.NOT_FOUND, + reason: 'Not found', + }); + }); + + it('should handle undefined values in toJSON', () => { + const suggestion = new Suggestion(undefined, undefined, undefined, undefined); + const json = suggestion.toJSON(); + + expect(json).to.deep.equal({ + requestedPath: undefined, + suggestedPath: undefined, + type: undefined, + reason: undefined, + }); + }); + + it('should handle empty string values in toJSON', () => { + const suggestion = new Suggestion('', '', '', ''); + const json = suggestion.toJSON(); + + expect(json).to.deep.equal({ + requestedPath: '', + suggestedPath: '', + type: '', + reason: '', + }); + }); + }); + + describe('integration scenarios', () => { + it('should work with JSON.stringify', () => { + const suggestion = Suggestion.publish(TEST_PATH_TEST_IMAGE); + const jsonString = JSON.stringify(suggestion); + const parsed = JSON.parse(jsonString); + + expect(parsed).to.deep.equal({ + requestedPath: TEST_PATH_TEST_IMAGE, + suggestedPath: null, + type: SuggestionType.PUBLISH, + reason: 'Content exists on Author', + }); + }); + + it('should maintain immutability after creation', () => { + const suggestion = Suggestion.similar(TEST_PATH_TEST_BROKEN, TEST_PATH_TEST_FIXED); + const originalPath = suggestion.requestedPath; + const originalSuggested = suggestion.suggestedPath; + const originalType = suggestion.type; + const originalReason = suggestion.reason; + + // Attempt to modify properties + suggestion.requestedPath = 'modified'; + suggestion.suggestedPath = 'modified'; + suggestion.type = 'modified'; + suggestion.reason = 'modified'; + + expect(suggestion.requestedPath).to.equal('modified'); + expect(suggestion.suggestedPath).to.equal('modified'); + expect(suggestion.type).to.equal('modified'); + expect(suggestion.reason).to.equal('modified'); + + // Create a new suggestion to verify factory methods still work + const newSuggestion = Suggestion.similar(originalPath, originalSuggested); + expect(newSuggestion.requestedPath).to.equal(originalPath); + expect(newSuggestion.suggestedPath).to.equal(originalSuggested); + expect(newSuggestion.type).to.equal(originalType); + expect(newSuggestion.reason).to.equal(originalReason); + }); + + it('should handle complex path scenarios', () => { + const complexPath = TEST_PATH_EN_US_COMPLEX; + const suggestion = Suggestion.locale(complexPath, complexPath.replace('en-us', 'fr-fr')); + + expect(suggestion.requestedPath).to.equal(complexPath); + expect(suggestion.suggestedPath).to.include('fr-fr'); + expect(suggestion.type).to.equal(SuggestionType.LOCALE); + }); + + it('should work with all factory methods in sequence', () => { + const basePath = TEST_PATH_TEST_IMAGE; + + const publishSuggestion = Suggestion.publish(basePath); + const localeSuggestion = Suggestion.locale(basePath, basePath.replace('test', 'en-us')); + const similarSuggestion = Suggestion.similar(basePath, basePath.replace('image', 'photo')); + const notFoundSuggestion = Suggestion.notFound(basePath); + + expect(publishSuggestion.type).to.equal(SuggestionType.PUBLISH); + expect(localeSuggestion.type).to.equal(SuggestionType.LOCALE); + expect(similarSuggestion.type).to.equal(SuggestionType.SIMILAR); + expect(notFoundSuggestion.type).to.equal(SuggestionType.NOT_FOUND); + + // All should have the same requestedPath + expect(publishSuggestion.requestedPath).to.equal(basePath); + expect(localeSuggestion.requestedPath).to.equal(basePath); + expect(similarSuggestion.requestedPath).to.equal(basePath); + expect(notFoundSuggestion.requestedPath).to.equal(basePath); + }); + }); +}); diff --git a/test/audits/content-fragment-404/test-constants.js b/test/audits/content-fragment-404/test-constants.js new file mode 100644 index 000000000..d029caac2 --- /dev/null +++ b/test/audits/content-fragment-404/test-constants.js @@ -0,0 +1,265 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +// IDs +export const TEST_SITE_ID = 'test-site-id'; +export const TEST_OPPORTUNITY_ID = 'test-opportunity-id'; +export const TEST_AUDIT_ID = 'test-audit-id'; +export const TEST_SUGGESTION_ID = 'test-suggestion-id'; +export const TEST_SUGGESTION_ID_2 = 'test-suggestion-id-2'; +export const TEST_ORG_ID = 'test-org-id'; +export const TEST_IMS_ORG_ID = '1234567890'; +export const TEST_HOSTNAME = 'test'; +export const TEST_CURSOR = 'cursor-123'; + +// URLs and Base URLs +export const TEST_BASE_URL = 'https://test-tenant.adobe.com'; +export const TEST_CUSTOM_URL = 'https://custom-tenant.adobe.com'; +export const TEST_BASE_URL_SITE = 'https://test-site.com'; +export const TEST_BASE_URL_EXAMPLE = 'https://example.com'; +export const TEST_AEM_AUTHOR_URL = 'https://author.example.com'; +export const TEST_URL_EXAMPLE_COM_CONTENT_DAM_IMAGES_PHOTO = 'https://example.com/content/dam/images/photo.jpg'; + +// Paths - Base Content DAM +export const TEST_PATH_CONTENT = '/content'; +export const TEST_PATH_CONTENT_DAM = '/content/dam'; +export const TEST_PATH_CONTENT_DAM_SLASH = '/content/dam/'; +export const TEST_PATH_TEST = '/content/dam/test'; +export const TEST_PATH_PARENT = '/content/dam/parent'; +export const TEST_PATH_OTHER = '/other/path'; +export const TEST_PATH_RELATIVE = 'content/dam/test'; + +// Paths - Generic Test Paths +export const TEST_PATH_BROKEN = '/content/dam/test/broken.jpg'; +export const TEST_PATH_FIXED = '/content/dam/test/fixed.jpg'; +export const TEST_PATH_TEST_IMAGE = '/content/dam/test/image.jpg'; +export const TEST_PATH_TEST_BROKEN = '/content/dam/test/broken.jpg'; +export const TEST_PATH_TEST_FIXED = '/content/dam/test/fixed.jpg'; +export const TEST_PATH_TEST_MISSING = '/content/dam/test/missing.jpg'; +export const TEST_PATH_TEST_DELETED = '/content/dam/test/deleted.jpg'; +export const TEST_PATH_IMAGE = '/content/dam/test/image.jpg'; +export const TEST_PATH_IMAGE_1 = '/content/dam/test/image1.jpg'; +export const TEST_PATH_IMAGE_2 = '/content/dam/test/image2.jpg'; +export const TEST_PATH_CHILD = '/content/dam/test/child'; +export const TEST_PATH_CHILD_1 = '/content/dam/test/child1.jpg'; +export const TEST_PATH_PARENT_CHILD = '/content/dam/parent/child.jpg'; + +// Paths - With Double Slashes +export const TEST_PATH_BROKEN_WITH_DOUBLE_SLASHES = '/content/dam//test/broken.jpg'; +export const TEST_PATH_CONTENT_DAM_DOUBLE_SLASH_IMAGES_PHOTO = '/content/dam//images/photo.jpg'; +export const TEST_PATH_CONTENT_DOUBLE_SLASH_DAM_IMAGES_PHOTO = '/content//dam/images/photo.jpg'; +export const TEST_PATH_CONTENT_DAM_IMAGES_DOUBLE_SLASH_PHOTO = '/content/dam/images//photo.jpg'; +export const TEST_PATH_CONTENT_DAM_TRIPLE_SLASH_IMAGES_PHOTO = '/content/dam///images/photo.jpg'; +export const TEST_PATH_CONTENT_QUAD_SLASH_DAM_IMAGES_PHOTO = '/content////dam/images/photo.jpg'; +export const TEST_PATH_SIX_SLASHES_CONTENT_DAM_IMAGES_PHOTO = '//////content/dam/images/photo.jpg'; + +// Paths - With Special Characters +export const TEST_PATH_TEST_IMAG = '/content/dam/test/imag.jpg'; +export const TEST_PATH_TEST_PHOTO = '/content/dam/test/photo.jpg'; +export const TEST_PATH_TEST_PHOTOS = '/content/dam/test/photos.jpg'; +export const TEST_PATH_TEST_PUBLISHED_IMAGE = '/content/dam/test/published-image.jpg'; +export const TEST_PATH_IMAGE_WITH_SPACES = '/content/dam/test/image with spaces.jpg'; +export const TEST_PATH_EN_US_COMPLEX = '/content/dam/en-us/folder with spaces/sub-folder/image%20with%20encoding.jpg'; +export const TEST_PATH_FOLDER_FILE = '/content/dam/folder/subfolder/file.jpg'; + +// Paths - Locale-specific (en-US, en-GB, fr-FR, de-DE) +export const TEST_PATH_EN_US = '/content/dam/en-us/test/broken.jpg'; +export const TEST_PATH_EN_GB = '/content/dam/en-gb/test/broken.jpg'; +export const TEST_PATH_FR_FR = '/content/dam/fr-fr/test/broken.jpg'; +export const TEST_PATH_EN_US_TEST_IMAGE = '/content/dam/en-us/test/image.jpg'; +export const TEST_PATH_FR_FR_TEST_IMAGE = '/content/dam/fr-fr/test/image.jpg'; +export const TEST_PATH_DE_DE_TEST_IMAGE = '/content/dam/de-de/test/image.jpg'; +export const TEST_PATH_CONTENT_DAM_EN_US = '/content/dam/en-US'; +export const TEST_PATH_CONTENT_DAM_EN_US_SLASH = '/content/dam/en-US/'; +export const TEST_PATH_CONTENT_DAM_EN_US_IMAGES = '/content/dam/en-US/images'; +export const TEST_PATH_EN_US_IMAGES = '/content/dam/en-US/images'; +export const TEST_PATH_CONTENT_DAM_EN_US_IMAGES_PHOTO = '/content/dam/en-US/images/photo.jpg'; +export const TEST_PATH_EN_US_IMAGES_PHOTO = '/content/dam/en-US/images/photo.jpg'; +export const TEST_PATH_EN_US_IMAGES_PHOTO_JPG = '/content/dam/en-US/images/photo.jpg'; +export const TEST_PATH_EN_US_IMAGES_PHOTO_PNG = '/content/dam/en-US/images/photo.png'; +export const TEST_PATH_EN_US_IMAGES_PHOTO1 = '/content/dam/en-US/images/photo1.jpg'; +export const TEST_PATH_EN_US_IMAGES_PHOTO2 = '/content/dam/en-US/images/photo2.jpg'; +export const TEST_PATH_EN_US_IMAGES_SUBFOLDER_PHOTO3 = '/content/dam/en-US/images/subfolder/photo3.jpg'; +export const TEST_PATH_CONTENT_DAM_FR_IMAGES_PHOTO = '/content/dam/fr/images/photo.jpg'; +export const TEST_PATH_CONTENT_DAM_FR_FR_IMAGES_PHOTO = '/content/dam/fr_FR/images/photo.jpg'; +export const TEST_PATH_FR_FR_IMAGES_PHOTO = '/content/dam/fr-FR/images/photo.jpg'; +export const TEST_PATH_FR_FR_IMAGES_PHOTO_JPG = '/content/dam/fr-FR/images/photo.jpg'; +export const TEST_PATH_FR_FR_IMAGES_PHOTO1 = '/content/dam/fr-FR/images/photo1.jpg'; +export const TEST_PATH_CONTENT_DAM_EN_US_US_IMAGES_PHOTO = '/content/dam/en-US/US/images/photo.jpg'; +export const TEST_PATH_CONTENT_DAM_US_IMAGES_PHOTO = '/content/dam/US/images/photo.jpg'; +export const TEST_PATH_DE_DE_IMAGES_PHOTO = '/content/dam/de-DE/images/photo.jpg'; +export const TEST_PATH_DE_DE_IMAGES = '/content/dam/de-DE/images'; +export const TEST_PATH_DE_DE = '/content/dam/de-DE'; + +// Paths - With Numeric Segments +export const TEST_PATH_CONTENT_DAM_123_IMAGES_PHOTO = '/content/dam/123/images/photo.jpg'; +export const TEST_PATH_CONTENT_DAM_123_SLASH = '/content/dam/123/'; + +// Paths - Generic Images/Photos +export const TEST_PATH_CONTENT_DAM_IMAGES_PHOTO = '/content/dam/images/photo.jpg'; +export const TEST_PATH_CONTENT_DAM_IMAGES_SLASH = '/content/dam/images/'; + +// Paths - Fragments +export const TEST_PATH_1 = '/content/dam/test/fragment1'; +export const TEST_PATH_2 = '/content/dam/test/fragment2'; +export const TEST_PATH_FRAGMENT = '/content/dam/fragment'; +export const TEST_PATH_FRAGMENT1 = '/content/dam/fragment1'; +export const TEST_PATH_FRAGMENT2 = '/content/dam/fragment2'; +export const TEST_PATH_FRAGMENT3 = '/content/dam/fragment3'; +export const TEST_PATH_FRAGMENT4 = '/content/dam/fragment4'; +export const TEST_PATH_FRAGMENT5 = '/content/dam/fragment5'; +export const TEST_PATH_ANOTHER_FRAGMENT = '/content/dam/another-fragment'; +export const TEST_PATH_ANOTHER_FRAGMENT_2 = '/content/dam/test/another-fragment'; +export const TEST_PATH_VALID_FRAGMENT = '/content/dam/test/valid-fragment'; +export const TEST_PATH_BROKEN_1 = '/content/dam/test/broken1.jpg'; +export const TEST_PATH_BROKEN_2 = '/content/dam/test/broken2.jpg'; +export const TEST_PATH_BROKEN_3 = '/content/dam/test/broken3.jpg'; +export const TEST_PATH_BROKEN_NO_EXT = '/content/dam/test/broken'; +export const TEST_PATH_SUGGESTED = '/content/dam/test/suggested.jpg'; +export const TEST_PATH_SUGGESTED_2 = '/content/dam/test/suggested2.jpg'; +export const TEST_PATH_FIXED_1 = '/content/dam/test/fixed1.jpg'; +export const TEST_SUGGESTED_PATH_1 = '/content/dam/test/fixed1'; +export const TEST_OBJECT_FORMAT_PATH = '/content/dam/test/object-format'; +export const TEST_STRING_FORMAT_PATH = '/content/dam/test/string-format'; + +// Paths - Assets/Files by Type +export const TEST_ASSET_PATH = '/content/dam/test/asset.jpg'; +export const TEST_PATH_IMAGE_JPG = '/content/dam/image.jpg'; +export const TEST_PATH_DOCUMENT_PDF = '/content/dam/document.pdf'; +export const TEST_PATH_VIDEO_MP4 = '/content/dam/video.mp4'; +export const TEST_PATH_FONT_WOFF = '/content/dam/font.woff'; +export const TEST_PATH_ARCHIVE_ZIP = '/content/dam/archive.zip'; +export const TEST_PATH_ANOTHER = '/content/dam/another'; + +// AEM Configuration +export const TEST_AEM_AUTHOR_TOKEN = 'test-token-123'; +export const TEST_AEM_AUTHOR_TOKEN_ALT = 'token-123'; +export const BEARER_PREFIX = 'Bearer '; +export const ACCEPT_JSON = 'application/json'; +export const API_SITES_FRAGMENTS = '/adobe/sites/cf/fragments'; +export const PROJECTION_MINIMAL = 'minimal'; +export const HTTP_STATUS_NOT_FOUND = 404; +export const HTTP_STATUS_TEXT_NOT_FOUND = 'Not Found'; +export const MAX_PAGES_VALUE = 10; +export const PAGINATION_DELAY_MS_VALUE = 100; +export const DELAY_MS_TEST = 50; +export const DELAY_TOLERANCE_MS = 45; +export const DELAY_ZERO = 0; +export const DELAY_THRESHOLD_MS = 10; + +// AWS/Athena Configuration +export const TEST_DATABASE = 'test_database'; +export const TEST_TABLE = 'test_table'; +export const TEST_IMS_ORG = 'test-ims-org'; +export const TEST_S3_BUCKET = 'test-raw-bucket'; +export const TEST_DATABASE_NAME = 'test_db'; +export const DEFAULT_DATABASE_NAME = 'cdn_logs_test'; +export const DEFAULT_TABLE_NAME = 'content_fragment_404'; +export const CUSTOM_BUCKET_NAME = 'custom-bucket'; +export const CUSTOM_IMS_ORG = 'custom-ims'; +export const S3_PATH_AGGREGATED_404 = 'aggregated-404'; +export const S3_PATH_TEMP_ATHENA_RESULTS = 'temp/athena-results/'; +export const TEST_SQL_RESULT = 'SELECT * FROM test_table;'; +export const ATHENA_QUERY_PREFIX = '[Athena Query]'; + +// User Agents +export const TEST_USER_AGENT_1 = 'Mozilla/5.0'; +export const TEST_USER_AGENT_2 = 'Chrome/91.0'; +export const TEST_USER_AGENT_3 = 'Safari/14.0'; +export const TEST_USER_AGENT_4 = 'Edge/90.0'; +export const TEST_USER_AGENT_5 = 'Opera/80.0'; + +// Request Counts +export const REQUEST_COUNT_NONE = 0; +export const REQUEST_COUNT_TINY = 5; +export const REQUEST_COUNT_LOW_1 = 7; +export const REQUEST_COUNT_LOW_2 = 6; +export const REQUEST_COUNT_LOW_3 = 4; +export const REQUEST_COUNT_LOW_4 = 3; +export const REQUEST_COUNT_LOW_5 = 2; +export const REQUEST_COUNT_SMALL = 10; +export const REQUEST_COUNT_MEDIUM = 8; +export const REQUEST_COUNT_MID_1 = 9; +export const REQUEST_COUNT_MID_2 = 12; +export const REQUEST_COUNT_MID_3 = 15; +export const REQUEST_COUNT_LOW = 50; +export const REQUEST_COUNT_HIGH = 20; +export const REQUEST_COUNT_HIGH_1 = 25; +export const REQUEST_COUNT_HIGH_2 = 30; +export const REQUEST_COUNT_1 = 100; +export const REQUEST_COUNT_2 = 200; +export const USER_AGENT_COUNT_1 = 50; +export const USER_AGENT_COUNT_2 = 200; + +// Dates and Date Components +export const TEST_YEAR = '2025'; +export const TEST_MONTH = '01'; +export const TEST_MONTH_09 = '09'; +export const TEST_DAY = '15'; +export const TEST_DAY_18 = '18'; +export const TEST_DAY_PREVIOUS = '14'; +export const TEST_MONTH_MAR = '03'; +export const TEST_DAY_5 = '05'; +export const TEST_MONTH_DEC = '12'; +export const TEST_DAY_25 = '25'; +export const TEST_DAY_31 = '31'; +export const TEST_HOUR_13 = '13'; +export const TEST_HOUR_23 = '23'; +export const TEST_DATE_2025_01_14 = new Date('2025-01-14T12:00:00.000Z'); +export const TEST_DATE_2025_01_15 = new Date('2025-01-15T12:00:00.000Z'); +export const TEST_DATE_2025_01_15_14_30 = new Date('2025-01-15T14:30:00Z'); +export const TEST_DATE_2025_01_15_00_30 = new Date('2025-01-15T00:30:00Z'); +export const TEST_DATE_2025_02_01 = new Date('2025-02-01T10:30:00Z'); +export const TEST_DATE_2025_03_05 = new Date('2025-03-05T10:30:00Z'); +export const TEST_DATE_2025_09_18 = new Date('2025-09-18T14:00:00.000Z'); +export const TEST_DATE_2025_12_25 = new Date('2025-12-25T10:30:00Z'); + +// Status Values +export const STATUS_UNKNOWN = 'UNKNOWN'; +export const STATUS_PUBLISHED = 'PUBLISHED'; +export const STATUS_DRAFT = 'DRAFT'; +export const SUGGESTION_TYPE_PUBLISH = 'PUBLISH'; +export const SUGGESTION_TYPE_LOCALE = 'LOCALE'; +export const SUGGESTION_TYPE_SIMILAR = 'SIMILAR'; +export const SUGGESTION_TYPE_NOT_FOUND = 'NOT_FOUND'; + +// Locale Codes +export const LOCALE_CODE_EN_US = 'en-us'; + +// Error Messages +export const ERROR_AEM_CONNECTION_FAILED = 'AEM connection failed'; + +// Rule Priorities +export const PUBLISH_RULE_PRIORITY = 1; +export const LOCALE_FALLBACK_RULE_PRIORITY = 2; +export const SIMILAR_PATH_RULE_PRIORITY = 3; +export const BASE_RULE_DEFAULT_PRIORITY = 42; + +// Levenshtein Distance +export const MAX_LEVENSHTEIN_DISTANCE = 1; + +// Expected Counts +export const EXPECTED_COUNT_ZERO = 0; +export const EXPECTED_EMPTY_COUNT = 0; +export const EXPECTED_COUNT_TWO = 2; +export const EXPECTED_COUNT_FOUR = 4; +export const EXPECTED_SUGGESTIONS_COUNT = 2; +export const EXPECTED_SINGLE_SUGGESTION_COUNT = 1; +export const EXPECTED_RULES_COUNT = 3; +export const EXPECTED_CALL_COUNT_THRICE = 3; + +// Levenshtein Distance Values +export const DISTANCE_SINGLE_CHAR = 1; +export const DISTANCE_TWO_CHARS = 2; +export const DISTANCE_THREE_CHARS = 3; +export const DISTANCE_FOUR_CHARS = 4; +export const STRING_LENGTH_HELLO = 5; diff --git a/test/utils/asset-utils.test.js b/test/utils/asset-utils.test.js new file mode 100644 index 000000000..a3ff06020 --- /dev/null +++ b/test/utils/asset-utils.test.js @@ -0,0 +1,347 @@ +/* + * Copyright 2024 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/* eslint-env mocha */ + +import { expect } from 'chai'; +import { + isAssetUrl, + isAssetCategory, + filterAssetUrls, + getAllAssetExtensions, + getAssetExtensionsByCategory, +} from '../../src/utils/asset-utils.js'; + +describe('asset-utils', () => { + describe('isAssetUrl', () => { + describe('image assets', () => { + it('should return true for common image formats', () => { + expect(isAssetUrl('/content/dam/image.jpg')).to.be.true; + expect(isAssetUrl('/content/dam/image.jpeg')).to.be.true; + expect(isAssetUrl('/content/dam/image.png')).to.be.true; + expect(isAssetUrl('/content/dam/image.gif')).to.be.true; + expect(isAssetUrl('/content/dam/image.svg')).to.be.true; + expect(isAssetUrl('/content/dam/image.webp')).to.be.true; + expect(isAssetUrl('/content/dam/image.ico')).to.be.true; + expect(isAssetUrl('/content/dam/image.bmp')).to.be.true; + }); + + it('should be case insensitive', () => { + expect(isAssetUrl('/content/dam/IMAGE.JPG')).to.be.true; + expect(isAssetUrl('/content/dam/Image.PNG')).to.be.true; + expect(isAssetUrl('/content/dam/photo.JpEg')).to.be.true; + }); + }); + + describe('document assets', () => { + it('should return true for document formats', () => { + expect(isAssetUrl('/content/dam/doc.pdf')).to.be.true; + expect(isAssetUrl('/content/dam/doc.doc')).to.be.true; + expect(isAssetUrl('/content/dam/doc.docx')).to.be.true; + expect(isAssetUrl('/content/dam/spreadsheet.xls')).to.be.true; + expect(isAssetUrl('/content/dam/spreadsheet.xlsx')).to.be.true; + expect(isAssetUrl('/content/dam/presentation.ppt')).to.be.true; + expect(isAssetUrl('/content/dam/presentation.pptx')).to.be.true; + }); + }); + + describe('media assets', () => { + it('should return true for video formats', () => { + expect(isAssetUrl('/content/dam/video.mp4')).to.be.true; + expect(isAssetUrl('/content/dam/video.avi')).to.be.true; + expect(isAssetUrl('/content/dam/video.mov')).to.be.true; + expect(isAssetUrl('/content/dam/video.wmv')).to.be.true; + expect(isAssetUrl('/content/dam/video.flv')).to.be.true; + expect(isAssetUrl('/content/dam/video.webm')).to.be.true; + }); + + it('should return true for audio formats', () => { + expect(isAssetUrl('/content/dam/audio.mp3')).to.be.true; + expect(isAssetUrl('/content/dam/audio.wav')).to.be.true; + expect(isAssetUrl('/content/dam/audio.ogg')).to.be.true; + expect(isAssetUrl('/content/dam/audio.m4a')).to.be.true; + }); + }); + + describe('archive assets', () => { + it('should return true for archive formats', () => { + expect(isAssetUrl('/content/dam/file.zip')).to.be.true; + expect(isAssetUrl('/content/dam/file.rar')).to.be.true; + expect(isAssetUrl('/content/dam/file.tar')).to.be.true; + expect(isAssetUrl('/content/dam/file.gz')).to.be.true; + expect(isAssetUrl('/content/dam/file.7z')).to.be.true; + expect(isAssetUrl('/content/dam/file.bz2')).to.be.true; + }); + }); + + describe('font assets', () => { + it('should return true for font formats', () => { + expect(isAssetUrl('/content/dam/font.woff')).to.be.true; + expect(isAssetUrl('/content/dam/font.woff2')).to.be.true; + expect(isAssetUrl('/content/dam/font.ttf')).to.be.true; + expect(isAssetUrl('/content/dam/font.eot')).to.be.true; + expect(isAssetUrl('/content/dam/font.otf')).to.be.true; + }); + }); + + describe('non-asset URLs', () => { + it('should return false for content fragment paths', () => { + expect(isAssetUrl('/content/dam/my-site/fragments/article')).to.be.false; + expect(isAssetUrl('/content/dam/fragments/product-details')).to.be.false; + expect(isAssetUrl('/content/experience-fragments/site/header')).to.be.false; + }); + + it('should return false for HTML pages', () => { + expect(isAssetUrl('/content/site/en/page')).to.be.false; + expect(isAssetUrl('/en/products/category')).to.be.false; + expect(isAssetUrl('/about-us')).to.be.false; + }); + + it('should return false for paths with query parameters', () => { + expect(isAssetUrl('/content/page?param=value')).to.be.false; + expect(isAssetUrl('/api/endpoint')).to.be.false; + }); + }); + + describe('edge cases', () => { + it('should handle null or undefined input', () => { + expect(isAssetUrl(null)).to.be.false; + expect(isAssetUrl(undefined)).to.be.false; + }); + + it('should handle empty string', () => { + expect(isAssetUrl('')).to.be.false; + }); + + it('should handle non-string input', () => { + expect(isAssetUrl(123)).to.be.false; + expect(isAssetUrl({})).to.be.false; + expect(isAssetUrl([])).to.be.false; + }); + + it('should handle URLs with extensions in path but not at end', () => { + expect(isAssetUrl('/content/image.jpg/metadata')).to.be.false; + expect(isAssetUrl('/content/pdf.viewer/document')).to.be.false; + }); + + it('should handle full URLs with protocol', () => { + expect(isAssetUrl('https://example.com/assets/image.png')).to.be.true; + expect(isAssetUrl('http://example.com/content/page')).to.be.false; + }); + }); + + describe('custom extensions', () => { + it('should accept custom extension list', () => { + const customExtensions = ['.custom', '.special']; + expect(isAssetUrl('/file.custom', customExtensions)).to.be.true; + expect(isAssetUrl('/file.special', customExtensions)).to.be.true; + expect(isAssetUrl('/file.jpg', customExtensions)).to.be.false; + }); + }); + }); + + describe('isAssetCategory', () => { + it('should correctly identify image category', () => { + expect(isAssetCategory('/content/image.jpg', 'images')).to.be.true; + expect(isAssetCategory('/content/image.png', 'images')).to.be.true; + expect(isAssetCategory('/content/doc.pdf', 'images')).to.be.false; + }); + + it('should correctly identify documents category', () => { + expect(isAssetCategory('/content/doc.pdf', 'documents')).to.be.true; + expect(isAssetCategory('/content/doc.docx', 'documents')).to.be.true; + expect(isAssetCategory('/content/image.jpg', 'documents')).to.be.false; + }); + + it('should correctly identify media category', () => { + expect(isAssetCategory('/content/video.mp4', 'media')).to.be.true; + expect(isAssetCategory('/content/audio.mp3', 'media')).to.be.true; + expect(isAssetCategory('/content/image.jpg', 'media')).to.be.false; + }); + + it('should correctly identify archives category', () => { + expect(isAssetCategory('/content/file.zip', 'archives')).to.be.true; + expect(isAssetCategory('/content/file.tar', 'archives')).to.be.true; + expect(isAssetCategory('/content/image.jpg', 'archives')).to.be.false; + }); + + it('should correctly identify fonts category', () => { + expect(isAssetCategory('/content/font.woff', 'fonts')).to.be.true; + expect(isAssetCategory('/content/font.ttf', 'fonts')).to.be.true; + expect(isAssetCategory('/content/image.jpg', 'fonts')).to.be.false; + }); + + it('should throw error for unknown category', () => { + expect(() => isAssetCategory('/content/file.jpg', 'unknown')) + .to.throw('Unknown asset category: unknown'); + }); + + it('should include valid categories in error message', () => { + try { + isAssetCategory('/content/file.jpg', 'invalid'); + } catch (error) { + expect(error.message).to.include('images'); + expect(error.message).to.include('documents'); + expect(error.message).to.include('media'); + expect(error.message).to.include('archives'); + expect(error.message).to.include('fonts'); + } + }); + }); + + describe('filterAssetUrls', () => { + it('should filter out asset URLs from array', () => { + const urls = [ + '/content/page', + '/content/image.jpg', + '/content/fragment', + '/content/doc.pdf', + '/content/another-page', + ]; + + const filtered = filterAssetUrls(urls); + + expect(filtered).to.deep.equal([ + '/content/page', + '/content/fragment', + '/content/another-page', + ]); + }); + + it('should handle empty array', () => { + expect(filterAssetUrls([])).to.deep.equal([]); + }); + + it('should handle array with only assets', () => { + const urls = [ + '/content/image.jpg', + '/content/doc.pdf', + '/content/video.mp4', + ]; + + expect(filterAssetUrls(urls)).to.deep.equal([]); + }); + + it('should handle array with no assets', () => { + const urls = [ + '/content/page1', + '/content/page2', + '/content/fragment', + ]; + + expect(filterAssetUrls(urls)).to.deep.equal(urls); + }); + + it('should work with custom extension list', () => { + const urls = [ + '/file.custom', + '/file.txt', + '/file.jpg', + ]; + const customExtensions = ['.custom']; + + const filtered = filterAssetUrls(urls, customExtensions); + + expect(filtered).to.deep.equal([ + '/file.txt', + '/file.jpg', + ]); + }); + }); + + describe('getAllAssetExtensions', () => { + it('should return array of all extensions', () => { + const extensions = getAllAssetExtensions(); + + expect(extensions).to.be.an('array'); + expect(extensions.length).to.be.greaterThan(0); + }); + + it('should include extensions from all categories', () => { + const extensions = getAllAssetExtensions(); + + expect(extensions).to.include('.jpg'); + expect(extensions).to.include('.pdf'); + expect(extensions).to.include('.mp4'); + expect(extensions).to.include('.zip'); + expect(extensions).to.include('.woff'); + }); + + it('should return a copy of the array', () => { + const extensions1 = getAllAssetExtensions(); + const extensions2 = getAllAssetExtensions(); + + expect(extensions1).to.not.equal(extensions2); + expect(extensions1).to.deep.equal(extensions2); + }); + + it('should not allow mutation of internal state', () => { + const extensions = getAllAssetExtensions(); + extensions.push('.malicious'); + + const newExtensions = getAllAssetExtensions(); + expect(newExtensions).to.not.include('.malicious'); + }); + }); + + describe('getAssetExtensionsByCategory', () => { + it('should return object with all categories', () => { + const categories = getAssetExtensionsByCategory(); + + expect(categories).to.be.an('object'); + expect(categories).to.have.property('images'); + expect(categories).to.have.property('documents'); + expect(categories).to.have.property('media'); + expect(categories).to.have.property('archives'); + expect(categories).to.have.property('fonts'); + }); + + it('should have arrays for each category', () => { + const categories = getAssetExtensionsByCategory(); + + expect(categories.images).to.be.an('array'); + expect(categories.documents).to.be.an('array'); + expect(categories.media).to.be.an('array'); + expect(categories.archives).to.be.an('array'); + expect(categories.fonts).to.be.an('array'); + }); + + it('should return a copy of the object', () => { + const categories1 = getAssetExtensionsByCategory(); + const categories2 = getAssetExtensionsByCategory(); + + expect(categories1).to.not.equal(categories2); + expect(categories1).to.deep.equal(categories2); + }); + + it('should not allow mutation of internal state', () => { + const categories = getAssetExtensionsByCategory(); + categories.malicious = ['.bad']; + + const newCategories = getAssetExtensionsByCategory(); + expect(newCategories).to.not.have.property('malicious'); + }); + + it('should include expected extensions in each category', () => { + const categories = getAssetExtensionsByCategory(); + + expect(categories.images).to.include('.jpg'); + expect(categories.images).to.include('.png'); + expect(categories.documents).to.include('.pdf'); + expect(categories.documents).to.include('.docx'); + expect(categories.media).to.include('.mp4'); + expect(categories.media).to.include('.mp3'); + expect(categories.archives).to.include('.zip'); + expect(categories.fonts).to.include('.woff'); + }); + }); +}); diff --git a/test/utils/data-access.test.js b/test/utils/data-access.test.js index 985eabc66..0370da0d8 100644 --- a/test/utils/data-access.test.js +++ b/test/utils/data-access.test.js @@ -295,6 +295,57 @@ describe('data-access', () => { expect(existingSuggestions[0].save).to.have.been.calledOnce; }); + it('should update rank when getRank function is provided', async () => { + const suggestionsData = [ + { key: '1', title: 'title 1', rank: 5 }, + { key: '2', title: 'title 2', rank: 10 }, + ]; + const existingSuggestions = [{ + id: '1', + data: suggestionsData[0], + getData: sinon.stub().returns(suggestionsData[0]), + setData: sinon.stub(), + setRank: sinon.stub(), + save: sinon.stub(), + getStatus: sinon.stub().returns('NEW'), + setUpdatedBy: sinon.stub().returnsThis(), + }, { + id: '2', + data: suggestionsData[1], + getData: sinon.stub().returns(suggestionsData[1]), + setData: sinon.stub(), + setRank: sinon.stub(), + save: sinon.stub(), + getStatus: sinon.stub().returns('NEW'), + setUpdatedBy: sinon.stub().returnsThis(), + }]; + const newData = [ + { key: '1', title: 'updated title 1', rank: 15 }, + { key: '2', title: 'updated title 2', rank: 20 }, + ]; + + mockOpportunity.getSuggestions.resolves(existingSuggestions); + + const getRank = (data) => data.rank; + + await syncSuggestions({ + opportunity: mockOpportunity, + newData, + context, + buildKey, + mapNewSuggestion, + getRank, + }); + + expect(mockOpportunity.getSuggestions).to.have.been.calledOnce; + expect(existingSuggestions[0].setRank).to.have.been.calledOnceWith(15); + expect(existingSuggestions[1].setRank).to.have.been.calledOnceWith(20); + expect(existingSuggestions[0].setData).to.have.been.calledOnceWith(newData[0]); + expect(existingSuggestions[1].setData).to.have.been.calledOnceWith(newData[1]); + expect(existingSuggestions[0].save).to.have.been.calledOnce; + expect(existingSuggestions[1].save).to.have.been.calledOnce; + }); + it('should log errors if there are items with errors', async () => { const suggestionsData = [{ key: '1' }]; const existingSuggestions = [{