redpanda-data · JakeSCahill · Oct 3, 2025 · Oct 3, 2025
diff --git a/tools/cloud-regions/generate-cloud-regions.js b/tools/cloud-regions/generate-cloud-regions.js
@@ -193,11 +193,10 @@ function processCloudRegions(yamlText) {
 }
 
 /**
- * Processes cloud regions data and organizes it by cluster type (BYOC/Dedicated) for tabs output.
- *
- * @param {string} yamlText - The YAML content to parse and process.
- * @return {Object} An object with clusterTypes array, each containing providers organized by cluster type.
- * @throws {Error} If the YAML is malformed or missing the required `regions` array.
+ * Parse cloud-regions YAML and group available regions by cluster type (BYOC or Dedicated) and provider for tabbed output.
+ * @param {string} yamlText - YAML document containing `regions` and optional `products`; regions must include `cloudProvider`, `name`, and `redpandaProductAvailability` entries.
+ * @returns {{clusterTypes: Array<{name: string, providers: Array<{name: string, regions: Array<{name: string}>}>}>}} An object with a `clusterTypes` array; each entry lists a cluster type (`BYOC` or `Dedicated`) and its providers (only providers with at least one region), each containing sorted region names.
+ * @throws {Error} If the YAML is malformed or does not contain a top-level `regions` array.
  */
 function processCloudRegionsForTabs(yamlText) {
   let data;
@@ -297,10 +296,10 @@ function processCloudRegionsForTabs(yamlText) {
 }
 
 /**
- * Fetches, processes, and renders cloud region and tier data from a GitHub YAML file.
+ * Generate rendered cloud region and tier output from a GitHub-hosted YAML file.
  *
- * Retrieves YAML data from GitHub using the GitHub API (to avoid caching issues),
- * parses and filters it to include only public cloud regions and tiers, and renders the result in the requested format.
+ * Fetches the YAML from the specified repository path, parses and filters it to include only public providers/regions/tiers,
+ * and renders the result in the requested format. When `options.tabs` is true, returns separate rendered outputs per cluster type.
  *
  * @param {Object} options - Options for generating cloud regions.
  * @param {string} options.owner - GitHub repository owner.
@@ -309,10 +308,10 @@ function processCloudRegionsForTabs(yamlText) {
  * @param {string} [options.ref='main'] - Git reference (branch, tag, or commit SHA).
  * @param {string} [options.format='md'] - The output format (e.g., 'md' for Markdown).
  * @param {string} [options.token] - Optional GitHub token for authentication.
- * @param {string} [options.template] - Optional path to custom Handlebars template.
- * @param {boolean} [options.tabs=false] - Whether to generate AsciiDoc with tabs organized by cluster type.
- * @returns {string} The rendered cloud regions output.
- * @throws {Error} If fetching, processing, or rendering fails, or if no valid providers or regions are found.
+ * @param {string} [options.template] - Optional path to a custom Handlebars template.
+ * @param {boolean} [options.tabs=false] - When true, produce separate rendered outputs organized by cluster type (keys are cluster type names lowercased).
+ * @returns {string|Object} Rendered output as a string, or when `options.tabs` is true an object mapping lowercase cluster type names to rendered strings.
+ * @throws {Error} If fetching, parsing, processing, or rendering fails, or if no valid providers/regions remain after filtering.
  */
 async function generateCloudRegions({ owner, repo, path, ref = 'main', format = 'md', token, template, tabs = false }) {
   let yamlText;
@@ -368,4 +367,4 @@ async function generateCloudRegions({ owner, repo, path, ref = 'main', format =
 module.exports = {
   generateCloudRegions,
   processCloudRegions,
-};
+};
diff --git a/tools/cloud-tier-table/generate-cloud-tier-table.js b/tools/cloud-tier-table/generate-cloud-tier-table.js
@@ -36,7 +36,11 @@ const LIMIT_KEYS = [
   'kafka_topics_max',
 ];
 
-// Map header keys to human readable labels
+/**
+ * Map a header key to a human-readable label.
+ * @param {string} key - The header key to convert.
+ * @returns {string} The human-readable label for `key`, or `key` unchanged if no mapping exists.
+ */
 function humanLabel(key) {
   if (key === 'cloud_provider') return 'Cloud Provider';
   if (key === 'machine_type') return 'Machine Type';
@@ -48,14 +52,24 @@ function humanLabel(key) {
   return key;
 }
 
-// Map provider values to human readable
+/**
+ * Convert a provider identifier into a human-friendly provider name.
+ * @param {*} val - Provider identifier (e.g., 'aws', 'gcp', 'azure'); comparison is case-insensitive. Falsy values produce an empty string.
+ * @returns {string} `'AWS'`, `'GCP'`, or `'Azure'` for known providers; empty string for falsy input; otherwise returns the original value.
+ */
 function humanProvider(val) {
   if (!val) return '';
   const map = { aws: 'AWS', gcp: 'GCP', azure: 'Azure' };
   return map[String(val).toLowerCase()] || val;
 }
 
-// Fetch public tiers from master-data.yaml
+/**
+ * Loads master-data.yaml (from an HTTP URL or local path), validates its products list, and returns a normalized list of public tiers.
+ *
+ * @param {string} masterDataUrl - HTTP URL (GitHub API file response expected) or local filesystem path to the master-data.yaml file.
+ * @returns {Array<Object>} An array of public tier objects with the following fields: `displayName`, `configProfileName`, `cloudProvider`, `advertisedMaxIngress`, `advertisedMaxEgress`, `advertisedMaxPartitionCount`, and `advertisedMaxClientCount`.
+ * @throws {Error} If masterDataUrl is missing or not a string, fetching or file reading fails, YAML parsing fails, the products array is missing/invalid, or no valid public tiers are found.
+ */
 async function fetchPublicTiers(masterDataUrl) {
   try {
     if (!masterDataUrl || typeof masterDataUrl !== 'string') {
@@ -161,6 +175,15 @@ async function fetchPublicTiers(masterDataUrl) {
   }
 }
 
+/**
+ * Load and parse YAML from a local file path, HTTP(S) URL, or the special GitHub install-pack API directory.
+ *
+ * When given the GitHub install-pack API directory URL, selects the latest versioned YAML file (names like `1.2.yml` or `1.2.3.yaml`) and parses it. For HTTP(S) inputs, fetches the URL and parses the response body. For local file paths, reads and parses the file contents. If GITHUB_TOKEN is set, requests include Authorization and User-Agent headers.
+ *
+ * @param {string} input - Local filesystem path, HTTP(S) URL, or the GitHub API directory URL 'https://api.github.com/repos/redpanda-data/cloudv2/contents/install-pack'.
+ * @returns {Object} The parsed YAML content as a JavaScript object.
+ * @throws {Error} If `input` is not a valid string, the network or filesystem access fails, no suitable versioned YAML is found in the install-pack directory, or the YAML content cannot be parsed into an object.
+ */
 async function parseYaml(input) {
   try {
     if (!input || typeof input !== 'string') {
@@ -333,10 +356,11 @@ function extractVersion(profileName) {
 }
 
 /**
- * Find the highest version config profile for a given base name
- * @param {Object} configProfiles - All config profiles
- * @param {string} targetProfile - The target profile name from master data
- * @returns {string} The highest version profile name
+ * Selects the highest-versioned config profile name matching the given target profile.
+ *
+ * @param {Object} configProfiles - Map of profile names to profile definitions.
+ * @param {string} targetProfile - The target profile name to match (from master data).
+ * @returns {string} The matching profile name with the largest numeric `-vN` suffix, or `targetProfile` if no versioned variants are found or on failure.
  */
 function findHighestVersionProfile(configProfiles, targetProfile) {
   try {
@@ -384,6 +408,18 @@ function findHighestVersionProfile(configProfiles, targetProfile) {
   }
 }
 
+/**
+ * Builds table row objects by merging public tier metadata with matching config profiles.
+ *
+ * Each returned row maps the tier display name to the requested limit keys (either `customLimits` or the module's default keys).
+ * Missing values are represented as the string "N/A". Duplicate rows with the same tier name and resolved config profile are removed.
+ *
+ * @param {Object} tiers - Parsed tiers YAML object; must contain a `config_profiles` object mapping profile names to definitions.
+ * @param {Array<Object>} publicTiers - Array of public tier descriptors; each entry must include `displayName` and `configProfileName`.
+ * @param {Array<string>} [customLimits] - Optional list of limit keys to extract; when omitted the module's default LIMIT_KEYS are used.
+ * @returns {Array<Object>} An array of row objects. Each row has a `tier` property (display name) and entries for each requested limit key.
+ * @throws {Error} If inputs are invalid or row construction fails (e.g., missing `config_profiles`, non-array `publicTiers`, or other fatal processing errors).
+ */
 function buildTableRows(tiers, publicTiers, customLimits) {
   try {
     // Use custom limits if provided, otherwise use default LIMIT_KEYS
@@ -519,6 +555,13 @@ function buildTableRows(tiers, publicTiers, customLimits) {
   }
 }
 
+/**
+ * Render an array of tier rows as a Markdown table.
+ *
+ * @param {Array<Object>} rows - Array of row objects where each row has a `tier` property and keys matching entries in `limitKeys`.
+ * @param {Array<string>} [limitKeys=LIMIT_KEYS] - Ordered list of keys to include as columns after the "Tier" column; each key's value is taken from the corresponding property on a row.
+ * @returns {string} A Markdown-formatted table with a header row ("Tier" followed by the provided keys' labels) and one row per entry in `rows`.
+ */
 function toMarkdown(rows, limitKeys = LIMIT_KEYS) {
   const headers = ['Tier', ...limitKeys.map(humanLabel)];
   const lines = [];
@@ -530,6 +573,13 @@ function toMarkdown(rows, limitKeys = LIMIT_KEYS) {
   return lines.join('\n');
 }
 
+/**
+ * Render table rows as an AsciiDoc table.
+ *
+ * @param {Array<Object>} rows - Array of row objects; each object must include a `tier` property and values for the keys listed in `limitKeys`.
+ * @param {Array<string>} [limitKeys=LIMIT_KEYS] - Ordered list of keys to include as columns (excluding the leading "Tier" column).
+ * @returns {string} An AsciiDoc-formatted table containing a header row ("Tier" plus humanized `limitKeys`) and one data row per entry in `rows`.
+ */
 function toAsciiDoc(rows, limitKeys = LIMIT_KEYS) {
   const headers = ['Tier', ...limitKeys.map(humanLabel)];
   let out = '[options="header"]\n|===\n';
@@ -541,6 +591,13 @@ function toAsciiDoc(rows, limitKeys = LIMIT_KEYS) {
   return out;
 }
 
+/**
+ * Render rows as CSV with a "Tier" column followed by the provided limit keys.
+ *
+ * @param {Array<Object>} rows - Array of row objects where each object contains a `tier` property and keys matching `limitKeys`.
+ * @param {Array<string>} [limitKeys=LIMIT_KEYS] - Ordered list of keys to include as CSV columns after the "Tier" column.
+ * @returns {string} CSV-formatted text with a header row and one line per input row; values are quoted and internal quotes doubled.
+ */
 function toCSV(rows, limitKeys = LIMIT_KEYS) {
   const headers = ['Tier', ...limitKeys];
   const esc = v => {
@@ -555,6 +612,18 @@ function toCSV(rows, limitKeys = LIMIT_KEYS) {
   return lines.join('\n');
 }
 
+/**
+ * Render the provided rows into HTML using a Handlebars template.
+ *
+ * The template is compiled from the file at `templatePath` and is invoked with a context
+ * containing: `rows`, `headers` (array of {name, index_plus_one}), `limitKeys`, `cloudProviders`,
+ * and `uniqueTiers`.
+ *
+ * @param {Array<Object>} rows - Table row objects to render; each row is passed through to the template.
+ * @param {string} templatePath - Filesystem path to a Handlebars template.
+ * @param {Array<string>} [limitKeys=LIMIT_KEYS] - Ordered list of limit keys used to build headers and passed to the template.
+ * @returns {string} The rendered HTML string.
+ */
 function toHTML(rows, templatePath, limitKeys = LIMIT_KEYS) {
   const fs = require('fs');
   const handlebars = require('handlebars');
@@ -579,6 +648,18 @@ function toHTML(rows, templatePath, limitKeys = LIMIT_KEYS) {
   });
 }
 
+/**
+ * Generate a cloud tier table from local or remote YAML input and master-data, rendered in the requested format.
+ *
+ * @param {Object} options - Function options.
+ * @param {string} options.input - Path or URL to the input YAML (or the special GitHub install-pack API directory URL) containing tiers/config profiles.
+ * @param {string} [options.output] - Output destination (not used by this function; included for CLI compatibility).
+ * @param {string} [options.format='html'] - Output format: 'html', 'md' (Markdown), 'adoc' (AsciiDoc), or 'csv'.
+ * @param {string} [options.template] - Path to a Handlebars template to use for rendering; for 'html' format a default template is used when this is not provided.
+ * @param {string} [options.masterData] - URL or filesystem path to master-data.yaml used to fetch public tier definitions.
+ * @param {string[]} [options.limits] - Custom ordered list of limit keys to include; when omitted the default LIMIT_KEYS set is used.
+ * @returns {string} Generated table content in the requested format (HTML, Markdown, AsciiDoc, or CSV).
+ */
 async function generateCloudTierTable({ 
   input,
   output,
@@ -623,4 +704,4 @@ module.exports = {
   findHighestVersionProfile, 
   parseYaml, 
   fetchPublicTiers 
-};
+};
diff --git a/tools/cloud-tier-table/generate-discrepancy-report.js b/tools/cloud-tier-table/generate-discrepancy-report.js
@@ -4,20 +4,20 @@ const { generateCloudTierTable } = require('./generate-cloud-tier-table.js');
 const Papa = require('papaparse');
 
 /**
- * Calculate percentage difference between two values
- * @param {number} advertised - Advertised value
- * @param {number} actual - Actual config value
- * @returns {number} Percentage difference (positive = actual is higher, negative = actual is lower)
+ * Compute the percentage difference from an advertised value to an actual value.
+ * @param {number} advertised - The reference (advertised) value.
+ * @param {number} actual - The observed or configured value to compare.
+ * @returns {number|null} The percentage difference ((actual - advertised) / advertised * 100); `null` if `advertised` is falsy or zero.
  */
 function calculatePercentageDiff(advertised, actual) {
   if (!advertised || advertised === 0) return null;
   return ((actual - advertised) / advertised) * 100;
 }
 
 /**
- * Format bytes per second values for display
- * @param {number} bps - Bytes per second
- * @returns {string} Formatted string
+ * Convert a bytes-per-second value into a human-readable Mbps or Kbps string.
+ * @param {number} bps - Bytes per second; falsy values (e.g., 0, null, undefined) produce `'N/A'`.
+ * @returns {string} A formatted throughput string like `"<n.n> Mbps"` or `"<n.n> Kbps"`, or `'N/A'` when input is falsy.
  */
 function formatThroughput(bps) {
   if (!bps) return 'N/A';
@@ -30,19 +30,21 @@ function formatThroughput(bps) {
 }
 
 /**
- * Format numbers with commas
- * @param {number} num - Number to format
- * @returns {string} Formatted number
+ * Format a number with locale-specific thousands separators.
+ *
+ * If `num` is null or undefined the function returns `"N/A"`; a numeric `0` is formatted normally.
+ * @param {number} num - The number to format.
+ * @returns {string} The number formatted with locale-specific separators, or `"N/A"` if input is null or undefined.
  */
 function formatNumber(num) {
   if (!num && num !== 0) return 'N/A';
   return num.toLocaleString();
 }
 
 /**
- * Determine severity of discrepancy
- * @param {number} percentDiff - Percentage difference
- * @returns {string} Severity level
+ * Classifies a percentage difference into a severity level.
+ * @param {number} percentDiff - Percentage difference (positive if actual > advertised, negative if actual < advertised).
+ * @returns {string} `unknown` if `percentDiff` is null or undefined; otherwise `minor` for absolute percentage <= 5, `moderate` for <= 25, `major` for <= 50, and `critical` for > 50.
  */
 function getSeverity(percentDiff) {
   if (percentDiff === null || percentDiff === undefined) return 'unknown';
@@ -92,12 +94,13 @@ function safeParseInt(tier, key, tierName) {
 }
 
 /**
- * Analyze a single metric and create discrepancy entry
- * @param {string} metricName - Name of the metric
- * @param {number} advertised - Advertised value
- * @param {number} actual - Actual configuration value
- * @param {Function} formatter - Function to format the values for display
- * @returns {Object} Discrepancy analysis object
+ * Create a discrepancy analysis entry for a single metric.
+ *
+ * @param {string} metricName - Human-readable metric name.
+ * @param {number} advertised - Advertised/configured value to compare against.
+ * @param {number} actual - Actual observed or configured value.
+ * @param {(value: number) => string} formatter - Formatter that converts numeric values to display strings.
+ * @returns {{metric: string, advertised: number, advertisedFormatted: string, actual: number, actualFormatted: string, percentageDiff: number|null, severity: string, emoji: string}} An object containing the metric name, raw and formatted advertised/actual values, the percentage difference (or `null` if unavailable), severity label, and a severity emoji.
  */
 function analyzeMetric(metricName, advertised, actual, formatter) {
   const percentageDiff = calculatePercentageDiff(advertised, actual);
@@ -116,9 +119,19 @@ function analyzeMetric(metricName, advertised, actual, formatter) {
 }
 
 /**
- * Generate discrepancy analysis for a single tier
- * @param {Object} tier - Tier data object
- * @returns {Object} Discrepancy analysis
+ * Builds a discrepancy analysis object for a cloud tier.
+ *
+ * @param {Object} tier - Tier data object containing configuration and advertised values. Expected keys include:
+ *   `Tier` or `tier_name`, `cloud_provider`, `machine_type`, `nodes_count`,
+ *   `advertisedMaxIngress`, `advertisedMaxEgress`, `advertisedMaxPartitionCount`, `advertisedMaxClientCount`,
+ *   `kafka_throughput_limit_node_in_bps`, `kafka_throughput_limit_node_out_bps`,
+ *   `topic_partitions_per_shard`, `kafka_connections_max`.
+ * @returns {Object} An analysis object with:
+ *   - `tierName` (string): tier identifier,
+ *   - `cloudProvider` (string),
+ *   - `machineType` (string),
+ *   - `nodeCount` (number),
+ *   - `discrepancies` (Array): list of per-metric analysis objects (metric name, advertised value, actual/config value, formatted values, percentageDiff, severity, emoji).
  */
 function analyzeTierDiscrepancies(tier) {
   const tierName = tier.Tier || tier.tier_name;
@@ -176,9 +189,18 @@ function analyzeTierDiscrepancies(tier) {
 }
 
 /**
- * Generate a comprehensive discrepancy report
- * @param {Object} options - Options object
- * @returns {string} Formatted report
+ * Generate a comprehensive discrepancy report for Redpanda Cloud Tier configurations.
+ *
+ * Produces a report that compares advertised tier values against actual configuration values
+ * and classifies discrepancies by severity. The report can be returned as Markdown (default)
+ * or as a JSON string containing the generated date, a summary, and per-tier analyses.
+ *
+ * @param {Object} [options] - Options to control input sources and output format.
+ * @param {string} [options.input] - URL or path to the install-pack source used to derive tier data.
+ * @param {string} [options.masterData] - URL or path to master-data.yaml used to resolve advertised values.
+ * @param {string} [options.format] - Output format: 'markdown' or 'json' (case-insensitive). Defaults to 'markdown'.
+ * @returns {string} The generated report as a formatted string (Markdown or JSON).
+ * @throws {Error} If CSV parsing fails, an unsupported format is requested, or report generation encounters an error.
  */
 async function generateDiscrepancyReport(options = {}) {
   const {