Skip to content

Implement IndexNow integration for improved search engine indexing #14997

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/build-and-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ jobs:
ALGOLIA_APP_ID: ${{ vars.ALGOLIA_APP_ID }}
ALGOLIA_APP_SEARCH_KEY: ${{ vars.ALGOLIA_APP_SEARCH_KEY }}
ALGOLIA_APP_ADMIN_KEY: ${{ steps.esc-secrets.outputs.ALGOLIA_APP_ADMIN_KEY }}
INDEXNOW_API_KEY: ${{ steps.esc-secrets.outputs.INDEXNOW_API_KEY }}
NODE_OPTIONS: "--max_old_space_size=8192"

- name: Archive test results
Expand Down
5 changes: 5 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,11 @@ ci_update_search_index:
echo "Updating search index: ${DEPLOYMENT_ENVIRONMENT}..."
./scripts/ci-update-search-index.sh "${DEPLOYMENT_ENVIRONMENT}"

.PHONY: ci_submit_indexnow
ci_submit_indexnow:
echo "Submitting URLs to IndexNow..."
./scripts/search/submit-indexnow.sh "${DEPLOYMENT_ENVIRONMENT}"

.PHONY: serve-all
serve-all:
./node_modules/.bin/concurrently --kill-others -r "./scripts/serve.sh" "yarn --cwd ./theme run start"
Expand Down
1 change: 1 addition & 0 deletions scripts/ci-push.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ source ./scripts/ci-login.sh
./scripts/sync-and-test-bucket.sh update

./scripts/generate-search-index.sh
./scripts/search/submit-indexnow.sh

node ./scripts/await-in-progress.js

Expand Down
211 changes: 211 additions & 0 deletions scripts/search/indexnow.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
/**
* IndexNow API implementation for Pulumi docs
*
* This script submits new or updated URLs to the IndexNow API
* to trigger immediate crawling by search engines.
*/

const fs = require('fs');
const path = require('path');
const https = require('https');
const Sitemapper = require('sitemapper');
const sitemap = new Sitemapper();

// IndexNow API settings
const INDEXNOW_ENDPOINT = 'https://www.bing.com/indexnow';
const INDEXNOW_API_KEY = process.env.INDEXNOW_API_KEY || '33134703c43349ddb227d769745f33cc';
const INDEXNOW_KEY_LOCATION = 'indexnow.txt';
// Cache file to store previously submitted URLs
const CACHE_FILE = path.join(__dirname, '../../public/indexnow-submitted-urls.json');
const BATCH_SIZE = 10000; // Maximum number of URLs per batch (IndexNow allows up to 10,000)
const SITE_URL = 'https://www.pulumi.com';

// Get environment
const isTestMode = process.env.INDEXNOW_TEST_MODE === 'true';

/**
* Get URLs from sitemap
*/
async function getSitemapUrls() {
console.log('Fetching sitemap URLs...');
try {
const result = await sitemap.fetch(`${SITE_URL}/sitemap.xml`);
return result.sites
// Filter out any excluded patterns if needed
// For example: .filter(url => !url.match(/\/api-docs\//))
.map(url => url.trim())
.sort();
} catch (error) {
console.error('Error fetching sitemap:', error);
return [];
}
}

/**
* Load previously submitted URLs from cache file
*/
function loadSubmittedUrls() {
try {
if (fs.existsSync(CACHE_FILE)) {
const data = fs.readFileSync(CACHE_FILE, 'utf8');
return JSON.parse(data);
}
} catch (error) {
console.warn('Error loading submitted URLs cache, starting fresh:', error.message);
}
return {
lastSubmission: null,
urls: {}
};
}

/**
* Save submitted URLs to cache file
*/
function saveSubmittedUrls(cache) {
try {
// Ensure the directory exists
const dir = path.dirname(CACHE_FILE);
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true });
}
fs.writeFileSync(CACHE_FILE, JSON.stringify(cache, null, 2));
} catch (error) {
console.error('Error saving submitted URLs cache:', error);
}
}

/**
* Submit URLs to IndexNow API
*/
async function submitToIndexNow(urls) {
if (urls.length === 0) {
console.log('No URLs to submit.');
return;
}

// Split URLs into batches (IndexNow allows up to 10,000 URLs per submission)
for (let i = 0; i < urls.length; i += BATCH_SIZE) {
const batch = urls.slice(i, Math.min(i + BATCH_SIZE, urls.length));

console.log(`Submitting batch of ${batch.length} URLs to IndexNow...`);

// Prepare data for IndexNow API
const data = JSON.stringify({
host: new URL(SITE_URL).hostname,
key: INDEXNOW_API_KEY,
keyLocation: `${SITE_URL}/${INDEXNOW_KEY_LOCATION}`,
urlList: batch
});

if (isTestMode) {
console.log('TEST MODE - would submit:');
console.log(`Endpoint: ${INDEXNOW_ENDPOINT}`);
console.log(`Data: ${data}`);
continue;
}

// Submit to IndexNow API
try {
await new Promise((resolve, reject) => {
const options = {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Content-Length': data.length
}
};

const req = https.request(INDEXNOW_ENDPOINT, options, (res) => {
let responseData = '';

res.on('data', (chunk) => {
responseData += chunk;
});

res.on('end', () => {
if (res.statusCode >= 200 && res.statusCode < 300) {
console.log(`Successfully submitted ${batch.length} URLs to IndexNow.`);
resolve();
} else {
console.error(`IndexNow API error (${res.statusCode}): ${responseData}`);
reject(new Error(`IndexNow API returned status ${res.statusCode}: ${responseData}`));
}
});
});

req.on('error', (error) => {
console.error('Error submitting to IndexNow:', error);
reject(error);
});

req.write(data);
req.end();
});
} catch (error) {
console.error('Failed to submit batch to IndexNow:', error);
// Continue with the next batch even if this one failed
}
}
}

/**
* Generate the IndexNow API key verification file
*/
function generateKeyFile() {
console.log('Generating IndexNow key verification file...');
const keyFilePath = path.join(__dirname, '../../public', INDEXNOW_KEY_LOCATION);

try {
fs.writeFileSync(keyFilePath, INDEXNOW_API_KEY);
console.log(`Key file generated at: ${keyFilePath}`);
} catch (error) {
console.error('Error generating key file:', error);
}
}

/**
* Main function
*/
async function main() {
console.log('Starting IndexNow URL submission...');

// Generate the key verification file
generateKeyFile();

// Get URLs from sitemap
const sitemapUrls = await getSitemapUrls();
console.log(`Found ${sitemapUrls.length} URLs in sitemap.`);

// Load previously submitted URLs
const cache = loadSubmittedUrls();
cache.lastSubmission = new Date().toISOString();

// Find URLs to submit (new or updated)
const urlsToSubmit = [];

for (const url of sitemapUrls) {
// Add URL if it wasn't submitted before, or force submission
// if INDEXNOW_FORCE_SUBMIT is set to true
if (!cache.urls[url] || process.env.INDEXNOW_FORCE_SUBMIT === 'true') {
urlsToSubmit.push(url);
cache.urls[url] = { lastSubmitted: cache.lastSubmission };
}
}

console.log(`Found ${urlsToSubmit.length} new or updated URLs to submit.`);

// Submit URLs to IndexNow
await submitToIndexNow(urlsToSubmit);

// Save updated cache
saveSubmittedUrls(cache);

console.log('IndexNow URL submission completed.');
}

// Run the main function
main().catch(error => {
console.error('IndexNow script failed:', error);
process.exit(1);
});
11 changes: 11 additions & 0 deletions scripts/search/submit-indexnow.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash

set -o errexit -o pipefail

# This script submits URLs to IndexNow API to trigger immediate crawling by search engines
# It uses the indexnow.js Node.js script to handle the submission process

source ./scripts/common.sh

echo "Submitting URLs to IndexNow..."
node ./scripts/search/indexnow.js "$1"
85 changes: 85 additions & 0 deletions scripts/search/test-indexnow-api.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/**
* Test script for verifying IndexNow API functionality
*
* This script makes a real API call with a single test URL to validate
* that the IndexNow API works correctly with our implementation.
*/

const https = require('https');

// IndexNow API settings
const INDEXNOW_ENDPOINT = 'https://www.bing.com/indexnow';
const INDEXNOW_API_KEY = process.env.INDEXNOW_API_KEY || '33134703c43349ddb227d769745f33cc';
const TEST_URL = 'https://www.pulumi.com/';

// Function to test IndexNow API
async function testIndexNowApi() {
console.log('Testing IndexNow API with a single URL submission...');
console.log(`API Key: ${INDEXNOW_API_KEY}`);
console.log(`Test URL: ${TEST_URL}`);

// Prepare data for IndexNow API
const data = JSON.stringify({
host: new URL(TEST_URL).hostname,
key: INDEXNOW_API_KEY,
keyLocation: `${TEST_URL}indexnow.txt`,
urlList: [TEST_URL]
});

// Log the request
console.log('\nRequest data:');
console.log(data);

try {
const response = await new Promise((resolve, reject) => {
const options = {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Content-Length': data.length
}
};

const req = https.request(INDEXNOW_ENDPOINT, options, (res) => {
let responseData = '';

res.on('data', (chunk) => {
responseData += chunk;
});

res.on('end', () => {
resolve({
statusCode: res.statusCode,
headers: res.headers,
body: responseData
});
});
});

req.on('error', (error) => {
reject(error);
});

req.write(data);
req.end();
});

// Log the response
console.log('\nResponse:');
console.log(`Status code: ${response.statusCode}`);
console.log('Headers:', JSON.stringify(response.headers, null, 2));
console.log('Body:', response.body);

// Check if successful
if (response.statusCode >= 200 && response.statusCode < 300) {
console.log('\n✅ SUCCESS: IndexNow API test completed successfully!');
} else {
console.log('\n❌ ERROR: IndexNow API test failed with status code', response.statusCode);
}
} catch (error) {
console.error('\n❌ ERROR: Failed to test IndexNow API:', error);
}
}

// Run the test
testIndexNowApi();
9 changes: 9 additions & 0 deletions scripts/search/test-indexnow-api.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash

set -o errexit -o pipefail

# This script tests the IndexNow API with a single URL submission
# to verify that the API is working correctly

echo "Testing IndexNow API with a single URL submission..."
node ./scripts/search/test-indexnow-api.js
Loading