Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
209 changes: 186 additions & 23 deletions data/onPostBuild/llmstxt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,16 @@ import languageInfo from '../../src/data/languages/languageInfo';
* It is heavily inspired by the gatsby-plugin-sitemap plugin, and stripped down to only to what we need.
*/

const LLMS_TXT_PREAMBLE = `# https://ably.com/docs llms.txt\n`;
const LLMS_TXT_PREAMBLE = `# Ably Documentation

> Ably is a realtime experience infrastructure platform that provides pub/sub messaging, chat, realtime data synchronization, and more.

- **Global Edge Network**: Ultra-low latency realtime messaging delivered through a globally distributed edge network
- **Enterprise Scale**: Built to handle millions of concurrent connections with guaranteed message delivery
- **Multiple Products**: Pub/Sub, Chat, LiveSync, LiveObjects, Spaces, Asset Tracking, and powerful integrations
- **Developer-Friendly SDKs**: SDKs available for JavaScript, Python, Java, Go, Swift, and many more languages
Comment on lines +17 to +18
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lost my comment these;

  • Let's remove 'integrations' from a heading about products.
  • Should we list all languages in the SDK row to make it complete?


`;

const REPORTER_PREFIX = 'onPostBuild:';

Expand All @@ -25,6 +34,8 @@ const VALID_LANGUAGES = [
'ruby',
'swift',
'go',
'kotlin',
'react',
];

// Function to get the display label for a language
Expand Down Expand Up @@ -78,6 +89,76 @@ const escapeMarkdown = (text: string) => {
return text.replace(/([\\`*_{}[\]()#+!])/g, '\\$1');
};

// Category structure for organizing pages
interface CategoryStructure {
[category: string]: {
title: string;
subcategories: {
[subcategory: string]: {
title: string;
pages: Array<{
slug: string;
meta: { title: string; meta_description: string };
languages: string[];
}>;
};
};
};
}

// Function to categorize a page based on its slug
const categorizePage = (slug: string): { category: string; subcategory: string } => {
const parts = slug.split('/');
const firstPart = parts[0] || 'general';

// Define category mappings
const categoryMap: Record<string, { category: string; subcategory: string }> = {
// Platform
platform: { category: 'Platform', subcategory: 'Platform & Account' },
auth: { category: 'Platform', subcategory: 'Authentication' },
api: { category: 'Platform', subcategory: 'API Reference' },
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think APIs should sit within the relevant product.

sdks: { category: 'Platform', subcategory: 'SDKs' },
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think there should be some additional sub-categories here:

  • architecture
  • pricing
  • integrations


// Pub/Sub - Core realtime messaging features
basics: { category: 'Pub/Sub', subcategory: 'Basics' },
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This just pulls in the product landing page. See my comment about an empty sub-category suggestion.

channels: { category: 'Pub/Sub', subcategory: 'Channels' },
connect: { category: 'Pub/Sub', subcategory: 'Connections' },
'getting-started': { category: 'Pub/Sub', subcategory: 'Getting Started' },
guides: { category: 'Pub/Sub', subcategory: 'Guides' },
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This only has 1 at the moment and is for Chat... Let's move it into platform for now maybe.

'how-to': { category: 'Pub/Sub', subcategory: 'How-To' },
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's remove this. I think it's time we removed the page too.

messages: { category: 'Pub/Sub', subcategory: 'Messages' },
'metadata-stats': { category: 'Pub/Sub', subcategory: 'Metadata & Statistics' },
'presence-occupancy': { category: 'Pub/Sub', subcategory: 'Presence & Occupancy' },
protocols: { category: 'Pub/Sub', subcategory: 'Protocols' },
'pub-sub': { category: 'Pub/Sub', subcategory: 'Pub/Sub Features' },
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems to be used for the Pub/Sub basics and advanced pages which seems odd. I wonder if we need an 'unclassified' product level one. So it's just a category of 'Pub/Sub' without a sub-category. WDYT?

push: { category: 'Pub/Sub', subcategory: 'Push Notifications' },
'storage-history': { category: 'Pub/Sub', subcategory: 'Storage & History' },

// Chat
chat: { category: 'Chat', subcategory: 'Chat' },

// Spaces
spaces: { category: 'Spaces', subcategory: 'Spaces' },

// LiveObjects
liveobjects: { category: 'LiveObjects', subcategory: 'LiveObjects' },

// LiveSync
livesync: { category: 'LiveSync', subcategory: 'LiveSync' },

// Asset Tracking
'asset-tracking': { category: 'Asset Tracking', subcategory: 'Asset Tracking' },
};

// Check if the first part matches a known category
if (categoryMap[firstPart]) {
return categoryMap[firstPart];
}

// Default categorization for uncategorized pages
return { category: 'General', subcategory: 'Documentation' };
};

// Function to extract code element classes from an MDX file
const extractCodeLanguages = async (filePath: string): Promise<Set<string>> => {
try {
Expand Down Expand Up @@ -217,32 +298,114 @@ export const onPostBuild: GatsbyNode['onPostBuild'] = async ({ graphql, reporter
`${REPORTER_PREFIX} Found ${allPages.length} pages to place into llms.txt (${textilePages.length} textile, ${mdxPages.length} MDX)`,
);

const serializedPages = [LLMS_TXT_PREAMBLE];
// Organize pages into categories
const categoryStructure: CategoryStructure = {};

for (const page of allPages) {
const { slug, meta, languages } = page;
const { title, meta_description } = meta;

try {
const baseUrl = prefixPath({ url: `/docs/${slug}`, siteUrl, pathPrefix: basePath });
const safeTitle = escapeMarkdown(title);

// Generate base page entry (without language parameter)
const baseLink = `[${safeTitle}](${baseUrl})`;
const baseLine = `- ${[baseLink, meta_description].join(': ')}`;
serializedPages.push(baseLine);

// Generate language-specific entries if the page has languages
if (languages && languages.length > 0) {
for (const language of languages) {
const langUrl = `${baseUrl}?lang=${language}`;
const langLink = `[${safeTitle} (${getLanguageLabel(language)})](${langUrl})`;
const langLine = `- ${[langLink, meta_description].join(': ')}`;
serializedPages.push(langLine);
const { category, subcategory } = categorizePage(page.slug);

// Initialize category if it doesn't exist
if (!categoryStructure[category]) {
categoryStructure[category] = {
title: category,
subcategories: {},
};
}

// Initialize subcategory if it doesn't exist
if (!categoryStructure[category].subcategories[subcategory]) {
categoryStructure[category].subcategories[subcategory] = {
title: subcategory,
pages: [],
};
}

// Add page to subcategory (only base page without language variants)
categoryStructure[category].subcategories[subcategory].pages.push(page);
}

// Generate serialized output with categorization
const serializedPages = [LLMS_TXT_PREAMBLE];

// Define the order of categories
const categoryOrder = [
'Platform',
'Pub/Sub',
'Chat',
'Spaces',
'LiveObjects',
'LiveSync',
'Asset Tracking',
'General',
];

// Sort categories by defined order
const sortedCategories = Object.keys(categoryStructure).sort((a, b) => {
const indexA = categoryOrder.indexOf(a);
const indexB = categoryOrder.indexOf(b);
if (indexA === -1 && indexB === -1) return a.localeCompare(b);
if (indexA === -1) return 1;
if (indexB === -1) return -1;
return indexA - indexB;
});

for (const categoryKey of sortedCategories) {
const category = categoryStructure[categoryKey];
serializedPages.push(`## ${category.title}`);
serializedPages.push('');

// Sort subcategories alphabetically
const sortedSubcategories = Object.keys(category.subcategories).sort();

for (const subcategoryKey of sortedSubcategories) {
const subcategory = category.subcategories[subcategoryKey];
serializedPages.push(`### ${subcategory.title}`);

for (const page of subcategory.pages) {
const { slug, meta, languages } = page;
const { title, meta_description } = meta;

try {
const baseUrl = prefixPath({ url: `/docs/${slug}`, siteUrl, pathPrefix: basePath });
const safeTitle = escapeMarkdown(title);

// Generate base page entry (without language parameter)
const baseLink = `[${safeTitle}](${baseUrl})`;
const baseLine = `- ${[baseLink, meta_description].join(': ')}`;
serializedPages.push(baseLine);

// Generate language-specific entries if the page has languages
// Skip language variants that match the page's primary language (e.g., skip ?lang=go for /getting-started/go)
if (languages && languages.length > 0) {
// Extract the last part of the slug to check if it matches a language
const slugParts = slug.split('/');
const slugLastPart = slugParts[slugParts.length - 1];

// Map slug names to their corresponding language codes
const slugToLangMap: Record<string, string> = {
dotnet: 'csharp',
'objective-c': 'objc',
};

// Get the primary language for this page (either direct match or mapped)
const primaryLanguage = slugToLangMap[slugLastPart] || slugLastPart;

for (const language of languages) {
// Skip if the language matches the page's primary language
if (language !== primaryLanguage) {
const langUrl = `${baseUrl}?lang=${language}`;
const langLink = `[${safeTitle} (${getLanguageLabel(language)})](${langUrl})`;
const langLine = `- ${[langLink, meta_description].join(': ')}`;
serializedPages.push(langLine);
}
}
}
} catch (err) {
reporter.panic(`${REPORTER_PREFIX} Error serializing pages`, err as Error);
}
}
} catch (err) {
reporter.panic(`${REPORTER_PREFIX} Error serializing pages`, err as Error);

serializedPages.push(''); // Add blank line after each subcategory
}
}

Expand Down