-
Notifications
You must be signed in to change notification settings - Fork 45
[EDU-2101] - Update llmstxt.ts to structure llms.txt a bit cleaner #2891
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,7 +8,16 @@ import languageInfo from '../../src/data/languages/languageInfo'; | |
| * It is heavily inspired by the gatsby-plugin-sitemap plugin, and stripped down to only to what we need. | ||
| */ | ||
|
|
||
| const LLMS_TXT_PREAMBLE = `# https://ably.com/docs llms.txt\n`; | ||
| const LLMS_TXT_PREAMBLE = `# Ably Documentation | ||
|
|
||
| > Ably is a realtime experience infrastructure platform that provides pub/sub messaging, chat, realtime data synchronization, and more. | ||
|
|
||
| - **Global Edge Network**: Ultra-low latency realtime messaging delivered through a globally distributed edge network | ||
| - **Enterprise Scale**: Built to handle millions of concurrent connections with guaranteed message delivery | ||
| - **Multiple Products**: Pub/Sub, Chat, LiveSync, LiveObjects, Spaces, Asset Tracking, and powerful integrations | ||
| - **Developer-Friendly SDKs**: SDKs available for JavaScript, Python, Java, Go, Swift, and many more languages | ||
|
|
||
| `; | ||
|
|
||
| const REPORTER_PREFIX = 'onPostBuild:'; | ||
|
|
||
|
|
@@ -25,6 +34,8 @@ const VALID_LANGUAGES = [ | |
| 'ruby', | ||
| 'swift', | ||
| 'go', | ||
| 'kotlin', | ||
| 'react', | ||
| ]; | ||
|
|
||
| // Function to get the display label for a language | ||
|
|
@@ -78,6 +89,76 @@ const escapeMarkdown = (text: string) => { | |
| return text.replace(/([\\`*_{}[\]()#+!])/g, '\\$1'); | ||
| }; | ||
|
|
||
| // Category structure for organizing pages | ||
| interface CategoryStructure { | ||
| [category: string]: { | ||
| title: string; | ||
| subcategories: { | ||
| [subcategory: string]: { | ||
| title: string; | ||
| pages: Array<{ | ||
| slug: string; | ||
| meta: { title: string; meta_description: string }; | ||
| languages: string[]; | ||
| }>; | ||
| }; | ||
| }; | ||
| }; | ||
| } | ||
|
|
||
| // Function to categorize a page based on its slug | ||
| const categorizePage = (slug: string): { category: string; subcategory: string } => { | ||
| const parts = slug.split('/'); | ||
| const firstPart = parts[0] || 'general'; | ||
|
|
||
| // Define category mappings | ||
| const categoryMap: Record<string, { category: string; subcategory: string }> = { | ||
| // Platform | ||
| platform: { category: 'Platform', subcategory: 'Platform & Account' }, | ||
| auth: { category: 'Platform', subcategory: 'Authentication' }, | ||
| api: { category: 'Platform', subcategory: 'API Reference' }, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think APIs should sit within the relevant product. |
||
| sdks: { category: 'Platform', subcategory: 'SDKs' }, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think there should be some additional sub-categories here:
|
||
|
|
||
| // Pub/Sub - Core realtime messaging features | ||
| basics: { category: 'Pub/Sub', subcategory: 'Basics' }, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This just pulls in the product landing page. See my comment about an empty sub-category suggestion. |
||
| channels: { category: 'Pub/Sub', subcategory: 'Channels' }, | ||
| connect: { category: 'Pub/Sub', subcategory: 'Connections' }, | ||
| 'getting-started': { category: 'Pub/Sub', subcategory: 'Getting Started' }, | ||
| guides: { category: 'Pub/Sub', subcategory: 'Guides' }, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This only has 1 at the moment and is for Chat... Let's move it into platform for now maybe. |
||
| 'how-to': { category: 'Pub/Sub', subcategory: 'How-To' }, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's remove this. I think it's time we removed the page too. |
||
| messages: { category: 'Pub/Sub', subcategory: 'Messages' }, | ||
| 'metadata-stats': { category: 'Pub/Sub', subcategory: 'Metadata & Statistics' }, | ||
| 'presence-occupancy': { category: 'Pub/Sub', subcategory: 'Presence & Occupancy' }, | ||
| protocols: { category: 'Pub/Sub', subcategory: 'Protocols' }, | ||
| 'pub-sub': { category: 'Pub/Sub', subcategory: 'Pub/Sub Features' }, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems to be used for the Pub/Sub basics and advanced pages which seems odd. I wonder if we need an 'unclassified' product level one. So it's just a category of 'Pub/Sub' without a sub-category. WDYT? |
||
| push: { category: 'Pub/Sub', subcategory: 'Push Notifications' }, | ||
| 'storage-history': { category: 'Pub/Sub', subcategory: 'Storage & History' }, | ||
|
|
||
| // Chat | ||
| chat: { category: 'Chat', subcategory: 'Chat' }, | ||
|
|
||
| // Spaces | ||
| spaces: { category: 'Spaces', subcategory: 'Spaces' }, | ||
|
|
||
| // LiveObjects | ||
| liveobjects: { category: 'LiveObjects', subcategory: 'LiveObjects' }, | ||
|
|
||
| // LiveSync | ||
| livesync: { category: 'LiveSync', subcategory: 'LiveSync' }, | ||
|
|
||
| // Asset Tracking | ||
| 'asset-tracking': { category: 'Asset Tracking', subcategory: 'Asset Tracking' }, | ||
| }; | ||
|
|
||
| // Check if the first part matches a known category | ||
| if (categoryMap[firstPart]) { | ||
| return categoryMap[firstPart]; | ||
| } | ||
|
|
||
| // Default categorization for uncategorized pages | ||
| return { category: 'General', subcategory: 'Documentation' }; | ||
| }; | ||
|
|
||
| // Function to extract code element classes from an MDX file | ||
| const extractCodeLanguages = async (filePath: string): Promise<Set<string>> => { | ||
| try { | ||
|
|
@@ -217,32 +298,114 @@ export const onPostBuild: GatsbyNode['onPostBuild'] = async ({ graphql, reporter | |
| `${REPORTER_PREFIX} Found ${allPages.length} pages to place into llms.txt (${textilePages.length} textile, ${mdxPages.length} MDX)`, | ||
| ); | ||
|
|
||
| const serializedPages = [LLMS_TXT_PREAMBLE]; | ||
| // Organize pages into categories | ||
| const categoryStructure: CategoryStructure = {}; | ||
|
|
||
| for (const page of allPages) { | ||
| const { slug, meta, languages } = page; | ||
| const { title, meta_description } = meta; | ||
|
|
||
| try { | ||
| const baseUrl = prefixPath({ url: `/docs/${slug}`, siteUrl, pathPrefix: basePath }); | ||
| const safeTitle = escapeMarkdown(title); | ||
|
|
||
| // Generate base page entry (without language parameter) | ||
| const baseLink = `[${safeTitle}](${baseUrl})`; | ||
| const baseLine = `- ${[baseLink, meta_description].join(': ')}`; | ||
| serializedPages.push(baseLine); | ||
|
|
||
| // Generate language-specific entries if the page has languages | ||
| if (languages && languages.length > 0) { | ||
| for (const language of languages) { | ||
| const langUrl = `${baseUrl}?lang=${language}`; | ||
| const langLink = `[${safeTitle} (${getLanguageLabel(language)})](${langUrl})`; | ||
| const langLine = `- ${[langLink, meta_description].join(': ')}`; | ||
| serializedPages.push(langLine); | ||
| const { category, subcategory } = categorizePage(page.slug); | ||
|
|
||
| // Initialize category if it doesn't exist | ||
| if (!categoryStructure[category]) { | ||
| categoryStructure[category] = { | ||
| title: category, | ||
| subcategories: {}, | ||
| }; | ||
| } | ||
|
|
||
| // Initialize subcategory if it doesn't exist | ||
| if (!categoryStructure[category].subcategories[subcategory]) { | ||
| categoryStructure[category].subcategories[subcategory] = { | ||
| title: subcategory, | ||
| pages: [], | ||
| }; | ||
| } | ||
|
|
||
| // Add page to subcategory (only base page without language variants) | ||
| categoryStructure[category].subcategories[subcategory].pages.push(page); | ||
| } | ||
|
|
||
| // Generate serialized output with categorization | ||
| const serializedPages = [LLMS_TXT_PREAMBLE]; | ||
|
|
||
| // Define the order of categories | ||
| const categoryOrder = [ | ||
| 'Platform', | ||
| 'Pub/Sub', | ||
| 'Chat', | ||
| 'Spaces', | ||
| 'LiveObjects', | ||
| 'LiveSync', | ||
| 'Asset Tracking', | ||
| 'General', | ||
| ]; | ||
|
|
||
| // Sort categories by defined order | ||
| const sortedCategories = Object.keys(categoryStructure).sort((a, b) => { | ||
| const indexA = categoryOrder.indexOf(a); | ||
| const indexB = categoryOrder.indexOf(b); | ||
| if (indexA === -1 && indexB === -1) return a.localeCompare(b); | ||
| if (indexA === -1) return 1; | ||
| if (indexB === -1) return -1; | ||
| return indexA - indexB; | ||
| }); | ||
|
|
||
| for (const categoryKey of sortedCategories) { | ||
| const category = categoryStructure[categoryKey]; | ||
| serializedPages.push(`## ${category.title}`); | ||
| serializedPages.push(''); | ||
|
|
||
| // Sort subcategories alphabetically | ||
| const sortedSubcategories = Object.keys(category.subcategories).sort(); | ||
|
|
||
| for (const subcategoryKey of sortedSubcategories) { | ||
| const subcategory = category.subcategories[subcategoryKey]; | ||
| serializedPages.push(`### ${subcategory.title}`); | ||
|
|
||
| for (const page of subcategory.pages) { | ||
| const { slug, meta, languages } = page; | ||
| const { title, meta_description } = meta; | ||
|
|
||
| try { | ||
| const baseUrl = prefixPath({ url: `/docs/${slug}`, siteUrl, pathPrefix: basePath }); | ||
| const safeTitle = escapeMarkdown(title); | ||
|
|
||
| // Generate base page entry (without language parameter) | ||
| const baseLink = `[${safeTitle}](${baseUrl})`; | ||
| const baseLine = `- ${[baseLink, meta_description].join(': ')}`; | ||
| serializedPages.push(baseLine); | ||
|
|
||
| // Generate language-specific entries if the page has languages | ||
| // Skip language variants that match the page's primary language (e.g., skip ?lang=go for /getting-started/go) | ||
| if (languages && languages.length > 0) { | ||
| // Extract the last part of the slug to check if it matches a language | ||
| const slugParts = slug.split('/'); | ||
| const slugLastPart = slugParts[slugParts.length - 1]; | ||
|
|
||
| // Map slug names to their corresponding language codes | ||
| const slugToLangMap: Record<string, string> = { | ||
| dotnet: 'csharp', | ||
| 'objective-c': 'objc', | ||
| }; | ||
|
|
||
| // Get the primary language for this page (either direct match or mapped) | ||
| const primaryLanguage = slugToLangMap[slugLastPart] || slugLastPart; | ||
|
|
||
| for (const language of languages) { | ||
| // Skip if the language matches the page's primary language | ||
| if (language !== primaryLanguage) { | ||
| const langUrl = `${baseUrl}?lang=${language}`; | ||
| const langLink = `[${safeTitle} (${getLanguageLabel(language)})](${langUrl})`; | ||
| const langLine = `- ${[langLink, meta_description].join(': ')}`; | ||
| serializedPages.push(langLine); | ||
| } | ||
| } | ||
| } | ||
| } catch (err) { | ||
| reporter.panic(`${REPORTER_PREFIX} Error serializing pages`, err as Error); | ||
| } | ||
| } | ||
| } catch (err) { | ||
| reporter.panic(`${REPORTER_PREFIX} Error serializing pages`, err as Error); | ||
|
|
||
| serializedPages.push(''); // Add blank line after each subcategory | ||
| } | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Lost my comment these;