Skip to content

Commit 1887d38

Browse files
committed
Add script to generate llms.txt and llms-full.txt in prebuild
1 parent e3a59e7 commit 1887d38

File tree

5 files changed

+223
-6
lines changed

5 files changed

+223
-6
lines changed

documentation/web-console.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
title: Web Console Overview
33
sidebar_label: Overview
44
description: Learn how to use the QuestDB Web Console. Launch queries, create
5-
visualizations and more. Includes pictures and examples.
5+
visualizations and more. Includes pictures and examples with links to the main components.
66
---
77

88
import Screenshot from "@theme/Screenshot"

documentation/web-console/create-table.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
---
22
title: Create Table
3-
description: Create a new table using QuestDB Web Console
3+
description: Create a new table using QuestDB Web Console user interface
44
---
55

66
import Screenshot from "@theme/Screenshot"

package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"license": "Apache-2.0",
66
"scripts": {
77
"start": "cross-env docusaurus start --port 3001",
8-
"prebuild": "docusaurus clear && node ./scripts/cleanup-guidelines",
8+
"prebuild": "docusaurus clear && node ./scripts/generate-llms-files.js",
99
"build": "cross-env NO_UPDATE_NOTIFIER=true USE_SIMPLE_CSS_MINIFIER=true PWA_SW_CUSTOM= docusaurus build",
1010
"deploy": "docusaurus deploy",
1111
"serve": "docusaurus serve",
@@ -27,6 +27,7 @@
2727
"dotenv": "^16.4.5",
2828
"framer-motion": "^10.18.0",
2929
"gray-matter": "4.0.3",
30+
"js-yaml": "^4.1.0",
3031
"prism-react-renderer": "2.4.0",
3132
"react": "^19.0.0",
3233
"react-calendly": "4.3.1",

scripts/cleanup-guidelines.js

Lines changed: 0 additions & 3 deletions
This file was deleted.

scripts/generate-llms-files.js

Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
const fs = require('fs')
2+
const path = require('path')
3+
const yaml = require('js-yaml')
4+
5+
const sidebarConfig = require('../documentation/sidebars.js')
6+
const BASE_URL = 'https://questdb.com/docs/'
7+
8+
const processedFiles = new Map()
9+
10+
function extractMetadataAndContent(filePath) {
11+
if (processedFiles.has(filePath)) {
12+
return processedFiles.get(filePath)
13+
}
14+
15+
try {
16+
let content
17+
18+
if (fs.existsSync(filePath)) {
19+
content = fs.readFileSync(filePath, 'utf8')
20+
} else {
21+
const mdxPath = filePath.replace('.md', '.mdx')
22+
if (fs.existsSync(mdxPath)) {
23+
content = fs.readFileSync(mdxPath, 'utf8')
24+
} else {
25+
throw new Error(`File not found: ${filePath} or ${mdxPath}`)
26+
}
27+
}
28+
29+
const frontmatterRegex = /^---\s*\n([\s\S]*?)\n---\s*\n([\s\S]*)$/
30+
const match = content.match(frontmatterRegex)
31+
32+
let frontmatter = {}
33+
let mainContent = content
34+
35+
if (match) {
36+
frontmatter = yaml.load(match[1]) || {}
37+
mainContent = match[2]
38+
}
39+
40+
let cleanContent = mainContent
41+
// Remove import statements
42+
.replace(/^import\s+.*$/gm, '')
43+
// Remove self-closing components
44+
.replace(/<[A-Z][^>]*\/>/g, '')
45+
// Remove headers
46+
.replace(/^#{1,6}\s*(.*)$/gm, '$1')
47+
// Remove extra newlines
48+
.replace(/\n\s*\n\s*\n/g, '\n\n')
49+
.trim()
50+
51+
const result = {
52+
title: frontmatter.title || null,
53+
description: frontmatter.description || null,
54+
content: cleanContent
55+
}
56+
57+
processedFiles.set(filePath, result)
58+
return result
59+
60+
} catch (error) {
61+
console.warn(`Warning: Could not read file ${filePath}: ${error.message}`)
62+
const result = { title: null, description: null, content: '' }
63+
processedFiles.set(filePath, result)
64+
return result
65+
}
66+
}
67+
68+
function generateUrl(docId) {
69+
if (docId === 'introduction') {
70+
return BASE_URL
71+
}
72+
return BASE_URL + docId
73+
}
74+
75+
function processForLlmsTxt(items, indent = 0, isTopLevel = false) {
76+
let result = ''
77+
const indentStr = ' '.repeat(indent)
78+
79+
for (const item of items) {
80+
if (typeof item === 'string') {
81+
const docPath = path.join('./documentation', item + '.md')
82+
const { title, description } = extractMetadataAndContent(docPath)
83+
const url = generateUrl(item)
84+
85+
const displayTitle = title || item
86+
result += `${indentStr}- [${displayTitle}](${url})`
87+
if (description) {
88+
result += `: ${description}`
89+
}
90+
result += '\n'
91+
92+
} else if (item.type === 'doc') {
93+
const docId = item.id
94+
const docPath = path.join('./documentation', docId + '.md')
95+
const { title, description } = extractMetadataAndContent(docPath)
96+
const url = generateUrl(docId)
97+
98+
const displayTitle = item.label || title || docId
99+
result += `${indentStr}- [${displayTitle}](${url})`
100+
if (description) {
101+
result += `: ${description}`
102+
}
103+
result += '\n'
104+
105+
} else if (item.type === 'category') {
106+
if (isTopLevel) {
107+
result += `\n## ${item.label}\n`
108+
if (item.items && item.items.length > 0) {
109+
result += processForLlmsTxt(item.items, 0, false)
110+
}
111+
} else {
112+
result += `${indentStr}${item.label}\n`
113+
if (item.items && item.items.length > 0) {
114+
result += processForLlmsTxt(item.items, indent + 1, false)
115+
}
116+
}
117+
118+
} else if (item.type === 'link') {
119+
const linkText = item.label || item.href
120+
result += `${indentStr}- [${linkText}](${item.href})\n`
121+
}
122+
}
123+
124+
return result
125+
}
126+
127+
function processForLlmsFullTxt(items, headerLevel = 2, isTopLevel = false) {
128+
let result = ''
129+
const headerPrefix = '#'.repeat(headerLevel)
130+
131+
for (const item of items) {
132+
if (typeof item === 'string') {
133+
const docPath = path.join('./documentation', item + '.md')
134+
const { title, description, content } = extractMetadataAndContent(docPath)
135+
136+
if (content.trim()) {
137+
const displayTitle = title || item
138+
result += `\n${headerPrefix}# ${displayTitle}\n`
139+
if (description) {
140+
result += `**Description**: ${description}\n`
141+
}
142+
result += content + '\n\n'
143+
}
144+
145+
} else if (item.type === 'doc') {
146+
const docId = item.id
147+
const docPath = path.join('./documentation', docId + '.md')
148+
const { title, description, content } = extractMetadataAndContent(docPath)
149+
150+
if (content.trim()) {
151+
const displayTitle = item.label || title || docId
152+
153+
if (isTopLevel) {
154+
result += `\n${headerPrefix} ${displayTitle}\n`
155+
} else {
156+
result += `\n${headerPrefix}# ${displayTitle}\n`
157+
}
158+
159+
if (description) {
160+
result += `**Description**: ${description}\n`
161+
}
162+
result += '\n' + content + '\n\n'
163+
}
164+
165+
} else if (item.type === 'category') {
166+
if (isTopLevel) {
167+
result += `\n## ${item.label}\n\n`
168+
if (item.items && item.items.length > 0) {
169+
result += processForLlmsFullTxt(item.items, 3, false)
170+
}
171+
} else {
172+
result += `\n${headerPrefix} ${item.label}\n\n`
173+
if (item.items && item.items.length > 0) {
174+
result += processForLlmsFullTxt(item.items, headerLevel + 1, false)
175+
}
176+
}
177+
178+
}
179+
}
180+
181+
return result
182+
}
183+
184+
function generateLlmsFiles() {
185+
console.log('Generating llms.txt and llms-full.txt from QuestDB documentation...')
186+
187+
const docs = sidebarConfig.docs
188+
189+
let llmsOutput = `# QuestDB Documentation
190+
191+
## Getting Started
192+
193+
`
194+
llmsOutput += processForLlmsTxt(docs, 0, true)
195+
196+
let llmsFullOutput = `# QuestDB Documentation - Complete Content
197+
198+
This file contains the complete text content of QuestDB documentation organized hierarchically.
199+
200+
## Getting Started
201+
202+
`
203+
llmsFullOutput += processForLlmsFullTxt(docs, 2, true)
204+
205+
fs.writeFileSync('./static/llms.txt', llmsOutput)
206+
fs.writeFileSync('./static/llms-full.txt', llmsFullOutput)
207+
208+
console.log('✅ llms.txt generated successfully!')
209+
console.log(` - Size: ${(llmsOutput.length / 1024).toFixed(2)} KB`)
210+
211+
console.log('✅ llms-full.txt generated successfully!')
212+
console.log(` - Size: ${(llmsFullOutput.length / 1024 / 1024).toFixed(2)} MB`)
213+
}
214+
215+
try {
216+
generateLlmsFiles()
217+
} catch (error) {
218+
console.error('Error generating llms files:', error)
219+
}

0 commit comments

Comments
 (0)