1
+ const fs = require ( 'fs' )
2
+ const path = require ( 'path' )
3
+ const yaml = require ( 'js-yaml' )
4
+
5
+ const sidebarConfig = require ( '../documentation/sidebars.js' )
6
+ const BASE_URL = 'https://questdb.com/docs/'
7
+
8
+ const processedFiles = new Map ( )
9
+
10
+ function extractMetadataAndContent ( filePath ) {
11
+ if ( processedFiles . has ( filePath ) ) {
12
+ return processedFiles . get ( filePath )
13
+ }
14
+
15
+ try {
16
+ let content
17
+
18
+ if ( fs . existsSync ( filePath ) ) {
19
+ content = fs . readFileSync ( filePath , 'utf8' )
20
+ } else {
21
+ const mdxPath = filePath . replace ( '.md' , '.mdx' )
22
+ if ( fs . existsSync ( mdxPath ) ) {
23
+ content = fs . readFileSync ( mdxPath , 'utf8' )
24
+ } else {
25
+ throw new Error ( `File not found: ${ filePath } or ${ mdxPath } ` )
26
+ }
27
+ }
28
+
29
+ const frontmatterRegex = / ^ - - - \s * \n ( [ \s \S ] * ?) \n - - - \s * \n ( [ \s \S ] * ) $ /
30
+ const match = content . match ( frontmatterRegex )
31
+
32
+ let frontmatter = { }
33
+ let mainContent = content
34
+
35
+ if ( match ) {
36
+ frontmatter = yaml . load ( match [ 1 ] ) || { }
37
+ mainContent = match [ 2 ]
38
+ }
39
+
40
+ let cleanContent = mainContent
41
+ // Remove import statements
42
+ . replace ( / ^ i m p o r t \s + .* $ / gm, '' )
43
+ // Remove self-closing components
44
+ . replace ( / < [ A - Z ] [ ^ > ] * \/ > / g, '' )
45
+ // Remove headers
46
+ . replace ( / ^ # { 1 , 6 } \s * ( .* ) $ / gm, '$1' )
47
+ // Remove extra newlines
48
+ . replace ( / \n \s * \n \s * \n / g, '\n\n' )
49
+ . trim ( )
50
+
51
+ const result = {
52
+ title : frontmatter . title || null ,
53
+ description : frontmatter . description || null ,
54
+ content : cleanContent
55
+ }
56
+
57
+ processedFiles . set ( filePath , result )
58
+ return result
59
+
60
+ } catch ( error ) {
61
+ console . warn ( `Warning: Could not read file ${ filePath } : ${ error . message } ` )
62
+ const result = { title : null , description : null , content : '' }
63
+ processedFiles . set ( filePath , result )
64
+ return result
65
+ }
66
+ }
67
+
68
+ function generateUrl ( docId ) {
69
+ if ( docId === 'introduction' ) {
70
+ return BASE_URL
71
+ }
72
+ return BASE_URL + docId
73
+ }
74
+
75
+ function processForLlmsTxt ( items , indent = 0 , isTopLevel = false ) {
76
+ let result = ''
77
+ const indentStr = ' ' . repeat ( indent )
78
+
79
+ for ( const item of items ) {
80
+ if ( typeof item === 'string' ) {
81
+ const docPath = path . join ( './documentation' , item + '.md' )
82
+ const { title, description } = extractMetadataAndContent ( docPath )
83
+ const url = generateUrl ( item )
84
+
85
+ const displayTitle = title || item
86
+ result += `${ indentStr } - [${ displayTitle } ](${ url } )`
87
+ if ( description ) {
88
+ result += `: ${ description } `
89
+ }
90
+ result += '\n'
91
+
92
+ } else if ( item . type === 'doc' ) {
93
+ const docId = item . id
94
+ const docPath = path . join ( './documentation' , docId + '.md' )
95
+ const { title, description } = extractMetadataAndContent ( docPath )
96
+ const url = generateUrl ( docId )
97
+
98
+ const displayTitle = item . label || title || docId
99
+ result += `${ indentStr } - [${ displayTitle } ](${ url } )`
100
+ if ( description ) {
101
+ result += `: ${ description } `
102
+ }
103
+ result += '\n'
104
+
105
+ } else if ( item . type === 'category' ) {
106
+ if ( isTopLevel ) {
107
+ result += `\n## ${ item . label } \n`
108
+ if ( item . items && item . items . length > 0 ) {
109
+ result += processForLlmsTxt ( item . items , 0 , false )
110
+ }
111
+ } else {
112
+ result += `${ indentStr } ${ item . label } \n`
113
+ if ( item . items && item . items . length > 0 ) {
114
+ result += processForLlmsTxt ( item . items , indent + 1 , false )
115
+ }
116
+ }
117
+
118
+ } else if ( item . type === 'link' ) {
119
+ const linkText = item . label || item . href
120
+ result += `${ indentStr } - [${ linkText } ](${ item . href } )\n`
121
+ }
122
+ }
123
+
124
+ return result
125
+ }
126
+
127
+ function processForLlmsFullTxt ( items , headerLevel = 2 , isTopLevel = false ) {
128
+ let result = ''
129
+ const headerPrefix = '#' . repeat ( headerLevel )
130
+
131
+ for ( const item of items ) {
132
+ if ( typeof item === 'string' ) {
133
+ const docPath = path . join ( './documentation' , item + '.md' )
134
+ const { title, description, content } = extractMetadataAndContent ( docPath )
135
+
136
+ if ( content . trim ( ) ) {
137
+ const displayTitle = title || item
138
+ result += `\n${ headerPrefix } # ${ displayTitle } \n`
139
+ if ( description ) {
140
+ result += `**Description**: ${ description } \n`
141
+ }
142
+ result += content + '\n\n'
143
+ }
144
+
145
+ } else if ( item . type === 'doc' ) {
146
+ const docId = item . id
147
+ const docPath = path . join ( './documentation' , docId + '.md' )
148
+ const { title, description, content } = extractMetadataAndContent ( docPath )
149
+
150
+ if ( content . trim ( ) ) {
151
+ const displayTitle = item . label || title || docId
152
+
153
+ if ( isTopLevel ) {
154
+ result += `\n${ headerPrefix } ${ displayTitle } \n`
155
+ } else {
156
+ result += `\n${ headerPrefix } # ${ displayTitle } \n`
157
+ }
158
+
159
+ if ( description ) {
160
+ result += `**Description**: ${ description } \n`
161
+ }
162
+ result += '\n' + content + '\n\n'
163
+ }
164
+
165
+ } else if ( item . type === 'category' ) {
166
+ if ( isTopLevel ) {
167
+ result += `\n## ${ item . label } \n\n`
168
+ if ( item . items && item . items . length > 0 ) {
169
+ result += processForLlmsFullTxt ( item . items , 3 , false )
170
+ }
171
+ } else {
172
+ result += `\n${ headerPrefix } ${ item . label } \n\n`
173
+ if ( item . items && item . items . length > 0 ) {
174
+ result += processForLlmsFullTxt ( item . items , headerLevel + 1 , false )
175
+ }
176
+ }
177
+
178
+ }
179
+ }
180
+
181
+ return result
182
+ }
183
+
184
+ function generateLlmsFiles ( ) {
185
+ console . log ( 'Generating llms.txt and llms-full.txt from QuestDB documentation...' )
186
+
187
+ const docs = sidebarConfig . docs
188
+
189
+ let llmsOutput = `# QuestDB Documentation
190
+
191
+ ## Getting Started
192
+
193
+ `
194
+ llmsOutput += processForLlmsTxt ( docs , 0 , true )
195
+
196
+ let llmsFullOutput = `# QuestDB Documentation - Complete Content
197
+
198
+ This file contains the complete text content of QuestDB documentation organized hierarchically.
199
+
200
+ ## Getting Started
201
+
202
+ `
203
+ llmsFullOutput += processForLlmsFullTxt ( docs , 2 , true )
204
+
205
+ fs . writeFileSync ( './static/llms.txt' , llmsOutput )
206
+ fs . writeFileSync ( './static/llms-full.txt' , llmsFullOutput )
207
+
208
+ console . log ( '✅ llms.txt generated successfully!' )
209
+ console . log ( ` - Size: ${ ( llmsOutput . length / 1024 ) . toFixed ( 2 ) } KB` )
210
+
211
+ console . log ( '✅ llms-full.txt generated successfully!' )
212
+ console . log ( ` - Size: ${ ( llmsFullOutput . length / 1024 / 1024 ) . toFixed ( 2 ) } MB` )
213
+ }
214
+
215
+ try {
216
+ generateLlmsFiles ( )
217
+ } catch ( error ) {
218
+ console . error ( 'Error generating llms files:' , error )
219
+ }
0 commit comments