diff --git a/docs/pages/product/configuration/reference/environment-variables.mdx b/docs/pages/product/configuration/reference/environment-variables.mdx index ddb9ef4207a6b..de5d8e517a1a4 100644 --- a/docs/pages/product/configuration/reference/environment-variables.mdx +++ b/docs/pages/product/configuration/reference/environment-variables.mdx @@ -1377,6 +1377,16 @@ See [this issue](https://github.com/cube-js/cube/issues/9285) for details. +## `CUBEJS_YAML_FOLDED_STRINGS_AS_LITERAL` + +If `true`, enables interpreting [folded strings][ref-yaml-folded-and-literal] in +YAML-based data model files as literal strings. This affects how line breaks and spaces +are handled in multi-line strings. + +| Possible Values | Default in Development | Default in Production | +| --------------- | ---------------------- | --------------------- | +| `true`, `false` | `true` | `true` | + ## `CUBEJS_WEB_SOCKETS` If `true`, then use WebSocket for data fetching. @@ -1799,4 +1809,5 @@ The port for a Cube deployment to listen to API connections on. [ref-multi-stage-calculations]: /product/data-modeling/concepts/multi-stage-calculations [ref-folders]: /product/data-modeling/reference/view#folders [ref-dataviz-tools]: /product/configuration/visualization-tools -[ref-context-to-app-id]: /product/configuration/reference/config#context_to_app_id \ No newline at end of file +[ref-context-to-app-id]: /product/configuration/reference/config#context_to_app_id +[ref-yaml-folded-and-literal]: /product/data-modeling/dynamic/jinja#folded-and-literal-strings \ No newline at end of file diff --git a/packages/cubejs-schema-compiler/src/compiler/YamlCompiler.ts b/packages/cubejs-schema-compiler/src/compiler/YamlCompiler.ts index abc207be9cb05..e75ea39b12373 100644 --- a/packages/cubejs-schema-compiler/src/compiler/YamlCompiler.ts +++ b/packages/cubejs-schema-compiler/src/compiler/YamlCompiler.ts @@ -82,7 +82,8 @@ export class YamlCompiler { return; } - const yamlObj: any = YAML.load(file.content); + const processedContent = this.preprocessYamlSqlMultilineValues(file.content); + const yamlObj: any = YAML.load(processedContent); if (!yamlObj) { return; } @@ -345,4 +346,13 @@ export class YamlCompiler { return ast; } + + private preprocessYamlSqlMultilineValues(yamlContent: string): string { + // Convert all folded scalars (sql: >) to literal scalars (sql: |) + // to preserve SQL formatting including comments and whitespace + return yamlContent.replace( + /(\s+sql:\s*)>/g, + '$1|' + ); + } } diff --git a/packages/cubejs-schema-compiler/test/unit/js-sql-comments.test.ts b/packages/cubejs-schema-compiler/test/unit/js-sql-comments.test.ts new file mode 100644 index 0000000000000..49abab2797d5c --- /dev/null +++ b/packages/cubejs-schema-compiler/test/unit/js-sql-comments.test.ts @@ -0,0 +1,131 @@ +import { PostgresQuery } from '../../src/adapter/PostgresQuery'; +import { prepareCompiler } from './PrepareCompiler'; + +describe('JavaScript SQL Comments Preservation', () => { + it('preserves SQL comments in JS models', async () => { + const { compiler, joinGraph, cubeEvaluator } = prepareCompiler([ + { + fileName: 'test.js', + content: ` + cube('JSTestCube', { + sql: \` + SELECT + r.id as record_id, + r.created_at as record_created_at, + -- Extract target_record_id from workspace association JSON + JSON_EXTRACT_SCALAR(workspace.value, '$[0].target_record_id') as workspace_target_record_id, + -- Get actual workspace name by joining with workspace record + CASE + WHEN workspace_name.value IS NOT NULL + THEN JSON_EXTRACT_SCALAR(JSON_EXTRACT_ARRAY(workspace_name.value)[OFFSET(0)], '$.value') + ELSE NULL + END as workspace_name + FROM \\\`table\\\`.\\\`record\\\` r + JOIN \\\`table\\\`.\\\`object\\\` o ON r.object_id = o.id + -- Get company name + LEFT JOIN \\\`table\\\`.\\\`record_value\\\` company_name ON r.id = company_name.record_id + AND company_name.name = 'name' + WHERE r._fivetran_deleted = FALSE + AND o.singular_noun = 'Company' + \`, + + dimensions: { + record_id: { + sql: 'record_id', + type: 'string', + primaryKey: true + } + }, + + measures: { + count: { + type: 'count' + } + } + }); + ` + } + ]); + + await compiler.compile(); + + // Build a simple query to extract the actual SQL + const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { + measures: ['JSTestCube.count'], + dimensions: ['JSTestCube.record_id'], + timezone: 'UTC' + }); + + const [sql] = query.buildSqlAndParams(); + + // Verify that SQL comments are preserved on separate lines + expect(sql).toContain('-- Extract target_record_id from workspace association JSON'); + expect(sql).toContain('-- Get actual workspace name by joining with workspace record'); + expect(sql).toContain('-- Get company name'); + + // Ensure comments are on separate lines in JS models + const lines = sql.split('\n'); + const commentLine = lines.find(line => line.trim() === '-- Get company name'); + expect(commentLine).toBeDefined(); + }); + + it('handles edge cases in JS SQL strings', async () => { + const { compiler, joinGraph, cubeEvaluator } = prepareCompiler([ + { + fileName: 'edge-cases.js', + content: ` + cube('EdgeCasesTest', { + sql: \` + SELECT + id, + -- Comment with 'quotes' and "double quotes" + name, + -- Comment with special chars: !@#$%^&*() + email, + created_at + FROM users + -- SQL string in comment: SELECT * FROM table + WHERE active = true + \`, + + dimensions: { + id: { + sql: 'id', + type: 'string', + primaryKey: true + } + }, + + measures: { + count: { + type: 'count' + } + } + }); + ` + } + ]); + + await compiler.compile(); + + const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { + measures: ['EdgeCasesTest.count'], + dimensions: ['EdgeCasesTest.id'], + timezone: 'UTC' + }); + + const [sql] = query.buildSqlAndParams(); + + const testLines = [ + '-- Comment with \'quotes\' and "double quotes"', + '-- Comment with special chars: !@#$%^&*()', + '-- SQL string in comment: SELECT * FROM table', + ]; + + // Ensure all comments are properly preserved + const lines = sql.split('\n').map(l => l.trim()); + for (const testLine of testLines) { + expect(lines.includes(testLine)).toBeTruthy(); + } + }); +}); diff --git a/packages/cubejs-schema-compiler/test/unit/yaml-sql-comments.test.ts b/packages/cubejs-schema-compiler/test/unit/yaml-sql-comments.test.ts new file mode 100644 index 0000000000000..81401bfa495ad --- /dev/null +++ b/packages/cubejs-schema-compiler/test/unit/yaml-sql-comments.test.ts @@ -0,0 +1,174 @@ +import { PostgresQuery } from '../../src/adapter/PostgresQuery'; +import { prepareYamlCompiler } from './PrepareCompiler'; + +describe('YAML SQL Formatting Preservation', () => { + it('handles sql: > (folded scalar)', async () => { + const { compiler, joinGraph, cubeEvaluator } = prepareYamlCompiler( + ` + cubes: + - name: Orders + sql: > + SELECT + r.id as record_id, + r.created_at as record_created_at, + -- Extract target_record_id from workspace association JSON + JSON_EXTRACT_SCALAR(workspace.value, '$[0].target_record_id') as workspace_target_record_id, + -- Get actual workspace name by joining with workspace record + CASE + WHEN workspace_name.value IS NOT NULL + THEN JSON_EXTRACT_SCALAR(JSON_EXTRACT_ARRAY(workspace_name.value)[OFFSET(0)], '$.value') + ELSE NULL + END as workspace_name + FROM \`table\`.\`record\` r + JOIN \`table\`.\`object\` o ON r.object_id = o.id + -- Get company name + LEFT JOIN \`table\`.\`record_value\` company_name ON r.id = company_name.record_id + AND company_name.name = 'name' + WHERE r._fivetran_deleted = FALSE + AND o.singular_noun = 'Company' + + dimensions: + - name: record_id + sql: record_id + type: string + primaryKey: true + measures: + - name: count + type: count + ` + ); + + await compiler.compile(); + + // Build a simple query to extract the actual SQL + const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { + measures: ['Orders.count'], + dimensions: ['Orders.record_id'], + timezone: 'UTC' + }); + + const [sql] = query.buildSqlAndParams(); + + // Verify that SQL comments are preserved on separate lines + expect(sql).toContain('-- Extract target_record_id from workspace association JSON'); + expect(sql).toContain('-- Get actual workspace name by joining with workspace record'); + expect(sql).toContain('-- Get company name'); + + // Most importantly, ensure comments are NOT merged with the previous line + const lines = sql.split('\n'); + const commentLine = lines.find(line => line.trim() === '-- Get company name'); + expect(commentLine).toBeDefined(); + }); + + it('handles sql: | (literal scalar)', async () => { + const { compiler, joinGraph, cubeEvaluator } = prepareYamlCompiler( + ` + cubes: + - name: TestCube + sql: | + SELECT id, name + -- Comment here + FROM table1 + WHERE active = true + + dimensions: + - name: id + sql: id + type: string + primaryKey: true + measures: + - name: count + type: count + ` + ); + + await compiler.compile(); + + const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { + measures: ['TestCube.count'], + dimensions: ['TestCube.id'], + timezone: 'UTC' + }); + + const [sql] = query.buildSqlAndParams(); + + // Should preserve comments with literal scalar + expect(sql).toContain('-- Comment here'); + const lines = sql.split('\n'); + const commentLine = lines.find(line => line.trim() === '-- Comment here'); + expect(commentLine).toBeDefined(); + }); + + it('handles single-line SQL without multilines', async () => { + const { compiler, joinGraph, cubeEvaluator } = prepareYamlCompiler( + ` + cubes: + - name: TestCube + sql: "SELECT id, name FROM table1" + + dimensions: + - name: id + sql: id + type: string + primaryKey: true + measures: + - name: count + type: count + ` + ); + + await compiler.compile(); + + const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { + measures: ['TestCube.count'], + dimensions: ['TestCube.id'], + timezone: 'UTC' + }); + + const [sql] = query.buildSqlAndParams(); + + // Should work normally for single-line SQL + expect(sql).toContain('SELECT id, name FROM table1'); + }); + + it('works correctly for SQL without comments', async () => { + const { compiler, joinGraph, cubeEvaluator } = prepareYamlCompiler( + ` + cubes: + - name: SimpleOrders + sql: > + SELECT + id, + amount, + status + FROM orders + WHERE active = true + + dimensions: + - name: id + sql: id + type: string + primaryKey: true + measures: + - name: count + type: count + ` + ); + + await compiler.compile(); + + // Build a simple query to extract the actual SQL + const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { + measures: ['SimpleOrders.count'], + dimensions: ['SimpleOrders.id'], + timezone: 'UTC' + }); + + const [sql] = query.buildSqlAndParams(); + + // Should still work normally for SQL without comments + expect(sql).toContain('SELECT'); + expect(sql).toContain('FROM orders'); + expect(sql).toContain('WHERE active = true'); + }); +});