Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(databricks-jdbc-driver): Switch to the latest OSS Databricks JDBC driver #9376

Merged
merged 7 commits into from
Mar 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions packages/cubejs-backend-shared/src/env.ts
Original file line number Diff line number Diff line change
Expand Up @@ -952,9 +952,15 @@ const variables: Record<string, (...args: any) => any> = {
* Accept Databricks policy flag. This environment variable doesn't
* need to be split by the data source.
*/
databrickAcceptPolicy: () => (
get('CUBEJS_DB_DATABRICKS_ACCEPT_POLICY').asBoolStrict()
),
databrickAcceptPolicy: () => {
const val = get('CUBEJS_DB_DATABRICKS_ACCEPT_POLICY').asBoolStrict();

if (val !== undefined) {
console.warn(
'The CUBEJS_DB_DATABRICKS_ACCEPT_POLICY is not needed anymore. Please, remove it'
);
}
},

/**
* Databricks jdbc-connection url.
Expand Down
14 changes: 12 additions & 2 deletions packages/cubejs-backend-shared/src/http-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,11 @@ export async function streamWithProgress(
type DownloadAndExtractFile = {
showProgress: boolean;
cwd: string;
noExtract?: boolean;
dstFileName?: string;
};

export async function downloadAndExtractFile(url: string, { cwd }: DownloadAndExtractFile) {
export async function downloadAndExtractFile(url: string, { cwd, noExtract, dstFileName }: DownloadAndExtractFile) {
const request = new Request(url, {
headers: new Headers({
'Content-Type': 'application/octet-stream',
Expand Down Expand Up @@ -99,7 +101,15 @@ export async function downloadAndExtractFile(url: string, { cwd }: DownloadAndEx
});
});

await decompress(savedFilePath, cwd);
if (noExtract) {
if (dstFileName) {
fs.copyFileSync(savedFilePath, path.resolve(path.join(cwd, dstFileName)));
} else {
fs.copyFileSync(savedFilePath, cwd);
}
} else {
await decompress(savedFilePath, cwd);
}

try {
fs.unlinkSync(savedFilePath);
Expand Down
28 changes: 0 additions & 28 deletions packages/cubejs-backend-shared/test/db_env_multi.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1192,34 +1192,6 @@ describe('Multiple datasources', () => {
);
});

test('getEnv("databrickAcceptPolicy")', () => {
process.env.CUBEJS_DB_DATABRICKS_ACCEPT_POLICY = 'true';
expect(getEnv('databrickAcceptPolicy', { dataSource: 'default' })).toEqual(true);
expect(getEnv('databrickAcceptPolicy', { dataSource: 'postgres' })).toEqual(true);
expect(getEnv('databrickAcceptPolicy', { dataSource: 'wrong' })).toEqual(true);

process.env.CUBEJS_DB_DATABRICKS_ACCEPT_POLICY = 'false';
expect(getEnv('databrickAcceptPolicy', { dataSource: 'default' })).toEqual(false);
expect(getEnv('databrickAcceptPolicy', { dataSource: 'postgres' })).toEqual(false);
expect(getEnv('databrickAcceptPolicy', { dataSource: 'wrong' })).toEqual(false);

process.env.CUBEJS_DB_DATABRICKS_ACCEPT_POLICY = 'wrong';
expect(() => getEnv('databrickAcceptPolicy', { dataSource: 'default' })).toThrow(
'env-var: "CUBEJS_DB_DATABRICKS_ACCEPT_POLICY" should be either "true", "false", "TRUE", or "FALSE"'
);
expect(() => getEnv('databrickAcceptPolicy', { dataSource: 'postgres' })).toThrow(
'env-var: "CUBEJS_DB_DATABRICKS_ACCEPT_POLICY" should be either "true", "false", "TRUE", or "FALSE"'
);
expect(() => getEnv('databrickAcceptPolicy', { dataSource: 'wrong' })).toThrow(
'env-var: "CUBEJS_DB_DATABRICKS_ACCEPT_POLICY" should be either "true", "false", "TRUE", or "FALSE"'
);

delete process.env.CUBEJS_DB_DATABRICKS_ACCEPT_POLICY;
expect(getEnv('databrickAcceptPolicy', { dataSource: 'default' })).toBeUndefined();
expect(getEnv('databrickAcceptPolicy', { dataSource: 'postgres' })).toBeUndefined();
expect(getEnv('databrickAcceptPolicy', { dataSource: 'wrong' })).toBeUndefined();
});

test('getEnv("athenaAwsKey")', () => {
process.env.CUBEJS_AWS_KEY = 'default1';
process.env.CUBEJS_DS_POSTGRES_AWS_KEY = 'postgres1';
Expand Down
28 changes: 0 additions & 28 deletions packages/cubejs-backend-shared/test/db_env_single.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -760,34 +760,6 @@ describe('Single datasources', () => {
expect(getEnv('databricksCatalog', { dataSource: 'wrong' })).toBeUndefined();
});

test('getEnv("databrickAcceptPolicy")', () => {
process.env.CUBEJS_DB_DATABRICKS_ACCEPT_POLICY = 'true';
expect(getEnv('databrickAcceptPolicy', { dataSource: 'default' })).toEqual(true);
expect(getEnv('databrickAcceptPolicy', { dataSource: 'postgres' })).toEqual(true);
expect(getEnv('databrickAcceptPolicy', { dataSource: 'wrong' })).toEqual(true);

process.env.CUBEJS_DB_DATABRICKS_ACCEPT_POLICY = 'false';
expect(getEnv('databrickAcceptPolicy', { dataSource: 'default' })).toEqual(false);
expect(getEnv('databrickAcceptPolicy', { dataSource: 'postgres' })).toEqual(false);
expect(getEnv('databrickAcceptPolicy', { dataSource: 'wrong' })).toEqual(false);

process.env.CUBEJS_DB_DATABRICKS_ACCEPT_POLICY = 'wrong';
expect(() => getEnv('databrickAcceptPolicy', { dataSource: 'default' })).toThrow(
'env-var: "CUBEJS_DB_DATABRICKS_ACCEPT_POLICY" should be either "true", "false", "TRUE", or "FALSE"'
);
expect(() => getEnv('databrickAcceptPolicy', { dataSource: 'postgres' })).toThrow(
'env-var: "CUBEJS_DB_DATABRICKS_ACCEPT_POLICY" should be either "true", "false", "TRUE", or "FALSE"'
);
expect(() => getEnv('databrickAcceptPolicy', { dataSource: 'wrong' })).toThrow(
'env-var: "CUBEJS_DB_DATABRICKS_ACCEPT_POLICY" should be either "true", "false", "TRUE", or "FALSE"'
);

delete process.env.CUBEJS_DB_DATABRICKS_ACCEPT_POLICY;
expect(getEnv('databrickAcceptPolicy', { dataSource: 'default' })).toBeUndefined();
expect(getEnv('databrickAcceptPolicy', { dataSource: 'postgres' })).toBeUndefined();
expect(getEnv('databrickAcceptPolicy', { dataSource: 'wrong' })).toBeUndefined();
});

test('getEnv("athenaAwsKey")', () => {
process.env.CUBEJS_AWS_KEY = 'default1';
expect(getEnv('athenaAwsKey', { dataSource: 'default' })).toEqual('default1');
Expand Down
23 changes: 8 additions & 15 deletions packages/cubejs-databricks-jdbc-driver/src/DatabricksDriver.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,21 @@
* @fileoverview The `DatabricksDriver` and related types declaration.
*/

import { assertDataSource, getEnv, } from '@cubejs-backend/shared';
import {
getEnv,
assertDataSource,
} from '@cubejs-backend/shared';
import {
DatabaseStructure,
DriverCapabilities,
GenericDataBaseType,
QueryColumnsResult,
QueryOptions,
QuerySchemasResult,
QueryTablesResult,
UnloadOptions,
GenericDataBaseType,
TableColumn,
DatabaseStructure,
UnloadOptions,
} from '@cubejs-backend/base-driver';
import {
JDBCDriver,
JDBCDriverConfiguration,
} from '@cubejs-backend/jdbc-driver';
import { JDBCDriver, JDBCDriverConfiguration, } from '@cubejs-backend/jdbc-driver';
import { DatabricksQuery } from './DatabricksQuery';
import { resolveJDBCDriver, extractUidFromJdbcUrl } from './helpers';
import { extractUidFromJdbcUrl, resolveJDBCDriver } from './helpers';

export type DatabricksDriverConfiguration = JDBCDriverConfiguration &
{
Expand Down Expand Up @@ -132,7 +126,7 @@ export class DatabricksDriver extends JDBCDriver {
/**
* Show warning message flag.
*/
private showSparkProtocolWarn: boolean;
private readonly showSparkProtocolWarn: boolean;

/**
* Driver Configuration.
Expand Down Expand Up @@ -429,8 +423,7 @@ export class DatabricksDriver extends JDBCDriver {
metadata[database] = {};
}

const columns = await this.tableColumnTypes(`${database}.${tableName}`);
metadata[database][tableName] = columns;
metadata[database][tableName] = await this.tableColumnTypes(`${database}.${tableName}`);
}));

return metadata;
Expand Down
8 changes: 4 additions & 4 deletions packages/cubejs-databricks-jdbc-driver/src/helpers.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import fs from 'fs';
import path from 'path';

import { downloadJDBCDriver } from './installer';
import { downloadJDBCDriver, OSS_DRIVER_VERSION } from './installer';

async function fileExistsOr(
fsPath: string,
Expand All @@ -15,16 +15,16 @@ async function fileExistsOr(

export async function resolveJDBCDriver(): Promise<string> {
return fileExistsOr(
path.join(process.cwd(), 'DatabricksJDBC42.jar'),
path.join(process.cwd(), `databricks-jdbc-${OSS_DRIVER_VERSION}-oss.jar`),
async () => fileExistsOr(
path.join(__dirname, '..', 'download', 'DatabricksJDBC42.jar'),
path.join(__dirname, '..', 'download', `databricks-jdbc-${OSS_DRIVER_VERSION}-oss.jar`),
async () => {
const pathOrNull = await downloadJDBCDriver();
if (pathOrNull) {
return pathOrNull;
}
throw new Error(
'Please download and place DatabricksJDBC42.jar inside your ' +
`Please download and place databricks-jdbc-${OSS_DRIVER_VERSION}-oss.jar inside your ` +
'project directory'
);
}
Expand Down
52 changes: 23 additions & 29 deletions packages/cubejs-databricks-jdbc-driver/src/installer.ts
Original file line number Diff line number Diff line change
@@ -1,38 +1,32 @@
import path from 'path';
import { downloadAndExtractFile, getEnv } from '@cubejs-backend/shared';

function acceptedByEnv() {
const acceptStatus = getEnv('databrickAcceptPolicy');
if (acceptStatus) {
console.log('You accepted Terms & Conditions for JDBC driver from DataBricks by CUBEJS_DB_DATABRICKS_ACCEPT_POLICY');
}

if (acceptStatus === false) {
console.log('You declined Terms & Conditions for JDBC driver from DataBricks by CUBEJS_DB_DATABRICKS_ACCEPT_POLICY');
console.log('Installation will be skipped');
}

return acceptStatus;
}

export const OSS_DRIVER_VERSION = '1.0.2';

/**
* In the beginning of 2025 Databricks released their open-source version of JDBC driver and encourage
* all users to migrate to it as company plans to focus on improving and evolving it over legacy simba driver.
* More info about OSS Driver could be found at https://docs.databricks.com/aws/en/integrations/jdbc/oss
* As of March 2025 To use the Databricks JDBC Driver (OSS), the following requirements must be met:
* Java Runtime Environment (JRE) 11.0 or above. CI testing is supported on JRE 11, 17, and 21.
*/
export async function downloadJDBCDriver(): Promise<string | null> {
const driverAccepted = acceptedByEnv();

if (driverAccepted) {
console.log('Downloading DatabricksJDBC42-2.6.29.1051');
// TODO: Just to throw a console warning that this ENV is obsolete and could be safely removed
getEnv('databrickAcceptPolicy');

await downloadAndExtractFile(
'https://databricks-bi-artifacts.s3.us-east-2.amazonaws.com/simbaspark-drivers/jdbc/2.6.29/DatabricksJDBC42-2.6.29.1051.zip',
{
showProgress: true,
cwd: path.resolve(path.join(__dirname, '..', 'download')),
}
);
console.log(`Downloading databricks-jdbc-${OSS_DRIVER_VERSION}-oss.jar`);

console.log('Release notes: https://databricks-bi-artifacts.s3.us-east-2.amazonaws.com/simbaspark-drivers/jdbc/2.6.29/docs/release-notes.txt');
await downloadAndExtractFile(
`https://repo1.maven.org/maven2/com/databricks/databricks-jdbc/${OSS_DRIVER_VERSION}-oss/databricks-jdbc-${OSS_DRIVER_VERSION}-oss.jar`,
{
showProgress: true,
cwd: path.resolve(path.join(__dirname, '..', 'download')),
noExtract: true,
dstFileName: `databricks-jdbc-${OSS_DRIVER_VERSION}-oss.jar`,
}
);

return path.resolve(path.join(__dirname, '..', 'download', 'DatabricksJDBC42.jar'));
}
console.log(`Release notes: https://mvnrepository.com/artifact/com.databricks/databricks-jdbc/${OSS_DRIVER_VERSION}-oss`);

return null;
return path.resolve(path.join(__dirname, '..', 'download', `databricks-jdbc-${OSS_DRIVER_VERSION}-oss.jar`));
}
9 changes: 2 additions & 7 deletions packages/cubejs-databricks-jdbc-driver/src/post-install.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,11 @@
import 'source-map-support/register';

import { displayCLIError } from '@cubejs-backend/shared';

import fs from 'fs';
import path from 'path';
import { downloadJDBCDriver } from './installer';
import { resolveJDBCDriver } from './helpers';

(async () => {
try {
if (!fs.existsSync(path.join(__dirname, '..', 'download', 'SparkJDBC42.jar'))) {
await downloadJDBCDriver();
}
await resolveJDBCDriver();
} catch (e: any) {
await displayCLIError(e, 'Cube.js Databricks JDBC Installer');
}
Expand Down