Skip to content

Commit 9c95a3f

Browse files
committed
WIP
1 parent 39efe53 commit 9c95a3f

File tree

5 files changed

+150
-58
lines changed

5 files changed

+150
-58
lines changed

firestore-bigquery-export/scripts/gen-schema-view/src/__tests__/config/index.test.ts

Lines changed: 75 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,33 @@ describe("parseConfig", () => {
8989
expect(result.bigQueryProjectId).toBe("test-project");
9090
});
9191

92+
it("should use gemini if specified", async () => { // TODO: This test needs completed
93+
// Setup mocks with useGemini = true
94+
const mockProgram = {
95+
nonInteractive: true,
96+
project: "test-project",
97+
bigQueryProject: "test-bq-project",
98+
dataset: "test-dataset",
99+
tableNamePrefix: "test-prefix",
100+
schemaFiles: ["schema.json"],
101+
useGemini: true,
102+
googleAiKey: "test-key",
103+
geminiAnalyzeCollectionPath: "test-collection",
104+
schemaDirectory: "test-directory",
105+
outputHelp: jest.fn(),
106+
};
107+
108+
(parseProgram as jest.Mock).mockReturnValue(mockProgram);
109+
(validateNonInteractiveParams as jest.Mock).mockReturnValue(true);
110+
111+
const result = await parseConfig();
112+
113+
expect(result.useGemini).toBe(true);
114+
expect(result.googleAiKey).toBe("test-key");
115+
expect(result.geminiAnalyzeCollectionPath).toBe("test-collection");
116+
expect(result.schemaDirectory).toBe("test-directory");
117+
});
118+
92119
it("should exit if required parameters are missing", async () => {
93120
const mockProgram = {
94121
nonInteractive: true,
@@ -104,7 +131,7 @@ describe("parseConfig", () => {
104131
});
105132
});
106133

107-
describe("Interactive mode", () => {
134+
describe("Interactive mode without Gemini", () => {
108135
it("should return CLI config from inquirer prompts", async () => {
109136
// Setup mocks for interactive mode
110137
const mockProgram = {
@@ -116,6 +143,7 @@ describe("parseConfig", () => {
116143
bigQueryProject: "interactive-bq-project",
117144
dataset: "interactive-dataset",
118145
tableNamePrefix: "interactive-prefix",
146+
useGemini: false,
119147
schemaFiles: "schema1.json, schema2.json",
120148
};
121149

@@ -155,6 +183,7 @@ describe("parseConfig", () => {
155183
bigQueryProject: "test-bq-project",
156184
dataset: "test-dataset",
157185
tableNamePrefix: "test-prefix",
186+
useGemini: false,
158187
schemaFiles: " schema1.json, schema2.json , schema3.json",
159188
};
160189

@@ -172,4 +201,49 @@ describe("parseConfig", () => {
172201
]);
173202
});
174203
});
204+
205+
describe("Interactive mode with Gemini", () => { // TODO: This needs completed
206+
it("should return CLI config from inquirer prompts", async () => {
207+
// Setup mocks for interactive mode
208+
const mockProgram = {
209+
nonInteractive: false,
210+
};
211+
212+
const mockPromptResponse = {
213+
project: "interactive-project",
214+
bigQueryProject: "interactive-bq-project",
215+
dataset: "interactive-dataset",
216+
tableNamePrefix: "interactive-prefix",
217+
useGemini: true,
218+
googleAiKey: "test-key",
219+
geminiAnalyzeCollectionPath: "test-collection",
220+
schemaDirectory: "test-directory",
221+
};
222+
223+
const mockSchemas = {
224+
schema1: { fields: { field1: { type: "string" } } },
225+
schema2: { fields: { field2: { type: "number" } } },
226+
};
227+
228+
(parseProgram as jest.Mock).mockReturnValue(mockProgram);
229+
(promptInquirer as jest.Mock).mockResolvedValue(mockPromptResponse);
230+
(readSchemas as jest.Mock).mockReturnValue(mockSchemas);
231+
232+
const result = await parseConfig();
233+
234+
expect(parseProgram).toHaveBeenCalled();
235+
expect(promptInquirer).toHaveBeenCalled();
236+
expect(readSchemas).toHaveBeenCalledWith([
237+
"schema1.json",
238+
"schema2.json",
239+
]);
240+
expect(result).toEqual({
241+
projectId: "interactive-project",
242+
bigQueryProjectId: "interactive-bq-project",
243+
datasetId: "interactive-dataset",
244+
tableNamePrefix: "interactive-prefix",
245+
schemas: mockSchemas,
246+
});
247+
});
248+
});
175249
});

firestore-bigquery-export/scripts/gen-schema-view/src/config/index.ts

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,10 @@ export interface CliConfig {
1414
tableNamePrefix: string;
1515
schemas: { [schemaName: string]: FirestoreSchema };
1616
useGemini?: boolean;
17+
geminiAnalyzeCollectionPath?: string;
1718
agentSampleSize?: number;
1819
googleAiKey?: string;
20+
schemaDirectory?: string;
1921
}
2022

2123
export async function parseConfig(): Promise<CliConfig> {
@@ -33,8 +35,10 @@ export async function parseConfig(): Promise<CliConfig> {
3335
tableNamePrefix: program.tableNamePrefix,
3436
useGemini: program.useGemini,
3537
schemas: !program.useGemini ? readSchemas(program.schemaFiles) : {},
38+
geminiAnalyzeCollectionPath: program.geminiAnalyzeCollectionPath,
3639
agentSampleSize: DEFAULT_SAMPLE_SIZE,
3740
googleAiKey: program.googleAiKey,
41+
schemaDirectory: program.schemaDirectory,
3842
};
3943
}
4044
const {
@@ -44,20 +48,23 @@ export async function parseConfig(): Promise<CliConfig> {
4448
tableNamePrefix,
4549
schemaFiles,
4650
useGemini,
47-
// TODO: rename?
51+
geminiAnalyzeCollectionPath,
4852
googleAiKey,
53+
schemaDirectory,
4954
} = await promptInquirer();
5055

5156
return {
5257
projectId: project,
5358
bigQueryProjectId: bigQueryProject,
5459
datasetId: dataset,
55-
tableNamePrefix: tableNamePrefix,
60+
tableNamePrefix,
5661
schemas: !useGemini ? readSchemas(
5762
schemaFiles.split(",").map((schemaFileName) => schemaFileName.trim())
5863
) : {},
59-
useGemini: useGemini,
64+
useGemini,
65+
geminiAnalyzeCollectionPath,
6066
agentSampleSize: DEFAULT_SAMPLE_SIZE,
61-
googleAiKey: googleAiKey,
67+
googleAiKey,
68+
schemaDirectory,
6269
};
6370
}

firestore-bigquery-export/scripts/gen-schema-view/src/config/interactive.ts

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ export const questions = [
1818
{
1919
message: "What is your Firebase project ID?",
2020
name: "project",
21-
default: process.env.PROJECT_ID,
2221
type: "input",
2322
validate: (value) =>
2423
validateInput(value, "project ID", FIRESTORE_VALID_CHARACTERS),
@@ -27,7 +26,6 @@ export const questions = [
2726
message:
2827
"What is your Google Cloud Project ID for BigQuery? (can be the same as the Firebase project ID)",
2928
name: "bigQueryProject",
30-
default: process.env.PROJECT_ID,
3129
type: "input",
3230
validate: (value) =>
3331
validateInput(value, "BigQuery project ID", GCP_PROJECT_VALID_CHARACTERS),
@@ -42,7 +40,7 @@ export const questions = [
4240
},
4341
{
4442
message:
45-
"What is the name of the Cloud Firestore collection for which you want to generate a schema view?",
43+
"What prefix should be used for the names of the views generated by this script?",
4644
name: "tableNamePrefix",
4745
type: "input",
4846
validate: (value) =>
@@ -53,7 +51,7 @@ export const questions = [
5351
"Would you like to use a Gemini to automatically analyze your data and generate a draft schema?",
5452
name: "useGemini",
5553
type: "confirm",
56-
default: false,
54+
default: true,
5755
},
5856
{
5957
message:
@@ -62,16 +60,6 @@ export const questions = [
6260
type: "input",
6361
when: (answers) => !answers.useGemini,
6462
},
65-
// TODO: I dont think this is required as we have it above
66-
// TODO: can we make the questions conditional? if we select useGemini then dont ask about finding schema files?
67-
// {
68-
// message: "What is the Firestore collection path you want to analyze?",
69-
// name: "collectionPath",
70-
// type: "input",
71-
// when: (answers) => answers.useGemini,
72-
// validate: (value) =>
73-
// validateInput(value, "collection path", FIRESTORE_VALID_CHARACTERS),
74-
// },
7563
{
7664
message: "Please provide your Google AI API Key:",
7765
name: "googleAiKey",
@@ -84,6 +72,14 @@ export const questions = [
8472
return true;
8573
},
8674
},
75+
{
76+
message: "What is the Firestore collection path you want Gemini to analyze?",
77+
name: "geminiAnalyzeCollectionPath",
78+
type: "input",
79+
when: (answers) => answers.useGemini,
80+
validate: (value) =>
81+
validateInput(value, "collection path", FIRESTORE_VALID_CHARACTERS),
82+
},
8783
{
8884
message: "Where should the generated schema files be stored?",
8985
name: "schemaDirectory",

firestore-bigquery-export/scripts/gen-schema-view/src/config/non-interactive.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ export const configureProgram = () => {
5050
false
5151
)
5252
.option(
53-
"-c, --collection-path <path>",
53+
"-c, --gemini-analyze-collection-path <path>",
5454
"Firestore collection path for Gemini to analyze"
5555
)
5656
.option(

firestore-bigquery-export/scripts/gen-schema-view/src/schema/genkit.ts

Lines changed: 53 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,9 @@ import type { CliConfig } from "../config";
22
import firebase = require("firebase-admin");
33
import { genkit, z } from "genkit";
44
import { googleAI, gemini20Flash } from "@genkit-ai/googleai";
5-
import * as fs from "fs/promises";
5+
import * as fs from "fs";
66
import * as path from "path";
77
import inquirer from "inquirer";
8-
import {SchemaSchema} from './genkitSchema'
98

109
export async function sampleFirestoreDocuments(
1110
collectionPath: string,
@@ -25,7 +24,6 @@ export async function sampleFirestoreDocuments(
2524
return serializeDocument(data);
2625
});
2726

28-
console.log(`Successfully sampled ${documents.length} documents.`);
2927
return documents;
3028
} catch (error) {
3129
console.error("Error sampling documents:", error);
@@ -67,44 +65,19 @@ function serializeDocument(data: any): any {
6765
return data;
6866
}
6967

70-
/**
71-
* Writes a schema file to the specified directory if it does not already exist.
72-
*
73-
* @param {string} schemaDirectory - The directory where schema files are stored.
74-
* @param {string} fileName - The name of the schema file to write.
75-
* @param {string} content - The content of the schema file as a JSON string.
76-
* @returns {Promise<string>} - A message indicating success or an error if the file already exists.
77-
*/
78-
const writeSchemaFile = async (
79-
schemaDirectory: string,
80-
fileName: string,
81-
content: string
82-
): Promise<string> => {
83-
const filePath = path.join(schemaDirectory, fileName);
84-
try {
85-
await fs.access(filePath);
86-
return "Error: Schema file already exists";
87-
} catch {
88-
await fs.writeFile(filePath, content);
89-
return "Schema created successfully";
90-
}
91-
};
92-
9368
const biqquerySchemaPrompt = ({
94-
collectionName,
69+
collectionPath,
9570
sampleData,
96-
tablePrefix,
9771
}: {
98-
collectionName: string;
72+
collectionPath: string;
9973
sampleData: any[];
100-
tablePrefix: string;
10174
}) => `
10275
You are a Schema Management Agent for Generating BigQuery schemas from Firestore Collections.
10376
Your primary tasks are:
10477
1. Analyze the provided sample documents
10578
2. Generate an appropriate BigQuery schema
10679
107-
I will provide you with sample documents from the collection "${collectionName}".
80+
I will provide you with sample documents from the collection "${collectionPath}".
10881
10982
Here are the sample documents to analyze:
11083
${JSON.stringify(sampleData, null, 2)}
@@ -194,14 +167,19 @@ const biqquerySchemaPrompt = ({
194167
export const generateSchemaFilesWithGemini = async (config: CliConfig) => {
195168
// get sample data from Firestore
196169
const sampleData = await sampleFirestoreDocuments(
197-
config.tableNamePrefix!,
170+
config.geminiAnalyzeCollectionPath!,
198171
config.agentSampleSize!
199172
);
200173

174+
if (sampleData.length === 0) {
175+
console.log("Operation cancelled. No sample data found. Either the collection is empty or the collection path is incorrect.");
176+
process.exit(0);
177+
}
178+
console.log(`Successfully sampled ${sampleData.length} documents from collection ${config.geminiAnalyzeCollectionPath}`);
179+
201180
const prompt = biqquerySchemaPrompt({
202-
collectionName: config.tableNamePrefix!,
181+
collectionPath: config.geminiAnalyzeCollectionPath!,
203182
sampleData,
204-
tablePrefix: config.tableNamePrefix,
205183
});
206184

207185
// initialize genkit with googleAI plugin
@@ -218,12 +196,49 @@ export const generateSchemaFilesWithGemini = async (config: CliConfig) => {
218196
model: gemini20Flash,
219197
prompt,
220198
output: {
221-
format: 'json',
222-
schema: SchemaSchema
199+
format: "json",
200+
schema: z.object({
201+
fields: z.array(z.object({
202+
name: z.string(),
203+
type: z.string(),
204+
description: z.string(),
205+
fields: z.array(z.object({
206+
name: z.string(),
207+
type: z.string(),
208+
description: z.string(),
209+
fields: z.array(z.object({
210+
name: z.string(),
211+
type: z.string(),
212+
description: z.string(),
213+
column_name: z.string().optional(),
214+
})),
215+
})),
216+
})),
217+
})
218+
}});
219+
220+
const filePath = path.join(config.schemaDirectory, `${config.tableNamePrefix}.json`);
221+
222+
// Check if a file exists
223+
if (fs.existsSync(filePath)) {
224+
const overwriteConfirm = await inquirer.prompt([
225+
{
226+
type: "confirm",
227+
name: "proceed",
228+
message:
229+
"Schema file already exists. Would you like to overwrite it?",
230+
default: false,
231+
},
232+
]);
233+
234+
if (!overwriteConfirm.proceed) {
235+
console.log("Operation cancelled. Please choose a different schema file name.");
236+
process.exit(0);
223237
}
224-
});
225238

226-
await writeSchemaFile("./schemas", `${config.tableNamePrefix}.json`, text);
239+
await fs.promises.writeFile(filePath, text);
240+
}
241+
227242
// confirm with user that schema file is correct
228243
const confirmation = await inquirer.prompt([
229244
{

0 commit comments

Comments
 (0)