From 9a339f08a7f9b04688dfd0e09e8c9fd72b712d76 Mon Sep 17 00:00:00 2001 From: Daniel Vaz Gaspar Date: Mon, 6 May 2024 15:51:42 +0100 Subject: [PATCH] feat: new Columnar upload form and API (#28192) --- superset-frontend/package-lock.json | 156 ----- superset-frontend/package.json | 1 - .../UploadDataModel/UploadDataModal.test.tsx | 280 +++++++- .../databases/UploadDataModel/index.tsx | 627 +++++++++--------- .../src/features/home/Menu.test.tsx | 6 +- .../src/features/home/RightMenu.test.tsx | 12 +- .../src/features/home/RightMenu.tsx | 53 +- superset-frontend/src/features/home/types.ts | 3 + .../src/pages/DatabaseList/index.tsx | 20 +- superset-frontend/src/views/CRUD/utils.tsx | 2 +- superset/commands/database/uploaders/base.py | 46 +- .../database/uploaders/columnar_reader.py | 134 ++++ .../commands/database/uploaders/csv_reader.py | 115 ++-- .../database/uploaders/excel_reader.py | 42 +- superset/constants.py | 3 + superset/databases/api.py | 235 ++++++- superset/databases/schemas.py | 282 +++++--- superset/initialization/__init__.py | 3 +- ...33124c18ad_mig_new_columnar_upload_perm.py | 88 +++ superset/views/database/forms.py | 174 ----- superset/views/database/views.py | 156 +---- tests/integration_tests/csv_upload_tests.py | 237 ------- .../integration_tests/databases/api_tests.py | 1 + .../databases/commands/upload_test.py | 32 + .../databases/columnar_reader_test.py | 253 +++++++ .../commands/databases/csv_reader_test.py | 64 +- .../commands/databases/excel_reader_test.py | 87 ++- tests/unit_tests/databases/api_test.py | 339 +++++++++- tests/unit_tests/fixtures/common.py | 32 +- 29 files changed, 2259 insertions(+), 1224 deletions(-) create mode 100644 superset/commands/database/uploaders/columnar_reader.py create mode 100644 superset/migrations/versions/2024-04-26_12-36_4a33124c18ad_mig_new_columnar_upload_perm.py delete mode 100644 superset/views/database/forms.py delete mode 100644 tests/integration_tests/csv_upload_tests.py create mode 100644 tests/unit_tests/commands/databases/columnar_reader_test.py diff --git a/superset-frontend/package-lock.json b/superset-frontend/package-lock.json index 739e2130b5d14..ba45c7041f0ec 100644 --- a/superset-frontend/package-lock.json +++ b/superset-frontend/package-lock.json @@ -138,7 +138,6 @@ "use-event-callback": "^0.1.0", "use-immer": "^0.9.0", "use-query-params": "^1.1.9", - "xlsx": "^0.18.5", "yargs": "^17.7.2" }, "devDependencies": { @@ -25339,14 +25338,6 @@ "node": ">= 0.12.0" } }, - "node_modules/adler-32": { - "version": "1.3.1", - "resolved": "https://registry.npmjs.org/adler-32/-/adler-32-1.3.1.tgz", - "integrity": "sha512-ynZ4w/nUUv5rrsR8UUGoe1VC9hZj6V5hU9Qw1HlMDJGEJw5S7TfTErWTjMys6M7vr0YWcPqs3qAr4ss0nDfP+A==", - "engines": { - "node": ">=0.8" - } - }, "node_modules/agent-base": { "version": "6.0.2", "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-6.0.2.tgz", @@ -28096,18 +28087,6 @@ "url": "https://github.com/sponsors/wooorm" } }, - "node_modules/cfb": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/cfb/-/cfb-1.2.2.tgz", - "integrity": "sha512-KfdUZsSOw19/ObEWasvBP/Ac4reZvAGauZhs6S/gqNhXhI7cKwvlH7ulj+dOEYnca4bm4SGo8C1bTAQvnTjgQA==", - "dependencies": { - "adler-32": "~1.3.0", - "crc-32": "~1.2.0" - }, - "engines": { - "node": ">=0.8" - } - }, "node_modules/chainsaw": { "version": "0.1.0", "resolved": "https://registry.npmjs.org/chainsaw/-/chainsaw-0.1.0.tgz", @@ -28906,14 +28885,6 @@ "node": ">=0.10.0" } }, - "node_modules/codepage": { - "version": "1.15.0", - "resolved": "https://registry.npmjs.org/codepage/-/codepage-1.15.0.tgz", - "integrity": "sha512-3g6NUTPd/YtuuGrhMnOMRjFc+LJw/bnMp3+0r/Wcz3IXUuCosKRJvMphm5+Q+bvTVGcJJuRvVLuYba+WojaFaA==", - "engines": { - "node": ">=0.8" - } - }, "node_modules/collect-v8-coverage": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/collect-v8-coverage/-/collect-v8-coverage-1.0.1.tgz", @@ -29885,17 +29856,6 @@ "node": ">=8" } }, - "node_modules/crc-32": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/crc-32/-/crc-32-1.2.2.tgz", - "integrity": "sha512-ROmzCKrTnOwybPcJApAA6WBWij23HVfGVNKqqrZpuyZOHqK2CwHSvpGuyt/UNNvaIjEd8X5IFGp4Mh+Ie1IHJQ==", - "bin": { - "crc32": "bin/crc32.njs" - }, - "engines": { - "node": ">=0.8" - } - }, "node_modules/create-emotion": { "version": "10.0.27", "resolved": "https://registry.npmjs.org/create-emotion/-/create-emotion-10.0.27.tgz", @@ -37175,14 +37135,6 @@ "node": ">=12.20.0" } }, - "node_modules/frac": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/frac/-/frac-1.1.2.tgz", - "integrity": "sha512-w/XBfkibaTl3YDqASwfDUqkna4Z2p9cFSr1aHDt0WoMTECnRfBOv2WArlZILlqgWlmdIlALXGpM2AOhEk5W3IA==", - "engines": { - "node": ">=0.8" - } - }, "node_modules/fragment-cache": { "version": "0.2.1", "resolved": "https://registry.npmjs.org/fragment-cache/-/fragment-cache-0.2.1.tgz", @@ -62001,17 +61953,6 @@ "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz", "integrity": "sha1-BOaSb2YolTVPPdAVIDYzuFcpfiw=" }, - "node_modules/ssf": { - "version": "0.11.2", - "resolved": "https://registry.npmjs.org/ssf/-/ssf-0.11.2.tgz", - "integrity": "sha512-+idbmIXoYET47hH+d7dfm2epdOMUDjqcB4648sTZ+t2JwoyBFL/insLfB/racrDmsKB3diwsDA696pZMieAC5g==", - "dependencies": { - "frac": "~1.1.2" - }, - "engines": { - "node": ">=0.8" - } - }, "node_modules/sshpk": { "version": "1.15.2", "resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.15.2.tgz", @@ -66195,22 +66136,6 @@ "integrity": "sha512-JcKqAHLPxcdb9KM49dufGXn2x3ssnfjbcaQdLlfZsL9rH9wgDQjUtDxbo8NE0F6SFvydeu1VhZe7hZuHsB2/pw==", "dev": true }, - "node_modules/wmf": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/wmf/-/wmf-1.0.2.tgz", - "integrity": "sha512-/p9K7bEh0Dj6WbXg4JG0xvLQmIadrner1bi45VMJTfnbVHsc7yIajZyoSoK60/dtVBs12Fm6WkUI5/3WAVsNMw==", - "engines": { - "node": ">=0.8" - } - }, - "node_modules/word": { - "version": "0.3.0", - "resolved": "https://registry.npmjs.org/word/-/word-0.3.0.tgz", - "integrity": "sha512-OELeY0Q61OXpdUfTp+oweA/vtLVg5VDOXh+3he3PNzLGG/y0oylSOC1xRVj0+l4vQ3tj/bB1HVHv1ocXkQceFA==", - "engines": { - "node": ">=0.8" - } - }, "node_modules/wordwrap": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/wordwrap/-/wordwrap-1.0.0.tgz", @@ -66525,26 +66450,6 @@ "url": "https://opencollective.com/node-fetch" } }, - "node_modules/xlsx": { - "version": "0.18.5", - "resolved": "https://registry.npmjs.org/xlsx/-/xlsx-0.18.5.tgz", - "integrity": "sha512-dmg3LCjBPHZnQp5/F/+nnTa+miPJxUXB6vtk42YjBBKayDNagxGEeIdWApkYPOf3Z3pm3k62Knjzp7lMeTEtFQ==", - "dependencies": { - "adler-32": "~1.3.0", - "cfb": "~1.2.1", - "codepage": "~1.15.0", - "crc-32": "~1.2.1", - "ssf": "~0.11.2", - "wmf": "~1.0.1", - "word": "~0.3.0" - }, - "bin": { - "xlsx": "bin/xlsx.njs" - }, - "engines": { - "node": ">=0.8" - } - }, "node_modules/xml-name-validator": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/xml-name-validator/-/xml-name-validator-3.0.0.tgz", @@ -91924,11 +91829,6 @@ "integrity": "sha512-aT6camzM4xEA54YVJYSqxz1kv4IHnQZRtThJJHhUMRExaU5spC7jX5ugSwTaTgJliIgs4VhZOk7htClvQ/LmRA==", "dev": true }, - "adler-32": { - "version": "1.3.1", - "resolved": "https://registry.npmjs.org/adler-32/-/adler-32-1.3.1.tgz", - "integrity": "sha512-ynZ4w/nUUv5rrsR8UUGoe1VC9hZj6V5hU9Qw1HlMDJGEJw5S7TfTErWTjMys6M7vr0YWcPqs3qAr4ss0nDfP+A==" - }, "agent-base": { "version": "6.0.2", "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-6.0.2.tgz", @@ -94034,15 +93934,6 @@ "resolved": "https://registry.npmjs.org/ccount/-/ccount-2.0.1.tgz", "integrity": "sha512-eyrF0jiFpY+3drT6383f1qhkbGsLSifNAjA61IUjZjmLCWjItY6LB9ft9YhoDgwfmclB2zhu51Lc7+95b8NRAg==" }, - "cfb": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/cfb/-/cfb-1.2.2.tgz", - "integrity": "sha512-KfdUZsSOw19/ObEWasvBP/Ac4reZvAGauZhs6S/gqNhXhI7cKwvlH7ulj+dOEYnca4bm4SGo8C1bTAQvnTjgQA==", - "requires": { - "adler-32": "~1.3.0", - "crc-32": "~1.2.0" - } - }, "chainsaw": { "version": "0.1.0", "resolved": "https://registry.npmjs.org/chainsaw/-/chainsaw-0.1.0.tgz", @@ -94654,11 +94545,6 @@ "resolved": "https://registry.npmjs.org/code-point-at/-/code-point-at-1.1.0.tgz", "integrity": "sha1-DQcLTQQ6W+ozovGkDi7bPZpMz3c=" }, - "codepage": { - "version": "1.15.0", - "resolved": "https://registry.npmjs.org/codepage/-/codepage-1.15.0.tgz", - "integrity": "sha512-3g6NUTPd/YtuuGrhMnOMRjFc+LJw/bnMp3+0r/Wcz3IXUuCosKRJvMphm5+Q+bvTVGcJJuRvVLuYba+WojaFaA==" - }, "collect-v8-coverage": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/collect-v8-coverage/-/collect-v8-coverage-1.0.1.tgz", @@ -95426,11 +95312,6 @@ } } }, - "crc-32": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/crc-32/-/crc-32-1.2.2.tgz", - "integrity": "sha512-ROmzCKrTnOwybPcJApAA6WBWij23HVfGVNKqqrZpuyZOHqK2CwHSvpGuyt/UNNvaIjEd8X5IFGp4Mh+Ie1IHJQ==" - }, "create-emotion": { "version": "10.0.27", "resolved": "https://registry.npmjs.org/create-emotion/-/create-emotion-10.0.27.tgz", @@ -101104,11 +100985,6 @@ "fetch-blob": "^3.1.2" } }, - "frac": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/frac/-/frac-1.1.2.tgz", - "integrity": "sha512-w/XBfkibaTl3YDqASwfDUqkna4Z2p9cFSr1aHDt0WoMTECnRfBOv2WArlZILlqgWlmdIlALXGpM2AOhEk5W3IA==" - }, "fragment-cache": { "version": "0.2.1", "resolved": "https://registry.npmjs.org/fragment-cache/-/fragment-cache-0.2.1.tgz", @@ -119976,14 +119852,6 @@ "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz", "integrity": "sha1-BOaSb2YolTVPPdAVIDYzuFcpfiw=" }, - "ssf": { - "version": "0.11.2", - "resolved": "https://registry.npmjs.org/ssf/-/ssf-0.11.2.tgz", - "integrity": "sha512-+idbmIXoYET47hH+d7dfm2epdOMUDjqcB4648sTZ+t2JwoyBFL/insLfB/racrDmsKB3diwsDA696pZMieAC5g==", - "requires": { - "frac": "~1.1.2" - } - }, "sshpk": { "version": "1.15.2", "resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.15.2.tgz", @@ -123149,16 +123017,6 @@ "integrity": "sha512-JcKqAHLPxcdb9KM49dufGXn2x3ssnfjbcaQdLlfZsL9rH9wgDQjUtDxbo8NE0F6SFvydeu1VhZe7hZuHsB2/pw==", "dev": true }, - "wmf": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/wmf/-/wmf-1.0.2.tgz", - "integrity": "sha512-/p9K7bEh0Dj6WbXg4JG0xvLQmIadrner1bi45VMJTfnbVHsc7yIajZyoSoK60/dtVBs12Fm6WkUI5/3WAVsNMw==" - }, - "word": { - "version": "0.3.0", - "resolved": "https://registry.npmjs.org/word/-/word-0.3.0.tgz", - "integrity": "sha512-OELeY0Q61OXpdUfTp+oweA/vtLVg5VDOXh+3he3PNzLGG/y0oylSOC1xRVj0+l4vQ3tj/bB1HVHv1ocXkQceFA==" - }, "wordwrap": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/wordwrap/-/wordwrap-1.0.0.tgz", @@ -123398,20 +123256,6 @@ } } }, - "xlsx": { - "version": "0.18.5", - "resolved": "https://registry.npmjs.org/xlsx/-/xlsx-0.18.5.tgz", - "integrity": "sha512-dmg3LCjBPHZnQp5/F/+nnTa+miPJxUXB6vtk42YjBBKayDNagxGEeIdWApkYPOf3Z3pm3k62Knjzp7lMeTEtFQ==", - "requires": { - "adler-32": "~1.3.0", - "cfb": "~1.2.1", - "codepage": "~1.15.0", - "crc-32": "~1.2.1", - "ssf": "~0.11.2", - "wmf": "~1.0.1", - "word": "~0.3.0" - } - }, "xml-name-validator": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/xml-name-validator/-/xml-name-validator-3.0.0.tgz", diff --git a/superset-frontend/package.json b/superset-frontend/package.json index 4f5a83058ff4e..4ff1708933589 100644 --- a/superset-frontend/package.json +++ b/superset-frontend/package.json @@ -204,7 +204,6 @@ "use-event-callback": "^0.1.0", "use-immer": "^0.9.0", "use-query-params": "^1.1.9", - "xlsx": "^0.18.5", "yargs": "^17.7.2" }, "devDependencies": { diff --git a/superset-frontend/src/features/databases/UploadDataModel/UploadDataModal.test.tsx b/superset-frontend/src/features/databases/UploadDataModel/UploadDataModal.test.tsx index 4de9a89e6f650..44a8a60738a70 100644 --- a/superset-frontend/src/features/databases/UploadDataModel/UploadDataModal.test.tsx +++ b/superset-frontend/src/features/databases/UploadDataModel/UploadDataModal.test.tsx @@ -29,6 +29,7 @@ import { forEach } from 'lodash'; fetchMock.post('glob:*api/v1/database/1/csv_upload/', {}); fetchMock.post('glob:*api/v1/database/1/excel_upload/', {}); +fetchMock.post('glob:*api/v1/database/1/columnar_upload/', {}); fetchMock.get( 'glob:*api/v1/database/?q=(filters:!((col:allow_file_upload,opr:eq,value:!t)),page:0,page_size:100)', @@ -68,6 +69,13 @@ const excelProps = { type: 'excel', }; +const columnarProps = { + show: true, + onHide: () => {}, + allowedExtensions: ['parquet', 'zip'], + type: 'columnar', +}; + test('CSV, renders the general information elements correctly', () => { render(, { useRedux: true, @@ -200,6 +208,78 @@ test('Excel, renders the general information elements correctly', () => { }); }); +test('Columnar, renders the general information elements correctly', () => { + render(, { + useRedux: true, + }); + + const cancelButton = screen.getByRole('button', { + name: 'Cancel', + }); + const uploadButton = screen.getByRole('button', { + name: 'Upload', + }); + const selectButton = screen.getByRole('button', { + name: 'Select', + }); + + const title = screen.getByRole('heading', { + name: /columnar upload/i, + }); + const missingTitle = screen.queryByRole('heading', { + name: /csv upload/i, + }); + expect(missingTitle).not.toBeInTheDocument(); + const panel1 = screen.getByRole('heading', { + name: /General information/i, + }); + const panel2 = screen.getByRole('heading', { + name: /file settings/i, + }); + const panel3 = screen.getByRole('heading', { + name: /columns/i, + }); + const panel4 = screen.queryByRole('heading', { + name: /rows/i, + }); + expect(panel4).not.toBeInTheDocument(); + + const selectDatabase = screen.getByRole('combobox', { + name: /select a database/i, + }); + const selectDelimiter = screen.queryByRole('combobox', { + name: /choose a delimiter/i, + }); + expect(selectDelimiter).not.toBeInTheDocument(); + + const selectSheetName = screen.queryByRole('combobox', { + name: /choose sheet name/i, + }); + expect(selectSheetName).not.toBeInTheDocument(); + const inputTableName = screen.getByRole('textbox', { + name: /table name/i, + }); + const inputSchema = screen.getByRole('combobox', { + name: /schema/i, + }); + + const visibleComponents = [ + cancelButton, + uploadButton, + selectButton, + title, + panel1, + panel2, + panel3, + selectDatabase, + inputTableName, + inputSchema, + ]; + visibleComponents.forEach(component => { + expect(component).toBeVisible(); + }); +}); + test('CSV, renders the file settings elements correctly', () => { render(, { useRedux: true, @@ -282,6 +362,45 @@ test('Excel, renders the file settings elements correctly', () => { }); }); +test('Columnar, renders the file settings elements correctly', () => { + render(, { + useRedux: true, + }); + + expect(screen.queryByText('If Table Already Exists')).not.toBeInTheDocument(); + const panelHeader = screen.getByRole('heading', { + name: /file settings/i, + }); + userEvent.click(panelHeader); + const selectTableAlreadyExists = screen.getByRole('combobox', { + name: /choose already exists/i, + }); + const inputDecimalCharacter = screen.queryByRole('textbox', { + name: /decimal character/i, + }); + expect(inputDecimalCharacter).not.toBeInTheDocument(); + const selectColumnsDates = screen.queryByRole('combobox', { + name: /choose columns to be parsed as dates/i, + }); + expect(selectColumnsDates).not.toBeInTheDocument(); + const selectNullValues = screen.queryByRole('combobox', { + name: /null values/i, + }); + expect(selectNullValues).not.toBeInTheDocument(); + + const switchSkipInitialSpace = screen.queryByText('skipInitialSpace'); + expect(switchSkipInitialSpace).not.toBeInTheDocument(); + const switchSkipBlankLines = screen.queryByText('skipBlankLines'); + expect(switchSkipBlankLines).not.toBeInTheDocument(); + const switchDayFirst = screen.queryByText('dayFirst'); + expect(switchDayFirst).not.toBeInTheDocument(); + + const visibleComponents = [selectTableAlreadyExists]; + visibleComponents.forEach(component => { + expect(component).toBeVisible(); + }); +}); + test('CSV, renders the columns elements correctly', () => { render(, { useRedux: true, @@ -291,12 +410,13 @@ test('CSV, renders the columns elements correctly', () => { name: /columns/i, }); userEvent.click(panelHeader); + const switchDataFrameIndex = screen.getByTestId('dataFrameIndex'); + userEvent.click(switchDataFrameIndex); const selectIndexColumn = screen.getByRole('combobox', { name: /Choose index column/i, }); - const switchDataFrameIndex = screen.getByTestId('dataFrameIndex'); const inputColumnLabels = screen.getByRole('textbox', { - name: /Column labels/i, + name: /Index label/i, }); const selectColumnsToRead = screen.getByRole('combobox', { name: /Choose columns to read/i, @@ -327,12 +447,13 @@ test('Excel, renders the columns elements correctly', () => { name: /columns/i, }); userEvent.click(panelHeader); + const switchDataFrameIndex = screen.getByTestId('dataFrameIndex'); + userEvent.click(switchDataFrameIndex); const selectIndexColumn = screen.getByRole('combobox', { name: /Choose index column/i, }); - const switchDataFrameIndex = screen.getByTestId('dataFrameIndex'); - const inputColumnLabels = screen.getByRole('textbox', { - name: /Column labels/i, + const inputIndexLabel = screen.getByRole('textbox', { + name: /Index label/i, }); const selectColumnsToRead = screen.getByRole('combobox', { name: /Choose columns to read/i, @@ -348,7 +469,45 @@ test('Excel, renders the columns elements correctly', () => { const visibleComponents = [ selectIndexColumn, switchDataFrameIndex, - inputColumnLabels, + inputIndexLabel, + selectColumnsToRead, + ]; + visibleComponents.forEach(component => { + expect(component).toBeVisible(); + }); +}); + +test('Columnar, renders the columns elements correctly', () => { + render(, { + useRedux: true, + }); + + const panelHeader = screen.getByRole('heading', { + name: /columns/i, + }); + userEvent.click(panelHeader); + const selectIndexColumn = screen.queryByRole('combobox', { + name: /Choose index column/i, + }); + expect(selectIndexColumn).not.toBeInTheDocument(); + const switchDataFrameIndex = screen.getByTestId('dataFrameIndex'); + userEvent.click(switchDataFrameIndex); + const inputIndexLabel = screen.getByRole('textbox', { + name: /Index label/i, + }); + const selectColumnsToRead = screen.getByRole('combobox', { + name: /Choose columns to read/i, + }); + userEvent.click(selectColumnsToRead); + + const columnDataTypes = screen.queryByRole('textbox', { + name: /Column data types/i, + }); + expect(columnDataTypes).not.toBeInTheDocument(); + + const visibleComponents = [ + switchDataFrameIndex, + inputIndexLabel, selectColumnsToRead, ]; visibleComponents.forEach(component => { @@ -381,6 +540,17 @@ test('renders the rows elements correctly', () => { }); }); +test('Columnar, does not render the rows', () => { + render(, { + useRedux: true, + }); + + const panelHeader = screen.queryByRole('heading', { + name: /rows/i, + }); + expect(panelHeader).not.toBeInTheDocument(); +}); + test('database and schema are correctly populated', async () => { render(, { useRedux: true, @@ -546,6 +716,67 @@ test('Excel, form post', async () => { expect(fileData.name).toBe('test.xls'); }); +test('Columnar, form post', async () => { + render(, { + useRedux: true, + }); + + const selectButton = screen.getByRole('button', { + name: 'Select', + }); + userEvent.click(selectButton); + + // Select a file from the file dialog + const file = new File(['test'], 'test.parquet', { type: 'text' }); + const inputElement = document.querySelector('input[type="file"]'); + + if (inputElement) { + userEvent.upload(inputElement, file); + } + + const selectDatabase = screen.getByRole('combobox', { + name: /select a database/i, + }); + userEvent.click(selectDatabase); + await waitFor(() => screen.getByText('database1')); + await waitFor(() => screen.getByText('database2')); + + screen.getByText('database1').click(); + const selectSchema = screen.getByRole('combobox', { + name: /schema/i, + }); + userEvent.click(selectSchema); + await waitFor(() => screen.getAllByText('public')); + screen.getAllByText('public')[1].click(); + + // Fill out form fields + const inputTableName = screen.getByRole('textbox', { + name: /table name/i, + }); + userEvent.type(inputTableName, 'table1'); + const uploadButton = screen.getByRole('button', { + name: 'Upload', + }); + + userEvent.click(uploadButton); + await waitFor(() => + fetchMock.called('glob:*api/v1/database/1/columnar_upload/'), + ); + + // Get the matching fetch calls made + const matchingCalls = fetchMock.calls( + 'glob:*api/v1/database/1/columnar_upload/', + ); + expect(matchingCalls).toHaveLength(1); + const [_, options] = matchingCalls[0]; + const formData = options?.body as FormData; + expect(formData.get('table_name')).toBe('table1'); + expect(formData.get('schema')).toBe('public'); + expect(formData.get('table_name')).toBe('table1'); + const fileData = formData.get('file') as File; + expect(fileData.name).toBe('test.parquet'); +}); + test('CSV, validate file extension returns false', () => { const invalidFileNames = ['out', 'out.exe', 'out.csv.exe', '.csv', 'out.xls']; forEach(invalidFileNames, fileName => { @@ -572,6 +803,25 @@ test('Excel, validate file extension returns false', () => { }); }); +test('Columnar, validate file extension returns false', () => { + const invalidFileNames = [ + 'out', + 'out.exe', + 'out.parquet.exe', + '.parquet', + 'out.excel', + ]; + forEach(invalidFileNames, fileName => { + const file: UploadFile = { + name: fileName, + uid: 'xp', + size: 100, + type: 'text/csv', + }; + expect(validateUploadFileExtension(file, ['parquet', 'zip'])).toBe(false); + }); +}); + test('CSV, validate file extension returns true', () => { const invalidFileNames = ['out.csv', 'out.tsv', 'out.exe.csv', 'out a.csv']; forEach(invalidFileNames, fileName => { @@ -597,3 +847,21 @@ test('Excel, validate file extension returns true', () => { expect(validateUploadFileExtension(file, ['xls', 'xlsx'])).toBe(true); }); }); + +test('Columnar, validate file extension returns true', () => { + const invalidFileNames = [ + 'out.parquet', + 'out.zip', + 'out.exe.zip', + 'out a.parquet', + ]; + forEach(invalidFileNames, fileName => { + const file: UploadFile = { + name: fileName, + uid: 'xp', + size: 100, + type: 'text/csv', + }; + expect(validateUploadFileExtension(file, ['parquet', 'zip'])).toBe(true); + }); +}); diff --git a/superset-frontend/src/features/databases/UploadDataModel/index.tsx b/superset-frontend/src/features/databases/UploadDataModel/index.tsx index 066910c8ecfca..63c2b5a7e7aaa 100644 --- a/superset-frontend/src/features/databases/UploadDataModel/index.tsx +++ b/superset-frontend/src/features/databases/UploadDataModel/index.tsx @@ -40,7 +40,6 @@ import { Input, InputNumber } from 'src/components/Input'; import rison from 'rison'; import { UploadChangeParam, UploadFile } from 'antd/lib/upload/interface'; import withToasts from 'src/components/MessageToasts/withToasts'; -import * as XLSX from 'xlsx'; import { antdCollapseStyles, antDModalNoPaddingStyles, @@ -69,9 +68,25 @@ const CSVSpecificFields = [ 'skip_blank_lines', 'day_first', 'column_data_types', + 'column_dates', + 'decimal_character', + 'null_values', + 'index_column', + 'header_row', + 'rows_to_read', + 'skip_rows', ]; -const ExcelSpecificFields = ['sheet_name']; +const ExcelSpecificFields = [ + 'sheet_name', + 'column_dates', + 'decimal_character', + 'null_values', + 'index_column', + 'header_row', + 'rows_to_read', + 'skip_rows', +]; const ColumnarSpecificFields: string[] = []; @@ -89,6 +104,9 @@ const UploadTypeToSpecificFields: Record = { columnar: ColumnarSpecificFields, }; +const isFieldATypeSpecificField = (field: string, type: UploadType) => + UploadTypeToSpecificFields[type].includes(field); + interface UploadInfo { table_name: string; schema: string; @@ -106,11 +124,16 @@ interface UploadInfo { column_dates: Array; index_column: string | null; dataframe_index: boolean; - column_labels: string; + index_label: string; columns_read: Array; column_data_types: string; } +interface SheetColumnNames { + sheet_name: string; + column_names: string[]; +} + const defaultUploadInfo: UploadInfo = { table_name: '', schema: '', @@ -128,7 +151,7 @@ const defaultUploadInfo: UploadInfo = { column_dates: [], index_column: null, dataframe_index: false, - column_labels: '', + index_label: '', columns_read: [], column_data_types: '', }; @@ -136,7 +159,11 @@ const defaultUploadInfo: UploadInfo = { // Allowed extensions to accept for file upload, users can always override this // by selecting all file extensions on the OS file picker. Also ".txt" will // allow all files to be selected. -const READ_HEADER_SIZE = 10000; +const allowedExtensionsToAccept = { + csv: '.csv, .tsv', + excel: '.xls, .xlsx', + columnar: '.parquet, .zip', +}; export const validateUploadFileExtension = ( file: UploadFile, @@ -183,21 +210,17 @@ const UploadDataModal: FunctionComponent = ({ const [fileList, setFileList] = useState([]); const [columns, setColumns] = React.useState([]); const [sheetNames, setSheetNames] = React.useState([]); - const [currentSheetName, setCurrentSheetName] = React.useState< - string | undefined - >(); + const [sheetsColumnNames, setSheetsColumnNames] = React.useState< + SheetColumnNames[] + >([]); const [delimiter, setDelimiter] = useState(','); const [isLoading, setIsLoading] = useState(false); const [currentSchema, setCurrentSchema] = useState(); + const [currentDataframeIndex, setCurrentDataframeIndex] = + useState(false); const [previewUploadedFile, setPreviewUploadedFile] = useState(true); const [fileLoading, setFileLoading] = useState(false); - const allowedExtensionsToAccept = { - csv: '.csv, .tsv', - excel: '.xls, .xlsx', - columnar: '.parquet, .orc', - }; - const createTypeToEndpointMap = ( databaseId: number, ): { [key: string]: string } => ({ @@ -206,6 +229,12 @@ const UploadDataModal: FunctionComponent = ({ columnar: `/api/v1/database/${databaseId}/columnar_upload/`, }); + const typeToFileMetadataEndpoint = { + csv: '/api/v1/database/csv_metadata/', + excel: '/api/v1/database/excel_metadata/', + columnar: '/api/v1/database/columnar_metadata/', + }; + const nullValuesOptions = [ { value: '""', @@ -286,12 +315,12 @@ const UploadDataModal: FunctionComponent = ({ setColumns([]); setCurrentSchema(''); setCurrentDatabaseId(0); - setCurrentSheetName(undefined); setSheetNames([]); setIsLoading(false); setDelimiter(','); setPreviewUploadedFile(true); setFileLoading(false); + setSheetsColumnNames([]); form.resetFields(); }; @@ -343,6 +372,58 @@ const UploadDataModal: FunctionComponent = ({ [currentDatabaseId], ); + const loadFileMetadata = (file: File) => { + const fields = form.getFieldsValue(); + const mergedValues = { ...defaultUploadInfo, ...fields }; + const formData = new FormData(); + formData.append('file', file); + if (type === 'csv') { + formData.append('delimiter', mergedValues.delimiter); + } + setFileLoading(true); + return SupersetClient.post({ + endpoint: typeToFileMetadataEndpoint[type], + body: formData, + headers: { Accept: 'application/json' }, + }) + .then(response => { + const { items } = response.json.result; + if (items && type !== 'excel') { + setColumns(items[0].column_names); + } else { + const { allSheetNames, sheetColumnNamesMap } = items.reduce( + ( + acc: { + allSheetNames: any[]; + sheetColumnNamesMap: Record; + }, + item: { sheet_name: any; column_names: any }, + ) => { + acc.allSheetNames.push(item.sheet_name); + acc.sheetColumnNamesMap[item.sheet_name] = item.column_names; + return acc; + }, + { allSheetNames: [], sheetColumnNamesMap: {} }, + ); + setColumns(items[0].column_names); + setSheetNames(allSheetNames); + form.setFieldsValue({ sheet_name: allSheetNames[0] }); + setSheetsColumnNames(sheetColumnNamesMap); + } + }) + .catch(response => + getClientErrorObject(response).then(error => { + addDangerToast(error.error || 'Error'); + setColumns([]); + form.setFieldsValue({ sheet_name: undefined }); + setSheetNames([]); + }), + ) + .finally(() => { + setFileLoading(false); + }); + }; + const getAllFieldsNotInType = (): string[] => { const specificFields = UploadTypeToSpecificFields[type] || []; return [...AllSpecificFields].filter( @@ -353,7 +434,13 @@ const UploadDataModal: FunctionComponent = ({ const appendFormData = (formData: FormData, data: Record) => { const allFieldsNotInType = getAllFieldsNotInType(); Object.entries(data).forEach(([key, value]) => { - if (!(allFieldsNotInType.includes(key) || NonNullFields.includes(key))) { + if ( + !( + allFieldsNotInType.includes(key) || + (NonNullFields.includes(key) && + (value === undefined || value === null)) + ) + ) { formData.append(key, value); } }); @@ -401,13 +488,12 @@ const UploadDataModal: FunctionComponent = ({ setFileList(fileList.filter(file => file.uid !== removedFile.uid)); setColumns([]); setSheetNames([]); - setCurrentSheetName(undefined); form.setFieldsValue({ sheet_name: undefined }); return false; }; const onSheetNameChange = (value: string) => { - setCurrentSheetName(value); + setColumns(sheetsColumnNames[value] ?? []); }; const columnsToOptions = () => @@ -422,97 +508,6 @@ const UploadDataModal: FunctionComponent = ({ label: sheetName, })); - const readFileContent = (file: File) => - new Promise((resolve, reject) => { - const reader = new FileReader(); - reader.onload = event => { - if (event.target) { - const text = event.target.result as string; - resolve(text); - } else { - reject(new Error('Failed to read file content')); - } - }; - reader.onerror = () => { - reject(new Error('Failed to read file content')); - }; - reader.readAsText(file.slice(0, READ_HEADER_SIZE)); - }); - - const processCSVFile = async (file: File) => { - try { - setFileLoading(true); - const text = await readFileContent(file); - const firstLine = text.split('\n')[0].trim(); - const firstRow = firstLine - .split(delimiter) - .map(column => column.replace(/^"(.*)"$/, '$1')); - setColumns(firstRow); - setFileLoading(false); - } catch (error) { - addDangerToast('Failed to process file content'); - setFileLoading(false); - } - }; - - const processExcelColumns = (workbook: XLSX.WorkBook, sn: string[]) => { - if (!workbook) { - return; - } - let cSheetName = currentSheetName; - if (!currentSheetName) { - setCurrentSheetName(sn[0]); - cSheetName = sn[0]; - } - cSheetName = cSheetName || sn[0]; - form.setFieldsValue({ sheet_name: cSheetName }); - const worksheet = workbook.Sheets[cSheetName]; - - const worksheetRef: string = worksheet['!ref'] ? worksheet['!ref'] : ''; - const range = XLSX.utils.decode_range(worksheetRef); - const columnNames = Array.from({ length: range.e.c + 1 }, (_, i) => { - const cellAddress = XLSX.utils.encode_cell({ r: 0, c: i }); - return worksheet[cellAddress]?.v; - }); - setColumns(columnNames); - }; - - const processExcelFile = async (file: File) => - new Promise((resolve, reject) => { - setFileLoading(true); - const reader = new FileReader(); - reader.readAsBinaryString(file); - - reader.onload = event => { - if (!event.target && event.target == null) { - reader.onerror = () => { - reject(new Error('Failed to read file content')); - }; - return; - } - // Read workbook - const workbook = XLSX.read(event.target.result, { type: 'binary' }); - if (workbook == null) { - reject(new Error('Failed to process file content')); - addDangerToast('Failed to process file content'); - setFileLoading(false); - return; - } - // Extract sheet names - const tmpSheetNames = workbook.SheetNames; - if (tmpSheetNames.length < 1) { - reject(new Error('Failed to read file content')); - addDangerToast('Failed to process file content'); - setFileLoading(false); - return; - } - processExcelColumns(workbook, tmpSheetNames); - setSheetNames(workbook.SheetNames); - setFileLoading(false); - resolve('success'); - }; - }); - const onChangeFile = async (info: UploadChangeParam) => { setFileList([ { @@ -523,14 +518,7 @@ const UploadDataModal: FunctionComponent = ({ if (!previewUploadedFile) { return; } - if (type === 'csv') { - await processCSVFile(info.file.originFileObj); - } - if (type === 'excel') { - setSheetNames([]); - setCurrentSheetName(undefined); - await processExcelFile(info.file.originFileObj); - } + await loadFileMetadata(info.file.originFileObj); }; useEffect(() => { @@ -542,25 +530,10 @@ const UploadDataModal: FunctionComponent = ({ if (!previewUploadedFile) { return; } - processCSVFile(fileList[0].originFileObj).then(r => r); + loadFileMetadata(fileList[0].originFileObj).then(r => r); } }, [delimiter]); - useEffect(() => { - (async () => { - if ( - columns.length > 0 && - fileList[0].originFileObj && - fileList[0].originFileObj instanceof File - ) { - if (!previewUploadedFile) { - return; - } - await processExcelFile(fileList[0].originFileObj); - } - })(); - }, [currentSheetName]); - const validateUpload = (_: any, value: string) => { if (fileList.length === 0) { return Promise.reject(t('Uploading a file is required')); @@ -734,9 +707,9 @@ const UploadDataModal: FunctionComponent = ({ - - - {type === 'csv' && ( + {isFieldATypeSpecificField('delimiter', type) && ( + + = ({ allowNewOptions /> - )} - {type === 'excel' && ( + + + )} + {isFieldATypeSpecificField('sheet_name', type) && ( + + + + + + + + + )} + {isFieldATypeSpecificField('null_values', type) && ( + + + - - - - - - - - - - - - - - + + + + )} + {isFieldATypeSpecificField('skip_initial_space', type) && ( + + + + + + + + )} + {isFieldATypeSpecificField('skip_blank_lines', type) && ( + + + + + + + + )} + {isFieldATypeSpecificField('day_first', type) && ( + + + + + + + )} = ({ } key="3" > - - - - - - - = ({ - {type === 'csv' && ( + {isFieldATypeSpecificField('column_data_types', type) && ( = ({ + {currentDataframeIndex && + isFieldATypeSpecificField('index_column', type) && ( + + + + + + + + )} - -

{t('Rows')}

-

- {t('Set header rows and the number of rows to read or skip.')} -

- - } - key="4" - > - - - - - - - - - - - - - - - - - -
+ {isFieldATypeSpecificField('header_row', type) && + isFieldATypeSpecificField('rows_to_read', type) && + isFieldATypeSpecificField('skip_rows', type) && ( + +

{t('Rows')}

+

+ {t( + 'Set header rows and the number of rows to read or skip.', + )} +

+ + } + key="4" + > + + + + + + + + + + + + + + + + + +
+ )} diff --git a/superset-frontend/src/features/home/Menu.test.tsx b/superset-frontend/src/features/home/Menu.test.tsx index 28ef85aad5b90..01e3ea2a9a759 100644 --- a/superset-frontend/src/features/home/Menu.test.tsx +++ b/superset-frontend/src/features/home/Menu.test.tsx @@ -43,19 +43,19 @@ const dropdownItems = [ { label: 'Upload a CSV', name: 'Upload a CSV', - url: '/csvtodatabaseview/form', + url: '#', perm: true, }, { label: 'Upload a Columnar File', name: 'Upload a Columnar file', - url: '/columnartodatabaseview/form', + url: '#', perm: true, }, { label: 'Upload Excel', name: 'Upload Excel', - url: '/exceltodatabaseview/form', + url: '#', perm: true, }, ], diff --git a/superset-frontend/src/features/home/RightMenu.test.tsx b/superset-frontend/src/features/home/RightMenu.test.tsx index 30706459430f7..1cce6064a42df 100644 --- a/superset-frontend/src/features/home/RightMenu.test.tsx +++ b/superset-frontend/src/features/home/RightMenu.test.tsx @@ -54,13 +54,13 @@ const dropdownItems = [ { label: 'Upload CSV to database', name: 'Upload a CSV', - url: '/csvtodatabaseview/form', + url: '#', perm: true, }, { label: 'Upload columnar file to database', name: 'Upload a Columnar file', - url: '/columnartodatabaseview/form', + url: '#', perm: true, }, ], @@ -309,12 +309,10 @@ test('If there is a DB with allow_file_upload set as True the option should be e userEvent.hover(dropdown); const dataMenu = await screen.findByText(dropdownItems[0].label); userEvent.hover(dataMenu); + expect(await screen.findByText('Upload CSV to database')).toBeInTheDocument(); expect( - (await screen.findByText('Upload CSV to database')).closest('a'), - ).toHaveAttribute('href', '#'); - expect( - (await screen.findByText('Upload Excel to database')).closest('a'), - ).toHaveAttribute('href', '#'); + await screen.findByText('Upload Excel to database'), + ).toBeInTheDocument(); }); test('If there is NOT a DB with allow_file_upload set as True the option should be disabled', async () => { diff --git a/superset-frontend/src/features/home/RightMenu.tsx b/superset-frontend/src/features/home/RightMenu.tsx index ff4a6a8095469..d88269cf5b1bf 100644 --- a/superset-frontend/src/features/home/RightMenu.tsx +++ b/superset-frontend/src/features/home/RightMenu.tsx @@ -45,6 +45,7 @@ import { } from 'src/types/bootstrapTypes'; import { RootState } from 'src/dashboard/types'; import DatabaseModal from 'src/features/databases/DatabaseModal'; +import UploadDataModal from 'src/features/databases/UploadDataModel'; import { uploadUserPerms } from 'src/views/CRUD/utils'; import TelemetryPixel from 'src/components/TelemetryPixel'; import LanguagePicker from './LanguagePicker'; @@ -143,6 +144,11 @@ const RightMenu = ({ HAS_GSHEETS_INSTALLED, } = useSelector(state => state.common.conf); const [showDatabaseModal, setShowDatabaseModal] = useState(false); + const [showCSVUploadModal, setShowCSVUploadModal] = useState(false); + const [showExcelUploadModal, setShowExcelUploadModal] = + useState(false); + const [showColumnarUploadModal, setShowColumnarUploadModal] = + useState(false); const [engine, setEngine] = useState(''); const canSql = findPermission('can_sqllab', 'Superset', roles); const canDashboard = findPermission('can_write', 'Dashboard', roles); @@ -188,23 +194,20 @@ const RightMenu = ({ }, { label: t('Upload CSV to database'), - name: 'Upload a CSV', - url: '#', + name: GlobalMenuDataOptions.CSVUpload, perm: canUploadCSV && showUploads, disable: isAdmin && !allowUploads, }, { - label: t('Upload columnar file to database'), - name: 'Upload a Columnar file', - url: '/columnartodatabaseview/form', - perm: canUploadColumnar && showUploads, + label: t('Upload Excel to database'), + name: GlobalMenuDataOptions.ExcelUpload, + perm: canUploadExcel && showUploads, disable: isAdmin && !allowUploads, }, { - label: t('Upload Excel to database'), - name: 'Upload Excel', - url: '#', - perm: canUploadExcel && showUploads, + label: t('Upload Columnar file to database'), + name: GlobalMenuDataOptions.ColumnarUpload, + perm: canUploadColumnar && showUploads, disable: isAdmin && !allowUploads, }, ], @@ -289,6 +292,12 @@ const RightMenu = ({ } else if (itemChose.key === GlobalMenuDataOptions.GoogleSheets) { setShowDatabaseModal(true); setEngine('Google Sheets'); + } else if (itemChose.key === GlobalMenuDataOptions.CSVUpload) { + setShowCSVUploadModal(true); + } else if (itemChose.key === GlobalMenuDataOptions.ExcelUpload) { + setShowExcelUploadModal(true); + } else if (itemChose.key === GlobalMenuDataOptions.ColumnarUpload) { + setShowColumnarUploadModal(true); } }; @@ -350,6 +359,30 @@ const RightMenu = ({ onDatabaseAdd={handleDatabaseAdd} /> )} + {canUploadCSV && ( + setShowCSVUploadModal(false)} + show={showCSVUploadModal} + allowedExtensions={CSV_EXTENSIONS} + type="csv" + /> + )} + {canUploadExcel && ( + setShowExcelUploadModal(false)} + show={showExcelUploadModal} + allowedExtensions={EXCEL_EXTENSIONS} + type="excel" + /> + )} + {canUploadColumnar && ( + setShowColumnarUploadModal(false)} + show={showColumnarUploadModal} + allowedExtensions={COLUMNAR_EXTENSIONS} + type="columnar" + /> + )} {environmentTag?.text && (