diff --git a/documentation/configuration-utils/_cairo.config.json b/documentation/configuration-utils/_cairo.config.json index 4611663ee..24ae49061 100644 --- a/documentation/configuration-utils/_cairo.config.json +++ b/documentation/configuration-utils/_cairo.config.json @@ -466,5 +466,29 @@ "cairo.partition.encoder.parquet.raw.array.encoding.enabled": { "default": "false", "description": "determines whether to export arrays in QuestDB-native binary format (true, less compatible) or Parquet-native format (false, more compatible)." + }, + "cairo.partition.encoder.parquet.version": { + "default": "1", + "description": "Output parquet version to use for parquet-encoded partitions. Can be 1 or 2." + }, + "cairo.partition.encoder.parquet.statistics.enabled": { + "default": "true", + "description": "Controls whether or not statistics are included in parquet-encoded partitions." + }, + "cairo.partition.encoder.parquet.compression.codec": { + "default": "ZSTD", + "description": "Sets the default compression codec for parquet-encoded partitions. Alternatives include `LZ4_RAW`, `SNAPPY`." + }, + "cairo.partition.encoder.parquet.compression.level": { + "default": "9 (ZSTD), 0 (otherwise)", + "description": "Sets the default compression level for parquet-encoded partitions. Dependent on underlying compression codec." + }, + "cairo.partition.encoder.parquet.row.group.size": { + "default": "100000", + "description": "Sets the default row-group size for parquet-encoded partitions." + }, + "cairo.partition.encoder.parquet.data.page.size": { + "default": "1048576", + "description": "Sets the default page size for parquet-encoded partitions." } } diff --git a/documentation/configuration-utils/_parquet-export.config.json b/documentation/configuration-utils/_parquet-export.config.json new file mode 100644 index 000000000..df5fb5e44 --- /dev/null +++ b/documentation/configuration-utils/_parquet-export.config.json @@ -0,0 +1,6 @@ +{ + "cairo.sql.copy.export.root": { + "default": "export", + "description": "Root directory for parquet exports via `COPY-TO` SQL. This path must not overlap with other directory (e.g. db, conf) of running instance, otherwise export may delete or overwrite existing files. Relative paths are resolved against the server root directory." + } +} \ No newline at end of file diff --git a/documentation/configuration.md b/documentation/configuration.md index b84261150..6fc267b67 100644 --- a/documentation/configuration.md +++ b/documentation/configuration.md @@ -10,6 +10,7 @@ import cairoConfig from "./configuration-utils/\_cairo.config.json" import parallelSqlConfig from "./configuration-utils/\_parallel-sql.config.json" import walConfig from "./configuration-utils/\_wal.config.json" import csvImportConfig from "./configuration-utils/\_csv-import.config.json" +import parquetExportConfig from "./configuration-utils/\_parquet-export.config.json" import postgresConfig from "./configuration-utils/\_postgres.config.json" import tcpConfig from "./configuration-utils/\_tcp.config.json" import udpConfig from "./configuration-utils/\_udp.config.json" @@ -168,12 +169,14 @@ applying WAL data to the table storage: -### CSV import +### COPY settings + +#### Import This section describes configuration settings for using `COPY` to import large -CSV files. +CSV files, or export parquet files. -Settings for `COPY`: +Settings for `COPY FROM` (import): -#### CSV import configuration for Docker +**CSV import configuration for Docker** For QuestDB instances using Docker: @@ -222,6 +225,28 @@ Where: It is important that the two path are identical (`/var/lib/questdb/questdb_import` in the example). + +#### Export + + + +Parquet export is also generally impacted by query execution and parquet conversion parameters. + +If not overridden, the following default setting will be used. + + + ### Parallel SQL execution This section describes settings that can affect the level of parallelism during diff --git a/documentation/guides/export-parquet.md b/documentation/guides/export-parquet.md index 18bb245fc..d26aa23e1 100644 --- a/documentation/guides/export-parquet.md +++ b/documentation/guides/export-parquet.md @@ -33,20 +33,17 @@ You can override these defaults when [exporting via COPY](#export-query-as-files ## Export queries as files -:::warning -Exporting as files is right now available on a development branch: [https://github.com/questdb/questdb/pull/6008](https://github.com/questdb/questdb/pull/6008). -If you want to test this feature, you need to clone and compile the branch. - -The code is functional, but it is just lacking fuzzy tests and documentation. We should be able to include this in a -release soon enough, but for exporting it is safe to just checkout the development branch, compile, and start QuestDB -pointing to the target jar. -::: - To export a query as a file, you can use either the `/exp` REST API endpoint or the `COPY` command. ### Export query as file via REST +:::tip + +See also the [/exp documentation](/docs/reference/api/rest/#exp---export-data). + +::: + You can use the same parameters as when doing a [CSV export](/docs/reference/api/rest/#exp---export-data), only passing `parquet` as the `fmt` parameter value. ``` @@ -67,12 +64,18 @@ to point DuckDB to the example file exported in the previous example, you could start DuckDB and execute: ``` - select * from read_parquet('~/tmp/exp.parquet'); +select * from read_parquet('~/tmp/exp.parquet'); ``` - ### Export query as files via COPY + +:::tip + +See also the [COPY-TO documentation](/docs/reference/sql/copy). + +::: + If you prefer to export data via SQL, or if you want to export asynchronously, you can use the `COPY` command from the web console, from any pgwire-compliant client, or using the [`exec` endpoint](/docs/reference/api/rest/#exec---execute-queries) of the REST API. @@ -81,13 +84,13 @@ or using the [`exec` endpoint](/docs/reference/api/rest/#exec---execute-queries) You can export a query: ``` - COPY (select * from market_data limit 3) TO 'market_data_parquet_table' WITH FORMAT PARQUET; +COPY (select * from market_data limit 3) TO 'market_data_parquet_table' WITH FORMAT PARQUET; ``` Or you can export a whole table: ``` - COPY market_data TO 'market_data_parquet_table' WITH FORMAT PARQUET; +COPY market_data TO 'market_data_parquet_table' WITH FORMAT PARQUET; ``` @@ -106,7 +109,6 @@ If you want to monitor the export process, you can issue a call like this: SELECT * FROM 'sys.copy_export_log' WHERE id = '45ba24e5ba338099'; ``` - While it is running, export can be cancelled with: ``` diff --git a/documentation/guides/import-csv.md b/documentation/guides/import-csv.md index 062e5a6fd..8b0d14a9c 100644 --- a/documentation/guides/import-csv.md +++ b/documentation/guides/import-csv.md @@ -127,7 +127,7 @@ csvstack *.csv > singleFile.csv #### Configure `COPY` -- Enable `COPY` and [configure](/docs/configuration/#csv-import) the `COPY` +- Enable `COPY` and [configure](/docs/configuration/#copy-settings) the `COPY` directories to suit your server. - `cairo.sql.copy.root` must be set for `COPY` to work. diff --git a/documentation/reference/api/rest.md b/documentation/reference/api/rest.md index 184408294..e0318ede2 100644 --- a/documentation/reference/api/rest.md +++ b/documentation/reference/api/rest.md @@ -20,8 +20,7 @@ off-the-shelf HTTP clients. It provides a simple way to interact with QuestDB and is compatible with most programming languages. API functions are fully keyed on the URL and they use query parameters as their arguments. -The Web Console[Web Console](/docs/web-console/) is the official Web client -relying on the REST API. +The [Web Console](/docs/web-console/) is the official Web client for QuestDB, that relies on the REST API. **Available methods** @@ -591,15 +590,41 @@ returned in a tabular form to be saved and reused as opposed to JSON. `/exp` is expecting an HTTP GET request with following parameters: | Parameter | Required | Description | -| :-------- | :------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +|:----------|:---------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | `query` | Yes | URL encoded query text. It can be multi-line. | | `limit` | No | Paging opp parameter. For example, `limit=10,20` will return row numbers 10 through to 20 inclusive and `limit=20` will return first 20 rows, which is equivalent to `limit=0,20`. `limit=-20` will return the last 20 rows. | | `nm` | No | `true` or `false`. Skips the metadata section of the response when set to `true`. | +| `fmt` | No | Export format. Valid values: `parquet`, `csv`. When set to `parquet`, exports data in Parquet format instead of CSV. | + +#### Parquet Export Parameters + +:::warning + +Parquet exports currently require writing interim data to disk, and therefore must be run on **read-write instances only**. + +This limitation will be removed in future. + +::: + +When `fmt=parquet`, the following additional parameters are supported: + +| Parameter | Required | Default | Description | +|:---------------------|:---------|:----------|:-------------------------------------------------------------------------------------------------------------------| +| `partition_by` | No | `NONE` | Partition unit: `NONE`, `HOUR`, `DAY`, `WEEK`, `MONTH`, or `YEAR`. | +| `compression_codec` | No | `ZSTD` | Compression algorithm: `UNCOMPRESSED`, `SNAPPY`, `GZIP`, `LZ4`, `ZSTD`, `LZ4_RAW`, `BROTLI`, `LZO`. | +| `compression_level` | No | `9` | Compression level (codec-specific). Higher values = better compression but slower. | +| `row_group_size` | No | `100000` | Number of rows per Parquet row group. | +| `data_page_size` | No | `1048576` | Size of data pages in bytes (default 1MB). | +| `statistics_enabled` | No | `true` | Enable Parquet column statistics: `true` or `false`. | +| `parquet_version` | No | `2` | Parquet format version: `1` (v1.0) or `2` (v2.0). | +| `raw_array_encoding` | No | `false` | Use raw encoding for arrays: `true` (lighter-weight, less compatible) or `false` (heavier-weight, more compatible) | The parameters must be URL encoded. ### Examples +#### CSV Export (default) + Considering the query: ```shell @@ -620,6 +645,44 @@ A HTTP status code of `200` is returned with the following response body: 200501BS00005,"2005-01-10T00:00:00.000Z",21:13 ``` +#### Parquet Export + +Export query results to Parquet format: + +```shell +curl -G \ + --data-urlencode "query=SELECT * FROM trades WHERE timestamp IN today()" \ + --data-urlencode "fmt=parquet" \ + http://localhost:9000/exp > trades_today.parquet +``` + +#### Parquet Export with Custom Options + +Export with custom compression and partitioning: + +```shell +curl -G \ + --data-urlencode "query=SELECT * FROM trades" \ + --data-urlencode "fmt=parquet" \ + --data-urlencode "partition_by=DAY" \ + --data-urlencode "compression_codec=ZSTD" \ + --data-urlencode "compression_level=9" \ + --data-urlencode "row_group_size=1000000" \ + http://localhost:9000/exp > trades.parquet +``` + +#### Parquet Export with LZ4 Compression + +Export with LZ4_RAW compression for faster export: + +```shell +curl -G \ + --data-urlencode "query=SELECT symbol, price, amount FROM trades WHERE timestamp > dateadd('h', -1, now())" \ + --data-urlencode "fmt=parquet" \ + --data-urlencode "compression_codec=LZ4_RAW" \ + http://localhost:9000/exp > recent_trades.parquet +``` + ## Error responses ### Malformed queries diff --git a/documentation/reference/function/aggregation.md b/documentation/reference/function/aggregation.md index a4218986e..2919641f8 100644 --- a/documentation/reference/function/aggregation.md +++ b/documentation/reference/function/aggregation.md @@ -22,17 +22,19 @@ Running it will result in the following error: You can work around this limitation by using CTEs or subqueries: -```questdb-sql title="aggregates as function args workaround" demo +```questdb-sql title="CTE workaround" -- CTE WITH minmax AS ( - SELECT min(timestamp) as min_date, max(timestamp) as max_date FROM trades + SELECT min(timestamp) AS min_date, max(timestamp) AS max_date FROM trades ) SELECT datediff('d', min_date, max_date) FROM minmax; -- Subquery -SELECT datediff('d', min_date, max_date) FROM ( - SELECT min(timestamp) as min_date, max(timestamp) as max_date FROM trades +SELECT datediff('d', min_date, max_date) +FROM ( + SELECT min(timestamp) AS min_date, max(timestamp) AS max_date FROM trades ); + ``` ::: diff --git a/documentation/reference/sql/copy.md b/documentation/reference/sql/copy.md index c7f359ea0..74aac097e 100644 --- a/documentation/reference/sql/copy.md +++ b/documentation/reference/sql/copy.md @@ -23,6 +23,13 @@ following impact: ## Description +The `COPY` command has two modes of operation: + +1. **Import mode**: `COPY table_name FROM 'file.csv'`, copying data from a delimited text file into QuestDB. +2. **Export mode**: `COPY table_name TO 'output_directory'` or `COPY (query) TO 'output_directory'`, exporting data to Parquet files. + +## Import mode (COPY-FROM) + Copies tables from a delimited text file saved in the defined root directory into QuestDB. `COPY` has the following import modes: @@ -48,7 +55,7 @@ into QuestDB. `COPY` has the following import modes: :::note -`COPY` takes up all the available resources. While one import is running, new +Parallel `COPY` takes up all the available resources. While one import is running, new request(s) will be rejected. ::: @@ -56,7 +63,7 @@ request(s) will be rejected. `COPY '' CANCEL` cancels the copying operation defined by the import `id`, while an import is taking place. -### Root directory +### Import root `COPY` requires a defined root directory where CSV files are saved and copied from. A CSV file must be saved to the root directory before starting the `COPY` @@ -64,13 +71,13 @@ operation. There are two root directories to be defined: - `cairo.sql.copy.root` is used for storing regular files to be imported. By default, it points to the `root_directory/import` directory. This allows you to drop a CSV - file into the `import` directory and start the import operation. + file into the `import` directory and start the import operation. - `cairo.sql.copy.work.root` is used for storing temporary files like indexes or temporary partitions. Unless otherwise specified, it points to the `root_directory/tmp` directory. Use the [configuration keys](/docs/configuration/) to edit these properties in -[`COPY` configuration settings](/docs/configuration/#csv-import): +[`COPY` configuration settings](/docs/configuration/#copy-settings): ```shell title="Example" cairo.sql.copy.root=/Users/UserName/Desktop @@ -90,10 +97,9 @@ the `/Users` tree and set the root directory accordingly. ::: -### Log table +### Logs -`COPY` generates a log table,`sys.text_import_log`, tracking `COPY` operation -for the last three days with the following information: +`COPY-FROM` reports its progress through a system table, `sys.text_import_log`. This contains the following information: | Column name | Data type | Notes | | ------------- | --------- | ----------------------------------------------------------------------------- | @@ -110,27 +116,27 @@ for the last three days with the following information: | | | The counters are shown in the final log row for the given import | | errors | long | The number of errors for the given phase | -\* Available phases for parallel import are: -- setup -- boundary_check -- indexing -- partition_import -- symbol_table_merge -- update_symbol_keys -- build_symbol_index -- move_partitions -- attach_partitions -- analyze_file_structure -- cleanup +**Parallel import phases** + - setup + - boundary_check + - indexing + - partition_import + - symbol_table_merge + - update_symbol_keys + - build_symbol_index + - move_partitions + - attach_partitions + - analyze_file_structure + - cleanup -Log table row retention is configurable through -`cairo.sql.copy.log.retention.days` setting, and is three days by default. +The retention for this table is configured using the `cairo.sql.copy.log.retention.days` setting, and is three days by default. -`COPY` returns `id` value from `sys.text_import_log` to track the import -progress. +`COPY` returns an `id` value, which can be correlated with `sys.text_import_log` to track the import progress. -## Options +### Options + +These options are provided as key-value pairs after the `WITH` keyword. - `HEADER true/false`: When `true`, QuestDB automatically assumes the first row is a header. Otherwise, schema recognition is used to determine whether the @@ -144,13 +150,13 @@ progress. - `DELIMITER`: Default setting is `,`. - `PARTITION BY`: Partition unit. - `ON ERROR`: Define responses to data parsing errors. The valid values are: - - `SKIP_ROW`: Skip the entire row - - `SKIP_COLUMN`: Skip column and use the default value (`null` for nullable - types, `false` for boolean, `0` for other non-nullable types) - - `ABORT`: Abort whole import on first error, and restore the pre-import table - status + - `SKIP_ROW`: Skip the entire row + - `SKIP_COLUMN`: Skip column and use the default value (`null` for nullable + types, `false` for boolean, `0` for other non-nullable types) + - `ABORT`: Abort whole import on first error, and restore the pre-import table + status -## Examples +### Examples For more details on parallel import, please also see [Importing data in bulk via CSV](/docs/guides/import-csv/#import-csv-via-copy-sql). @@ -194,3 +200,189 @@ SELECT * FROM 'sys.text_import_log' WHERE id = '55ca24e5ba328050' LIMIT -1; | ts | id | table | file | phase | status | message | rows_handled | rows_imported | errors | | :-------------------------- | ---------------- | ------- | ----------- | ----- | --------- | ---------------------------------------------------------- | ------------ | ------------- | ------ | | 2022-08-03T14:04:42.268502Z | 55ca24e5ba328050 | weather | weather.csv | null | cancelled | import cancelled [phase=partition_import, msg=`Cancelled`] | 0 | 0 | 0 | + + +## Export mode (COPY-TO) + +Exports data from a table or query result set to Parquet format. The export is performed asynchronously and non-blocking, allowing writes to continue during the export process. + +**Key features:** + +- Export entire tables or query results +- Configurable Parquet export options (compression, row group size, etc.) +- Non-blocking exports - writes continue during export +- Supports partitioned exports matching table partitioning +- Configurable size limits + +### Export root + +:::warning + +Parquet exports currently require writing interim data to disk, and therefore must be run on **read-write instances only**. + +This limitation will be removed in future. + +::: + +The export destination is relative to `cairo.sql.copy.export.root` (defaults to `root_directory/export`). You can configure this through the [configuration settings](/docs/configuration/). + +### Logs + +`COPY-TO` reports its progress through a system table, `sys.copy_export_log`. This contains the following information: + + +| Column name | Data type | Notes | +|--------------------|-----------|-----------------------------------------------------------------------------------------------------------------------------------------------------| +| ts | timestamp | The log event timestamp | +| id | string | Export id | +| table_name | symbol | Source table name (or 'query' for subquery exports) | +| export_path | symbol | The destination directory path | +| num_exported_files | int | The number of files exported | +| phase | symbol | The export execution phase: none, wait_to_run, populating_temp_table, converting_partitions, move_files, dropping_temp_table, sending_data, success | +| status | symbol | The event status: started, finished, failed, cancelled | +| message | VARCHAR | Information about the current phase/step | +| errors | long | Error code(s) | + +Log table row retention is configurable through `cairo.sql.copy.log.retention.days` setting, and is three days by default. + +`COPY TO` returns an `id` value from `sys.copy_export_log` to track the export progress. + +### Options + +All export options are specified using the `WITH` clause after the `TO` destination path. + +- `FORMAT PARQUET`: Specifies Parquet as the export format (currently the only supported format). Default: `PARQUET`. +- `PARTITION_BY `: Partition the export by time unit. Valid values: `NONE`, `HOUR`, `DAY`, `WEEK`, `MONTH`, `YEAR`. Default: matches the source table's partitioning, or `NONE` for queries. +- `COMPRESSION_CODEC `: Parquet compression algorithm. Valid values: `UNCOMPRESSED`, `SNAPPY`, `GZIP`, `LZ4`, `ZSTD`, `LZ4_RAW`. Default: `LZ4_RAW`. +- `COMPRESSION_LEVEL `: Compression level (codec-specific). Higher values mean better compression but slower speed. Default: varies by codec. +- `ROW_GROUP_SIZE `: Number of rows per Parquet row group. Larger values improve compression but increase memory usage. Default: `100000`. +- `DATA_PAGE_SIZE `: Size of data pages within row groups in bytes. Default: `1048576` (1MB). +- `STATISTICS_ENABLED true/false`: Enable Parquet column statistics for better query performance. Default: `true`. +- `PARQUET_VERSION `: Parquet format version. Valid values: `1` (v1.0) or `2` (v2.0). Default: `2`. +- `RAW_ARRAY_ENCODING true/false`: Use raw encoding for arrays (compatibility for parquet readers). Default: `true`. + +## Examples + +#### Export entire table to Parquet + +Export a complete table to Parquet format: + +```questdb-sql title="Export table to Parquet" +COPY trades TO 'trades_export' WITH FORMAT PARQUET; +``` + +Returns an export ID: + +| id | +| ---------------- | +| 7f3a9c2e1b456789 | + +Track export progress: + +```questdb-sql +SELECT * FROM sys.copy_export_log WHERE id = '7f3a9c2e1b456789'; +``` + +This will copy all of the partitions from `trades`, and convert them individually to parquet. + +If partitioning of `NONE` is used, then a single parquet file will be generated instead. + +#### Export query results to Parquet + +Export the results of a query: + +```questdb-sql title="Export filtered data" +COPY (SELECT * FROM trades WHERE timestamp IN today() AND symbol = 'BTC-USD') +TO 'btc_today' +WITH FORMAT PARQUET; +``` + +This will export the result set to a single parquet file. + +#### Export with partitioning + +Export data partitioned by day: + +```questdb-sql title="Export with daily partitions" +COPY trades TO 'trades_daily' +WITH FORMAT PARQUET +PARTITION_BY DAY; +``` + +The underlying table does not already need to be partitioned. Likewise, you can output query results as partitions: + +```questdb-sql title=Export queries with partitions +COPY ( + SELECT generate_series as date + FROM generate_series('2025-01-01', '2025-02-01', '1d') +) +TO 'dates' +WITH FORMAT PARQUET +PARTITION_BY DAY; +``` + +This creates separate Parquet files for each day's data in subdirectories named by date. For example: + +- export + - dates + - 2025-01-01.parquet + - 2025-01-02.parquet + - 2025-01-03.parquet + - ... + +#### Export with custom Parquet options + +Configure compression, row group size, and other Parquet settings: + +```questdb-sql title="Export with custom compression" +COPY trades TO 'trades_compressed' +WITH + FORMAT PARQUET + COMPRESSION_CODEC ZSTD + COMPRESSION_LEVEL 9 + ROW_GROUP_SIZE 1000000 + DATA_PAGE_SIZE 2097152; +``` + +This allows you to tune each export request to your particular needs. + +#### Export aggregated data + +Export aggregated results for analysis: + +```questdb-sql title="Export OHLCV data" +COPY ( + SELECT + timestamp, + symbol, + first(price) AS open, + max(price) AS high, + min(price) AS low, + last(price) AS close, + sum(amount) AS volume + FROM trades + WHERE timestamp > dateadd('d', -7, now()) + SAMPLE BY 1h +) +TO 'ohlcv_7d' +WITH FORMAT PARQUET; +``` + +#### Monitor export status + +Check all recent exports: + +```questdb-sql title="View export history" +SELECT ts, table, destination, status, rows_exported +FROM sys.copy_export_log +WHERE ts > dateadd('d', -1, now()) +ORDER BY ts DESC; +``` + +Sample output: + +| ts | table | destination | status | rows_exported | +| --------------------------- | ------ | ---------------- | -------- | ------------- | +| 2024-10-01T14:23:15.123456Z | trades | trades_export | finished | 1000000 | +| 2024-10-01T13:45:22.654321Z | query | btc_today | finished | 45672 | +| 2024-10-01T12:30:11.987654Z | trades | trades_daily | finished | 1000000 | diff --git a/static/images/docs/diagrams/.railroad b/static/images/docs/diagrams/.railroad index 1a6151886..f328c6f1b 100644 --- a/static/images/docs/diagrams/.railroad +++ b/static/images/docs/diagrams/.railroad @@ -121,7 +121,11 @@ case ::= 'CASE' ('WHEN' condition 'THEN' value)* ( | 'ELSE' value ) 'END' copy - ::= 'COPY' (id 'CANCEL' | tableName 'FROM' fileName (| 'WITH' (| 'HEADER' (true|false) |'TIMESTAMP' columnName | 'DELIMITER' delimiter | 'FORMAT' format | |'PARTITION BY' ('NONE'|'YEAR'|'MONTH'|'DAY'|'HOUR') | 'ON ERROR' ('SKIP_ROW'|'SKIP_COLUMN'|'ABORT')) )) + ::= 'COPY' ( + id 'CANCEL' + | tableName 'FROM' fileName ('WITH' ('HEADER' (true|false) | 'TIMESTAMP' columnName | 'DELIMITER' delimiter | 'FORMAT' format | 'PARTITION BY' ('NONE'|'YEAR'|'MONTH'|'DAY'|'HOUR') | 'ON ERROR' ('SKIP_ROW'|'SKIP_COLUMN'|'ABORT')))? + | (tableName | '(' selectQuery ')') 'TO' destinationPath ('WITH' ('FORMAT' 'PARQUET' | 'PARTITION_BY' ('NONE'|'HOUR'|'DAY'|'WEEK'|'MONTH'|'YEAR') | 'COMPRESSION_CODEC' ('UNCOMPRESSED'|'SNAPPY'|'GZIP'|'LZ4'|'ZSTD'|'LZ4_RAW'|'BROTLI'|'LZO') | 'COMPRESSION_LEVEL' number | 'ROW_GROUP_SIZE' number | 'DATA_PAGE_SIZE' number | 'STATISTICS_ENABLED' (true|false) | 'PARQUET_VERSION' ('1'|'2') | 'RAW_ARRAY_ENCODING' (true|false)))? + ) select ::= ('SELECT' ( (column | expression | function) ('AS' alias | alias)? (',' (column | expression | function) ('AS' alias | alias)? )* ) 'FROM')? ( table | '(' query ')') ('AS' alias | alias)? diff --git a/static/images/docs/diagrams/copy.svg b/static/images/docs/diagrams/copy.svg index c06a2091e..6796d7378 100644 --- a/static/images/docs/diagrams/copy.svg +++ b/static/images/docs/diagrams/copy.svg @@ -1,4 +1,4 @@ - + - - - - - COPY - - - id - - CANCEL - - - tableName - - FROM - - - fileName - - WITH - - - HEADER - - - true - - - false - - TIMESTAMP - - - columnName - - DELIMITER - - - delimiter - - FORMAT - - - format - - PARTITION BY - - - NONE - - - YEAR - - - MONTH - - - DAY - - - HOUR - - - ON ERROR - - - SKIP_ROW - - - SKIP_COLUMN - - - ABORT - - - + + + + + COPY + + + id + + CANCEL + + + tableName + + FROM + + + fileName + + WITH + + + HEADER + + + true + + + false + + TIMESTAMP + + + columnName + + DELIMITER + + + delimiter + + FORMAT + + + format + + PARTITION BY + + + NONE + + + YEAR + + + MONTH + + + DAY + + + HOUR + + + ON ERROR + + + SKIP_ROW + + + SKIP_COLUMN + + + ABORT + + + tableName + + ( + + + selectQuery + + ) + + + TO + + + destinationPath + + WITH + + + FORMAT + + + PARQUET + + + PARTITION BY + + + NONE + + + HOUR + + + DAY + + + WEEK + + + MONTH + + + YEAR + + + SIZE_LIMIT + + + sizeValue + + COMPRESSION_CODEC + + + UNCOMPRESSED + + + SNAPPY + + + GZIP + + + LZ4 + + + ZSTD + + + LZ4_RAW + + + COMPRESSION_LEVEL + + + ROW_GROUP_SIZE + + + DATA_PAGE_SIZE + + + number + + STATISTICS_ENABLED + + + RAW_ARRAY_ENCODING + + + true + + + false + + PARQUET_VERSION + + + 1 + + + 2 + + + \ No newline at end of file