Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions desktop/core/src/desktop/api_public_urls_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@
re_path(r'^importer/file/guess_metadata/?$', importer_api.guess_file_metadata, name='importer_guess_file_metadata'),
re_path(r'^importer/file/guess_header/?$', importer_api.guess_file_header, name='importer_guess_file_header'),
re_path(r'^importer/file/preview/?$', importer_api.preview_file, name='importer_preview_file'),
re_path(r'^importer/table/create/?$', importer_api.create_table, name='importer_create_table'),
re_path(r'^importer/sql_type_mapping/?$', importer_api.get_sql_type_mapping, name='importer_get_sql_type_mapping'),
]

Expand Down
65 changes: 65 additions & 0 deletions desktop/core/src/desktop/lib/importer/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

from desktop.lib.importer import operations
from desktop.lib.importer.serializers import (
CreateTableSerializer,
GuessFileHeaderSerializer,
GuessFileMetadataSerializer,
LocalFileUploadSerializer,
Expand Down Expand Up @@ -279,3 +280,67 @@ def get_sql_type_mapping(request: Request) -> Response:
except Exception as e:
LOG.exception(f"Error getting SQL type mapping: {e}", exc_info=True)
return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)


@api_view(["POST"])
@parser_classes([JSONParser])
@api_error_handler
def create_table(request: Request) -> Response:
"""Create a table from a file based on the provided parameters.

This API endpoint creates a table in the specified database using
the file data as the source. It can handle different file formats,
storage formats, and SQL dialects.

Args:
request: Request object containing parameters for table creation

Returns:
Response containing the SQL query for table creation and metadata:
- sql: The SQL query to create the table
- table_name: The name of the created table
- database_name: The database name
- column_count: The number of columns
"""

serializer = CreateTableSerializer(data=request.data)

if not serializer.is_valid():
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)

validated_data = serializer.validated_data

try:
result = operations.create_table(
file_path=validated_data["file_path"],
file_type=validated_data["file_type"],
import_type=validated_data["import_type"],
sql_dialect=validated_data["sql_dialect"],
database_name=validated_data["database_name"],
table_name=validated_data["table_name"],
columns=validated_data["columns"],
has_header=validated_data.get("has_header", False),
partition_columns=validated_data.get("partition_columns", []),
comment=validated_data.get("comment", ""),
external=validated_data.get("external", False),
external_path=validated_data.get("external_path", ""),
table_format=validated_data.get("table_format", "text"),
is_transactional=validated_data.get("is_transactional", False),
is_iceberg=validated_data.get("is_iceberg", False),
is_insert_only=validated_data.get("is_insert_only", False),
primary_keys=validated_data.get("primary_keys", []),
sheet_name=validated_data.get("sheet_name"),
field_separator=validated_data.get("field_separator"),
quote_char=validated_data.get("quote_char"),
record_separator=validated_data.get("record_separator"),
load_data=validated_data.get("load_data", True),
fs=request.fs if validated_data["import_type"] == "remote" else None,
)

return Response(result, status=status.HTTP_200_OK)

except ValueError as e:
return Response({"error": str(e)}, status=status.HTTP_400_BAD_REQUEST)
except Exception as e:
LOG.exception(f"Error creating table: {e}", exc_info=True)
return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
28 changes: 28 additions & 0 deletions desktop/core/src/desktop/lib/importer/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -734,3 +734,31 @@ def _map_polars_dtype_to_sql_type(dialect: str, polars_type: str) -> str:
raise ValueError(f"No mapping for Polars dtype {polars_type} in dialect {dialect}")

return mapping[polars_type]


def create_table(
Copy link

Copilot AI Jun 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The function body is empty (pass), so calls to create_table will silently return None. Implement the table creation logic or raise NotImplementedError until ready.

Copilot uses AI. Check for mistakes.
file_path: str,
file_type: str,
import_type: str,
sql_dialect: str,
database_name: str,
table_name: str,
columns: List[Dict],
has_header: bool = False,
partition_columns: List[Dict] = None,
comment: str = "",
external: bool = False,
external_path: str = "",
table_format: str = "text",
is_transactional: bool = False,
is_iceberg: bool = False,
is_insert_only: bool = False,
primary_keys: List[str] = None,
sheet_name: Optional[str] = None,
field_separator: Optional[str] = ",",
quote_char: Optional[str] = '"',
record_separator: Optional[str] = "\n",
load_data: bool = True,
fs=None,
) -> Dict[str, Any]:
pass
130 changes: 130 additions & 0 deletions desktop/core/src/desktop/lib/importer/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,3 +173,133 @@ def validate(self, data):
raise serializers.ValidationError({"sheet_name": "Sheet name is required for Excel files."})

return data


class CreateTableSerializer(serializers.Serializer):
Copy link

Copilot AI Jun 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] This serializer is very large. Consider extracting file-type–specific and table-format–specific validations into helper methods or nested serializers for readability.

Copilot uses AI. Check for mistakes.
"""Serializer for table creation request validation.

This serializer validates the parameters required for creating a SQL table from a file.

Attributes:
file_path: Path to the source file to import data from
file_type: Type of file format (csv, tsv, excel, delimiter_format)
import_type: Type of import (local or remote)
sql_dialect: Target SQL dialect for table creation
database_name: The database name in which to create the table
table_name: The name of the table to create
has_header: Whether the file has a header row
columns: List of column definitions (each with name, type, etc.)
partition_columns: List of partition column definitions (optional)
external: Whether to create an external table (optional)
external_path: External table location path (optional, required if external=True)
table_format: Table storage format (text, parquet, orc, avro, kudu, iceberg) (optional)
field_separator: Field separator character (required for delimited files)
quote_char: Quote character (required for delimited files)
record_separator: Record separator character (required for delimited files)
sheet_name: Sheet name for Excel files (required when file_type is excel)
comment: Table comment (optional)
is_transactional: Whether to create a transactional table (optional, Hive only)
is_iceberg: Whether to create an Iceberg table (optional)
is_insert_only: Whether the transactional table is insert-only (optional)
primary_keys: List of primary key column names (optional, required for Kudu tables)
"""

# Source file information
file_path = serializers.CharField(required=True, help_text="Path to the file to import data from")
file_type = serializers.ChoiceField(
choices=["csv", "tsv", "excel", "delimiter_format"], required=True, help_text="Type of file (csv, tsv, excel, delimiter_format)"
)
import_type = serializers.ChoiceField(
choices=["local", "remote"], required=True, help_text="Whether the file is local or on a remote filesystem"
)

# Target table information
sql_dialect = serializers.ChoiceField(
choices=["hive", "impala", "trino", "phoenix", "sparksql"], required=True, help_text="SQL dialect for creating the table"
)
database_name = serializers.CharField(required=True, help_text="Database name where the table will be created")
table_name = serializers.CharField(required=True, help_text="Name of the table to create")

# Data format information
has_header = serializers.BooleanField(default=False, help_text="Whether the file has a header row")

# Column definitions
columns = serializers.ListField(
child=serializers.DictField(), required=True, help_text="List of column definitions with name, type, etc."
)

# Optional parameters
partition_columns = serializers.ListField(
child=serializers.DictField(), required=False, default=[], help_text="List of partition column definitions"
)
comment = serializers.CharField(required=False, allow_blank=True, default="", help_text="Table comment")

# Table storage options
external = serializers.BooleanField(default=False, help_text="Whether to create an external table")
external_path = serializers.CharField(required=False, allow_blank=True, help_text="Location path for external tables")
table_format = serializers.ChoiceField(
choices=["text", "parquet", "orc", "avro", "kudu", "iceberg"], default="text", help_text="Storage format for the table"
)

# Hive/Impala specific options
is_transactional = serializers.BooleanField(default=False, help_text="Whether to create a transactional table (Hive)")
is_insert_only = serializers.BooleanField(default=False, help_text="Whether the transactional table is insert-only")
is_iceberg = serializers.BooleanField(default=False, help_text="Whether to create an Iceberg table")

# Kudu specific options
primary_keys = serializers.ListField(
child=serializers.CharField(), required=False, default=[], help_text="List of primary key column names (required for Kudu tables)"
)

# Excel-specific fields
sheet_name = serializers.CharField(required=False, help_text="Sheet name for Excel files")

# Delimited file-specific fields
field_separator = serializers.CharField(required=False, help_text="Field separator character")
quote_char = serializers.CharField(required=False, help_text="Quote character")
record_separator = serializers.CharField(required=False, help_text="Record separator character")

# Additional options
load_data = serializers.BooleanField(default=True, help_text="Whether to load data from the file into the table")

def validate(self, data):
"""Validate the complete data set with interdependent field validation."""

# Validate Excel-specific parameters
if data.get("file_type") == "excel" and not data.get("sheet_name"):
raise serializers.ValidationError({"sheet_name": "Sheet name is required for Excel files."})

# Validate delimited file-specific parameters
if data.get("file_type") in ["csv", "tsv", "delimiter_format"]:
if not data.get("field_separator"):
# If not provided, set default value based on file type
if data.get("file_type") == "csv":
data["field_separator"] = ","
elif data.get("file_type") == "tsv":
data["field_separator"] = "\t"
else:
raise serializers.ValidationError({"field_separator": "Field separator is required for delimited files"})

if not data.get("quote_char"):
data["quote_char"] = '"' # Default quote character

if not data.get("record_separator"):
data["record_separator"] = "\n" # Default record separator

# Validate external table parameters
if data.get("external") and not data.get("external_path"):
raise serializers.ValidationError({"external_path": "External path is required for external tables."})

# Validate Kudu table parameters
if data.get("table_format") == "kudu" and not data.get("primary_keys"):
raise serializers.ValidationError({"primary_keys": "Primary keys are required for Kudu tables."})

# Validate transaction table parameters
if data.get("is_transactional") and data.get("sql_dialect") not in ["hive", "impala"]:
raise serializers.ValidationError({"is_transactional": "Transactional tables are only supported in Hive and Impala."})

# Validate Iceberg table parameters
if data.get("is_iceberg") and data.get("sql_dialect") not in ["hive", "impala", "sparksql"]:
raise serializers.ValidationError({"is_iceberg": "Iceberg tables are only supported in Hive, Impala, and SparkSQL."})

return data