Skip to content
Draft
118 changes: 118 additions & 0 deletions dev/docker-compose-rest-server.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

services:
# LocalStack - provides DynamoDB and S3 locally
localstack:
image: localstack/localstack:latest
container_name: iceberg-localstack
ports:
- "4566:4566" # LocalStack edge port
- "4510-4559:4510-4559" # External services port range
environment:
- SERVICES=dynamodb,s3
- DEBUG=1
- DOCKER_HOST=unix:///var/run/docker.sock
- AWS_ACCESS_KEY_ID=test
- AWS_SECRET_ACCESS_KEY=test
- AWS_DEFAULT_REGION=us-east-1
volumes:
- localstack-data:/var/lib/localstack
- "/var/run/docker.sock:/var/run/docker.sock"
networks:
- iceberg-net
healthcheck:
test: ["CMD", "awslocal", "dynamodb", "list-tables"]
interval: 10s
timeout: 5s
retries: 5

# Initialize LocalStack with required resources
localstack-init:
image: amazon/aws-cli:latest
container_name: iceberg-localstack-init
depends_on:
localstack:
condition: service_healthy
environment:
- AWS_ACCESS_KEY_ID=test
- AWS_SECRET_ACCESS_KEY=test
- AWS_DEFAULT_REGION=us-east-1
entrypoint: /bin/bash
command:
- -c
- |
# Wait for LocalStack to be ready
sleep 5

# Create S3 bucket for warehouse
aws --endpoint-url=http://localstack:4566 s3 mb s3://warehouse || true
aws --endpoint-url=http://localstack:4566 s3api put-bucket-acl --bucket warehouse --acl public-read

# Create DynamoDB table for Iceberg catalog
aws --endpoint-url=http://localstack:4566 dynamodb create-table \
--table-name iceberg \
--attribute-definitions \
AttributeName=identifier,AttributeType=S \
AttributeName=namespace,AttributeType=S \
--key-schema \
AttributeName=identifier,KeyType=HASH \
AttributeName=namespace,KeyType=RANGE \
--global-secondary-indexes \
IndexName=namespace-identifier,KeySchema=["{AttributeName=namespace,KeyType=HASH}","{AttributeName=identifier,KeyType=RANGE}"],Projection="{ProjectionType=KEYS_ONLY}" \
--billing-mode PAY_PER_REQUEST \
--region us-east-1 || echo "Table already exists"

echo "LocalStack initialized successfully"
networks:
- iceberg-net

# REST Catalog Server
rest-server:
build:
context: ..
dockerfile: dev/rest-server/Dockerfile
container_name: iceberg-rest-server
depends_on:
localstack-init:
condition: service_completed_successfully
ports:
- "8000:8000"
environment:
- ICEBERG_CATALOG_NAME=dynamodb-local
- SERVER_HOST=0.0.0.0
- SERVER_PORT=8000
- LOG_LEVEL=info
# Note: AWS credentials should be configured in .pyiceberg.yaml
# Setting them here can cause conflicts with LocalStack
volumes:
- "./rest-server/main.py:/app/main.py"
- "./rest-server/.pyiceberg.yaml:/root/.pyiceberg.yaml"
networks:
- iceberg-net
healthcheck:
test: ["CMD", "python", "-c", "import requests; requests.get('http://localhost:8000/health').raise_for_status()"]
interval: 10s
timeout: 5s
retries: 5

networks:
iceberg-net:
driver: bridge

volumes:
localstack-data:
68 changes: 68 additions & 0 deletions dev/rest-server/.pyiceberg.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Example PyIceberg Configuration for REST Server
#
# Copy this file to ~/.pyiceberg.yaml or ./.pyiceberg.yaml
# Set ICEBERG_CATALOG_NAME environment variable to select which catalog to use

catalog:
# ========================================================================
# DynamoDB Catalog with LocalStack (for local development)
# ========================================================================
dynamodb-local:
type: dynamodb
table-name: iceberg
dynamodb.region: us-east-1
dynamodb.endpoint: http://localstack:4566
dynamodb.access-key-id: test
dynamodb.secret-access-key: test
s3.region: us-east-1
s3.endpoint: http://localstack:4566
s3.access-key-id: test
s3.secret-access-key: test
warehouse: s3://warehouse/

# ========================================================================
# DynamoDB Catalog (production)
# ========================================================================
dynamodb-prod:
type: dynamodb
table-name: iceberg-production
dynamodb.region: us-east-1
s3.region: us-east-1
warehouse: s3://my-production-warehouse/

# ========================================================================
# In-Memory Catalog (for testing, no persistence)
# ========================================================================
test:
type: in-memory
warehouse: memory://test/

# ========================================================================
# PostgreSQL Catalog (for local development)
# ========================================================================
postgres:
type: sql
uri: postgresql://localhost:5432/iceberg
warehouse: s3://warehouse/
# Optional S3 configuration
s3.endpoint: http://localhost:9000
s3.access-key-id: admin
s3.secret-access-key: password

# ========================================================================
# AWS Glue Catalog (AWS native)
# ========================================================================
glue-prod:
type: glue
warehouse: s3://my-glue-warehouse/
# AWS credentials from environment or IAM role

# ========================================================================
# Hive Metastore Catalog (for on-premises)
# ========================================================================
hive-prod:
type: hive
uri: thrift://localhost:9083
warehouse: hdfs://namenode:9000/warehouse
# Or with S3
# warehouse: s3://my-hive-warehouse/
52 changes: 52 additions & 0 deletions dev/rest-server/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

FROM python:3.11-slim

LABEL org.opencontainers.image.source=https://github.com/apache/iceberg-python
LABEL org.opencontainers.image.description="Universal Iceberg REST Catalog Server"
LABEL org.opencontainers.image.licenses=Apache-2.0

WORKDIR /app

# Install REST server dependencies
RUN pip install --no-cache-dir \
'fastapi>=0.104.0' \
'uvicorn[standard]>=0.24.0' \
'pydantic>=2.0.0' \
pyyaml \
pyiceberg

# Copy REST server code
COPY dev/rest-server/main.py .
COPY dev/rest-server/.pyiceberg.yaml /root/.pyiceberg.yaml

# Expose REST API port
EXPOSE 8000

# Set default environment variables
ENV ICEBERG_CATALOG_NAME=dynamodb-local
ENV SERVER_HOST=0.0.0.0
ENV SERVER_PORT=8000
ENV LOG_LEVEL=info

# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD python -c "import requests; requests.get('http://localhost:8000/health').raise_for_status()"

# Run REST server
CMD ["python", "main.py"]
Loading