-
Notifications
You must be signed in to change notification settings - Fork 1
Full Config Example
a.metwalli edited this page Feb 6, 2026
·
1 revision
This example is intentionally complete and commented so you can see most of what Floe supports in one place. Use it as a reference and trim it down for your real configs.
# Floe config version
version: "0.1"
# Optional project metadata
metadata:
project: "floe-demo"
description: "Example config covering many features"
owner: "data-platform"
tags: ["demo", "floe"]
# Optional environment variables for templating
env:
vars:
incoming_root: "/data/incoming"
outgoing_root: "/data/outgoing"
# Optional named domains for re-use
# Each entity can reference {{domain.incoming_dir}}
domains:
- name: "animals"
incoming_dir: "{{incoming_root}}/animals"
- name: "sales"
incoming_dir: "{{incoming_root}}/sales"
# Optional storage clients (local + cloud)
storages:
default: local
definitions:
- name: local
type: local
- name: s3_raw
type: s3
bucket: my-bucket
region: eu-west-1
prefix: raw
- name: gcs_raw
type: gcs
bucket: my-gcs-bucket
prefix: ingest
- name: adls_raw
type: adls
account: myaccount
container: raw
prefix: ingest
# Optional report output
report:
path: "{{outgoing_root}}/report"
storage: local
# Entities = datasets
entities:
# 1) CSV input, Parquet accepted, CSV rejected (append mode)
- name: "animal"
domain: "animals"
source:
format: "csv"
path: "{{domain.incoming_dir}}/animal"
options:
header: true
separator: ";"
null_values: ["", "NULL", "null"]
recursive: false
glob: "*.csv"
cast_mode: "strict"
sink:
write_mode: "append"
accepted:
format: "parquet"
path: "{{outgoing_root}}/accepted/animal"
options:
compression: "zstd"
row_group_size: 100000
max_size_per_file: 268435456 # 256MB
rejected:
format: "csv"
path: "{{outgoing_root}}/rejected/animal"
archive:
path: "{{outgoing_root}}/archive/animal"
policy:
severity: "reject"
schema:
normalize_columns:
enabled: true
strategy: "snake_case"
columns:
- name: "animal_id"
type: "string"
nullable: false
unique: true
- name: "species"
type: "string"
- name: "age"
type: "int64"
nullable: true
# 2) NDJSON input, Delta accepted, CSV rejected (overwrite mode)
- name: "orders"
domain: "sales"
source:
format: "json"
path: "{{domain.incoming_dir}}/orders.ndjson"
options:
json_mode: "ndjson"
sink:
write_mode: "overwrite"
accepted:
format: "delta"
# Example: local path (could be s3://, gs://, abfs://)
path: "{{outgoing_root}}/accepted/orders_delta"
rejected:
format: "csv"
path: "{{outgoing_root}}/rejected/orders"
policy:
severity: "warn"
schema:
normalize_columns:
enabled: true
strategy: "snake_case"
columns:
- name: "order_id"
type: "string"
nullable: false
unique: true
- name: "customer_id"
type: "string"
nullable: false
- name: "amount"
type: "float64"
nullable: true
# 3) Parquet input from cloud storage
- name: "customer"
source:
format: "parquet"
path: "customers"
storage: "s3_raw"
sink:
accepted:
format: "parquet"
path: "{{outgoing_root}}/accepted/customer"
rejected:
format: "csv"
path: "{{outgoing_root}}/rejected/customer"
policy:
severity: "abort"
schema:
columns:
- name: "customer_id"
type: "string"
nullable: false
unique: true
- name: "email"
type: "string"
nullable: true
# 4) JSON array input (not NDJSON)
- name: "product"
source:
format: "json"
path: "{{incoming_root}}/product.json"
options:
json_mode: "array"
sink:
accepted:
format: "parquet"
path: "{{outgoing_root}}/accepted/product"
rejected:
format: "csv"
path: "{{outgoing_root}}/rejected/product"
policy:
severity: "reject"
schema:
columns:
- name: "product_id"
type: "string"
nullable: false
unique: true
- name: "title"
type: "string"
nullable: falseNotes:
-
write_modeis configured atsinkand applies to both accepted and rejected outputs. - For cloud paths, use
s3://,gs://, orabfs://URIs and setstorage. - JSON must be flat objects; nested values are rejected.
- Use
normalize_columnsto align schema and input column names before checks.