From b569dd4a28a088395f424d52a836426e3b23ce07 Mon Sep 17 00:00:00 2001 From: xiaohongbo Date: Sun, 8 Mar 2026 17:38:11 +0800 Subject: [PATCH 1/5] feat(python): add get_primary_keys() method to Schema class (#384) --- bindings/python/src/metadata.rs | 7 ++++++- bindings/python/test/test_admin.py | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/bindings/python/src/metadata.rs b/bindings/python/src/metadata.rs index d6b122d2..127cb2eb 100644 --- a/bindings/python/src/metadata.rs +++ b/bindings/python/src/metadata.rs @@ -220,7 +220,12 @@ impl Schema { .collect() } - // TODO: support primaryKey + fn get_primary_keys(&self) -> Vec { + self.__schema + .primary_key() + .map(|pk| pk.column_names().to_vec()) + .unwrap_or_default() + } fn __str__(&self) -> String { format!("Schema: columns={:?}", self.get_columns()) diff --git a/bindings/python/test/test_admin.py b/bindings/python/test/test_admin.py index e2f43431..646248d8 100644 --- a/bindings/python/test/test_admin.py +++ b/bindings/python/test/test_admin.py @@ -82,6 +82,7 @@ async def test_create_table(admin): ), primary_keys=["id"], ) + assert schema.get_primary_keys() == ["id"] table_descriptor = fluss.TableDescriptor( schema, From 20421577223b8bf1505eeae556d0213bee72a258 Mon Sep 17 00:00:00 2001 From: xiaohongbo Date: Sun, 8 Mar 2026 18:03:47 +0800 Subject: [PATCH 2/5] add get_primary_keys in Schema and add comment at get_primary_keys --- bindings/python/fluss/__init__.pyi | 3 ++- bindings/python/src/metadata.rs | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/bindings/python/fluss/__init__.pyi b/bindings/python/fluss/__init__.pyi index 417ac9b2..7c654edb 100644 --- a/bindings/python/fluss/__init__.pyi +++ b/bindings/python/fluss/__init__.pyi @@ -19,7 +19,7 @@ from enum import IntEnum from types import TracebackType -from typing import Dict, Iterator, List, Optional, Tuple, Union, overload +from typing import Any, Dict, Iterator, List, Optional, Tuple, Union, overload import pandas as pd import pyarrow as pa @@ -775,6 +775,7 @@ class Schema: def get_column_names(self) -> List[str]: ... def get_column_types(self) -> List[str]: ... def get_columns(self) -> List[Tuple[str, str]]: ... + def get_primary_keys(self) -> List[str]: ... def __str__(self) -> str: ... class TableDescriptor: diff --git a/bindings/python/src/metadata.rs b/bindings/python/src/metadata.rs index 127cb2eb..02ef121d 100644 --- a/bindings/python/src/metadata.rs +++ b/bindings/python/src/metadata.rs @@ -220,6 +220,7 @@ impl Schema { .collect() } + /// Get primary key column names, returns empty list if no primary key is defined fn get_primary_keys(&self) -> Vec { self.__schema .primary_key() From 1bf2dda4bd410f2d86af5ef72ffb8c9cf2fc29e6 Mon Sep 17 00:00:00 2001 From: xiaohongbo Date: Sun, 8 Mar 2026 18:47:31 +0800 Subject: [PATCH 3/5] remove extra import --- bindings/python/fluss/__init__.pyi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindings/python/fluss/__init__.pyi b/bindings/python/fluss/__init__.pyi index 7c654edb..e06e9327 100644 --- a/bindings/python/fluss/__init__.pyi +++ b/bindings/python/fluss/__init__.pyi @@ -19,7 +19,7 @@ from enum import IntEnum from types import TracebackType -from typing import Any, Dict, Iterator, List, Optional, Tuple, Union, overload +from typing import Dict, Iterator, List, Optional, Tuple, Union, overload import pandas as pd import pyarrow as pa From 0a477cdd8b4cde73eb0b4c5dc3bfb82219dcdee7 Mon Sep 17 00:00:00 2001 From: xiaohongbo Date: Sun, 8 Mar 2026 22:32:47 +0800 Subject: [PATCH 4/5] add simple ut for get_primary_keys --- bindings/python/test/test_schema.py | 37 +++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 bindings/python/test/test_schema.py diff --git a/bindings/python/test/test_schema.py b/bindings/python/test/test_schema.py new file mode 100644 index 00000000..a72d9334 --- /dev/null +++ b/bindings/python/test/test_schema.py @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Unit tests for Schema (no cluster required).""" + +import pyarrow as pa + +import fluss + + +def test_get_primary_keys(): + fields = pa.schema([ + pa.field("id", pa.int32()), + pa.field("name", pa.string()), + ]) + + schema_with_pk = fluss.Schema(fields, primary_keys=["id"]) + assert schema_with_pk.get_primary_keys() == ["id"] + + schema_without_pk = fluss.Schema(fields) + assert schema_without_pk.get_primary_keys() == [] + + From c45a7f8d339dff85791a28aa52e0d7fe1443d2f8 Mon Sep 17 00:00:00 2001 From: luoyuxia Date: Wed, 11 Mar 2026 21:32:09 +0800 Subject: [PATCH 5/5] docs: update python api reference for schema methods --- website/docs/user-guide/python/api-reference.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/website/docs/user-guide/python/api-reference.md b/website/docs/user-guide/python/api-reference.md index e9113b69..fef10a8c 100644 --- a/website/docs/user-guide/python/api-reference.md +++ b/website/docs/user-guide/python/api-reference.md @@ -241,6 +241,8 @@ for record in scan_records: | `Schema(schema: pa.Schema, primary_keys=None)` | Create from PyArrow schema | | `.get_column_names() -> list[str]` | Get column names | | `.get_column_types() -> list[str]` | Get column type names | +| `.get_columns() -> list[tuple[str, str]]` | Get `(name, type)` pairs | +| `.get_primary_keys() -> list[str]` | Get primary key columns | ## `TableDescriptor`