@@ -3844,6 +3844,42 @@ def test_orc_schema_conversion_with_field_ids() -> None:
38443844 assert not name_field_no_ids .metadata
38453845
38463846
3847+ def test_orc_schema_conversion_with_required_attribute () -> None :
3848+ """
3849+ Test that schema_to_pyarrow correctly adds ORC iceberg.required attribute.
3850+ To run just this test:
3851+ pytest tests/io/test_pyarrow.py -k test_orc_schema_conversion_with_required_attribute
3852+ """
3853+ from pyiceberg .io .pyarrow import ORC_FIELD_REQUIRED_KEY , schema_to_pyarrow
3854+ from pyiceberg .manifest import FileFormat
3855+ from pyiceberg .schema import Schema
3856+ from pyiceberg .types import IntegerType , StringType
3857+
3858+ # Define schema
3859+ schema = Schema (
3860+ NestedField (1 , "id" , IntegerType (), required = True ),
3861+ NestedField (2 , "name" , StringType (), required = False ),
3862+ )
3863+
3864+ # Test 1: Specify Parquet format
3865+ arrow_schema_default = schema_to_pyarrow (schema , file_format = FileFormat .PARQUET )
3866+
3867+ id_field = arrow_schema_default .field (0 )
3868+ name_field = arrow_schema_default .field (1 )
3869+
3870+ assert ORC_FIELD_REQUIRED_KEY not in id_field .metadata
3871+ assert ORC_FIELD_REQUIRED_KEY not in name_field .metadata
3872+
3873+ # Test 2: Specify ORC format
3874+ arrow_schema_orc = schema_to_pyarrow (schema , file_format = FileFormat .ORC )
3875+
3876+ id_field_orc = arrow_schema_orc .field (0 )
3877+ name_field_orc = arrow_schema_orc .field (1 )
3878+
3879+ assert id_field_orc .metadata [ORC_FIELD_REQUIRED_KEY ] is True
3880+ assert name_field_orc .metadata [ORC_FIELD_REQUIRED_KEY ] is False
3881+
3882+
38473883def test_orc_batching_behavior_documentation (tmp_path : Path ) -> None :
38483884 """
38493885 Document and verify PyArrow's exact batching behavior for ORC files.
0 commit comments