Skip to content

Commit 01491e7

Browse files
committed
Remove Generic from expressions
From the beginning we had these Generic in the expressions system, but it really never worked as we hoped. It came from Java where Generics are much stronger, but the static typing of Python/mypy doesn't really follow the types.
1 parent ea8ade2 commit 01491e7

File tree

10 files changed

+554
-568
lines changed

10 files changed

+554
-568
lines changed

pyiceberg/expressions/__init__.py

Lines changed: 176 additions & 188 deletions
Large diffs are not rendered by default.

pyiceberg/expressions/visitors.py

Lines changed: 138 additions & 138 deletions
Large diffs are not rendered by default.

pyiceberg/io/pyarrow.py

Lines changed: 39 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -817,7 +817,7 @@ class _ConvertToArrowExpression(BoundBooleanExpressionVisitor[pc.Expression]):
817817
def __init__(self, schema: Schema | None = None):
818818
self._schema = schema
819819

820-
def _get_field_name(self, term: BoundTerm[Any]) -> str | tuple[str, ...]:
820+
def _get_field_name(self, term: BoundTerm) -> str | tuple[str, ...]:
821821
"""Get the field name or nested field path for a bound term.
822822
823823
For nested struct fields, returns a tuple of field names (e.g., ("mazeMetadata", "run_id")).
@@ -837,50 +837,50 @@ def _get_field_name(self, term: BoundTerm[Any]) -> str | tuple[str, ...]:
837837
# Fallback to just the field name if schema is not available
838838
return term.ref().field.name
839839

840-
def visit_in(self, term: BoundTerm[Any], literals: set[Any]) -> pc.Expression:
840+
def visit_in(self, term: BoundTerm, literals: set[Any]) -> pc.Expression:
841841
pyarrow_literals = pa.array(literals, type=schema_to_pyarrow(term.ref().field.field_type))
842842
return pc.field(self._get_field_name(term)).isin(pyarrow_literals)
843843

844-
def visit_not_in(self, term: BoundTerm[Any], literals: set[Any]) -> pc.Expression:
844+
def visit_not_in(self, term: BoundTerm, literals: set[Any]) -> pc.Expression:
845845
pyarrow_literals = pa.array(literals, type=schema_to_pyarrow(term.ref().field.field_type))
846846
return ~pc.field(self._get_field_name(term)).isin(pyarrow_literals)
847847

848-
def visit_is_nan(self, term: BoundTerm[Any]) -> pc.Expression:
848+
def visit_is_nan(self, term: BoundTerm) -> pc.Expression:
849849
ref = pc.field(self._get_field_name(term))
850850
return pc.is_nan(ref)
851851

852-
def visit_not_nan(self, term: BoundTerm[Any]) -> pc.Expression:
852+
def visit_not_nan(self, term: BoundTerm) -> pc.Expression:
853853
ref = pc.field(self._get_field_name(term))
854854
return ~pc.is_nan(ref)
855855

856-
def visit_is_null(self, term: BoundTerm[Any]) -> pc.Expression:
856+
def visit_is_null(self, term: BoundTerm) -> pc.Expression:
857857
return pc.field(self._get_field_name(term)).is_null(nan_is_null=False)
858858

859-
def visit_not_null(self, term: BoundTerm[Any]) -> pc.Expression:
859+
def visit_not_null(self, term: BoundTerm) -> pc.Expression:
860860
return pc.field(self._get_field_name(term)).is_valid()
861861

862-
def visit_equal(self, term: BoundTerm[Any], literal: Literal[Any]) -> pc.Expression:
862+
def visit_equal(self, term: BoundTerm, literal: Literal[Any]) -> pc.Expression:
863863
return pc.field(self._get_field_name(term)) == _convert_scalar(literal.value, term.ref().field.field_type)
864864

865-
def visit_not_equal(self, term: BoundTerm[Any], literal: Literal[Any]) -> pc.Expression:
865+
def visit_not_equal(self, term: BoundTerm, literal: Literal[Any]) -> pc.Expression:
866866
return pc.field(self._get_field_name(term)) != _convert_scalar(literal.value, term.ref().field.field_type)
867867

868-
def visit_greater_than_or_equal(self, term: BoundTerm[Any], literal: Literal[Any]) -> pc.Expression:
868+
def visit_greater_than_or_equal(self, term: BoundTerm, literal: Literal[Any]) -> pc.Expression:
869869
return pc.field(self._get_field_name(term)) >= _convert_scalar(literal.value, term.ref().field.field_type)
870870

871-
def visit_greater_than(self, term: BoundTerm[Any], literal: Literal[Any]) -> pc.Expression:
871+
def visit_greater_than(self, term: BoundTerm, literal: Literal[Any]) -> pc.Expression:
872872
return pc.field(self._get_field_name(term)) > _convert_scalar(literal.value, term.ref().field.field_type)
873873

874-
def visit_less_than(self, term: BoundTerm[Any], literal: Literal[Any]) -> pc.Expression:
874+
def visit_less_than(self, term: BoundTerm, literal: Literal[Any]) -> pc.Expression:
875875
return pc.field(self._get_field_name(term)) < _convert_scalar(literal.value, term.ref().field.field_type)
876876

877-
def visit_less_than_or_equal(self, term: BoundTerm[Any], literal: Literal[Any]) -> pc.Expression:
877+
def visit_less_than_or_equal(self, term: BoundTerm, literal: Literal[Any]) -> pc.Expression:
878878
return pc.field(self._get_field_name(term)) <= _convert_scalar(literal.value, term.ref().field.field_type)
879879

880-
def visit_starts_with(self, term: BoundTerm[Any], literal: Literal[Any]) -> pc.Expression:
880+
def visit_starts_with(self, term: BoundTerm, literal: Literal[Any]) -> pc.Expression:
881881
return pc.starts_with(pc.field(self._get_field_name(term)), literal.value)
882882

883-
def visit_not_starts_with(self, term: BoundTerm[Any], literal: Literal[Any]) -> pc.Expression:
883+
def visit_not_starts_with(self, term: BoundTerm, literal: Literal[Any]) -> pc.Expression:
884884
return ~pc.starts_with(pc.field(self._get_field_name(term)), literal.value)
885885

886886
def visit_true(self) -> pc.Expression:
@@ -901,13 +901,13 @@ def visit_or(self, left_result: pc.Expression, right_result: pc.Expression) -> p
901901

902902
class _NullNaNUnmentionedTermsCollector(BoundBooleanExpressionVisitor[None]):
903903
# BoundTerms which have either is_null or is_not_null appearing at least once in the boolean expr.
904-
is_null_or_not_bound_terms: set[BoundTerm[Any]]
904+
is_null_or_not_bound_terms: set[BoundTerm]
905905
# The remaining BoundTerms appearing in the boolean expr.
906-
null_unmentioned_bound_terms: set[BoundTerm[Any]]
906+
null_unmentioned_bound_terms: set[BoundTerm]
907907
# BoundTerms which have either is_nan or is_not_nan appearing at least once in the boolean expr.
908-
is_nan_or_not_bound_terms: set[BoundTerm[Any]]
908+
is_nan_or_not_bound_terms: set[BoundTerm]
909909
# The remaining BoundTerms appearing in the boolean expr.
910-
nan_unmentioned_bound_terms: set[BoundTerm[Any]]
910+
nan_unmentioned_bound_terms: set[BoundTerm]
911911

912912
def __init__(self) -> None:
913913
super().__init__()
@@ -916,81 +916,81 @@ def __init__(self) -> None:
916916
self.is_nan_or_not_bound_terms = set()
917917
self.nan_unmentioned_bound_terms = set()
918918

919-
def _handle_explicit_is_null_or_not(self, term: BoundTerm[Any]) -> None:
919+
def _handle_explicit_is_null_or_not(self, term: BoundTerm) -> None:
920920
"""Handle the predicate case where either is_null or is_not_null is included."""
921921
if term in self.null_unmentioned_bound_terms:
922922
self.null_unmentioned_bound_terms.remove(term)
923923
self.is_null_or_not_bound_terms.add(term)
924924

925-
def _handle_null_unmentioned(self, term: BoundTerm[Any]) -> None:
925+
def _handle_null_unmentioned(self, term: BoundTerm) -> None:
926926
"""Handle the predicate case where neither is_null or is_not_null is included."""
927927
if term not in self.is_null_or_not_bound_terms:
928928
self.null_unmentioned_bound_terms.add(term)
929929

930-
def _handle_explicit_is_nan_or_not(self, term: BoundTerm[Any]) -> None:
930+
def _handle_explicit_is_nan_or_not(self, term: BoundTerm) -> None:
931931
"""Handle the predicate case where either is_nan or is_not_nan is included."""
932932
if term in self.nan_unmentioned_bound_terms:
933933
self.nan_unmentioned_bound_terms.remove(term)
934934
self.is_nan_or_not_bound_terms.add(term)
935935

936-
def _handle_nan_unmentioned(self, term: BoundTerm[Any]) -> None:
936+
def _handle_nan_unmentioned(self, term: BoundTerm) -> None:
937937
"""Handle the predicate case where neither is_nan or is_not_nan is included."""
938938
if term not in self.is_nan_or_not_bound_terms:
939939
self.nan_unmentioned_bound_terms.add(term)
940940

941-
def visit_in(self, term: BoundTerm[Any], literals: set[Any]) -> None:
941+
def visit_in(self, term: BoundTerm, literals: set[Any]) -> None:
942942
self._handle_null_unmentioned(term)
943943
self._handle_nan_unmentioned(term)
944944

945-
def visit_not_in(self, term: BoundTerm[Any], literals: set[Any]) -> None:
945+
def visit_not_in(self, term: BoundTerm, literals: set[Any]) -> None:
946946
self._handle_null_unmentioned(term)
947947
self._handle_nan_unmentioned(term)
948948

949-
def visit_is_nan(self, term: BoundTerm[Any]) -> None:
949+
def visit_is_nan(self, term: BoundTerm) -> None:
950950
self._handle_null_unmentioned(term)
951951
self._handle_explicit_is_nan_or_not(term)
952952

953-
def visit_not_nan(self, term: BoundTerm[Any]) -> None:
953+
def visit_not_nan(self, term: BoundTerm) -> None:
954954
self._handle_null_unmentioned(term)
955955
self._handle_explicit_is_nan_or_not(term)
956956

957-
def visit_is_null(self, term: BoundTerm[Any]) -> None:
957+
def visit_is_null(self, term: BoundTerm) -> None:
958958
self._handle_explicit_is_null_or_not(term)
959959
self._handle_nan_unmentioned(term)
960960

961-
def visit_not_null(self, term: BoundTerm[Any]) -> None:
961+
def visit_not_null(self, term: BoundTerm) -> None:
962962
self._handle_explicit_is_null_or_not(term)
963963
self._handle_nan_unmentioned(term)
964964

965-
def visit_equal(self, term: BoundTerm[Any], literal: Literal[Any]) -> None:
965+
def visit_equal(self, term: BoundTerm, literal: Literal[Any]) -> None:
966966
self._handle_null_unmentioned(term)
967967
self._handle_nan_unmentioned(term)
968968

969-
def visit_not_equal(self, term: BoundTerm[Any], literal: Literal[Any]) -> None:
969+
def visit_not_equal(self, term: BoundTerm, literal: Literal[Any]) -> None:
970970
self._handle_null_unmentioned(term)
971971
self._handle_nan_unmentioned(term)
972972

973-
def visit_greater_than_or_equal(self, term: BoundTerm[Any], literal: Literal[Any]) -> None:
973+
def visit_greater_than_or_equal(self, term: BoundTerm, literal: Literal[Any]) -> None:
974974
self._handle_null_unmentioned(term)
975975
self._handle_nan_unmentioned(term)
976976

977-
def visit_greater_than(self, term: BoundTerm[Any], literal: Literal[Any]) -> None:
977+
def visit_greater_than(self, term: BoundTerm, literal: Literal[Any]) -> None:
978978
self._handle_null_unmentioned(term)
979979
self._handle_nan_unmentioned(term)
980980

981-
def visit_less_than(self, term: BoundTerm[Any], literal: Literal[Any]) -> None:
981+
def visit_less_than(self, term: BoundTerm, literal: Literal[Any]) -> None:
982982
self._handle_null_unmentioned(term)
983983
self._handle_nan_unmentioned(term)
984984

985-
def visit_less_than_or_equal(self, term: BoundTerm[Any], literal: Literal[Any]) -> None:
985+
def visit_less_than_or_equal(self, term: BoundTerm, literal: Literal[Any]) -> None:
986986
self._handle_null_unmentioned(term)
987987
self._handle_nan_unmentioned(term)
988988

989-
def visit_starts_with(self, term: BoundTerm[Any], literal: Literal[Any]) -> None:
989+
def visit_starts_with(self, term: BoundTerm, literal: Literal[Any]) -> None:
990990
self._handle_null_unmentioned(term)
991991
self._handle_nan_unmentioned(term)
992992

993-
def visit_not_starts_with(self, term: BoundTerm[Any], literal: Literal[Any]) -> None:
993+
def visit_not_starts_with(self, term: BoundTerm, literal: Literal[Any]) -> None:
994994
self._handle_null_unmentioned(term)
995995
self._handle_nan_unmentioned(term)
996996

@@ -1040,10 +1040,10 @@ def _expression_to_complementary_pyarrow(expr: BooleanExpression, schema: Schema
10401040
collector.collect(expr)
10411041

10421042
# Convert the set of terms to a sorted list so that layout of the expression to build is deterministic.
1043-
null_unmentioned_bound_terms: list[BoundTerm[Any]] = sorted(
1043+
null_unmentioned_bound_terms: list[BoundTerm] = sorted(
10441044
collector.null_unmentioned_bound_terms, key=lambda term: term.ref().field.name
10451045
)
1046-
nan_unmentioned_bound_terms: list[BoundTerm[Any]] = sorted(
1046+
nan_unmentioned_bound_terms: list[BoundTerm] = sorted(
10471047
collector.nan_unmentioned_bound_terms, key=lambda term: term.ref().field.name
10481048
)
10491049

0 commit comments

Comments
 (0)