Skip to content

Commit 91f6a36

Browse files
authored
Merge branch 'main' into drop-python-39
2 parents 956ee07 + 8817781 commit 91f6a36

File tree

11 files changed

+205
-17
lines changed

11 files changed

+205
-17
lines changed

.github/workflows/python-ci-docs.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,5 @@ jobs:
4545
run: make docs-install
4646
- name: Build docs
4747
run: make docs-build
48+
- name: Run linters
49+
run: make lint

dev/.rat-excludes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ build
44
.git
55
.gitignore
66
poetry.lock
7+
mkdocs/*

mkdocs/docs/api.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1928,7 +1928,7 @@ PyIceberg integrates with [Apache DataFusion](https://datafusion.apache.org/) th
19281928
<!-- prettier-ignore-start -->
19291929

19301930
!!! note "Requirements"
1931-
This requires [`datafusion` to be installed](index.md).
1931+
This requires [`datafusion` and `pyiceberg-core` to be installed](index.md).
19321932

19331933
<!-- prettier-ignore-end -->
19341934

@@ -1939,7 +1939,7 @@ PyIceberg integrates with [Apache DataFusion](https://datafusion.apache.org/) th
19391939

19401940
The integration has a few caveats:
19411941

1942-
- Only works with `datafusion >= 45`
1942+
- Only works with `datafusion >= 45, < 49`
19431943
- Depends directly on `iceberg-rust` instead of PyIceberg's implementation
19441944
- Has limited features compared to the full PyIceberg API
19451945

mkdocs/docs/index.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ You can mix and match optional dependencies depending on your needs:
6161
| snappy | Support for snappy Avro compression |
6262
| gcsfs | GCSFS as a FileIO implementation to interact with the object store |
6363
| rest-sigv4 | Support for generating AWS SIGv4 authentication headers for REST Catalogs |
64+
| pyiceberg-core | Installs iceberg-rust powered core |
65+
| datafusion | Installs both PyArrow and Apache DataFusion |
6466

6567
You either need to install `s3fs`, `adlfs`, `gcsfs`, or `pyarrow` to be able to fetch files from an object store.
6668

mkdocs/mkdocs.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ plugins:
3535

3636
theme:
3737
name: material
38+
custom_dir: overrides
3839
logo: assets/images/iceberg-logo-icon.png
3940
favicon: assets/images/iceberg-logo-icon.png
4041
font:

mkdocs/overrides/main.html

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{% extends "base.html" %}
2+
3+
{% block extrahead %}
4+
{{ super() }}
5+
<!-- Matomo -->
6+
<script>
7+
var _paq = window._paq = window._paq || [];
8+
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
9+
_paq.push(["setDoNotTrack", true]);
10+
_paq.push(["disableCookies"]);
11+
_paq.push(['trackPageView']);
12+
_paq.push(['enableLinkTracking']);
13+
(function() {
14+
var u="https://analytics.apache.org/";
15+
_paq.push(['setTrackerUrl', u+'matomo.php']);
16+
_paq.push(['setSiteId', '82']);
17+
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
18+
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
19+
})();
20+
</script>
21+
<!-- End Matomo -->
22+
{% endblock %}

poetry.lock

Lines changed: 0 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyiceberg/expressions/__init__.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
TypeVar,
3232
Union,
3333
)
34+
from typing import Literal as TypingLiteral
3435

3536
from pydantic import Field
3637

@@ -41,10 +42,15 @@
4142
literal,
4243
)
4344
from pyiceberg.schema import Accessor, Schema
44-
from pyiceberg.typedef import IcebergRootModel, L, StructProtocol
45+
from pyiceberg.typedef import IcebergBaseModel, IcebergRootModel, L, StructProtocol
4546
from pyiceberg.types import DoubleType, FloatType, NestedField
4647
from pyiceberg.utils.singleton import Singleton
4748

49+
try:
50+
from pydantic import ConfigDict
51+
except ImportError:
52+
ConfigDict = dict
53+
4854

4955
def _to_unbound_term(term: Union[str, UnboundTerm[Any]]) -> UnboundTerm[Any]:
5056
return Reference(term) if isinstance(term, str) else term
@@ -571,12 +577,14 @@ def as_bound(self) -> Type[BoundNotNaN[L]]:
571577
return BoundNotNaN[L]
572578

573579

574-
class SetPredicate(UnboundPredicate[L], ABC):
575-
literals: Set[Literal[L]]
580+
class SetPredicate(IcebergBaseModel, UnboundPredicate[L], ABC):
581+
model_config = ConfigDict(arbitrary_types_allowed=True)
582+
583+
type: TypingLiteral["in", "not-in"] = Field(default="in")
584+
literals: Set[Literal[L]] = Field(alias="items")
576585

577586
def __init__(self, term: Union[str, UnboundTerm[Any]], literals: Union[Iterable[L], Iterable[Literal[L]]]):
578-
super().__init__(term)
579-
self.literals = _to_literal_set(literals)
587+
super().__init__(term=_to_unbound_term(term), items=_to_literal_set(literals)) # type: ignore
580588

581589
def bind(self, schema: Schema, case_sensitive: bool = True) -> BoundSetPredicate[L]:
582590
bound_term = self.term.bind(schema, case_sensitive)
@@ -688,6 +696,8 @@ def as_unbound(self) -> Type[NotIn[L]]:
688696

689697

690698
class In(SetPredicate[L]):
699+
type: TypingLiteral["in"] = Field(default="in", alias="type")
700+
691701
def __new__( # type: ignore # pylint: disable=W0221
692702
cls, term: Union[str, UnboundTerm[Any]], literals: Union[Iterable[L], Iterable[Literal[L]]]
693703
) -> BooleanExpression:
@@ -710,6 +720,8 @@ def as_bound(self) -> Type[BoundIn[L]]:
710720

711721

712722
class NotIn(SetPredicate[L], ABC):
723+
type: TypingLiteral["not-in"] = Field(default="not-in", alias="type")
724+
713725
def __new__( # type: ignore # pylint: disable=W0221
714726
cls, term: Union[str, UnboundTerm[Any]], literals: Union[Iterable[L], Iterable[Literal[L]]]
715727
) -> BooleanExpression:

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ requests = ">=2.20.0,<3.0.0"
5555
click = ">=7.1.1,<9.0.0"
5656
rich = ">=10.11.0,<15.0.0"
5757
strictyaml = ">=1.7.0,<2.0.0" # CVE-2020-14343 was fixed in 5.4.
58-
pydantic = ">=2.0,<3.0,!=2.4.0,!=2.4.1" # 2.4.0, 2.4.1 has a critical bug
58+
pydantic = ">=2.0,<3.0,!=2.4.0,!=2.4.1,<2.12.0" # 2.4.0, 2.4.1, 2.12.0 has a critical bug
5959
sortedcontainers = "2.4.0"
6060
fsspec = ">=2023.1.0"
6161
pyparsing = ">=3.1.0,<4.0.0"

tests/expressions/test_expressions.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -873,6 +873,16 @@ def test_not_in() -> None:
873873
assert not_in == pickle.loads(pickle.dumps(not_in))
874874

875875

876+
def test_serialize_in() -> None:
877+
pred = In(term="foo", literals=[1, 2, 3])
878+
assert pred.model_dump_json() == '{"term":"foo","type":"in","items":[1,2,3]}'
879+
880+
881+
def test_serialize_not_in() -> None:
882+
pred = NotIn(term="foo", literals=[1, 2, 3])
883+
assert pred.model_dump_json() == '{"term":"foo","type":"not-in","items":[1,2,3]}'
884+
885+
876886
def test_bound_equal_to(term: BoundReference[Any]) -> None:
877887
bound_equal_to = BoundEqualTo(term, literal("a"))
878888
assert str(bound_equal_to) == f"BoundEqualTo(term={str(term)}, literal=literal('a'))"

0 commit comments

Comments
 (0)