Skip to content

Commit e7e0634

Browse files
authored
Merge pull request #78 from jg-rp/fix-normalized-paths
Fix normalized paths and serialization of compiled JSONPath queries.
2 parents b269c76 + 93d4f05 commit e7e0634

File tree

12 files changed

+176
-43
lines changed

12 files changed

+176
-43
lines changed

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
11
[submodule "tests/cts"]
22
path = tests/cts
33
url = [email protected]:jsonpath-standard/jsonpath-compliance-test-suite.git
4+
[submodule "tests/nts"]
5+
path = tests/nts
6+
url = [email protected]:jg-rp/jsonpath-compliance-normalized-paths.git

CHANGELOG.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,18 @@
11
# Python JSONPath Change Log
22

3+
## Version 1.3.0 (unreleased)
4+
5+
**Fixes**
6+
7+
- Fixed `jsonpath.JSONPathMatch.path`. It is now a "normalized path" following section 2.7 of RFC 9525.
8+
- Fixed normalized slice indexes. We were failing to normalize somme indexes given a negative step.
9+
10+
**Other changes**
11+
12+
- `jsonpath.match.NodeList` is now re-exported as `jsonpath.NodeList`.
13+
- Added `jsonpath.NodeList.paths()`, which returns a list of normalized paths, one for each node in the list.
14+
- Serialization of compiled JSONPath queries (instances of `jsonpath.JSONPath`) has changed. String literals inside filter selectors are now serialized using the canonical format, as described in section 2.7 of RFC 9525, and parentheses in filter selectors are kept to a minimum.
15+
316
## Version 1.2.2
417

518
**Fixes**

jsonpath/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from .fluent_api import Query
2525
from .lex import Lexer
2626
from .match import JSONPathMatch
27+
from .match import NodeList
2728
from .parse import Parser
2829
from .patch import JSONPatch
2930
from .path import CompoundJSONPath
@@ -58,6 +59,7 @@
5859
"JSONPointerResolutionError",
5960
"JSONPointerTypeError",
6061
"Lexer",
62+
"NodeList",
6163
"match",
6264
"Parser",
6365
"Projection",

jsonpath/filter.py

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
from __future__ import annotations
44

55
import copy
6-
import json
76
import re
87
from abc import ABC
98
from abc import abstractmethod
@@ -25,6 +24,7 @@
2524
from .match import NodeList
2625
from .selectors import Filter as FilterSelector
2726
from .selectors import ListSelector
27+
from .serialize import canonical_string
2828

2929
if TYPE_CHECKING:
3030
from .path import JSONPath
@@ -208,7 +208,7 @@ class StringLiteral(Literal[str]):
208208
__slots__ = ()
209209

210210
def __str__(self) -> str:
211-
return json.dumps(self.value)
211+
return canonical_string(self.value)
212212

213213

214214
class IntegerLiteral(Literal[int]):
@@ -375,6 +375,12 @@ def set_children(self, children: List[FilterExpression]) -> None:
375375
self.right = children[1]
376376

377377

378+
PRECEDENCE_LOWEST = 1
379+
PRECEDENCE_LOGICAL_OR = 3
380+
PRECEDENCE_LOGICAL_AND = 4
381+
PRECEDENCE_PREFIX = 7
382+
383+
378384
class BooleanExpression(FilterExpression):
379385
"""An expression that always evaluates to `True` or `False`."""
380386

@@ -408,13 +414,40 @@ def cacheable_nodes(self) -> bool:
408414
)
409415

410416
def __str__(self) -> str:
411-
return str(self.expression)
417+
return self._canonical_string(self.expression, PRECEDENCE_LOWEST)
412418

413419
def __eq__(self, other: object) -> bool:
414420
return (
415421
isinstance(other, BooleanExpression) and self.expression == other.expression
416422
)
417423

424+
def _canonical_string(
425+
self, expression: FilterExpression, parent_precedence: int
426+
) -> str:
427+
if isinstance(expression, InfixExpression):
428+
if expression.operator == "&&":
429+
left = self._canonical_string(expression.left, PRECEDENCE_LOGICAL_AND)
430+
right = self._canonical_string(expression.right, PRECEDENCE_LOGICAL_AND)
431+
expr = f"{left} && {right}"
432+
return (
433+
f"({expr})" if parent_precedence >= PRECEDENCE_LOGICAL_AND else expr
434+
)
435+
436+
if expression.operator == "||":
437+
left = self._canonical_string(expression.left, PRECEDENCE_LOGICAL_OR)
438+
right = self._canonical_string(expression.right, PRECEDENCE_LOGICAL_OR)
439+
expr = f"{left} || {right}"
440+
return (
441+
f"({expr})" if parent_precedence >= PRECEDENCE_LOGICAL_OR else expr
442+
)
443+
444+
if isinstance(expression, PrefixExpression):
445+
operand = self._canonical_string(expression.right, PRECEDENCE_PREFIX)
446+
expr = f"!{operand}"
447+
return f"({expr})" if parent_precedence > PRECEDENCE_PREFIX else expr
448+
449+
return str(expression)
450+
418451
def evaluate(self, context: FilterContext) -> bool:
419452
return context.env.is_truthy(self.expression.evaluate(context))
420453

jsonpath/match.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""The JSONPath match object, as returned from `JSONPath.finditer()`."""
2+
23
from __future__ import annotations
34

45
from typing import Any
@@ -104,6 +105,10 @@ def values_or_singular(self) -> object:
104105
return self[0].obj
105106
return [match.obj for match in self]
106107

108+
def paths(self) -> List[str]:
109+
"""Return a normalized path for each node in this node list."""
110+
return [match.path for match in self]
111+
107112
def empty(self) -> bool:
108113
"""Return `True` if this node list is empty."""
109114
return not bool(self)

jsonpath/selectors.py

Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""JSONPath segments and selectors, as returned from `Parser.parse`."""
2+
23
from __future__ import annotations
34

45
from abc import ABC
@@ -17,6 +18,7 @@
1718

1819
from .exceptions import JSONPathIndexError
1920
from .exceptions import JSONPathTypeError
21+
from .serialize import canonical_string
2022

2123
if TYPE_CHECKING:
2224
from .env import JSONPathEnvironment
@@ -75,7 +77,11 @@ def __init__(
7577
self.shorthand = shorthand
7678

7779
def __str__(self) -> str:
78-
return f"['{self.name}']" if self.shorthand else f"'{self.name}'"
80+
return (
81+
f"[{canonical_string(self.name)}]"
82+
if self.shorthand
83+
else f"{canonical_string(self.name)}"
84+
)
7985

8086
def __eq__(self, __value: object) -> bool:
8187
return (
@@ -98,7 +104,7 @@ def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]:
98104
obj=self.env.getitem(match.obj, self.name),
99105
parent=match,
100106
parts=match.parts + (self.name,),
101-
path=match.path + f"['{self.name}']",
107+
path=match.path + f"[{canonical_string(self.name)}]",
102108
root=match.root,
103109
)
104110
match.add_child(_match)
@@ -117,7 +123,7 @@ async def resolve_async(
117123
obj=await self.env.getitem_async(match.obj, self.name),
118124
parent=match,
119125
parts=match.parts + (self.name,),
120-
path=match.path + f"['{self.name}']",
126+
path=match.path + f"[{canonical_string(self.name)}]",
121127
root=match.root,
122128
)
123129
match.add_child(_match)
@@ -321,20 +327,15 @@ def _check_range(self, *indices: Optional[int]) -> None:
321327
):
322328
raise JSONPathIndexError("index out of range", token=self.token)
323329

324-
def _normalized_index(self, obj: Sequence[object], index: int) -> int:
325-
if index < 0 and len(obj) >= abs(index):
326-
return len(obj) + index
327-
return index
328-
329330
def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]:
330331
for match in matches:
331332
if not isinstance(match.obj, Sequence) or self.slice.step == 0:
332333
continue
333334

334-
idx = self.slice.start or 0
335-
step = self.slice.step or 1
336-
for obj in self.env.getitem(match.obj, self.slice):
337-
norm_index = self._normalized_index(match.obj, idx)
335+
for norm_index, obj in zip( # noqa: B905
336+
range(*self.slice.indices(len(match.obj))),
337+
self.env.getitem(match.obj, self.slice),
338+
):
338339
_match = self.env.match_class(
339340
filter_context=match.filter_context(),
340341
obj=obj,
@@ -345,7 +346,6 @@ def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]:
345346
)
346347
match.add_child(_match)
347348
yield _match
348-
idx += step
349349

350350
async def resolve_async(
351351
self, matches: AsyncIterable[JSONPathMatch]
@@ -354,10 +354,10 @@ async def resolve_async(
354354
if not isinstance(match.obj, Sequence) or self.slice.step == 0:
355355
continue
356356

357-
idx = self.slice.start or 0
358-
step = self.slice.step or 1
359-
for obj in await self.env.getitem_async(match.obj, self.slice):
360-
norm_index = self._normalized_index(match.obj, idx)
357+
for norm_index, obj in zip( # noqa: B905
358+
range(*self.slice.indices(len(match.obj))),
359+
await self.env.getitem_async(match.obj, self.slice),
360+
):
361361
_match = self.env.match_class(
362362
filter_context=match.filter_context(),
363363
obj=obj,
@@ -368,7 +368,6 @@ async def resolve_async(
368368
)
369369
match.add_child(_match)
370370
yield _match
371-
idx += step
372371

373372

374373
class WildSelector(JSONPathSelector):
@@ -402,7 +401,7 @@ def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]:
402401
obj=val,
403402
parent=match,
404403
parts=match.parts + (key,),
405-
path=match.path + f"['{key}']",
404+
path=match.path + f"[{canonical_string(key)}]",
406405
root=match.root,
407406
)
408407
match.add_child(_match)
@@ -431,7 +430,7 @@ async def resolve_async(
431430
obj=val,
432431
parent=match,
433432
parts=match.parts + (key,),
434-
path=match.path + f"['{key}']",
433+
path=match.path + f"[{canonical_string(key)}]",
435434
root=match.root,
436435
)
437436
match.add_child(_match)
@@ -479,7 +478,7 @@ def _expand(self, match: JSONPathMatch) -> Iterable[JSONPathMatch]:
479478
obj=val,
480479
parent=match,
481480
parts=match.parts + (key,),
482-
path=match.path + f"['{key}']",
481+
path=match.path + f"[{canonical_string(key)}]",
483482
root=match.root,
484483
)
485484
match.add_child(_match)
@@ -633,7 +632,7 @@ def resolve( # noqa: PLR0912
633632
obj=val,
634633
parent=match,
635634
parts=match.parts + (key,),
636-
path=match.path + f"['{key}']",
635+
path=match.path + f"[{canonical_string(key)}]",
637636
root=match.root,
638637
)
639638
match.add_child(_match)
@@ -701,7 +700,7 @@ async def resolve_async( # noqa: PLR0912
701700
obj=val,
702701
parent=match,
703702
parts=match.parts + (key,),
704-
path=match.path + f"['{key}']",
703+
path=match.path + f"[{canonical_string(key)}]",
705704
root=match.root,
706705
)
707706
match.add_child(_match)

jsonpath/serialize.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
"""Helper functions for serializing compiled JSONPath queries."""
2+
3+
import json
4+
5+
6+
def canonical_string(value: str) -> str:
7+
"""Return _value_ as a canonically formatted string literal."""
8+
single_quoted = (
9+
json.dumps(value, ensure_ascii=False)[1:-1]
10+
.replace('\\"', '"')
11+
.replace("'", "\\'")
12+
)
13+
return f"'{single_quoted}'"

tests/nts

Submodule nts added at c9288b3

tests/test_compliance.py

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ class Case:
2828
document: Union[Mapping[str, Any], Sequence[Any], None] = None
2929
result: Any = None
3030
results: Optional[List[Any]] = None
31+
result_paths: Optional[List[str]] = None
32+
results_paths: Optional[List[List[str]]] = None
3133
invalid_selector: Optional[bool] = None
3234
tags: List[str] = field(default_factory=list)
3335

@@ -105,27 +107,38 @@ def test_compliance(case: Case) -> None:
105107
pytest.skip(reason=SKIP[case.name])
106108

107109
assert case.document is not None
108-
rv = jsonpath.findall(case.selector, case.document)
110+
nodes = jsonpath.NodeList(jsonpath.finditer(case.selector, case.document))
109111

110112
if case.results is not None:
111-
assert rv in case.results
113+
assert case.results_paths is not None
114+
assert nodes.values() in case.results
115+
assert nodes.paths() in case.results_paths
112116
else:
113-
assert rv == case.result
117+
assert case.result_paths is not None
118+
assert nodes.values() == case.result
119+
assert nodes.paths() == case.result_paths
114120

115121

116122
@pytest.mark.parametrize("case", valid_cases(), ids=operator.attrgetter("name"))
117123
def test_compliance_async(case: Case) -> None:
118124
if case.name in SKIP:
119125
pytest.skip(reason=SKIP[case.name])
120126

121-
async def coro() -> List[object]:
127+
async def coro() -> jsonpath.NodeList:
122128
assert case.document is not None
123-
return await jsonpath.findall_async(case.selector, case.document)
129+
it = await jsonpath.finditer_async(case.selector, case.document)
130+
return jsonpath.NodeList([node async for node in it])
131+
132+
nodes = asyncio.run(coro())
124133

125134
if case.results is not None:
126-
assert asyncio.run(coro()) in case.results
135+
assert case.results_paths is not None
136+
assert nodes.values() in case.results
137+
assert nodes.paths() in case.results_paths
127138
else:
128-
assert asyncio.run(coro()) == case.result
139+
assert case.result_paths is not None
140+
assert nodes.values() == case.result
141+
assert nodes.paths() == case.result_paths
129142

130143

131144
@pytest.mark.parametrize("case", invalid_cases(), ids=operator.attrgetter("name"))

0 commit comments

Comments
 (0)