Skip to content

Commit 2605de5

Browse files
authored
fix: UnicodeEncode error when save/load knowledge graph (#1900)
1 parent 1dd4b29 commit 2605de5

File tree

2 files changed

+93
-4
lines changed

2 files changed

+93
-4
lines changed

src/ragas/testset/graph.py

+31-4
Original file line numberDiff line numberDiff line change
@@ -173,24 +173,51 @@ def _add_relationship(self, relationship: Relationship):
173173
self.relationships.append(relationship)
174174

175175
def save(self, path: t.Union[str, Path]):
176-
"""Saves the knowledge graph to a JSON file."""
176+
"""Saves the knowledge graph to a JSON file.
177+
178+
Parameters
179+
----------
180+
path : Union[str, Path]
181+
Path where the JSON file should be saved.
182+
183+
Notes
184+
-----
185+
The file is saved using UTF-8 encoding to ensure proper handling of Unicode characters
186+
across different platforms.
187+
"""
177188
if isinstance(path, str):
178189
path = Path(path)
179190

180191
data = {
181192
"nodes": [node.model_dump() for node in self.nodes],
182193
"relationships": [rel.model_dump() for rel in self.relationships],
183194
}
184-
with open(path, "w") as f:
195+
with open(path, "w", encoding="utf-8") as f:
185196
json.dump(data, f, cls=UUIDEncoder, indent=2, ensure_ascii=False)
186197

187198
@classmethod
188199
def load(cls, path: t.Union[str, Path]) -> "KnowledgeGraph":
189-
"""Loads a knowledge graph from a path."""
200+
"""Loads a knowledge graph from a path.
201+
202+
Parameters
203+
----------
204+
path : Union[str, Path]
205+
Path to the JSON file containing the knowledge graph.
206+
207+
Returns
208+
-------
209+
KnowledgeGraph
210+
The loaded knowledge graph.
211+
212+
Notes
213+
-----
214+
The file is read using UTF-8 encoding to ensure proper handling of Unicode characters
215+
across different platforms.
216+
"""
190217
if isinstance(path, str):
191218
path = Path(path)
192219

193-
with open(path, "r") as f:
220+
with open(path, "r", encoding="utf-8") as f:
194221
data = json.load(f)
195222

196223
nodes = [Node(**node_data) for node_data in data["nodes"]]
+62
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
from ragas.testset.graph import KnowledgeGraph, Node, NodeType, Relationship
2+
3+
4+
def test_knowledge_graph_save_with_problematic_chars(tmp_path):
5+
# Create a knowledge graph with special characters
6+
kg = KnowledgeGraph()
7+
8+
# Create nodes with various Unicode characters including ones that might cause charmap codec issues
9+
problematic_chars = [
10+
chr(i) for i in range(0x0080, 0x00FF) # Extended ASCII/Latin-1 characters
11+
] + [
12+
"\u2022", # bullet
13+
"\u2192", # arrow
14+
"\u2665", # heart
15+
"\u2605", # star
16+
"\u221E", # infinity
17+
"\u00B5", # micro
18+
"\u2264", # less than or equal
19+
"\u2265", # greater than or equal
20+
"\u0391", # Greek letters
21+
"\u0392",
22+
"\u0393",
23+
"\uFFFF", # Special Unicode characters
24+
]
25+
26+
# Create multiple nodes with combinations of problematic characters
27+
for i, char in enumerate(problematic_chars):
28+
text = f"Test{char}Text with special char at position {i}"
29+
node = Node(
30+
properties={
31+
"text": text,
32+
"description": f"Node {i} with {char}",
33+
"metadata": f"Extra {char} info",
34+
},
35+
type=NodeType.CHUNK,
36+
)
37+
kg.add(node)
38+
39+
# Add some relationships to make it more realistic
40+
nodes = kg.nodes
41+
for i in range(len(nodes) - 1):
42+
rel = Relationship(
43+
source=nodes[i],
44+
target=nodes[i + 1],
45+
type="next",
46+
properties={"info": f"Link {i} with special char {problematic_chars[i]}"},
47+
)
48+
kg.add(rel)
49+
50+
# Try to save to a temporary file
51+
save_path = tmp_path / "test_knowledge_graph.json"
52+
kg.save(str(save_path))
53+
54+
# Try to load it back to verify
55+
loaded_kg = KnowledgeGraph.load(str(save_path))
56+
57+
# Verify the content was preserved
58+
assert len(loaded_kg.nodes) == len(kg.nodes)
59+
assert len(loaded_kg.relationships) == len(kg.relationships)
60+
61+
# Verify the special characters were preserved in the first node
62+
assert loaded_kg.nodes[0].properties["text"] == nodes[0].properties["text"]

0 commit comments

Comments
 (0)