explodinggradients · jjmachan · Feb 4, 2025 · Feb 4, 2025 · Feb 4, 2025 · Feb 4, 2025
diff --git a/docs/howtos/integrations/haystack.ipynb b/docs/howtos/integrations/haystack.ipynb
@@ -397,8 +397,8 @@
     "    }\n",
     ")\n",
     "\n",
-    "print(result['answer_builder']['answers'][0].data, '\\n')\n",
-    "print(result['ragas_evaluator']['result'])"
+    "print(result[\"answer_builder\"][\"answers\"][0].data, \"\\n\")\n",
+    "print(result[\"ragas_evaluator\"][\"result\"])"
    ]
   },
   {
@@ -455,12 +455,15 @@
     "rubrics = {\n",
     "    \"score1_description\": \"The response does not answer the user input.\",\n",
     "    \"score2_description\": \"The response partially answers the user input.\",\n",
-    "    \"score3_description\": \"The response fully answer the user input\"\n",
+    "    \"score3_description\": \"The response fully answer the user input\",\n",
     "}\n",
     "\n",
     "evaluator = RagasEvaluator(\n",
-    "    ragas_metrics=[SportsRelevanceMetric, RubricsScore(llm=evaluator_llm, rubrics=rubrics)],\n",
-    "    evaluator_llm=evaluator_llm\n",
+    "    ragas_metrics=[\n",
+    "        SportsRelevanceMetric,\n",
+    "        RubricsScore(llm=evaluator_llm, rubrics=rubrics),\n",
+    "    ],\n",
+    "    evaluator_llm=evaluator_llm,\n",
     ")\n",
     "\n",
     "output = evaluator.run(\n",
@@ -472,10 +475,10 @@
     "        \" billion people.\"\n",
     "    ],\n",
     "    response=\"Football is the most popular sport with around 4 billion\"\n",
-    "                \" followers worldwide\",\n",
+    "    \" followers worldwide\",\n",
     ")\n",
     "\n",
-    "output['result']"
+    "output[\"result\"]"
    ]
   }
  ],

diff --git a/src/ragas/integrations/langgraph.py b/src/ragas/integrations/langgraph.py
@@ -7,7 +7,8 @@
 
 
 def convert_to_ragas_messages(
-    messages: List[Union[HumanMessage, SystemMessage, AIMessage, ToolMessage]], metadata: bool = False
+    messages: List[Union[HumanMessage, SystemMessage, AIMessage, ToolMessage]],
+    metadata: bool = False,
 ) -> List[Union[r.HumanMessage, r.AIMessage, r.ToolMessage]]:
     """
     Convert LangChain messages into Ragas messages with metadata for agent evaluation.
@@ -47,16 +48,16 @@ def _validate_string_content(message, message_type: str) -> str:
     def _extract_metadata(message) -> dict:
 
         return {k: v for k, v in message.__dict__.items() if k != "content"}
-    
+
     if metadata:
         MESSAGE_TYPE_MAP = {
             HumanMessage: lambda m: r.HumanMessage(
                 content=_validate_string_content(m, "HumanMessage"),
-                metadata=_extract_metadata(m)
+                metadata=_extract_metadata(m),
             ),
             ToolMessage: lambda m: r.ToolMessage(
                 content=_validate_string_content(m, "ToolMessage"),
-                metadata=_extract_metadata(m)
+                metadata=_extract_metadata(m),
             ),
         }
     else:
@@ -85,12 +86,12 @@ def _convert_ai_message(message: AIMessage, metadata: bool) -> r.AIMessage:
             return r.AIMessage(
                 content=_validate_string_content(message, "AIMessage"),
                 tool_calls=tool_calls,
-                metadata=_extract_metadata(message)
+                metadata=_extract_metadata(message),
             )
         else:
             return r.AIMessage(
                 content=_validate_string_content(message, "AIMessage"),
-                tool_calls=tool_calls
+                tool_calls=tool_calls,
             )
 
     def _convert_message(message, metadata: bool = False):

diff --git a/src/ragas/llms/base.py b/src/ragas/llms/base.py
@@ -183,7 +183,8 @@ def is_finished(self, response: LLMResult) -> bool:
                 elif resp_message.response_metadata.get("stop_reason") is not None:
                     stop_reason = resp_message.response_metadata.get("stop_reason")
                     is_finished_list.append(
-                        stop_reason in ["end_turn", "stop", "STOP", "MAX_TOKENS", "eos_token"]
+                        stop_reason
+                        in ["end_turn", "stop", "STOP", "MAX_TOKENS", "eos_token"]
                     )
             # default to True
             else:

diff --git a/src/ragas/testset/graph.py b/src/ragas/testset/graph.py
@@ -173,24 +173,51 @@ def _add_relationship(self, relationship: Relationship):
         self.relationships.append(relationship)
 
     def save(self, path: t.Union[str, Path]):
-        """Saves the knowledge graph to a JSON file."""
+        """Saves the knowledge graph to a JSON file.
+
+        Parameters
+        ----------
+        path : Union[str, Path]
+            Path where the JSON file should be saved.
+
+        Notes
+        -----
+        The file is saved using UTF-8 encoding to ensure proper handling of Unicode characters
+        across different platforms.
+        """
         if isinstance(path, str):
             path = Path(path)
 
         data = {
             "nodes": [node.model_dump() for node in self.nodes],
             "relationships": [rel.model_dump() for rel in self.relationships],
         }
-        with open(path, "w") as f:
+        with open(path, "w", encoding="utf-8") as f:
             json.dump(data, f, cls=UUIDEncoder, indent=2, ensure_ascii=False)
 
     @classmethod
     def load(cls, path: t.Union[str, Path]) -> "KnowledgeGraph":
-        """Loads a knowledge graph from a path."""
+        """Loads a knowledge graph from a path.
+
+        Parameters
+        ----------
+        path : Union[str, Path]
+            Path to the JSON file containing the knowledge graph.
+
+        Returns
+        -------
+        KnowledgeGraph
+            The loaded knowledge graph.
+
+        Notes
+        -----
+        The file is read using UTF-8 encoding to ensure proper handling of Unicode characters
+        across different platforms.
+        """
         if isinstance(path, str):
             path = Path(path)
 
-        with open(path, "r") as f:
+        with open(path, "r", encoding="utf-8") as f:
             data = json.load(f)
 
         nodes = [Node(**node_data) for node_data in data["nodes"]]

diff --git a/src/ragas/utils.py b/src/ragas/utils.py
@@ -6,14 +6,13 @@
 import re
 import typing as t
 import warnings
+from datetime import datetime
 from functools import lru_cache
 
 import numpy as np
 import tiktoken
 from datasets import Dataset
 
-from datetime import datetime
-
 if t.TYPE_CHECKING:
     from ragas.metrics.base import Metric
 
@@ -278,8 +277,8 @@ class _ContextualFormatter(logging.Formatter):
     """
 
     def format(self, record):
-        from ragas._analytics import get_userid
         from ragas import __version__
+        from ragas._analytics import get_userid
 
         # Add UTC time
         record.utc_time = self.format_time(record, _LOGGER_DATE_TIME)

diff --git a/tests/unit/test_knowledge_graph_save.py b/tests/unit/test_knowledge_graph_save.py
@@ -0,0 +1,62 @@
+from ragas.testset.graph import KnowledgeGraph, Node, NodeType, Relationship
+
+
+def test_knowledge_graph_save_with_problematic_chars(tmp_path):
+    # Create a knowledge graph with special characters
+    kg = KnowledgeGraph()
+
+    # Create nodes with various Unicode characters including ones that might cause charmap codec issues
+    problematic_chars = [
+        chr(i) for i in range(0x0080, 0x00FF)  # Extended ASCII/Latin-1 characters
+    ] + [
+        "\u2022",  # bullet
+        "\u2192",  # arrow
+        "\u2665",  # heart
+        "\u2605",  # star
+        "\u221E",  # infinity
+        "\u00B5",  # micro
+        "\u2264",  # less than or equal
+        "\u2265",  # greater than or equal
+        "\u0391",  # Greek letters
+        "\u0392",
+        "\u0393",
+        "\uFFFF",  # Special Unicode characters
+    ]
+
+    # Create multiple nodes with combinations of problematic characters
+    for i, char in enumerate(problematic_chars):
+        text = f"Test{char}Text with special char at position {i}"
+        node = Node(
+            properties={
+                "text": text,
+                "description": f"Node {i} with {char}",
+                "metadata": f"Extra {char} info",
+            },
+            type=NodeType.CHUNK,
+        )
+        kg.add(node)
+
+    # Add some relationships to make it more realistic
+    nodes = kg.nodes
+    for i in range(len(nodes) - 1):
+        rel = Relationship(
+            source=nodes[i],
+            target=nodes[i + 1],
+            type="next",
+            properties={"info": f"Link {i} with special char {problematic_chars[i]}"},
+        )
+        kg.add(rel)
+
+    # Try to save to a temporary file
+    save_path = tmp_path / "test_knowledge_graph.json"
+    kg.save(str(save_path))
+
+    # Try to load it back to verify
+    loaded_kg = KnowledgeGraph.load(str(save_path))
+
+    # Verify the content was preserved
+    assert len(loaded_kg.nodes) == len(kg.nodes)
+    assert len(loaded_kg.relationships) == len(kg.relationships)
+
+    # Verify the special characters were preserved in the first node
+    assert loaded_kg.nodes[0].properties["text"] == nodes[0].properties["text"]