lorenzofelletti
diff --git a/‎create_uml.sh
Lines changed: 22 additions & 0 deletions b/‎create_uml.sh
Lines changed: 22 additions & 0 deletions
diff --git a/‎docs/uml/classes.engine.png
2.36 KB b/‎docs/uml/classes.engine.png
2.36 KB
diff --git a/‎docs/uml/classes.pyregexp.png
10.5 KB b/‎docs/uml/classes.pyregexp.png
10.5 KB
diff --git a/‎docs/uml/classes.re_ast.png
4.77 KB b/‎docs/uml/classes.re_ast.png
4.77 KB
diff --git a/‎pyregexp/engine.py
Lines changed: 16 additions & 1 deletion b/‎pyregexp/engine.py
Lines changed: 16 additions & 1 deletion
diff --git a/‎test/test_engine.py
Lines changed: 34 additions & 0 deletions b/‎test/test_engine.py
Lines changed: 34 additions & 0 deletions
@@ -0,0 +1,22 @@
+#!/bin/bash
+pyreverse -o re_ast.png -A -S -mn -f ALL ./pyregexp/re_ast.py
+mv classes.re_ast.png docs/uml
+
+pyreverse -o engine.png -A -S -mn -f ALL ./pyregexp/engine.py
+mv classes.engine.png docs/uml
+
+pyreverse -o lexer.png -A -S -mn -f ALL ./pyregexp/lexer.py
+mv classes.lexer.png docs/uml
+
+pyreverse -o match.png -A -S -mn -f ALL ./pyregexp/match.py
+mv classes.match.png docs/uml
+
+pyreverse -o pyrser.png -A -S -mn -f ALL ./pyregexp/pyrser.py
+mv classes.pyrser.png docs/uml
+
+pyreverse -o tokens.png -A -S -mn -f ALL ./pyregexp/tokens.py
+mv classes.tokens.png docs/uml
+
+pyreverse -o pyregexp.png -A -S -mn ./pyregexp/*
+mv classes.pyregexp.png docs/uml
+mv packages.pyregexp.png docs/uml
@@ -11,6 +11,7 @@
 
 
 from typing import Callable, Union, Tuple, List
+import unicodedata
 from .pyrser import Pyrser
 from .match import Match
 from .re_ast import RE, GroupNode, LeafNode, OrNode, EndElement, StartElement
@@ -25,7 +26,7 @@ class RegexEngine:
     def __init__(self):
         self.parser: Pyrser = Pyrser()
 
-    def match(self, re: str, string: str, return_matches: bool = False, continue_after_match: bool = False) -> Union[Tuple[bool, int, List[List[Match]]], Tuple[bool, int]]:
+    def match(self, re: str, string: str, return_matches: bool = False, continue_after_match: bool = False, ignore_case: int = 0) -> Union[Tuple[bool, int, List[List[Match]]], Tuple[bool, int]]:
         """ Searches a regex in a test string.
 
         Searches the passed regular expression in the passed test string and
@@ -34,6 +35,10 @@ def match(self, re: str, string: str, return_matches: bool = False, continue_aft
         It is possible to customize both the returned value and the search
         method.
 
+        The ignore_case flag may cause unexpected results in the returned
+        number of matched characters, and also in the returned matches, e.g.
+        when the character ẞ is present in either the regex or the test string.
+
         Args:
             re (str): the regular expression to search
             string (str): the test string
@@ -43,6 +48,9 @@ def match(self, re: str, string: str, return_matches: bool = False, continue_aft
             continue_after_match (bool): if True the engine continues
                 matching until the whole input is consumed
                 (default is False)
+            ignore_case (int): when 0 the case is not ignored, when 1 a "soft"
+                case ignoring is performed, when 2 casefolding is performed.
+                (default is 0)
 
         Returns:
             A tuple containing whether a match was found or not, the last
@@ -59,6 +67,13 @@ def return_fnc(res: bool, str_i: int, all_matches: list, return_matches: bool) -
             else:
                 return res, str_i
 
+        if ignore_case == 1:
+            re = unicodedata.normalize("NFKD", re).lower()
+            string = unicodedata.normalize("NFKD", string).casefold()
+        elif ignore_case == 2:
+           re = unicodedata.normalize("NFKD", re).casefold()
+           string = unicodedata.normalize("NFKD", string).casefold()
+
         all_matches = []  # variables holding the matched groups list for each matched substring in the test string
         highest_matched_idx = 0  # holds the highest test_str index matched
 
 
@@ -405,3 +405,37 @@ def test_on_long_string(reng: RegexEngine):
         matches[1][0].end_idx == len(test_str)
     assert matches[1][1].start_idx == len(test_str)-1 and \
         matches[1][1].end_idx == len(test_str)
+
+
+def test_ignore_case_no_casefolding(reng: RegexEngine):
+    regex = r"ss"
+    test_str = "SS"
+    res, _ = reng.match(regex, test_str, ignore_case=1)
+    assert res == True
+
+    regex = r"ÄCHER"
+    test_str = "ächer"
+    res, _ = reng.match(regex, test_str, ignore_case=1)
+    assert res == True
+
+    regex = r"ÄCHER"
+    test_str = "acher"
+    res, _ = reng.match(regex, test_str, ignore_case=1)
+    assert res == False
+
+
+def test_ignore_case_casefolding(reng: RegexEngine):
+    regex = r"ẞ"
+    test_str = "SS"
+    res, _ = reng.match(regex, test_str, ignore_case=2)
+    assert res == True
+
+    regex = r"ÄCHER"
+    test_str = "ächer"
+    res, _ = reng.match(regex, test_str, ignore_case=2)
+    assert res == True
+
+    regex = r"ÄCHER"
+    test_str = "acher"
+    res, _ = reng.match(regex, test_str, ignore_case=2)
+    assert res == False