11
11
12
12
13
13
from typing import Callable , Union , Tuple , List
14
+ import unicodedata
14
15
from .pyrser import Pyrser
15
16
from .match import Match
16
17
from .re_ast import RE , GroupNode , LeafNode , OrNode , EndElement , StartElement
@@ -25,7 +26,7 @@ class RegexEngine:
25
26
def __init__ (self ):
26
27
self .parser : Pyrser = Pyrser ()
27
28
28
- def match (self , re : str , string : str , return_matches : bool = False , continue_after_match : bool = False ) -> Union [Tuple [bool , int , List [List [Match ]]], Tuple [bool , int ]]:
29
+ def match (self , re : str , string : str , return_matches : bool = False , continue_after_match : bool = False , ignore_case : int = 0 ) -> Union [Tuple [bool , int , List [List [Match ]]], Tuple [bool , int ]]:
29
30
""" Searches a regex in a test string.
30
31
31
32
Searches the passed regular expression in the passed test string and
@@ -34,6 +35,10 @@ def match(self, re: str, string: str, return_matches: bool = False, continue_aft
34
35
It is possible to customize both the returned value and the search
35
36
method.
36
37
38
+ The ignore_case flag may cause unexpected results in the returned
39
+ number of matched characters, and also in the returned matches, e.g.
40
+ when the character ẞ is present in either the regex or the test string.
41
+
37
42
Args:
38
43
re (str): the regular expression to search
39
44
string (str): the test string
@@ -43,6 +48,9 @@ def match(self, re: str, string: str, return_matches: bool = False, continue_aft
43
48
continue_after_match (bool): if True the engine continues
44
49
matching until the whole input is consumed
45
50
(default is False)
51
+ ignore_case (int): when 0 the case is not ignored, when 1 a "soft"
52
+ case ignoring is performed, when 2 casefolding is performed.
53
+ (default is 0)
46
54
47
55
Returns:
48
56
A tuple containing whether a match was found or not, the last
@@ -59,6 +67,13 @@ def return_fnc(res: bool, str_i: int, all_matches: list, return_matches: bool) -
59
67
else :
60
68
return res , str_i
61
69
70
+ if ignore_case == 1 :
71
+ re = unicodedata .normalize ("NFKD" , re ).lower ()
72
+ string = unicodedata .normalize ("NFKD" , string ).casefold ()
73
+ elif ignore_case == 2 :
74
+ re = unicodedata .normalize ("NFKD" , re ).casefold ()
75
+ string = unicodedata .normalize ("NFKD" , string ).casefold ()
76
+
62
77
all_matches = [] # variables holding the matched groups list for each matched substring in the test string
63
78
highest_matched_idx = 0 # holds the highest test_str index matched
64
79
0 commit comments