10
10
"""
11
11
12
12
13
- from typing import Callable , Union , Tuple , List
13
+ from collections import deque
14
+ from typing import Callable , Deque , Union , Tuple , List
14
15
import unicodedata
15
16
from .pyrser import Pyrser
16
17
from .match import Match
@@ -74,8 +75,9 @@ def return_fnc(res: bool, consumed: int, all_matches: list, return_matches: bool
74
75
re = unicodedata .normalize ("NFKD" , re ).casefold ()
75
76
string = unicodedata .normalize ("NFKD" , string ).casefold ()
76
77
77
- all_matches = [] # variables holding the matched groups list for each matched substring in the test string
78
- highest_matched_idx = 0 # holds the highest matched string's index
78
+ # variables holding the matched groups list for each matched substring in the test string
79
+ all_matches : List [List [Match ]] = []
80
+ highest_matched_idx : int = 0 # holds the highest matched string's index
79
81
80
82
res , consumed , matches = self .__match__ (re , string , 0 )
81
83
if res :
@@ -101,8 +103,8 @@ def return_fnc(res: bool, consumed: int, all_matches: list, return_matches: bool
101
103
def __match__ (self , re : str , string : str , start_str_i : int ) -> Tuple [bool , int , List [Match ]]:
102
104
""" Same as match, but always returns after the first match."""
103
105
ast = self .parser .parse (re = re )
104
- matches : List [Match ]
105
- matches = []
106
+ matches : Deque [Match ]
107
+ matches = deque ()
106
108
107
109
# str_i represents the matched characters so far. It is inizialized to
108
110
# the value of the input parameter start_str_i because the match could
@@ -114,67 +116,7 @@ def __match__(self, re: str, string: str, start_str_i: int) -> Tuple[bool, int,
114
116
def return_fnc (res : bool , str_i : int ) -> Tuple [bool , int , List [Match ]]:
115
117
""" Returns the Tuple to be returned by __match__."""
116
118
nonlocal matches
117
- # reverses the list so the last match (the "whole" match) is first
118
- matches .reverse ()
119
- return res , str_i , matches
120
-
121
- def backtrack (backtrack_stack : List [Tuple [int , int , int , List [int ]]], str_i : int , curr_i : int ) -> Tuple [bool , int , int ]:
122
- """ Returns whether it is possible to backtrack and the state to backtrack to.
123
-
124
- Takes as input the current state of the engine and returns whether
125
- or not it is possible to backtrack.
126
-
127
- Args:
128
- backtrack_stack (List[Tuple[int, int, int, List[int]]]): the
129
- current backtrack_stack situation. The Tuple values represents,
130
- in order from left to right, the node index of the entry in its
131
- parent children list, the minimum times that node must be
132
- matched, the time it is matched in the current state, the list
133
- of consumed character each times it was matched
134
- str_i (int): the current considered index of the test string
135
- curr_i (int): the index of the GroupNode children considered
136
-
137
- Returns:
138
- A Tuple containing a bool, True if it is possible to backtrack,
139
- the new string index, and the new node children index to which
140
- backtrack to. Note that the last two parameters only have a
141
- meaning in the case it is possible to backtrack (the bool is
142
- True).
143
- """
144
- if len (backtrack_stack ) == 0 :
145
- return False , str_i , curr_i
146
-
147
- # the fist step is to pop the last tuple from the backtrack_stack
148
- node_i , min_ , matched_times , consumed_list = backtrack_stack .pop ()
149
-
150
- if matched_times == min_ :
151
- # if a node is already matched the minimum number of times, the
152
- # chance you have to potentially be able to backtrack is to is
153
- # to delete the entry from the stack and then search for a new
154
- # possibility (recursively calling this function).
155
- # But, before the recursion, you have to calculate what the
156
- # string index (str_i) value was before the node was matched
157
- # even once. Thus, you have to decrease the string index
158
- # of each consumption in the consumed_list.
159
-
160
- # calculate_the new str_i
161
- for consumption in consumed_list :
162
- str_i -= consumption
163
- # recursive call
164
- return backtrack (backtrack_stack , str_i , node_i )
165
- else :
166
- # the node was matched more times than its min, so you just
167
- # need to remove the last consumption from the list,
168
- # decrease the str_i by that amount, decrease the times the node
169
- # was matched - matched_times - by 1, and then append the stack
170
- # the tuple with the new matched_times and consumed_list.
171
- last_consumed = consumed_list .pop ()
172
- new_str_i = str_i - last_consumed
173
- backtrack_stack .append (
174
- (node_i , min_ , matched_times - 1 , consumed_list ))
175
- # lastly, you return that the backtracking is possible, and
176
- # the state to which backtrack to.
177
- return True , new_str_i , curr_i
119
+ return res , str_i , list (matches )
178
120
179
121
def save_matches (match_group : Callable , ast : Union [RE , GroupNode ], string : str , start_idx : int ) -> Tuple [bool , int ]:
180
122
""" Save the matches of capturing groups.
@@ -198,7 +140,7 @@ def save_matches(match_group: Callable, ast: Union[RE, GroupNode], string: str,
198
140
if matches [i ].group_id == ast .group_id :
199
141
matches .remove (matches [i ])
200
142
break
201
- matches .append (
143
+ matches .appendleft (
202
144
Match (ast .group_id , start_idx , end_idx , string , ast .group_name ))
203
145
204
146
return res , end_idx
@@ -211,7 +153,68 @@ def match_group(ast: Union[RE, GroupNode, OrNode], string: str) -> Tuple[bool, i
211
153
number of matched characters in the string so far.
212
154
'''
213
155
nonlocal str_i
214
- backtrack_stack = []
156
+ backtrack_stack : List [Tuple [int , int , int , List [int ]]] = []
157
+
158
+ def backtrack (str_i : int , curr_i : int ) -> Tuple [bool , int , int ]:
159
+ """ Returns whether it is possible to backtrack and the state to backtrack to.
160
+
161
+ Takes as input the current state of the engine and returns whether
162
+ or not it is possible to backtrack.
163
+
164
+ Args:
165
+ backtrack_stack (List[Tuple[int, int, int, List[int]]]): the
166
+ current backtrack_stack situation. The Tuple values represents,
167
+ in order from left to right, the node index of the entry in its
168
+ parent children list, the minimum times that node must be
169
+ matched, the time it is matched in the current state, the list
170
+ of consumed character each times it was matched
171
+ str_i (int): the current considered index of the test string
172
+ curr_i (int): the index of the GroupNode children considered
173
+
174
+ Returns:
175
+ A Tuple containing a bool, True if it is possible to backtrack,
176
+ the new string index, and the new node children index to which
177
+ backtrack to. Note that the last two parameters only have a
178
+ meaning in the case it is possible to backtrack (the bool is
179
+ True).
180
+ """
181
+ nonlocal backtrack_stack
182
+
183
+ if len (backtrack_stack ) == 0 :
184
+ return False , str_i , curr_i
185
+
186
+ # the fist step is to pop the last tuple from the backtrack_stack
187
+ node_i , min_ , matched_times , consumed_list = backtrack_stack .pop ()
188
+
189
+ if matched_times == min_ :
190
+ # if a node is already matched the minimum number of times, the
191
+ # chance you have to potentially be able to backtrack is to is
192
+ # to delete the entry from the stack and then search for a new
193
+ # possibility (recursively calling this function).
194
+ # But, before the recursion, you have to calculate what the
195
+ # string index (str_i) value was before the node was matched
196
+ # even once. Thus, you have to decrease the string index
197
+ # of each consumption in the consumed_list.
198
+
199
+ # calculate_the new str_i
200
+ for consumption in consumed_list :
201
+ str_i -= consumption
202
+ # recursive call
203
+ return backtrack (str_i , node_i )
204
+ else :
205
+ # the node was matched more times than its min, so you just
206
+ # need to remove the last consumption from the list,
207
+ # decrease the str_i by that amount, decrease the times the node
208
+ # was matched - matched_times - by 1, and then append the stack
209
+ # the tuple with the new matched_times and consumed_list.
210
+ last_consumed = consumed_list .pop ()
211
+ new_str_i = str_i - last_consumed
212
+ backtrack_stack .append (
213
+ (node_i , min_ , matched_times - 1 , consumed_list ))
214
+ # lastly, you return that the backtracking is possible, and
215
+ # the state to which backtrack to.
216
+ return True , new_str_i , curr_i
217
+
215
218
curr_node = ast .children [0 ] if len (ast .children ) > 0 else None
216
219
i = 0 # the children i'm iterating, not to confuse with str_i
217
220
@@ -243,8 +246,7 @@ def match_group(ast: Union[RE, GroupNode, OrNode], string: str) -> Tuple[bool, i
243
246
elif min_ <= j :
244
247
break
245
248
else :
246
- can_bt , bt_str_i , bt_i = backtrack (
247
- backtrack_stack , str_i , i )
249
+ can_bt , bt_str_i , bt_i = backtrack (str_i , i )
248
250
if can_bt :
249
251
i = bt_i
250
252
str_i = bt_str_i
@@ -280,8 +282,7 @@ def match_group(ast: Union[RE, GroupNode, OrNode], string: str) -> Tuple[bool, i
280
282
# i did the bare minimum or more
281
283
break
282
284
else :
283
- can_bt , bt_str_i , bt_i = backtrack (
284
- backtrack_stack , str_i , i )
285
+ can_bt , bt_str_i , bt_i = backtrack (str_i , i )
285
286
if can_bt :
286
287
i = bt_i
287
288
str_i = bt_str_i
@@ -321,7 +322,7 @@ def match_group(ast: Union[RE, GroupNode, OrNode], string: str) -> Tuple[bool, i
321
322
if min_ <= j : # I already met the minimum requirement for match
322
323
break
323
324
can_bt , bt_str_i , bt_i = backtrack (
324
- backtrack_stack , before_str_i , i )
325
+ before_str_i , i )
325
326
if can_bt :
326
327
i = bt_i
327
328
str_i = bt_str_i
@@ -338,7 +339,7 @@ def match_group(ast: Union[RE, GroupNode, OrNode], string: str) -> Tuple[bool, i
338
339
else :
339
340
# i have more states, but the input is finished
340
341
can_bt , bt_str_i , bt_i = backtrack (
341
- backtrack_stack , before_str_i , i )
342
+ before_str_i , i )
342
343
if can_bt :
343
344
i = bt_i
344
345
str_i = bt_str_i
@@ -371,6 +372,6 @@ def match_group(ast: Union[RE, GroupNode, OrNode], string: str) -> Tuple[bool, i
371
372
if res :
372
373
return return_fnc (True , str_i )
373
374
else :
374
- matches = []
375
+ matches = deque ()
375
376
str_i = i
376
377
return return_fnc (False , str_i )
0 commit comments