lorenzofelletti
diff --git a/‎docs/uml/classes.pyregexp.png
-18.8 KB b/‎docs/uml/classes.pyregexp.png
-18.8 KB
diff --git a/‎docs/uml/classes.re_ast.png
-42.2 KB b/‎docs/uml/classes.re_ast.png
-42.2 KB
diff --git a/‎docs/uml/classes.tokens.png
-54 KB b/‎docs/uml/classes.tokens.png
-54 KB
diff --git a/‎pyregexp/engine.py
Lines changed: 76 additions & 75 deletions b/‎pyregexp/engine.py
Lines changed: 76 additions & 75 deletions
diff --git a/‎pyregexp/lexer.py
Lines changed: 2 additions & 2 deletions b/‎pyregexp/lexer.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎pyregexp/pyrser.py
Lines changed: 20 additions & 23 deletions b/‎pyregexp/pyrser.py
Lines changed: 20 additions & 23 deletions
@@ -10,7 +10,8 @@
 """
 
 
-from typing import Callable, Union, Tuple, List
+from collections import deque
+from typing import Callable, Deque, Union, Tuple, List
 import unicodedata
 from .pyrser import Pyrser
 from .match import Match
@@ -74,8 +75,9 @@ def return_fnc(res: bool, consumed: int, all_matches: list, return_matches: bool
             re = unicodedata.normalize("NFKD", re).casefold()
             string = unicodedata.normalize("NFKD", string).casefold()
 
-        all_matches = []  # variables holding the matched groups list for each matched substring in the test string
-        highest_matched_idx = 0  # holds the highest matched string's index
+        # variables holding the matched groups list for each matched substring in the test string
+        all_matches: List[List[Match]] = []
+        highest_matched_idx: int = 0  # holds the highest matched string's index
 
         res, consumed, matches = self.__match__(re, string, 0)
         if res:
@@ -101,8 +103,8 @@ def return_fnc(res: bool, consumed: int, all_matches: list, return_matches: bool
     def __match__(self, re: str, string: str, start_str_i: int) -> Tuple[bool, int, List[Match]]:
         """ Same as match, but always returns after the first match."""
         ast = self.parser.parse(re=re)
-        matches: List[Match]
-        matches = []
+        matches: Deque[Match]
+        matches = deque()
 
         # str_i represents the matched characters so far. It is inizialized to
         # the value of the input parameter start_str_i because the match could
@@ -114,67 +116,7 @@ def __match__(self, re: str, string: str, start_str_i: int) -> Tuple[bool, int,
         def return_fnc(res: bool, str_i: int) -> Tuple[bool, int, List[Match]]:
             """ Returns the Tuple to be returned by __match__."""
             nonlocal matches
-            # reverses the list so the last match (the "whole" match) is first
-            matches.reverse()
-            return res, str_i, matches
-
-        def backtrack(backtrack_stack: List[Tuple[int, int, int, List[int]]], str_i: int, curr_i: int) -> Tuple[bool, int, int]:
-            """ Returns whether it is possible to backtrack and the state to backtrack to.
-
-            Takes as input the current state of the engine and returns whether
-            or not it is possible to backtrack.
-
-            Args:
-                backtrack_stack (List[Tuple[int, int, int, List[int]]]): the
-                current backtrack_stack situation. The Tuple values represents,
-                in order from left to right, the node index of the entry in its
-                parent children list, the minimum times that node must be
-                matched, the time it is matched in the current state, the list
-                of consumed character each times it was matched
-                str_i (int): the current considered index of the test string
-                curr_i (int): the index of the GroupNode children considered
-
-            Returns:
-                A Tuple containing a bool, True if it is possible to backtrack,
-                the new string index, and the new node children index to which
-                backtrack to. Note that the last two parameters only have a
-                meaning in the case it is possible to backtrack (the bool is
-                True).
-            """
-            if len(backtrack_stack) == 0:
-                return False, str_i, curr_i
-
-            # the fist step is to pop the last tuple from the backtrack_stack
-            node_i, min_, matched_times, consumed_list = backtrack_stack.pop()
-
-            if matched_times == min_:
-                # if a node is already matched the minimum number of times, the
-                # chance you have to potentially be able to backtrack is to is
-                # to delete the entry from the stack and then search for a new
-                # possibility (recursively calling this function).
-                # But, before the recursion, you have to calculate  what the
-                # string index (str_i) value was before the node was matched
-                # even once. Thus, you have to decrease the string index
-                # of each consumption in the consumed_list.
-
-                # calculate_the new str_i
-                for consumption in consumed_list:
-                    str_i -= consumption
-                # recursive call
-                return backtrack(backtrack_stack, str_i, node_i)
-            else:
-                # the node was matched more times than its min, so you just
-                # need to remove the last consumption from the list,
-                # decrease the str_i by that amount, decrease the times the node
-                # was matched - matched_times - by 1, and then append the stack
-                # the tuple with the new matched_times and consumed_list.
-                last_consumed = consumed_list.pop()
-                new_str_i = str_i - last_consumed
-                backtrack_stack.append(
-                    (node_i, min_, matched_times - 1, consumed_list))
-                # lastly, you return that the backtracking is possible, and
-                # the state to which backtrack to.
-                return True, new_str_i, curr_i
+            return res, str_i, list(matches)
 
         def save_matches(match_group: Callable, ast: Union[RE, GroupNode], string: str, start_idx: int) -> Tuple[bool, int]:
             """ Save the matches of capturing groups.
@@ -198,7 +140,7 @@ def save_matches(match_group: Callable, ast: Union[RE, GroupNode], string: str,
                     if matches[i].group_id == ast.group_id:
                         matches.remove(matches[i])
                         break
-                matches.append(
+                matches.appendleft(
                     Match(ast.group_id, start_idx, end_idx, string, ast.group_name))
 
             return res, end_idx
@@ -211,7 +153,68 @@ def match_group(ast: Union[RE, GroupNode, OrNode], string: str) -> Tuple[bool, i
             number of matched characters in the string so far.
             '''
             nonlocal str_i
-            backtrack_stack = []
+            backtrack_stack: List[Tuple[int, int, int, List[int]]] = []
+
+            def backtrack(str_i: int, curr_i: int) -> Tuple[bool, int, int]:
+                """ Returns whether it is possible to backtrack and the state to backtrack to.
+
+                Takes as input the current state of the engine and returns whether
+                or not it is possible to backtrack.
+
+                Args:
+                    backtrack_stack (List[Tuple[int, int, int, List[int]]]): the
+                    current backtrack_stack situation. The Tuple values represents,
+                    in order from left to right, the node index of the entry in its
+                    parent children list, the minimum times that node must be
+                    matched, the time it is matched in the current state, the list
+                    of consumed character each times it was matched
+                    str_i (int): the current considered index of the test string
+                    curr_i (int): the index of the GroupNode children considered
+
+                Returns:
+                    A Tuple containing a bool, True if it is possible to backtrack,
+                    the new string index, and the new node children index to which
+                    backtrack to. Note that the last two parameters only have a
+                    meaning in the case it is possible to backtrack (the bool is
+                    True).
+                """
+                nonlocal backtrack_stack
+
+                if len(backtrack_stack) == 0:
+                    return False, str_i, curr_i
+
+                # the fist step is to pop the last tuple from the backtrack_stack
+                node_i, min_, matched_times, consumed_list = backtrack_stack.pop()
+
+                if matched_times == min_:
+                    # if a node is already matched the minimum number of times, the
+                    # chance you have to potentially be able to backtrack is to is
+                    # to delete the entry from the stack and then search for a new
+                    # possibility (recursively calling this function).
+                    # But, before the recursion, you have to calculate  what the
+                    # string index (str_i) value was before the node was matched
+                    # even once. Thus, you have to decrease the string index
+                    # of each consumption in the consumed_list.
+
+                    # calculate_the new str_i
+                    for consumption in consumed_list:
+                        str_i -= consumption
+                    # recursive call
+                    return backtrack(str_i, node_i)
+                else:
+                    # the node was matched more times than its min, so you just
+                    # need to remove the last consumption from the list,
+                    # decrease the str_i by that amount, decrease the times the node
+                    # was matched - matched_times - by 1, and then append the stack
+                    # the tuple with the new matched_times and consumed_list.
+                    last_consumed = consumed_list.pop()
+                    new_str_i = str_i - last_consumed
+                    backtrack_stack.append(
+                        (node_i, min_, matched_times - 1, consumed_list))
+                    # lastly, you return that the backtracking is possible, and
+                    # the state to which backtrack to.
+                    return True, new_str_i, curr_i
+
             curr_node = ast.children[0] if len(ast.children) > 0 else None
             i = 0  # the children i'm iterating, not to confuse with str_i
 
@@ -243,8 +246,7 @@ def match_group(ast: Union[RE, GroupNode, OrNode], string: str) -> Tuple[bool, i
                         elif min_ <= j:
                             break
                         else:
-                            can_bt, bt_str_i, bt_i = backtrack(
-                                backtrack_stack, str_i, i)
+                            can_bt, bt_str_i, bt_i = backtrack(str_i, i)
                             if can_bt:
                                 i = bt_i
                                 str_i = bt_str_i
@@ -280,8 +282,7 @@ def match_group(ast: Union[RE, GroupNode, OrNode], string: str) -> Tuple[bool, i
                             # i did the bare minimum or more
                             break
                         else:
-                            can_bt, bt_str_i, bt_i = backtrack(
-                                backtrack_stack, str_i, i)
+                            can_bt, bt_str_i, bt_i = backtrack(str_i, i)
                             if can_bt:
                                 i = bt_i
                                 str_i = bt_str_i
@@ -321,7 +322,7 @@ def match_group(ast: Union[RE, GroupNode, OrNode], string: str) -> Tuple[bool, i
                                 if min_ <= j:  # I already met the minimum requirement for match
                                     break
                                 can_bt, bt_str_i, bt_i = backtrack(
-                                    backtrack_stack, before_str_i, i)
+                                    before_str_i, i)
                                 if can_bt:
                                     i = bt_i
                                     str_i = bt_str_i
@@ -338,7 +339,7 @@ def match_group(ast: Union[RE, GroupNode, OrNode], string: str) -> Tuple[bool, i
                             else:
                                 # i have more states, but the input is finished
                                 can_bt, bt_str_i, bt_i = backtrack(
-                                    backtrack_stack, before_str_i, i)
+                                    before_str_i, i)
                                 if can_bt:
                                     i = bt_i
                                     str_i = bt_str_i
@@ -371,6 +372,6 @@ def match_group(ast: Union[RE, GroupNode, OrNode], string: str) -> Tuple[bool, i
             if res:
                 return return_fnc(True, str_i)
             else:
-                matches = []
+                matches = deque()
                 str_i = i
         return return_fnc(False, str_i)
@@ -43,7 +43,7 @@ def append(elem: Token) -> None:
                     append(ElementToken(char='\t'))
                 if ch == 's':
                     # \s matches a space character
-                    append(SpaceToken(space_ch=ch))
+                    append(SpaceToken(char=ch))
                 else:
                     append(ElementToken(char=ch))
             elif ch == '\\':
@@ -75,7 +75,7 @@ def append(elem: Token) -> None:
                         append(RightCurlyBrace())
                         break
                     else:
-                        raise Exception('Bad token at index ${}.'.format(i))
+                        raise Exception("Bad token at index ${}.".format(i))
                     i += 1
             elif ch == '^':
                 if i == 0:
 
@@ -68,7 +68,8 @@ def parse_re_seq(capturing: bool = True, group_name: str = None, group_id: int =
                 next_tkn()
                 match_start = True
 
-            node = parse_group(capturing=capturing, group_name=group_name, group_id=group_id)
+            node = parse_group(capturing=capturing,
+                               group_name=group_name, group_id=group_id)
 
             if isinstance(curr_tkn, EndToken):
                 next_tkn()
@@ -119,7 +120,6 @@ def parse_group(capturing: bool = True, group_name: str = None, group_id: int =
                     parse_curly(new_el)
 
                 elements.append(new_el)
-                # next_tkn()
 
             return GroupNode(children=elements, capturing=capturing, group_name=group_name, group_id=group_id)
 
@@ -139,13 +139,13 @@ def parse_curly(new_el: ASTNode) -> None:
                     val_1 = int(val_1)
 
                 if isinstance(curr_tkn, RightCurlyBrace):
-                    # I'm in the case {exact}
+                    # case {exact}
                     if type(val_1) is int:
                         new_el.min, new_el.max = val_1, val_1
                         next_tkn()  # skip the closing brace
                         return
                     else:
-                        raise Exception()
+                        raise Exception("Invalid curly brace syntax.")
 
                 next_tkn()
                 while isinstance(curr_tkn, ElementToken):
@@ -156,14 +156,13 @@ def parse_curly(new_el: ASTNode) -> None:
                 else:
                     val_2 = int(val_2)
 
-                # skip the closing brace
-                next_tkn()
+                next_tkn()  # skip the closing brace
 
                 new_el.min = val_1 if type(val_1) is int else 0
                 new_el.max = val_2 if type(val_2) is int else math.inf
 
             except Exception as e:
-                raise Exception('Invalid curly brace syntax.')
+                raise Exception("Invalid curly brace syntax.")
 
         def parse_range_el() -> ASTNode:
             if isinstance(curr_tkn, LeftBracket):
@@ -173,17 +172,17 @@ def parse_range_el() -> ASTNode:
                     return element
                 else:
                     raise Exception(
-                        'Missing closing \']\'. Check the regex and try again.')
+                        "Missing closing ']'.")
             else:
                 return parse_el()
 
         def parse_inner_el() -> RangeElement:
+            # parse_inner_el creates a single RangeElement with all the matches
             nonlocal curr_tkn
-            # innerel creates a single RangeElement with all the matches
             match_str = ''
             if curr_tkn is None:
                 raise Exception(
-                    "Missing closing ']'. Check the regex and try again.")
+                    "Missing closing ']'.")
 
             positive_logic = True
             if isinstance(curr_tkn, NotToken):
@@ -205,8 +204,7 @@ def parse_inner_el() -> RangeElement:
                     curr_tkn = ElementToken(char=curr_tkn.char)
 
                 if next_tkn(without_consuming=True) is None:
-                    raise Exception(
-                        "Missing closing ']'. Check the regex and try again.")
+                    raise Exception("Missing closing ']'.")
                 elif isinstance(next_tkn(without_consuming=True), Dash):
                     # it may be a range (like a-z, A-M, 0-9, ...)
                     prev_char = curr_tkn.char
@@ -219,8 +217,7 @@ def parse_inner_el() -> RangeElement:
                         # we're in the case of an actual range (or next_tkn is none)
                         next_tkn()  # curr_tkn is now the one after the dash
                         if next_tkn is None:
-                            raise Exception(
-                                "Missing closing ']'. Check the regex and try again.")
+                            raise Exception("Missing closing ']'.")
                         elif ord(prev_char) > ord(curr_tkn.char):
                             raise Exception(
                                 f"Range values reversed. Start '{prev_char}' char code is greater than end '{curr_tkn.char}' char code.")
@@ -257,31 +254,31 @@ def parse_el() -> Union[Element, OrNode, GroupNode]:
                         group_name = parse_group_name()
                     else:
                         if curr_tkn is None:
-                            raise Exception('Unterminated group')
+                            raise Exception("Unterminated group.")
                         else:
                             raise Exception(
-                                f'Invalid group: \'{LeftParenthesis()}{QuestionMark()}{curr_tkn.char}\'')
+                                f"Invalid group: '{{?{curr_tkn.char}'.")
                 res = parse_re_seq(capturing=capturing, group_name=group_name)
                 if isinstance(curr_tkn, RightParenthesis):
-                    # next_tkn() not needed (the parse_group while loop will eat the parenthesis)
+                    # next_tkn() not needed (parse_group's while loop will eat the parenthesis)
                     return res
                 else:
-                    raise Exception('Missing closing group parenthesis \')\'')
+                    raise Exception("Missing closing group parenthesis ')'.")
             else:
                 raise Exception(
-                    'Unescaped special character {}'.format(curr_tkn.char))
+                    "Unescaped special character {}.".format(curr_tkn.char))
 
         def parse_group_name() -> str:
             if curr_tkn is None:
-                raise Exception('Unterminated named group name.')
+                raise Exception("Unterminated named group name.")
             group_name = ''
             while curr_tkn.char != '>':
                 group_name += curr_tkn.char
                 next_tkn()
                 if curr_tkn is None:
-                    raise Exception('Unterminated named group name.')
+                    raise Exception("Unterminated named group name.")
             if len(group_name) == 0:
-                raise Exception('Unexpected empty named group name.')
+                raise Exception("Unexpected empty named group name.")
             next_tkn()  # consumes '>'
             return group_name
 
@@ -294,5 +291,5 @@ def parse_group_name() -> str:
         ast = parse_re()
         if curr_tkn is not None:
             raise Exception(
-                "Unable to parse the entire regex.\nCheck the regex and try again.")
+                "Unable to parse the regex.")
         return ast