Skip to content

Commit

Permalink
Merge pull request #15 from lorenzofelletti/14-empty-regex-result-in-…
Browse files Browse the repository at this point in the history
…infinite-loop-if-continue_after_match-is-true

Fixed infinite loop bug if continue_after_match is True
  • Loading branch information
lorenzofelletti authored Mar 25, 2022
2 parents 8bda05f + 42c3520 commit 0cf5b09
Show file tree
Hide file tree
Showing 3 changed files with 104 additions and 35 deletions.
30 changes: 15 additions & 15 deletions pyregexp/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,40 +60,40 @@ def match(self, re: str, string: str, return_matches: bool = False, continue_aft
positions all the group and subgroups matched.
"""

def return_fnc(res: bool, str_i: int, all_matches: list, return_matches: bool) -> Union[Tuple[bool, int, List[List[Match]]], Tuple[bool, int]]:
def return_fnc(res: bool, consumed: int, all_matches: list, return_matches: bool) -> Union[Tuple[bool, int, List[List[Match]]], Tuple[bool, int]]:
""" If return_matches is True returns the matches."""
if return_matches:
return res, str_i, all_matches
return res, consumed, all_matches
else:
return res, str_i
return res, consumed

if ignore_case == 1:
re = unicodedata.normalize("NFKD", re).lower()
string = unicodedata.normalize("NFKD", string).casefold()
elif ignore_case == 2:
re = unicodedata.normalize("NFKD", re).casefold()
string = unicodedata.normalize("NFKD", string).casefold()
re = unicodedata.normalize("NFKD", re).casefold()
string = unicodedata.normalize("NFKD", string).casefold()

all_matches = [] # variables holding the matched groups list for each matched substring in the test string
highest_matched_idx = 0 # holds the highest test_str index matched
highest_matched_idx = 0 # holds the highest matched string's index

res, str_i, matches = self.__match__(re, string, 0)
res, consumed, matches = self.__match__(re, string, 0)
if res:
highest_matched_idx = str_i
highest_matched_idx = consumed
all_matches.append(matches)
else:
return return_fnc(res, highest_matched_idx, all_matches, return_matches)

if not continue_after_match:
if not continue_after_match or not consumed > 0:
return return_fnc(res, highest_matched_idx, all_matches, return_matches)

while True:
#string = string[str_i:]
if not len(string) > 0:
return return_fnc(res, highest_matched_idx, all_matches, return_matches)
res, str_i, matches = self.__match__(re, string, str_i)
if res:
highest_matched_idx = str_i
res, consumed, matches = self.__match__(re, string, consumed)

# if consumed is not grater than highest_matched_idx this means the new match
# consumed 0 characters, so there is really nothing more to match
if res and consumed > highest_matched_idx:
highest_matched_idx = consumed
all_matches.append(matches)
else:
return return_fnc(True, highest_matched_idx, all_matches, return_matches)
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@
setup(
name='pyregexp',
packages=['pyregexp'],
version='0.2.2',
version='0.2.3',
license='MIT',
description='Simple regex library',
long_description=long_description,
long_description_content_type='text/markdown',
author='Lorenzo Felletti',
url='https://github.com/lorenzofelletti/pyregex',
download_url='https://github.com/lorenzofelletti/pyregex/archive/v0.2.2.tar.gz',
download_url='https://github.com/lorenzofelletti/pyregex/archive/v0.2.3.tar.gz',
keywords=['regex', 'regexp', 'engine'],
install_requires=[],
classifiers=[
Expand Down
105 changes: 87 additions & 18 deletions test/test_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,17 +32,17 @@ def test_or(reng: RegexEngine):


def test_or_no_match(reng: RegexEngine):
res, consumed = reng.match('^a|b$', 'c')
res, _ = reng.match('^a|b$', 'c')
assert res == False


def test_or_no_match_with_bt(reng: RegexEngine):
res, consumed = reng.match('a|b', 'c')
res, _ = reng.match('a|b', 'c')
assert res == False


def test_bt_no_match(reng: RegexEngine):
res, consumed = reng.match('a{5}a', 'aaaaa')
res, _ = reng.match('a{5}a', 'aaaaa')
assert res == False


Expand All @@ -52,71 +52,71 @@ def test_match_group_zero_or_more(reng: RegexEngine):


def test_fail_group_one_or_more(reng: RegexEngine):
res, cons = reng.match('^(a)+', 'b')
res, _ = reng.match('^(a)+', 'b')
assert res == False


def test_complex_match(reng: RegexEngine):
res, cons = reng.match('^(a|b+c)?[n-z]{2}', 'axx')
res, _ = reng.match('^(a|b+c)?[n-z]{2}', 'axx')
assert res == True


def test_complex_match_2(reng: RegexEngine):
res, cons = reng.match('^(a|b+c)?[n-z]{2}', 'xx')
res, _ = reng.match('^(a|b+c)?[n-z]{2}', 'xx')
assert res == True


def test_match_mail_simple(reng: RegexEngine):
res, cons = reng.match(r'.*@.*\.(com|it)', '[email protected]')
res, _ = reng.match(r'.*@.*\.(com|it)', '[email protected]')
assert res == True


def test_bt_index_leaf(reng: RegexEngine):
res, cons = reng.match(r'^aaaa.*a$', 'aaaaa')
res, _ = reng.match(r'^aaaa.*a$', 'aaaaa')
assert res == True


def test_bt_index_or(reng: RegexEngine):
res, cons = reng.match(r'^x(a|b)?bc$', 'xbc')
res, _ = reng.match(r'^x(a|b)?bc$', 'xbc')
assert res == True


def test_bt_index_group(reng: RegexEngine):
res, cons = reng.match(r'^x(a)?ac$', 'xac')
res, _ = reng.match(r'^x(a)?ac$', 'xac')
assert res == True


def test_match_or_left(reng: RegexEngine):
res, cons = reng.match('na|nb', 'na')
res, _ = reng.match('na|nb', 'na')
assert res == True


def test_match_or_right(reng: RegexEngine):
res, cons = reng.match('na|nb', 'nb')
res, _ = reng.match('na|nb', 'nb')
assert res == True


def test_match_or_right_at_start_end(reng: RegexEngine):
res, cons = reng.match('^na|nb$', 'nb')
res, _ = reng.match('^na|nb$', 'nb')
assert res == True


def test_no_match_after_end(reng: RegexEngine):
res, cons = reng.match('^na|nb$', 'nb ')
res, _ = reng.match('^na|nb$', 'nb ')
assert res == False


def test_match_sequence_with_start_end_correctly(reng: RegexEngine):
res, cons = reng.match('^a|b$', 'a ')
res, _ = reng.match('^a|b$', 'a ')
assert res == True

res, cons = reng.match('^a|b$', ' a ')
res, _ = reng.match('^a|b$', ' a ')
assert res == False

res, cons = reng.match('^a|b$', ' b')
res, _ = reng.match('^a|b$', ' b')
assert res == True

res, cons = reng.match('^a|b$', ' b ')
res, _ = reng.match('^a|b$', ' b ')
assert res == False


Expand Down Expand Up @@ -444,9 +444,40 @@ def test_ignore_case_casefolding(reng: RegexEngine):
def test_empty_regex(reng: RegexEngine):
regex = r""
test_str = "aaaa"

# repeate the test with different optional parameters configurations
res, _ = reng.match(regex, test_str)
assert res == True

res, _ = reng.match(regex, test_str, ignore_case=1)
assert res == True

res, _ = reng.match(regex, test_str, ignore_case=2)
assert res == True

res, _ = reng.match(regex, test_str, continue_after_match=True)
assert res == True

res, _, matches = reng.match(regex, test_str, return_matches=True)
assert res == True
assert len(matches) == 1 and len(matches[0]) == 1
assert matches[0][0].match == "" and matches[0][0].start_idx == 0 and matches[0][0].end_idx == 0

res, _, matches = reng.match(regex, test_str, True, True, 0)
assert res == True
assert len(matches) == 1 and len(matches[0]) == 1
assert matches[0][0].match == "" and matches[0][0].start_idx == 0 and matches[0][0].end_idx == 0

res, _, matches = reng.match(regex, test_str, True, True, 1)
assert res == True
assert len(matches) == 1 and len(matches[0]) == 1
assert matches[0][0].match == "" and matches[0][0].start_idx == 0 and matches[0][0].end_idx == 0

res, _, matches = reng.match(regex, test_str, True, True, 2)
assert res == True
assert len(matches) == 1 and len(matches[0]) == 1
assert matches[0][0].match == "" and matches[0][0].start_idx == 0 and matches[0][0].end_idx == 0


def test_empty_test_str(reng: RegexEngine):
regex = r"a"
Expand All @@ -460,3 +491,41 @@ def test_empty_regex_and_test_str(reng: RegexEngine):
test_str = ""
res, _ = reng.match(regex, test_str)
assert res == True


def test_regex_with_rigth_empty_group(reng: RegexEngine):
regex = r"a|"
test_str = "ab"

# repeate the test with different optional parameters configurations
res, _ = reng.match(regex, test_str)
assert res == True

res, _ = reng.match(regex, test_str, ignore_case=1)
assert res == True

res, _ = reng.match(regex, test_str, ignore_case=2)
assert res == True

res, _ = reng.match(regex, test_str, continue_after_match=True)
assert res == True

res, _, matches = reng.match(regex, test_str, return_matches=True)
assert res == True
assert len(matches) == 1 and len(matches[0]) == 1
assert matches[0][0].match == "a" and matches[0][0].start_idx == 0 and matches[0][0].end_idx == 1

res, _, matches = reng.match(regex, test_str, True, True, 0)
assert res == True
assert len(matches) == 1 and len(matches[0]) == 1
assert matches[0][0].match == "a" and matches[0][0].start_idx == 0 and matches[0][0].end_idx == 1

res, _, matches = reng.match(regex, test_str, True, True, 1)
assert res == True
assert len(matches) == 1 and len(matches[0]) == 1
assert matches[0][0].match == "a" and matches[0][0].start_idx == 0 and matches[0][0].end_idx == 1

res, _, matches = reng.match(regex, test_str, True, True, 2)
assert res == True
assert len(matches) == 1 and len(matches[0]) == 1
assert matches[0][0].match == "a" and matches[0][0].start_idx == 0 and matches[0][0].end_idx == 1

0 comments on commit 0cf5b09

Please sign in to comment.