Skip to content

Commit 8ce6196

Browse files
Merge branch 'master' of github.com:lorenzofelletti/pyregex
2 parents 150f977 + 0e01495 commit 8ce6196

File tree

6 files changed

+72
-1
lines changed

6 files changed

+72
-1
lines changed

create_uml.sh

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#!/bin/bash
2+
pyreverse -o re_ast.png -A -S -mn -f ALL ./pyregexp/re_ast.py
3+
mv classes.re_ast.png docs/uml
4+
5+
pyreverse -o engine.png -A -S -mn -f ALL ./pyregexp/engine.py
6+
mv classes.engine.png docs/uml
7+
8+
pyreverse -o lexer.png -A -S -mn -f ALL ./pyregexp/lexer.py
9+
mv classes.lexer.png docs/uml
10+
11+
pyreverse -o match.png -A -S -mn -f ALL ./pyregexp/match.py
12+
mv classes.match.png docs/uml
13+
14+
pyreverse -o pyrser.png -A -S -mn -f ALL ./pyregexp/pyrser.py
15+
mv classes.pyrser.png docs/uml
16+
17+
pyreverse -o tokens.png -A -S -mn -f ALL ./pyregexp/tokens.py
18+
mv classes.tokens.png docs/uml
19+
20+
pyreverse -o pyregexp.png -A -S -mn ./pyregexp/*
21+
mv classes.pyregexp.png docs/uml
22+
mv packages.pyregexp.png docs/uml

docs/uml/classes.engine.png

2.36 KB
Loading

docs/uml/classes.pyregexp.png

10.5 KB
Loading

docs/uml/classes.re_ast.png

4.77 KB
Loading

pyregexp/engine.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212

1313
from typing import Callable, Union, Tuple, List
14+
import unicodedata
1415
from .pyrser import Pyrser
1516
from .match import Match
1617
from .re_ast import RE, GroupNode, LeafNode, OrNode, EndElement, StartElement
@@ -25,7 +26,7 @@ class RegexEngine:
2526
def __init__(self):
2627
self.parser: Pyrser = Pyrser()
2728

28-
def match(self, re: str, string: str, return_matches: bool = False, continue_after_match: bool = False) -> Union[Tuple[bool, int, List[List[Match]]], Tuple[bool, int]]:
29+
def match(self, re: str, string: str, return_matches: bool = False, continue_after_match: bool = False, ignore_case: int = 0) -> Union[Tuple[bool, int, List[List[Match]]], Tuple[bool, int]]:
2930
""" Searches a regex in a test string.
3031
3132
Searches the passed regular expression in the passed test string and
@@ -34,6 +35,10 @@ def match(self, re: str, string: str, return_matches: bool = False, continue_aft
3435
It is possible to customize both the returned value and the search
3536
method.
3637
38+
The ignore_case flag may cause unexpected results in the returned
39+
number of matched characters, and also in the returned matches, e.g.
40+
when the character ẞ is present in either the regex or the test string.
41+
3742
Args:
3843
re (str): the regular expression to search
3944
string (str): the test string
@@ -43,6 +48,9 @@ def match(self, re: str, string: str, return_matches: bool = False, continue_aft
4348
continue_after_match (bool): if True the engine continues
4449
matching until the whole input is consumed
4550
(default is False)
51+
ignore_case (int): when 0 the case is not ignored, when 1 a "soft"
52+
case ignoring is performed, when 2 casefolding is performed.
53+
(default is 0)
4654
4755
Returns:
4856
A tuple containing whether a match was found or not, the last
@@ -59,6 +67,13 @@ def return_fnc(res: bool, str_i: int, all_matches: list, return_matches: bool) -
5967
else:
6068
return res, str_i
6169

70+
if ignore_case == 1:
71+
re = unicodedata.normalize("NFKD", re).lower()
72+
string = unicodedata.normalize("NFKD", string).casefold()
73+
elif ignore_case == 2:
74+
re = unicodedata.normalize("NFKD", re).casefold()
75+
string = unicodedata.normalize("NFKD", string).casefold()
76+
6277
all_matches = [] # variables holding the matched groups list for each matched substring in the test string
6378
highest_matched_idx = 0 # holds the highest test_str index matched
6479

test/test_engine.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,3 +405,37 @@ def test_on_long_string(reng: RegexEngine):
405405
matches[1][0].end_idx == len(test_str)
406406
assert matches[1][1].start_idx == len(test_str)-1 and \
407407
matches[1][1].end_idx == len(test_str)
408+
409+
410+
def test_ignore_case_no_casefolding(reng: RegexEngine):
411+
regex = r"ss"
412+
test_str = "SS"
413+
res, _ = reng.match(regex, test_str, ignore_case=1)
414+
assert res == True
415+
416+
regex = r"ÄCHER"
417+
test_str = "ächer"
418+
res, _ = reng.match(regex, test_str, ignore_case=1)
419+
assert res == True
420+
421+
regex = r"ÄCHER"
422+
test_str = "acher"
423+
res, _ = reng.match(regex, test_str, ignore_case=1)
424+
assert res == False
425+
426+
427+
def test_ignore_case_casefolding(reng: RegexEngine):
428+
regex = r"ẞ"
429+
test_str = "SS"
430+
res, _ = reng.match(regex, test_str, ignore_case=2)
431+
assert res == True
432+
433+
regex = r"ÄCHER"
434+
test_str = "ächer"
435+
res, _ = reng.match(regex, test_str, ignore_case=2)
436+
assert res == True
437+
438+
regex = r"ÄCHER"
439+
test_str = "acher"
440+
res, _ = reng.match(regex, test_str, ignore_case=2)
441+
assert res == False

0 commit comments

Comments
 (0)