Skip to content
This repository was archived by the owner on Jul 7, 2023. It is now read-only.

Commit d9cba5c

Browse files
martinpopelrsepassi
authored andcommitted
fix and test bleu_hook.bleu_tokenize (#514)
* fix and test bleu_hook.bleu_tokenize * make the test work in Python2
1 parent cc43389 commit d9cba5c

File tree

2 files changed

+9
-4
lines changed

2 files changed

+9
-4
lines changed

tensor2tensor/utils/bleu_hook.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ def __init__(self):
153153
def _property_chars(prefix):
154154
return ''.join(six.unichr(x) for x in range(sys.maxunicode)
155155
if unicodedata.category(six.unichr(x)).startswith(prefix))
156-
punctuation = self._property_chars('P')
156+
punctuation = _property_chars('P')
157157
self.nondigit_punct_re = re.compile(r'([^\d])([' + punctuation + r'])')
158158
self.punct_nondigit_re = re.compile(r'([' + punctuation + r'])([^\d])')
159159
self.symbol_re = re.compile('([' + _property_chars('S') + '])')
@@ -183,9 +183,10 @@ def bleu_tokenize(string):
183183
Returns:
184184
a list of tokens
185185
"""
186-
string = UnicodeRegex.nondigit_punct_re.sub(r'\1 \2 ', string)
187-
string = UnicodeRegex.punct_nondigit_re.sub(r' \1 \2', string)
188-
string = UnicodeRegex.symbol_re.sub(r' \1 ', string)
186+
uregex = UnicodeRegex()
187+
string = uregex.nondigit_punct_re.sub(r'\1 \2 ', string)
188+
string = uregex.punct_nondigit_re.sub(r' \1 \2', string)
189+
string = uregex.symbol_re.sub(r' \1 ', string)
189190
return string.split()
190191

191192

tensor2tensor/utils/bleu_hook_test.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,5 +57,9 @@ def testComputeMultipleNgrams(self):
5757
actual_bleu = 0.3436
5858
self.assertAllClose(bleu, actual_bleu, atol=1e-03)
5959

60+
def testBleuTokenize(self):
61+
self.assertEqual(bleu_hook.bleu_tokenize(u'hi, “there”'), [u'hi', u',', u'“', u'there', u'”'])
62+
63+
6064
if __name__ == '__main__':
6165
tf.test.main()

0 commit comments

Comments
 (0)