Skip to content

Commit

Permalink
Explicitly set tokenization case #78
Browse files Browse the repository at this point in the history
  • Loading branch information
minimaxir committed Oct 26, 2018
1 parent 5117a5f commit d0a72b8
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 2 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
setup(
name='textgenrnn',
packages=['textgenrnn'], # this must be the same as the name above
version='1.4',
version='1.4.1',
description='Easily train your own text-generating neural network ' \
'of any size and complexity',
long_description=long_description,
Expand Down
4 changes: 3 additions & 1 deletion textgenrnn/textgenrnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def __init__(self, weights_path=None,
encoding='utf8', errors='ignore') as json_file:
self.vocab = json.load(json_file)

self.tokenizer = Tokenizer(filters='', char_level=True)
self.tokenizer = Tokenizer(filters='', lower=False, char_level=True)
self.tokenizer.word_index = self.vocab
self.num_classes = len(self.vocab) + 1
self.model = textgenrnn_model(self.num_classes,
Expand Down Expand Up @@ -248,7 +248,9 @@ def train_new_model(self, texts, context_labels=None, num_epochs=50,
texts[i] = re.sub(' {2,}', ' ', texts[i])

# Create text vocabulary for new texts
# if word-level, lowercase; if char-level, uppercase
self.tokenizer = Tokenizer(filters='',
lower=self.config['word_level'],
char_level=(not self.config['word_level']))
self.tokenizer.fit_on_texts(texts)

Expand Down

0 comments on commit d0a72b8

Please sign in to comment.