diff --git a/setup.py b/setup.py index c02de2e..faae5d1 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,7 @@ setup( name='textgenrnn', packages=['textgenrnn'], # this must be the same as the name above - version='1.4', + version='1.4.1', description='Easily train your own text-generating neural network ' \ 'of any size and complexity', long_description=long_description, diff --git a/textgenrnn/textgenrnn.py b/textgenrnn/textgenrnn.py index 83a9f44..eb9e09c 100644 --- a/textgenrnn/textgenrnn.py +++ b/textgenrnn/textgenrnn.py @@ -59,7 +59,7 @@ def __init__(self, weights_path=None, encoding='utf8', errors='ignore') as json_file: self.vocab = json.load(json_file) - self.tokenizer = Tokenizer(filters='', char_level=True) + self.tokenizer = Tokenizer(filters='', lower=False, char_level=True) self.tokenizer.word_index = self.vocab self.num_classes = len(self.vocab) + 1 self.model = textgenrnn_model(self.num_classes, @@ -248,7 +248,9 @@ def train_new_model(self, texts, context_labels=None, num_epochs=50, texts[i] = re.sub(' {2,}', ' ', texts[i]) # Create text vocabulary for new texts + # if word-level, lowercase; if char-level, uppercase self.tokenizer = Tokenizer(filters='', + lower=self.config['word_level'], char_level=(not self.config['word_level'])) self.tokenizer.fit_on_texts(texts)