-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathmarkov.py
59 lines (44 loc) · 1.9 KB
/
markov.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#Stolen from here: https://github.com/rogerwhitson/Markov-Tweet/blob/master/Markov_Tweet.py
import random
class MarkovDict(object):
def __init__(self, text, order, output_count):
self.text = text
self.order = order
self.seed_counts = {}
self.output_count = output_count
def read_text(self):
words = self.text.split()
index = 0
bound = len(words) - (self.order + 1)
for token in self.text:
seed = tuple(words[index:index+self.order])
next_word = words[index+self.order]
if seed in self.seed_counts:
self.seed_counts[seed].append(next_word)
else:
self.seed_counts[seed] = [next_word]
index += 1
if index == bound:
return self.seed_counts
def define_seed(self):
random_key = int(random.random() * len(self.seed_counts.keys()))
start_seed = self.seed_counts.keys()[random_key]
#maximum = 2
#for seed in self.seed_counts:
# if len(self.seed_counts[seed]) > maximum:
# maximum = len(self.seed_counts[seed])
# start_seed = seed
return start_seed
def output_text(self):
start_seed = self.define_seed()
size = len(" ".join(start_seed))
output_list = list(start_seed)
while(True):
if size > self.output_count:
break
if start_seed in self.seed_counts:
random_index = random.randint(0, len(self.seed_counts[start_seed]) -1) # generate a random number constrained by size of the list
output_list.append(self.seed_counts[start_seed][random_index]) # add new character to the output string
size += (2+len(self.seed_counts[start_seed][random_index]))
start_seed = tuple(output_list[-self.order:])
return " ".join(output_list)