Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
bob7783 committed Oct 26, 2018
1 parent 1c29f04 commit edbfe0e
Showing 1 changed file with 23 additions and 0 deletions.
23 changes: 23 additions & 0 deletions nlp_class3/convert_twitter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# https://deeplearningcourses.com/c/deep-learning-advanced-nlp
from __future__ import print_function, division
from builtins import range, input
# Note: you may need to update your version of future
# sudo pip install -U future


# each output line should be:
# INPUT<tab>RESPONSE
with open('../large_files/twitter_tab_format.txt', 'w') as f:
prev_line = None
# data source: https://github.com/Phylliida/Dialogue-Datasets
for line in open('../large_files/TwitterLowerAsciiCorpus.txt'):
line = line.rstrip()

if prev_line and line:
f.write("%s\t%s\n" % (prev_line, line))

# note:
# between conversations there are empty lines
# which evaluate to false

prev_line = line

0 comments on commit edbfe0e

Please sign in to comment.