Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified .DS_Store
Binary file not shown.
1,079 changes: 1,079 additions & 0 deletions Final_mariapaula/My Gmail Life 2008.ipynb

Large diffs are not rendered by default.

656 changes: 656 additions & 0 deletions Final_mariapaula/My Gmail Life 2008.ipynb

Large diffs are not rendered by default.

1,103 changes: 1,103 additions & 0 deletions Final_mariapaula/My Gmail Life 2009.ipynb

Large diffs are not rendered by default.

1,096 changes: 1,096 additions & 0 deletions Final_mariapaula/My Gmail Life 2010.ipynb

Large diffs are not rendered by default.

1,105 changes: 1,105 additions & 0 deletions Final_mariapaula/My Gmail Life 2011.ipynb

Large diffs are not rendered by default.

1,077 changes: 1,077 additions & 0 deletions Final_mariapaula/My Gmail Life 2012.ipynb

Large diffs are not rendered by default.

1,118 changes: 1,118 additions & 0 deletions Final_mariapaula/My Gmail Life 2013.ipynb

Large diffs are not rendered by default.

1,117 changes: 1,117 additions & 0 deletions Final_mariapaula/My Gmail Life.ipynb

Large diffs are not rendered by default.

304 changes: 304 additions & 0 deletions Final_mariapaula/Reading GZ Files.ipynb

Large diffs are not rendered by default.

Binary file added Lesson0_bridges_and_tunnels/.DS_Store
Binary file not shown.

Large diffs are not rendered by default.

Empty file modified Lesson0_bridges_and_tunnels/Bridges and Tunnels.ipynb
100644 → 100755
Empty file.
657 changes: 657 additions & 0 deletions Lesson0_bridges_and_tunnels/maria_paula/Bridges and Tunnels.ipynb

Large diffs are not rendered by default.

6,970 changes: 6,970 additions & 0 deletions Lesson0_bridges_and_tunnels/maria_paula/daily_traffic.csv

Large diffs are not rendered by default.

512 changes: 512 additions & 0 deletions Lesson0_bridges_and_tunnels/maria_paula/verrazano.csv

Large diffs are not rendered by default.

Empty file modified Lesson1_restaurants/Restaurant Inspections.ipynb
100644 → 100755
Empty file.
Empty file modified Lesson1_restaurants/data/hamburgers.txt
100644 → 100755
Empty file.
775 changes: 775 additions & 0 deletions Lesson1_restaurants/maria_paula/Bedford Avenue.ipynb

Large diffs are not rendered by default.

1,146 changes: 1,146 additions & 0 deletions Lesson1_restaurants/maria_paula/Restaurant Inspections.ipynb

Large diffs are not rendered by default.

720 changes: 720 additions & 0 deletions Lesson1_restaurants/maria_paula/VegetarianCuisine.ipynb

Large diffs are not rendered by default.

2,348 changes: 2,348 additions & 0 deletions Lesson1_restaurants/maria_paula/data/bedford.txt

Large diffs are not rendered by default.

8,415 changes: 8,415 additions & 0 deletions Lesson1_restaurants/maria_paula/data/hamburgers.txt

Large diffs are not rendered by default.

2,451 changes: 2,451 additions & 0 deletions Lesson1_restaurants/maria_paula/data/vegetarian.txt

Large diffs are not rendered by default.

523 changes: 523 additions & 0 deletions Lesson2_scraping/maria_paula/Fitbit.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions Lesson2_scraping/maria_paula/citi_data.txt

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions Lesson2_scraping/maria_paula/citi_data0.txt

Large diffs are not rendered by default.

155 changes: 155 additions & 0 deletions Lesson2_scraping/maria_paula/citibike.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
{
"metadata": {
"name": ""
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import urllib\n",
"import json\n"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data_url = \"http://citibikenyc.com/stations/json/\"\n",
"\n",
"data_handler = urllib.urlopen(data_url)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 12
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"dir(data_handler)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 13,
"text": [
"['__doc__',\n",
" '__init__',\n",
" '__iter__',\n",
" '__module__',\n",
" '__repr__',\n",
" 'close',\n",
" 'code',\n",
" 'fileno',\n",
" 'fp',\n",
" 'getcode',\n",
" 'geturl',\n",
" 'headers',\n",
" 'info',\n",
" 'next',\n",
" 'read',\n",
" 'readline',\n",
" 'readlines',\n",
" 'url']"
]
}
],
"prompt_number": 13
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"citiStations = data_handler.read()\n"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 14
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"citiStations_data = json.loads(citiStations)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 15
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print len(citiStations_data['stationBeanList'])\n",
"\n",
"for station in citiStations_data['stationBeanList']:\n",
" print station\n",
" break\n",
"\n",
" "
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"331\n",
"{u'availableDocks': 6, u'totalDocks': 39, u'city': u'', u'altitude': u'', u'stAddress2': u'', u'longitude': -73.99392888, u'lastCommunicationTime': None, u'postalCode': u'', u'statusValue': u'In Service', u'testStation': False, u'stAddress1': u'W 52 St & 11 Ave', u'stationName': u'W 52 St & 11 Ave', u'landMark': u'', u'latitude': 40.76727216, u'statusKey': 1, u'availableBikes': 33, u'id': 72, u'location': u''}\n"
]
}
],
"prompt_number": 24
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"num_docks = []\n",
"for station in citiStations_data['stationBeanList']:\n",
" num_docks.append(station['totalDocks'])\n",
" \n",
"print num_docks\n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"[39, 33, 26, 62, 39, 19, 19, 31, 30, 46, 25, 19, 39, 33, 31, 33, 29, 55, 23, 26, 35, 47, 44, 47, 51, 30, 45, 28, 23, 39, 38, 33, 31, 37, 55, 27, 23, 39, 39, 39, 31, 31, 23, 23, 39, 31, 23, 20, 21, 27, 40, 27, 33, 47, 31, 39, 23, 39, 35, 27, 24, 31, 27, 34, 24, 57, 27, 23, 39, 31, 19, 25, 19, 19, 31, 59, 27, 43, 44, 19, 29, 20, 55, 31, 24, 35, 27, 35, 55, 37, 23, 30, 36, 32, 37, 31, 27, 41, 35, 30, 31, 23, 39, 29, 43, 27, 28, 35, 39, 27, 31, 39, 51, 35, 27, 39, 23, 31, 39, 27, 24, 31, 27, 36, 37, 24, 27, 19, 29, 23, 23, 35, 27, 35, 42, 23, 28, 39, 36, 27, 31, 23, 23, 27, 36, 55, 39, 43, 57, 49, 27, 31, 33, 34, 39, 35, 27, 19, 30, 43, 47, 59, 23, 34, 27, 31, 29, 43, 39, 35, 27, 31, 31, 32, 31, 36, 30, 25, 27, 31, 27, 15, 41, 42, 31, 39, 40, 34, 43, 23, 19, 35, 23, 29, 24, 42, 31, 47, 23, 23, 23, 19, 55, 39, 31, 59, 30, 27, 30, 31, 39, 27, 47, 27, 27, 27, 39, 32, 34, 51, 23, 52, 35, 39, 31, 31, 31, 59, 39, 34, 39, 35, 59, 21, 23, 39, 47, 25, 59, 39, 35, 34, 59, 57, 37, 27, 41, 42, 47, 37, 47, 59, 31, 31, 27, 25, 39, 35, 44, 39, 39, 35, 41, 37, 59, 53, 49, 34, 35, 25, 47, 59, 30, 36, 52, 43, 30, 29, 45, 36, 47, 24, 36, 51, 33, 27, 26, 53, 35, 31, 59, 39, 61, 39, 67, 51, 51, 57, 39, 43, 59, 39, 41, 36, 39, 43, 50, 31, 29, 39, 39, 31, 39, 27, 38, 30, 15, 27, 30, 36, 12, 49, 23, 33, 37, 36, 39, 43, 33, 35, 25]\n"
]
}
],
"prompt_number": 25
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
29 changes: 29 additions & 0 deletions Lesson2_scraping/maria_paula/citiscrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import urllib
import time



if __name__ == '__main__':
citibike_url = "http://citibikenyc.com/stations/json/"


counter = 0
while(True):
citi_handler = urllib.urlopen(citibike_url)
citi_data = citi_handler.read()
citi_handler.close()
#print citi_data


#f == that file
f = open("citi_data" + str(counter) +".txt", 'w')
f.write(citi_data)
f.close()

print counter
counter += 1

time.sleep(120)



1 change: 1 addition & 0 deletions Lesson2_scraping/maria_paula/data/Fitbit.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
date,steps,distance,activeScore,activityCalories,calories,caloriesIn,minutesSedentary,minutesLightlyActive,minutesFairlyActive,minutesVeryActive,timeInBed,minutesAsleep,minutesAwake,awakeningsCount,weight,bmi,fat6/29/2013,6889,4.85046,581,823,1940,0,198,209,56,16,961,810,148,41,63,22.40136147,06/30/2013,14260,10.03474,959,1360,2303,0,503,330,103,29,475,441,34,9,63,22.40136147,07/1/2013,8348,5.86014,595,900,1974,0,651,248,71,6,464,429,25,7,63.066,22.42482948,07/2/2013,6387,4.55523,542,797,1923,0,751,202,57,13,417,400,11,5,63.132,22.4482975,07/3/2013,8357,5.7914,681,1050,2057,0,577,323,75,0,338,322,3,3,63.198,22.47176743,07/4/2013,19204,13.30837,837,1202,2209,0,490,222,167,6,555,507,43,7,63.264,22.49523354,07/5/2013,10525,7.29383,633,935,2011,0,542,217,106,5,570,527,11,7,63.33,22.51870346,07/6/2013,5704,3.95287,551,874,1931,0,586,301,41,1,440,411,20,2,63.396,22.54216957,07/7/2013,5414,3.7519,333,523,1720,0,527,168,35,0,540,535,4,4,63.462,22.5656395,07/8/2013,5755,4.01334,624,733,2002,0,797,161,42,26,414,405,2,2,63.528,22.58910561,07/9/2013,10899,7.55301,712,1019,2087,0,588,198,126,8,494,472,14,3,63.594,22.61257553,07/10/2013,5890,4.1083,651,832,2028,0,690,162,83,23,505,495,3,3,63.66,22.63604164,07/11/2013,5173,3.61001,463,688,1846,0,739,170,65,4,465,454,4,4,63.726,22.65951157,07/12/2013,11149,7.95198,673,955,2050,0,638,194,93,17,498,448,27,4,63.792,22.68297768,07/13/2013,13377,9.34564,1030,1358,2396,0,572,142,194,36,496,475,7,6,63.858,22.7064476,07/14/2013,1022,0.70825,141,156,2003,0,920,0,0,20,500,500,0,0,63.924,22.72991562,07/15/2013,5241,3.63201,595,814,1974,0,697,157,66,22,498,443,37,7,64,22.75693893,07/16/2013,5688,3.94178,411,586,1795,0,795,137,53,5,450,439,11,8,63.6,22.61470795,07/17/2013,203,0.14068,589,726,1968,0,827,59,32,62,450,445,5,5,63.2,22.47247696,07/18/2013,3411,2.36382,235,359,1625,0,811,104,25,0,510,497,7,7,63.428,22.55354881,07/19/2013,5284,3.68613,388,578,1773,0,845,158,42,2,393,384,3,2,63.656,22.63461876,07/20/2013,3679,2.54955,322,502,1709,0,711,163,26,0,540,536,4,2,63.884,22.71569061,07/21/2013,1706,1.18226,323,505,1710,0,726,170,20,2,522,511,1,1,64.112,22.79676247,07/22/2013,2978,2.06375,176,261,2011,0,904,54,31,1,450,450,0,0,64.34,22.87783432,07/23/2013,6462,4.58188,431,673,1834,0,793,178,51,8,410,401,7,4,64.568,22.95890617,07/24/2013,13976,9.68537,767,1062,2163,0,1128,186,108,18,0,0,0,0,64.8,23.04140091,07/25/2013,8392,5.84279,530,788,1931,0,1170,204,62,4,0,0,0,0,64.805,23.04317856,07/26/2013,10095,6.99584,521,740,1922,0,1210,139,84,7,0,0,0,0,64.81,23.04495621,07/27/2013,6971,4.8309,362,555,1766,0,1239,162,39,0,0,0,0,0,64.815,23.04673576,07/28/2013,10213,7.07761,548,822,1948,0,1153,205,82,0,0,0,0,0,64.82,23.04851151,07/29/2013,4843,3.3562,383,562,1787,0,1254,132,51,3,0,0,0,0,64.825,23.05028915,07/30/2013,421,0.29175,65,113,2011,0,1395,43,2,0,0,0,0,0,64.83,23.05206871,07/31/2013,7152,4.98086,526,790,1927,0,714,213,62,2,449,434,6,4,64.835,23.05384445,08/1/2013,5396,3.79028,477,714,1879,0,1173,205,42,4,0,0,0,0,64.84,23.0556221,08/2/2013,17025,11.79833,914,1256,2307,0,596,205,135,24,496,486,7,5,64.845,23.05740166,08/3/2013,7563,5.39553,512,762,1913,0,1179,214,36,11,0,0,0,0,64.85,23.05917931,08/4/2013,2714,1.90553,154,239,2011,0,1355,69,16,0,0,0,0,0,64.855,23.06095886,08/5/2013,5301,3.67359,401,605,1804,0,816,162,51,0,411,393,11,7,64.86,23.0627346,08/6/2013,8142,5.66753,497,744,1898,0,810,185,72,1,350,331,10,7,64.865,23.06451225,08/7/2013,6488,4.49618,628,883,2027,0,620,192,45,32,573,513,54,11,64.87,23.06629181,08/8/2013,4904,3.44852,498,667,1899,0,893,109,31,38,369,355,3,3,64.875,23.06806946,08/9/2013,7864,5.44975,544,809,1944,0,727,202,74,2,435,435,0,0,64.88,23.0698452,08/10/2013,10831,7.55633,592,874,1991,0,664,211,83,2,480,480,0,0,64.885,23.07162476,08/11/2013,13052,9.04504,878,1158,2272,0,751,149,109,41,390,390,0,0,64.89,23.0734024,08/12/2013,7552,5.23354,464,698,1866,1388,810,189,53,2,386,376,2,1,64.895,23.07518005,08/13/2013,6946,4.81358,467,688,1869,2247,679,165,59,6,531,494,15,3,64.9,23.07695961,08/14/2013,6241,4.37546,401,594,1804,2924,855,145,53,3,384,384,0,0,64.905,23.07873535,08/15/2013,9212,6.38392,725,1070,2122,2471,644,276,81,6,433,406,2,2,64.91,23.08051491,08/16/2013,11627,8.05751,668,975,2066,0,695,228,87,8,422,410,5,3,64.915,23.08229256,08/17/2013,2803,1.94248,775,857,2171,0,1345,5,11,79,0,0,0,0,64.92,23.0840683,08/18/2013,2273,1.57519,145,228,2013,0,1357,66,17,0,0,0,0,0,64.925,23.08584785,08/19/2013,4070,2.84704,258,379,1664,0,1316,91,29,4,0,0,0,0,64.93,23.0876255,08/20/2013,0,0,0,5,2013,0,1438,2,0,0,0,0,0,0,64.935,23.08940315,08/21/2013,0,0,0,0,2013,0,1440,0,0,0,0,0,0,0,64.94,23.09118271,08/22/2013,0,0,0,0,2013,0,1440,0,0,0,0,0,0,0,64.945,23.09295845,08/23/2013,0,0,0,0,2013,0,1440,0,0,0,0,0,0,0,64.95,23.0947361,08/24/2013,0,0,0,0,2013,0,1440,0,0,0,0,0,0,0,64.955,23.09651566,08/25/2013,0,0,0,0,2013,0,1440,0,0,0,0,0,0,0,64.96,23.0982933,08/26/2013,0,0,0,0,2013,0,1440,0,0,0,0,0,0,0,64.965,23.10006905,08/27/2013,0,0,0,0,2013,0,1440,0,0,0,0,0,0,0,64.97,23.1018486,08/28/2013,0,0,0,0,2013,0,1440,0,0,0,0,0,0,0,64.975,23.10362625,08/29/2013,0,0,0,0,2013,0,1440,0,0,0,0,0,0,0,64.98,23.10540581,08/30/2013,0,0,0,0,2013,0,1440,0,0,0,0,0,0,0,64.985,23.10718155,08/31/2013,0,0,0,0,2013,0,1440,0,0,0,0,0,0,0,64.99,23.1089592,09/1/2013,0,0,0,0,2013,0,1440,0,0,0,0,0,0,0,64.995,23.11073875,09/2/2013,0,0,0,0,2013,0,1440,0,0,0,0,0,0,0,65,23.1125164,09/3/2013,9233,6.44972,545,775,1945,0,1196,153,85,6,0,0,0,0,65,23.1125164,09/4/2013,5250,3.63825,298,426,1703,0,1306,92,37,5,0,0,0,0,64.734,23.01793289,09/5/2013,0,0,15,5,2005,0,1438,2,0,0,0,0,0,0,64.468,22.92334938,09/6/2013,6854,4.74982,400,558,1785,0,1254,134,51,1,0,0,0,0,64.2,22.82805252,09/7/2013,7039,4.95401,386,503,1771,0,1290,79,64,7,0,0,0,0,63.8,22.68582344,09/8/2013,865,0.59945,838,65,2210,0,1421,11,7,1,0,0,0,0,63.6,22.61470795,09/9/2013,8075,5.69808,397,544,1782,0,1280,87,65,8,0,0,0,0,63.4,22.54359245,09/10/2013,5868,4.323,319,424,1689,0,1309,87,32,12,0,0,0,0,63,22.40136147,09/11/2013,5456,3.90824,353,530,1739,0,1256,144,31,9,0,0,0,0,63.083,22.43087387,09/12/2013,13092,9.09848,678,913,2054,0,1183,117,107,33,0,0,0,0,63.166,22.46038818,09/13/2013,14501,10.77058,731,956,2106,0,1193,112,89,46,0,0,0,0,63.249,22.48990059,09/14/2013,9993,6.976,405,576,1790,0,1259,87,91,3,0,0,0,0,63.332,22.51941299,09/15/2013,6529,4.5246,354,521,1740,0,815,115,60,1,449,430,1,1,63.415,22.54892731,09/16/2013,7074,4.97826,446,609,1811,0,732,127,57,11,513,477,30,17,63.5,22.57914925,09/17/2013,8957,6.28278,558,734,1918,0,819,105,81,26,409,394,8,6,63.15,22.45469856,09/18/2013,5744,4.0831,370,529,1738,0,731,125,45,9,530,501,19,12,62.8,22.33024597,09/19/2013,14985,10.68691,763,998,2115,0,640,124,120,32,524,503,5,4,62.8,22.33024597,09/20/2013,11918,8.61736,664,905,2020,0,645,165,81,32,517,508,1,1,62.8,22.33024597,09/21/2013,9144,6.41277,412,560,1778,0,819,89,75,7,450,450,0,0,62.8,22.33024597,09/22/2013,10870,7.63562,541,760,1902,0,710,161,80,9,480,479,1,1,62.8,22.33024597,09/23/2013,0,0,5,7,86,0,82,3,0,0,0,0,0,0,62.8,22.33024597,0
Expand Down
197 changes: 197 additions & 0 deletions Lesson3_MarkovChains/maria_paula/Markov Chains and art.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
{
"metadata": {
"name": ""
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "heading",
"level": 2,
"metadata": {},
"source": [
"Markov Chains and art"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The [National Gallery of Art](http://www.nga.gov/content/ngaweb.html) puts thumbnails and descriptions of the works in their collection online. One could, theoretically, crawl these descriptions and process them."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pickle\n",
"import random\n",
"\n",
"art = pickle.load(open('art.pickle'))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The thumbnails are really interesting. Each one looks like this: <img src=\"http://media.nga.gov/public/supplemental/objects/1/0/7/7/6/4/107764-crop-0-90x90.jpg\" />\n",
"\n",
"...and you can play with them to end up with images like this one, which shows the average color of each thumbnail (where each square represents a unique work of art):\n",
"\n",
"<img src=\"http://25.media.tumblr.com/c35ed306e3af0ca4b9aef0beff92b8a2/tumblr_mkiepmL5ot1qz7bx4o1_1280.png\" width=\"300\" />"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"...and if you look at the descriptions, you might end up with a data structure like this:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print art[0]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"{'name': 'Saint Paul', 'artist': 'Bernardo Daddi', 'material': 'tempera on panel', 'year': '1333', 'thumbnail': 'http://www.nga.gov:80/thumb-l/a000c6/a000c643.jpg', 'desc': 'The narrow shape and large size of this panel suggest it was meant to hang against a colossal pillar in a church. The original frame utilizes decorative motifs similar to those in the borders of Gothic illuminated manuscripts.\\n Saint Paul holds a book, recalling the Epistles he wrote. The sword he displays has several meanings: his early career as a Roman soldier; his position as defender of the Christian faith; and the instrument of his martyrdom by beheading. The great dignity of his erect figure and the monumental effect of the drapery correspond to his stern, direct gaze. His imposing presence implies that the painter Bernardo Daddi may have been a pupil of \\n A sweeter, gentler mood emanates from the small figures representing the donors who commissioned this painting. Although depictions of donors are not unusual in Gothic art, it is rare to find so many husbands and wives shown kneeling together. The couples are separated, just as men and women were while worshiping in church during the Middle Ages.\\n '}\n"
]
}
],
"prompt_number": 3
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The best kind of probabilistic mischief we can get up to with data like this is a structure called a Markov Chain."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# only run this code if we haven't run it already (to save time)\n",
"try:\n",
" trigrams = pickle.load(open('trigrams.pickle'))\n",
"except IOError:\n",
" all_art = pickle.load(open('art.pickle'))\n",
"\n",
" text_data = ''\n",
" for art in all_art: # deconstruct the descriptions of the works of art\n",
" if art.get('desc',''):\n",
" text_data += art['desc'] # add all descriptions of art into one variable\n",
"\n",
" text = [w.lower() for w in text_data.split()] #break it apart\n",
" trigrams = []\n",
" for i in range(0, len(text)-2):\n",
" trigrams.append((text[i], text[i+1], text[i+2])) # build all trigrams\n",
"\n",
"random_seed = ' '.join(random.choice(trigrams))\n",
"print random_seed\n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"elements of the\n"
]
}
],
"prompt_number": 13
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# build lookup table\n",
"lookup_table = {}\n",
"for w1, w2, w3 in trigrams:\n",
" lookup_table.setdefault((w1, w2), []).append(w3)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#input_text = \"against this\"\n",
"input_text = random_seed # we need some text to start with!\n",
"max_len = 40 # [n] words, please\n",
"seed_word, next_word = input_text.split()[-2:]\n",
" \n",
"fake_art = []\n",
"for i in range(max_len): # for each word we need to generate...\n",
" fake_art.append(next_word) # append the next word\n",
" try: # pick a random choice from the lookup table\n",
" seed_word, next_word = next_word, random.choice(lookup_table[(seed_word, next_word)])\n",
" except KeyError:\n",
" seed_word, next_word = random.choice(lookup_table.keys()) # if there isn't anything\n",
" # ... pick something random\n",
" \n",
"fake_art.append(next_word)\n",
"print ' '.join(fake_art)\n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"the greatest parisian cabinetmakers. after his return motion. barney, a split-second ahead in his lap, his nose spreading across his face toward the viewer, who would have gazed in the mid-eighteenth century foundries were producing popular cast ironwork for other regions.\n"
]
}
],
"prompt_number": 14
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Loading