|
16 | 16 | $FileInfo: miss-bone.py - Last Update: 6/12/2014 Ver. 1.0.0 RC 2 - Author: cooldude2k $
|
17 | 17 | '''
|
18 | 18 |
|
19 |
| -import re |
| 19 | +import argparse |
| 20 | +import datetime |
| 21 | +import gzip |
20 | 22 | import os
|
| 23 | +import re |
21 | 24 | import sys
|
| 25 | +import time |
22 | 26 | import urllib
|
23 |
| -import urllib2 |
| 27 | + |
24 | 28 | import cookielib
|
25 | 29 | import StringIO
|
26 |
| -import gzip |
27 |
| -import time |
28 |
| -import datetime |
29 |
| -import argparse |
| 30 | +import urllib2 |
30 | 31 | import urlparse
|
31 | 32 |
|
32 | 33 | fakeua = "Mozilla/5.0 (Windows NT 6.1; rv:24.0) Gecko/20100101 Firefox/24.0"
|
33 | 34 | geturls_cj = cookielib.CookieJar()
|
34 | 35 | geturls_opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(geturls_cj))
|
35 |
| -geturls_opener.addheaders = [("Referer", "http://www.emoticonplus.com/miss-bone/"), ("User-Agent", fakeua), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language", "en-US,en-CA,en-GB,en-UK,en-AU,en-NZ,en-ZA,en;q=0.5"), |
36 |
| - ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")] |
| 36 | +geturls_opener.addheaders = [ |
| 37 | + ("Referer", |
| 38 | + "http://www.emoticonplus.com/miss-bone/"), |
| 39 | + ("User-Agent", |
| 40 | + fakeua), |
| 41 | + ("Accept-Encoding", |
| 42 | + "gzip, deflate"), |
| 43 | + ("Accept-Language", |
| 44 | + "en-US,en-CA,en-GB,en-UK,en-AU,en-NZ,en-ZA,en;q=0.5"), |
| 45 | + ("Accept-Charset", |
| 46 | + "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), |
| 47 | + ("Accept", |
| 48 | + "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), |
| 49 | + ("Connection", |
| 50 | + "close")] |
37 | 51 | sfcountsub = 0
|
38 | 52 | sfcountfull = 0
|
39 | 53 | i = 1
|
40 | 54 | il = 15
|
41 |
| -while(i < il): |
42 |
| - print(str(i)+" Reading URL: http://www.emoticonplus.com/miss-bone/page/"+str(i)) |
| 55 | +while (i < il): |
| 56 | + print( |
| 57 | + str(i) + |
| 58 | + " Reading URL: http://www.emoticonplus.com/miss-bone/page/" + |
| 59 | + str(i)) |
43 | 60 | geturls_text = geturls_opener.open(
|
44 |
| - "http://www.emoticonplus.com/miss-bone/page/"+str(i)) |
45 |
| - if(geturls_text.info().get("Content-Encoding") == "gzip" or geturls_text.info().get("Content-Encoding") == "deflate"): |
| 61 | + "http://www.emoticonplus.com/miss-bone/page/" + str(i)) |
| 62 | + if (geturls_text.info().get("Content-Encoding") == |
| 63 | + "gzip" or geturls_text.info().get("Content-Encoding") == "deflate"): |
46 | 64 | strbuf = StringIO.StringIO(geturls_text.read())
|
47 | 65 | gzstrbuf = gzip.GzipFile(fileobj=strbuf)
|
48 | 66 | out_text = gzstrbuf.read()[:]
|
49 |
| - if(geturls_text.info().get("Content-Encoding") != "gzip" and geturls_text.info().get("Content-Encoding") != "deflate"): |
| 67 | + if (geturls_text.info().get("Content-Encoding") != |
| 68 | + "gzip" and geturls_text.info().get("Content-Encoding") != "deflate"): |
50 | 69 | out_text = geturls_text.read()[:]
|
51 |
| - regex_text = re.escape("<a class=\"emoticon-list\" href=\"#\" data=\"")+"(.*?)" + \ |
52 |
| - re.escape("\" im=\"")+"([0-9]+)" + \ |
| 70 | + regex_text = re.escape("<a class=\"emoticon-list\" href=\"#\" data=\"") + "(.*?)" + \ |
| 71 | + re.escape("\" im=\"") + "([0-9]+)" + \ |
53 | 72 | re.escape("\" category=\"miss-bone\">")
|
54 | 73 | post_text = re.findall(regex_text, out_text)
|
55 | 74 | isub = 0
|
56 | 75 | ilsub = len(post_text)
|
57 |
| - print(str(i)+" Found "+str(ilsub)+" GIF Images.") |
58 |
| - while(isub < ilsub): |
| 76 | + print(str(i) + " Found " + str(ilsub) + " GIF Images.") |
| 77 | + while (isub < ilsub): |
59 | 78 | getsub2xurls_cj = geturls_cj
|
60 | 79 | getsub2xurls_opener = urllib2.build_opener(
|
61 | 80 | urllib2.HTTPCookieProcessor(getsub2xurls_cj))
|
62 |
| - getsub2xurls_opener.addheaders = [("Referer", "http://www.emoticonplus.com/miss-bone/page/"+str(i)), ("User-Agent", fakeua), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language", "en-US,en-CA,en-GB,en-UK,en-AU,en-NZ,en-ZA,en;q=0.5"), |
63 |
| - ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")] |
64 |
| - print(str(i)+","+str(isub+1)+" Start Downloading Image File: " + |
65 |
| - str(isub+1)+" of "+str(ilsub)) |
66 |
| - print(str(i)+","+str(isub+1)+" Downloading Image: "+post_text[isub][0]) |
| 81 | + getsub2xurls_opener.addheaders = [ |
| 82 | + ("Referer", |
| 83 | + "http://www.emoticonplus.com/miss-bone/page/" + |
| 84 | + str(i)), |
| 85 | + ("User-Agent", |
| 86 | + fakeua), |
| 87 | + ("Accept-Encoding", |
| 88 | + "gzip, deflate"), |
| 89 | + ("Accept-Language", |
| 90 | + "en-US,en-CA,en-GB,en-UK,en-AU,en-NZ,en-ZA,en;q=0.5"), |
| 91 | + ("Accept-Charset", |
| 92 | + "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), |
| 93 | + ("Accept", |
| 94 | + "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), |
| 95 | + ("Connection", |
| 96 | + "close")] |
| 97 | + print(str(i) + "," + str(isub + 1) + " Start Downloading Image File: " + |
| 98 | + str(isub + 1) + " of " + str(ilsub)) |
| 99 | + print(str(i) + "," + str(isub + 1) + |
| 100 | + " Downloading Image: " + post_text[isub][0]) |
67 | 101 | getsub2xurls_text = getsub2xurls_opener.open(post_text[isub][0])
|
68 | 102 | gif_file_name = os.path.basename(
|
69 | 103 | urlparse.urlparse(post_text[isub][0]).path)
|
70 |
| - print(str(i)+","+str(isub+1) + |
71 |
| - " Finished Downloading Image: "+post_text[isub][0]) |
72 |
| - print(str(i)+","+str(isub+1)+" Saving File: ./"+gif_file_name) |
73 |
| - gifsf = open("./"+gif_file_name, "wb") |
| 104 | + print(str(i) + "," + str(isub + 1) + |
| 105 | + " Finished Downloading Image: " + post_text[isub][0]) |
| 106 | + print(str(i) + "," + str(isub + 1) + |
| 107 | + " Saving File: ./" + gif_file_name) |
| 108 | + gifsf = open("./" + gif_file_name, "wb") |
74 | 109 | gifsf.write(getsub2xurls_text.read())
|
75 | 110 | gifsf.close()
|
76 |
| - isub = isub+1 |
77 |
| - sfcountsub = sfcountsub+1 |
78 |
| - print(str(i)+" Downloaded "+str(sfcountsub)+" GIF Images") |
79 |
| - sfcountfull = sfcountfull+sfcountsub |
| 111 | + isub = isub + 1 |
| 112 | + sfcountsub = sfcountsub + 1 |
| 113 | + print(str(i) + " Downloaded " + str(sfcountsub) + " GIF Images") |
| 114 | + sfcountfull = sfcountfull + sfcountsub |
80 | 115 | sfcountsub = 0
|
81 |
| - i = i+1 |
82 |
| -print("Downloaded "+str(sfcountfull)+" GIF Images") |
| 116 | + i = i + 1 |
| 117 | +print("Downloaded " + str(sfcountfull) + " GIF Images") |
83 | 118 | sfcountfull = 0
|
0 commit comments