|
16 | 16 | $FileInfo: miss-bone.py - Last Update: 6/12/2014 Ver. 1.0.0 RC 2 - Author: cooldude2k $ |
17 | 17 | ''' |
18 | 18 |
|
19 | | -import argparse |
20 | | -import datetime |
21 | | -import gzip |
22 | | -import os |
23 | 19 | import re |
| 20 | +import os |
24 | 21 | import sys |
25 | | -import time |
26 | 22 | import urllib |
27 | | - |
| 23 | +import urllib2 |
28 | 24 | import cookielib |
29 | 25 | import StringIO |
30 | | -import urllib2 |
| 26 | +import gzip |
| 27 | +import time |
| 28 | +import datetime |
| 29 | +import argparse |
31 | 30 | import urlparse |
32 | 31 |
|
33 | 32 | fakeua = "Mozilla/5.0 (Windows NT 6.1; rv:24.0) Gecko/20100101 Firefox/24.0" |
34 | 33 | geturls_cj = cookielib.CookieJar() |
35 | 34 | geturls_opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(geturls_cj)) |
36 | | -geturls_opener.addheaders = [ |
37 | | - ("Referer", |
38 | | - "http://www.emoticonplus.com/miss-bone/"), |
39 | | - ("User-Agent", |
40 | | - fakeua), |
41 | | - ("Accept-Encoding", |
42 | | - "gzip, deflate"), |
43 | | - ("Accept-Language", |
44 | | - "en-US,en-CA,en-GB,en-UK,en-AU,en-NZ,en-ZA,en;q=0.5"), |
45 | | - ("Accept-Charset", |
46 | | - "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), |
47 | | - ("Accept", |
48 | | - "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), |
49 | | - ("Connection", |
50 | | - "close")] |
| 35 | +geturls_opener.addheaders = [("Referer", "http://www.emoticonplus.com/miss-bone/"), ("User-Agent", fakeua), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language", "en-US,en-CA,en-GB,en-UK,en-AU,en-NZ,en-ZA,en;q=0.5"), |
| 36 | + ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")] |
51 | 37 | sfcountsub = 0 |
52 | 38 | sfcountfull = 0 |
53 | 39 | i = 1 |
54 | 40 | il = 15 |
55 | | -while (i < il): |
56 | | - print( |
57 | | - str(i) + |
58 | | - " Reading URL: http://www.emoticonplus.com/miss-bone/page/" + |
59 | | - str(i)) |
| 41 | +while(i < il): |
| 42 | + print(str(i)+" Reading URL: http://www.emoticonplus.com/miss-bone/page/"+str(i)) |
60 | 43 | geturls_text = geturls_opener.open( |
61 | | - "http://www.emoticonplus.com/miss-bone/page/" + str(i)) |
62 | | - if (geturls_text.info().get("Content-Encoding") == |
63 | | - "gzip" or geturls_text.info().get("Content-Encoding") == "deflate"): |
| 44 | + "http://www.emoticonplus.com/miss-bone/page/"+str(i)) |
| 45 | + if(geturls_text.info().get("Content-Encoding") == "gzip" or geturls_text.info().get("Content-Encoding") == "deflate"): |
64 | 46 | strbuf = StringIO.StringIO(geturls_text.read()) |
65 | 47 | gzstrbuf = gzip.GzipFile(fileobj=strbuf) |
66 | 48 | out_text = gzstrbuf.read()[:] |
67 | | - if (geturls_text.info().get("Content-Encoding") != |
68 | | - "gzip" and geturls_text.info().get("Content-Encoding") != "deflate"): |
| 49 | + if(geturls_text.info().get("Content-Encoding") != "gzip" and geturls_text.info().get("Content-Encoding") != "deflate"): |
69 | 50 | out_text = geturls_text.read()[:] |
70 | | - regex_text = re.escape("<a class=\"emoticon-list\" href=\"#\" data=\"") + "(.*?)" + \ |
71 | | - re.escape("\" im=\"") + "([0-9]+)" + \ |
| 51 | + regex_text = re.escape("<a class=\"emoticon-list\" href=\"#\" data=\"")+"(.*?)" + \ |
| 52 | + re.escape("\" im=\"")+"([0-9]+)" + \ |
72 | 53 | re.escape("\" category=\"miss-bone\">") |
73 | 54 | post_text = re.findall(regex_text, out_text) |
74 | 55 | isub = 0 |
75 | 56 | ilsub = len(post_text) |
76 | | - print(str(i) + " Found " + str(ilsub) + " GIF Images.") |
77 | | - while (isub < ilsub): |
| 57 | + print(str(i)+" Found "+str(ilsub)+" GIF Images.") |
| 58 | + while(isub < ilsub): |
78 | 59 | getsub2xurls_cj = geturls_cj |
79 | 60 | getsub2xurls_opener = urllib2.build_opener( |
80 | 61 | urllib2.HTTPCookieProcessor(getsub2xurls_cj)) |
81 | | - getsub2xurls_opener.addheaders = [ |
82 | | - ("Referer", |
83 | | - "http://www.emoticonplus.com/miss-bone/page/" + |
84 | | - str(i)), |
85 | | - ("User-Agent", |
86 | | - fakeua), |
87 | | - ("Accept-Encoding", |
88 | | - "gzip, deflate"), |
89 | | - ("Accept-Language", |
90 | | - "en-US,en-CA,en-GB,en-UK,en-AU,en-NZ,en-ZA,en;q=0.5"), |
91 | | - ("Accept-Charset", |
92 | | - "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), |
93 | | - ("Accept", |
94 | | - "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), |
95 | | - ("Connection", |
96 | | - "close")] |
97 | | - print(str(i) + "," + str(isub + 1) + " Start Downloading Image File: " + |
98 | | - str(isub + 1) + " of " + str(ilsub)) |
99 | | - print(str(i) + "," + str(isub + 1) + |
100 | | - " Downloading Image: " + post_text[isub][0]) |
| 62 | + getsub2xurls_opener.addheaders = [("Referer", "http://www.emoticonplus.com/miss-bone/page/"+str(i)), ("User-Agent", fakeua), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language", "en-US,en-CA,en-GB,en-UK,en-AU,en-NZ,en-ZA,en;q=0.5"), |
| 63 | + ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")] |
| 64 | + print(str(i)+","+str(isub+1)+" Start Downloading Image File: " + |
| 65 | + str(isub+1)+" of "+str(ilsub)) |
| 66 | + print(str(i)+","+str(isub+1)+" Downloading Image: "+post_text[isub][0]) |
101 | 67 | getsub2xurls_text = getsub2xurls_opener.open(post_text[isub][0]) |
102 | 68 | gif_file_name = os.path.basename( |
103 | 69 | urlparse.urlparse(post_text[isub][0]).path) |
104 | | - print(str(i) + "," + str(isub + 1) + |
105 | | - " Finished Downloading Image: " + post_text[isub][0]) |
106 | | - print(str(i) + "," + str(isub + 1) + |
107 | | - " Saving File: ./" + gif_file_name) |
108 | | - gifsf = open("./" + gif_file_name, "wb") |
| 70 | + print(str(i)+","+str(isub+1) + |
| 71 | + " Finished Downloading Image: "+post_text[isub][0]) |
| 72 | + print(str(i)+","+str(isub+1)+" Saving File: ./"+gif_file_name) |
| 73 | + gifsf = open("./"+gif_file_name, "wb") |
109 | 74 | gifsf.write(getsub2xurls_text.read()) |
110 | 75 | gifsf.close() |
111 | | - isub = isub + 1 |
112 | | - sfcountsub = sfcountsub + 1 |
113 | | - print(str(i) + " Downloaded " + str(sfcountsub) + " GIF Images") |
114 | | - sfcountfull = sfcountfull + sfcountsub |
| 76 | + isub = isub+1 |
| 77 | + sfcountsub = sfcountsub+1 |
| 78 | + print(str(i)+" Downloaded "+str(sfcountsub)+" GIF Images") |
| 79 | + sfcountfull = sfcountfull+sfcountsub |
115 | 80 | sfcountsub = 0 |
116 | | - i = i + 1 |
117 | | -print("Downloaded " + str(sfcountfull) + " GIF Images") |
| 81 | + i = i+1 |
| 82 | +print("Downloaded "+str(sfcountfull)+" GIF Images") |
118 | 83 | sfcountfull = 0 |
0 commit comments