|
16 | 16 | $FileInfo: miss-bone.py - Last Update: 6/12/2014 Ver. 1.0.0 RC 2 - Author: cooldude2k $
|
17 | 17 | '''
|
18 | 18 |
|
19 |
| -import argparse |
20 |
| -import datetime |
21 |
| -import gzip |
22 |
| -import os |
23 | 19 | import re
|
| 20 | +import os |
24 | 21 | import sys
|
25 |
| -import time |
26 | 22 | import urllib
|
27 |
| - |
| 23 | +import urllib2 |
28 | 24 | import cookielib
|
29 | 25 | import StringIO
|
30 |
| -import urllib2 |
| 26 | +import gzip |
| 27 | +import time |
| 28 | +import datetime |
| 29 | +import argparse |
31 | 30 | import urlparse
|
32 | 31 |
|
33 | 32 | fakeua = "Mozilla/5.0 (Windows NT 6.1; rv:24.0) Gecko/20100101 Firefox/24.0"
|
34 | 33 | geturls_cj = cookielib.CookieJar()
|
35 | 34 | geturls_opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(geturls_cj))
|
36 |
| -geturls_opener.addheaders = [ |
37 |
| - ("Referer", |
38 |
| - "http://www.emoticonplus.com/miss-bone/"), |
39 |
| - ("User-Agent", |
40 |
| - fakeua), |
41 |
| - ("Accept-Encoding", |
42 |
| - "gzip, deflate"), |
43 |
| - ("Accept-Language", |
44 |
| - "en-US,en-CA,en-GB,en-UK,en-AU,en-NZ,en-ZA,en;q=0.5"), |
45 |
| - ("Accept-Charset", |
46 |
| - "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), |
47 |
| - ("Accept", |
48 |
| - "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), |
49 |
| - ("Connection", |
50 |
| - "close")] |
| 35 | +geturls_opener.addheaders = [("Referer", "http://www.emoticonplus.com/miss-bone/"), ("User-Agent", fakeua), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language", "en-US,en-CA,en-GB,en-UK,en-AU,en-NZ,en-ZA,en;q=0.5"), |
| 36 | + ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")] |
51 | 37 | sfcountsub = 0
|
52 | 38 | sfcountfull = 0
|
53 | 39 | i = 1
|
54 | 40 | il = 15
|
55 |
| -while (i < il): |
56 |
| - print( |
57 |
| - str(i) + |
58 |
| - " Reading URL: http://www.emoticonplus.com/miss-bone/page/" + |
59 |
| - str(i)) |
| 41 | +while(i < il): |
| 42 | + print(str(i)+" Reading URL: http://www.emoticonplus.com/miss-bone/page/"+str(i)) |
60 | 43 | geturls_text = geturls_opener.open(
|
61 |
| - "http://www.emoticonplus.com/miss-bone/page/" + str(i)) |
62 |
| - if (geturls_text.info().get("Content-Encoding") == |
63 |
| - "gzip" or geturls_text.info().get("Content-Encoding") == "deflate"): |
| 44 | + "http://www.emoticonplus.com/miss-bone/page/"+str(i)) |
| 45 | + if(geturls_text.info().get("Content-Encoding") == "gzip" or geturls_text.info().get("Content-Encoding") == "deflate"): |
64 | 46 | strbuf = StringIO.StringIO(geturls_text.read())
|
65 | 47 | gzstrbuf = gzip.GzipFile(fileobj=strbuf)
|
66 | 48 | out_text = gzstrbuf.read()[:]
|
67 |
| - if (geturls_text.info().get("Content-Encoding") != |
68 |
| - "gzip" and geturls_text.info().get("Content-Encoding") != "deflate"): |
| 49 | + if(geturls_text.info().get("Content-Encoding") != "gzip" and geturls_text.info().get("Content-Encoding") != "deflate"): |
69 | 50 | out_text = geturls_text.read()[:]
|
70 |
| - regex_text = re.escape("<a class=\"emoticon-list\" href=\"#\" data=\"") + "(.*?)" + \ |
71 |
| - re.escape("\" im=\"") + "([0-9]+)" + \ |
| 51 | + regex_text = re.escape("<a class=\"emoticon-list\" href=\"#\" data=\"")+"(.*?)" + \ |
| 52 | + re.escape("\" im=\"")+"([0-9]+)" + \ |
72 | 53 | re.escape("\" category=\"miss-bone\">")
|
73 | 54 | post_text = re.findall(regex_text, out_text)
|
74 | 55 | isub = 0
|
75 | 56 | ilsub = len(post_text)
|
76 |
| - print(str(i) + " Found " + str(ilsub) + " GIF Images.") |
77 |
| - while (isub < ilsub): |
| 57 | + print(str(i)+" Found "+str(ilsub)+" GIF Images.") |
| 58 | + while(isub < ilsub): |
78 | 59 | getsub2xurls_cj = geturls_cj
|
79 | 60 | getsub2xurls_opener = urllib2.build_opener(
|
80 | 61 | urllib2.HTTPCookieProcessor(getsub2xurls_cj))
|
81 |
| - getsub2xurls_opener.addheaders = [ |
82 |
| - ("Referer", |
83 |
| - "http://www.emoticonplus.com/miss-bone/page/" + |
84 |
| - str(i)), |
85 |
| - ("User-Agent", |
86 |
| - fakeua), |
87 |
| - ("Accept-Encoding", |
88 |
| - "gzip, deflate"), |
89 |
| - ("Accept-Language", |
90 |
| - "en-US,en-CA,en-GB,en-UK,en-AU,en-NZ,en-ZA,en;q=0.5"), |
91 |
| - ("Accept-Charset", |
92 |
| - "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), |
93 |
| - ("Accept", |
94 |
| - "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), |
95 |
| - ("Connection", |
96 |
| - "close")] |
97 |
| - print(str(i) + "," + str(isub + 1) + " Start Downloading Image File: " + |
98 |
| - str(isub + 1) + " of " + str(ilsub)) |
99 |
| - print(str(i) + "," + str(isub + 1) + |
100 |
| - " Downloading Image: " + post_text[isub][0]) |
| 62 | + getsub2xurls_opener.addheaders = [("Referer", "http://www.emoticonplus.com/miss-bone/page/"+str(i)), ("User-Agent", fakeua), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language", "en-US,en-CA,en-GB,en-UK,en-AU,en-NZ,en-ZA,en;q=0.5"), |
| 63 | + ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")] |
| 64 | + print(str(i)+","+str(isub+1)+" Start Downloading Image File: " + |
| 65 | + str(isub+1)+" of "+str(ilsub)) |
| 66 | + print(str(i)+","+str(isub+1)+" Downloading Image: "+post_text[isub][0]) |
101 | 67 | getsub2xurls_text = getsub2xurls_opener.open(post_text[isub][0])
|
102 | 68 | gif_file_name = os.path.basename(
|
103 | 69 | urlparse.urlparse(post_text[isub][0]).path)
|
104 |
| - print(str(i) + "," + str(isub + 1) + |
105 |
| - " Finished Downloading Image: " + post_text[isub][0]) |
106 |
| - print(str(i) + "," + str(isub + 1) + |
107 |
| - " Saving File: ./" + gif_file_name) |
108 |
| - gifsf = open("./" + gif_file_name, "wb") |
| 70 | + print(str(i)+","+str(isub+1) + |
| 71 | + " Finished Downloading Image: "+post_text[isub][0]) |
| 72 | + print(str(i)+","+str(isub+1)+" Saving File: ./"+gif_file_name) |
| 73 | + gifsf = open("./"+gif_file_name, "wb") |
109 | 74 | gifsf.write(getsub2xurls_text.read())
|
110 | 75 | gifsf.close()
|
111 |
| - isub = isub + 1 |
112 |
| - sfcountsub = sfcountsub + 1 |
113 |
| - print(str(i) + " Downloaded " + str(sfcountsub) + " GIF Images") |
114 |
| - sfcountfull = sfcountfull + sfcountsub |
| 76 | + isub = isub+1 |
| 77 | + sfcountsub = sfcountsub+1 |
| 78 | + print(str(i)+" Downloaded "+str(sfcountsub)+" GIF Images") |
| 79 | + sfcountfull = sfcountfull+sfcountsub |
115 | 80 | sfcountsub = 0
|
116 |
| - i = i + 1 |
117 |
| -print("Downloaded " + str(sfcountfull) + " GIF Images") |
| 81 | + i = i+1 |
| 82 | +print("Downloaded "+str(sfcountfull)+" GIF Images") |
118 | 83 | sfcountfull = 0
|
0 commit comments