-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathj.py
100 lines (86 loc) · 3.34 KB
/
j.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# -*- coding: utf-8 -*-
import sys
import re
from HTMLParser import HTMLParser
import urllib2
import cookielib
import urllib
import cookielib
## 这段代码是用于解决中文报错的问题
reload(sys)
sys.setdefaultencoding("utf8")
#####################################################
#登录人人
value_list = ["asset-value", "asset-amt", "asset-all", "date", "price", "amt", "amt-vaue", "scale", "risk", "subscribe-status","redeem-status"]
loginurl = 'http://www.jisilu.cn/login/'
logindomain = 'jisilu.cn'
class FundDataParser(HTMLParser):
def __init__(self, value_list = value_list):
HTMLParser.__init__(self) # old-style class could not use super
self._process = False
self.value_list = value_list
self.attr = ""
self.data = {}
self.result = {}
def get(self, code):
url = "http://www.jisilu.cn/data/sfnew/#tlink_3"
f = urllib2.urlopen(url)
html = f.read()
print html
self.feed(html)
self.close()
self.result = self.data
self.data = {}
return(self.result)
def handle_starttag(self, tag, attrs):
if tag == 'span':
for attr in attrs:
if (attr[0] == "class"):
# case <span class="asset-amt red">
for attr_name in attr[1].split():
if attr_name in self.value_list:
self._process = True
self.attr = attr_name
def handle_data(self, data):
if self._process:
self.data[self.attr] = data
self._process = False
class Login(object):
def __init__(self):
self.name = ''
self.passwprd = ''
self.domain = ''
self.cj = cookielib.LWPCookieJar()
self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj))
urllib2.install_opener(self.opener)
def setLoginInfo(self,username,password,domain):
'''设置用户登录信息'''
self.name = username
self.pwd = password
self.domain = domain
def login(self):
'''登录网站'''
loginparams = {'domain':self.domain,'email':self.name, 'password':self.pwd}
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.57 Safari/537.36'}
req = urllib2.Request(loginurl, urllib.urlencode(loginparams),headers=headers)
response = urllib2.urlopen(req)
self.operate = self.opener.open(req)
thePage = response.read()
print(thePage)
if __name__ == '__main__':
url="http://www.jisilu.cn/data/sfnew/#tlink_3"
req_header = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
'Accept':'text/html;q=0.9,*/*;q=0.8',
'Accept-Charset':'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
'Accept-Encoding': 'gzip, deflate',
'Connection':'close',
'Referer':None #注意如果依然不能抓取的话,这里可以设置抓取网站的host
}
req_timeout = 5
req = urllib2.Request(url,None,req_header)
resp = urllib2.urlopen(req,None,req_timeout)
html = unicode( resp.read() , errors='ignore')
req = urllib2.Request(url,None,req_header)
resp = urllib2.urlopen(req,None,req_timeout)
html = unicode( resp.read() , errors='ignore')
print(html)