12
12
from multiprocessing import Process
13
13
import asyncio
14
14
import aiohttp
15
-
16
15
from .db import RedisClient
17
16
from .error import ResourceDepletionError
18
- from .proxyGetter import FreeProxyGetter
17
+ from .getter import FreeProxyGetter
19
18
from .setting import *
20
19
21
20
@@ -24,7 +23,7 @@ class ValidityTester(object):
24
23
检验器,负责对未知的代理进行异步检测。
25
24
"""
26
25
# 用百度的首页来检验
27
- test_api = 'https ://www.baidu.com'
26
+ test_api = 'http ://www.baidu.com'
28
27
29
28
def __init__ (self ):
30
29
self ._raw_proxies = None
@@ -42,7 +41,10 @@ async def test_single_proxy(self, proxy):
42
41
async with aiohttp .ClientSession () as session :
43
42
try :
44
43
real_proxy = 'http://' + proxy
45
- async with session .get (self .test_api , proxy = real_proxy , timeout = 15 ) as resp :
44
+ print ('Testing' , real_proxy )
45
+ async with session .get (self .test_api , proxy = real_proxy , timeout = 15 ) as response :
46
+ await response
47
+ print ('Response from' , proxy )
46
48
self ._usable_proxies .append (proxy )
47
49
except Exception :
48
50
pass
@@ -79,24 +81,22 @@ def is_over_threshold(self):
79
81
else :
80
82
return False
81
83
82
- def add_to_queue (self , flag = 40 ):
84
+ def add_to_queue (self ):
83
85
"""
84
86
命令爬虫抓取一定量未检测的代理,然后检测,将通过检测的代理
85
87
加入到代理池中。
86
88
"""
87
89
print ('PoolAdder is working' )
88
- while not self .is_over_threshold ():
90
+ proxy_count = 0
91
+ if not self .is_over_threshold ():
89
92
for callback_label in range (self ._crawler .__CrawlFuncCount__ ):
90
93
callback = self ._crawler .__CrawlFunc__ [callback_label ]
91
- raw_proxies = self ._crawler .get_raw_proxies (callback , flag )
94
+ raw_proxies = self ._crawler .get_raw_proxies (callback )
92
95
self ._tester .set_raw_proxies (raw_proxies )
93
96
self ._tester .test ()
94
97
self ._conn .put_many (self ._tester .get_usable_proxies ())
95
- if self .is_over_threshold ():
96
- break
97
-
98
- flag += flag
99
- if flag >= 10 * flag :
98
+ proxy_count += len (raw_proxies )
99
+ if proxy_count == 0 :
100
100
raise ResourceDepletionError
101
101
102
102
0 commit comments