|
1 |
| -""" |
2 |
| -------------------------------------------------- |
3 |
| - File Name: schedule.py |
4 |
| - Description: 调度器模块, |
5 |
| - 包含ValidityTester,PoolAdder, |
6 |
| - Schedule三个类,负责维护代理池。 |
7 |
| - Author: Liu |
8 |
| - Date: 2016/12/9 |
9 |
| -------------------------------------------------- |
10 |
| -""" |
11 | 1 | import time
|
12 | 2 | from multiprocessing import Process
|
13 | 3 | import asyncio
|
14 | 4 | import aiohttp
|
15 |
| -from .db import RedisClient |
16 |
| -from .error import ResourceDepletionError |
17 |
| -from .getter import FreeProxyGetter |
18 |
| -from .setting import * |
| 5 | +from proxypool.db import RedisClient |
| 6 | +from proxypool.error import ResourceDepletionError |
| 7 | +from proxypool.getter import FreeProxyGetter |
| 8 | +from proxypool.setting import * |
19 | 9 |
|
20 | 10 |
|
21 | 11 | class ValidityTester(object):
|
22 | 12 | """
|
23 | 13 | 检验器,负责对未知的代理进行异步检测。
|
24 | 14 | """
|
25 | 15 | # 用百度的首页来检验
|
26 |
| - test_api = 'http://www.baidu.com' |
| 16 | + test_api = TEST_API |
27 | 17 |
|
28 | 18 | def __init__(self):
|
29 | 19 | self._raw_proxies = None
|
30 | 20 | self._usable_proxies = []
|
31 | 21 |
|
32 | 22 | def set_raw_proxies(self, proxies):
|
33 |
| - """设置待检测的代理。 |
| 23 | + """ |
| 24 | + 设置待检测的代理。 |
34 | 25 | """
|
35 | 26 | self._raw_proxies = proxies
|
36 | 27 | self._usable_proxies = []
|
37 | 28 |
|
38 | 29 | async def test_single_proxy(self, proxy):
|
39 |
| - """检测单个代理,如果可用,则将其加入_usable_proxies |
| 30 | + """ |
| 31 | + 检测单个代理,如果可用,则将其加入_usable_proxies |
40 | 32 | """
|
41 | 33 | async with aiohttp.ClientSession() as session:
|
42 | 34 | try:
|
43 | 35 | real_proxy = 'http://' + proxy
|
44 | 36 | print('Testing', real_proxy)
|
45 | 37 | async with session.get(self.test_api, proxy=real_proxy, timeout=15) as response:
|
46 | 38 | await response
|
47 |
| - print('Response from', proxy) |
48 | 39 | self._usable_proxies.append(proxy)
|
| 40 | + print('Valid proxy', proxy) |
49 | 41 | except Exception:
|
50 |
| - pass |
| 42 | + print('Invalid proxy', proxy) |
51 | 43 |
|
52 | 44 | def test(self):
|
53 |
| - """异步检测_raw_proxies中的全部代理。 |
| 45 | + """ |
| 46 | + 异步检测_raw_proxies中的全部代理。 |
54 | 47 | """
|
55 | 48 | print('ValidityTester is working')
|
56 | 49 | loop = asyncio.get_event_loop()
|
@@ -92,9 +85,6 @@ def add_to_queue(self):
|
92 | 85 | for callback_label in range(self._crawler.__CrawlFuncCount__):
|
93 | 86 | callback = self._crawler.__CrawlFunc__[callback_label]
|
94 | 87 | raw_proxies = self._crawler.get_raw_proxies(callback)
|
95 |
| - self._tester.set_raw_proxies(raw_proxies) |
96 |
| - self._tester.test() |
97 |
| - self._conn.put_many(self._tester.get_usable_proxies()) |
98 | 88 | proxy_count += len(raw_proxies)
|
99 | 89 | if proxy_count == 0:
|
100 | 90 | raise ResourceDepletionError
|
|
0 commit comments