Skip to content

Commit 05277b9

Browse files
committed
resource depletion
1 parent a0593a3 commit 05277b9

File tree

3 files changed

+80
-91
lines changed

3 files changed

+80
-91
lines changed

.idea/workspace.xml

Lines changed: 68 additions & 76 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

proxypool/proxyGetter.py renamed to proxypool/getter.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,6 @@ def get_raw_proxies(self, callback, count=40):
4848
print('Callback', callback)
4949
for proxy in eval("self.{}()".format(callback)):
5050
proxies.append(proxy)
51-
print(callback, proxy)
52-
if len(proxies) >= count:
53-
break
5451
return proxies
5552

5653
def crawl_daili66(self, page_count=4):

proxypool/schedule.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,9 @@
1212
from multiprocessing import Process
1313
import asyncio
1414
import aiohttp
15-
1615
from .db import RedisClient
1716
from .error import ResourceDepletionError
18-
from .proxyGetter import FreeProxyGetter
17+
from .getter import FreeProxyGetter
1918
from .setting import *
2019

2120

@@ -24,7 +23,7 @@ class ValidityTester(object):
2423
检验器,负责对未知的代理进行异步检测。
2524
"""
2625
# 用百度的首页来检验
27-
test_api = 'https://www.baidu.com'
26+
test_api = 'http://www.baidu.com'
2827

2928
def __init__(self):
3029
self._raw_proxies = None
@@ -42,7 +41,10 @@ async def test_single_proxy(self, proxy):
4241
async with aiohttp.ClientSession() as session:
4342
try:
4443
real_proxy = 'http://' + proxy
45-
async with session.get(self.test_api, proxy=real_proxy, timeout=15) as resp:
44+
print('Testing', real_proxy)
45+
async with session.get(self.test_api, proxy=real_proxy, timeout=15) as response:
46+
await response
47+
print('Response from', proxy)
4648
self._usable_proxies.append(proxy)
4749
except Exception:
4850
pass
@@ -79,24 +81,22 @@ def is_over_threshold(self):
7981
else:
8082
return False
8183

82-
def add_to_queue(self, flag=40):
84+
def add_to_queue(self):
8385
"""
8486
命令爬虫抓取一定量未检测的代理,然后检测,将通过检测的代理
8587
加入到代理池中。
8688
"""
8789
print('PoolAdder is working')
88-
while not self.is_over_threshold():
90+
proxy_count = 0
91+
if not self.is_over_threshold():
8992
for callback_label in range(self._crawler.__CrawlFuncCount__):
9093
callback = self._crawler.__CrawlFunc__[callback_label]
91-
raw_proxies = self._crawler.get_raw_proxies(callback, flag)
94+
raw_proxies = self._crawler.get_raw_proxies(callback)
9295
self._tester.set_raw_proxies(raw_proxies)
9396
self._tester.test()
9497
self._conn.put_many(self._tester.get_usable_proxies())
95-
if self.is_over_threshold():
96-
break
97-
98-
flag += flag
99-
if flag >= 10 * flag:
98+
proxy_count += len(raw_proxies)
99+
if proxy_count == 0:
100100
raise ResourceDepletionError
101101

102102

0 commit comments

Comments
 (0)