Skip to content

Commit ad4b7fc

Browse files
author
Jalin
committed
增加代理服务
1 parent f9036b7 commit ad4b7fc

File tree

4 files changed

+103
-1
lines changed

4 files changed

+103
-1
lines changed

src/app/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ class AppEnvType:
6666
DEFAULT_STATS_CHECK_INTERVAL = 10 # IP 数据统计间隔
6767

6868
DEFAULT_REQUEST_TIME_OUT = 5
69-
DEFAULT_REQUEST_CHECK_TIME_OUT = 3
69+
DEFAULT_REQUEST_CHECK_TIME_OUT = 5
7070

7171
# Rate
7272
RE_PUSH_TO_CHECK_POOL_RATE = 0.6 # 如果 总 IP 数量 > IP 池数量 * Rate 则跳过本次推送任务

src/sites/github_proxy_list.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
from src.app.ip_get import IPGet, SiteResponse
2+
from src.lib.structs import SiteData, SiteResponseData
3+
4+
key = 'github_proxy_list'
5+
6+
7+
@IPGet.config(key)
8+
def config():
9+
site = SiteData()
10+
site.name = 'Github proxy list'
11+
site.pages = ['https://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list-raw.txt']
12+
return site
13+
14+
15+
@IPGet.parse(key)
16+
def parse(resp: SiteResponse):
17+
import re
18+
ips = re.findall(r'(?:\d{1,3}\.){3}\d{1,3}:\d+', resp.text)
19+
for ip in ips:
20+
try:
21+
item = ip.split(':')
22+
res = SiteResponseData()
23+
res.ip = item[0]
24+
res.port = item[1]
25+
yield res
26+
except Exception:
27+
continue
28+
29+
30+
if __name__ == '__main__':
31+
from src.lib.func import run_until_complete
32+
33+
runner = IPGet.test_crawl(key)
34+
run_until_complete(runner)

src/sites/proxy_daily.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
from src.app.ip_get import IPGet, SiteResponse
2+
from src.lib.structs import SiteData, SiteResponseData
3+
4+
key = 'proxy_daily'
5+
6+
7+
@IPGet.config(key)
8+
def config():
9+
site = SiteData()
10+
site.name = 'Proxy daily'
11+
site.pages = ['https://proxy-daily.com/']
12+
return site
13+
14+
15+
@IPGet.parse(key)
16+
def parse(resp: SiteResponse):
17+
import re
18+
ips = re.findall(r'(?:\d{1,3}\.){3}\d{1,3}:\d+', resp.text)
19+
for ip in ips:
20+
try:
21+
item = ip.split(':')
22+
res = SiteResponseData()
23+
res.ip = item[0]
24+
res.port = item[1]
25+
yield res
26+
except Exception:
27+
continue
28+
29+
30+
if __name__ == '__main__':
31+
from src.lib.func import run_until_complete
32+
33+
runner = IPGet.test_crawl(key)
34+
run_until_complete(runner)

src/sites/spys_me.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
from src.app.ip_get import IPGet, SiteResponse
2+
from src.lib.structs import SiteData, SiteResponseData
3+
4+
key = 'spysme'
5+
6+
7+
@IPGet.config(key)
8+
def config():
9+
site = SiteData()
10+
site.name = 'Spys.me'
11+
site.pages = ['http://spys.me/proxy.txt']
12+
return site
13+
14+
15+
@IPGet.parse(key)
16+
def parse(resp: SiteResponse):
17+
import re
18+
ips = re.findall(r'(?:\d{1,3}\.){3}\d{1,3}:\d+', resp.text)
19+
for ip in ips:
20+
try:
21+
item = ip.split(':')
22+
res = SiteResponseData()
23+
res.ip = item[0]
24+
res.port = item[1]
25+
yield res
26+
except Exception:
27+
continue
28+
29+
30+
if __name__ == '__main__':
31+
from src.lib.func import run_until_complete
32+
33+
runner = IPGet.test_crawl(key)
34+
run_until_complete(runner)

0 commit comments

Comments
 (0)