-
Notifications
You must be signed in to change notification settings - Fork 1
/
parallel-starter.py
81 lines (43 loc) · 1.5 KB
/
parallel-starter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import logging
from datetime import datetime
import sys
import time
from threading import Thread
import os
import parallel_analyzer
CONCURRENCY_LEVEL=4
logging.basicConfig(level=logging.INFO,format='%(asctime)-15s-%(levelname)s:%(pathname)s:%(lineno)s %(message)s')
logger=logging.getLogger()
logger.info("start")
urls_path=sys.argv[1]
dateTag=datetime.now().strftime("%Y%m%d_%H%M%S")
logger.info("OUTPUT_FILE:"+dateTag)
with open(urls_path,"r") as urls:
allUrls=urls.readlines()
logger.info(len(allUrls))
logger.info("splitting urls for concurrency")
for i in range(0,CONCURRENCY_LEVEL):
temp=[]
j=i
while j < len(allUrls):
temp.append(allUrls[j].strip())
j=j+CONCURRENCY_LEVEL
f=open("output/parallel_inputs/"+dateTag+"_p"+str(i),"w")
for t in temp:
f.write(t+"\n")
f.close()
threadss=[]
for i in range(CONCURRENCY_LEVEL):
t1=Thread(target=parallel_analyzer.run, args=(dateTag,i))
t1.start()
threadss.append(t1)
for t in threadss:
t.join()
logger.info("DONE WITH WAITING ALL THREADS")
logger.info("combining outputs")
with open("output/analysis-"+dateTag+".csv", 'w') as outfile:
for i in range(CONCURRENCY_LEVEL):
with open("output/"+dateTag+"_p"+str(i),"r") as infile:
for line in infile:
outfile.write(line)
logger.info("DONE COMBINING FILES")