-
Notifications
You must be signed in to change notification settings - Fork 17
/
getStarPhotos.py
103 lines (87 loc) · 2.94 KB
/
getStarPhotos.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# file getStarPhotos.py 抓取facepp官网明星图片
"""getStarPhotos.py文档
@author: Leo Lou
@version: 0.1.0
"""
import os
import urllib,urllib2
import sys
from pprint import pprint
reload(sys)
sys.setdefaultencoding('utf-8')
# unify the star names like case '贝克·汉姆'' to '贝克 汉姆'
def unifyName(stars):
for i in range(len(stars)):
stars[i] = stars[i].replace('·', ' ')
return stars
# read in the star list and remove the finished stars
def getStars(filename='stars.txt', donefilename='done.txt'):
fin = open(filename,'r')
findone = open(donefilename,'r')
stars = fin.readlines()
done = findone.readlines()
fin.close()
findone.close()
for i in range(len(stars)):
stars[i] = stars[i][:-1]
for i in range(len(done)):
done[i] = done[i][:-1]
stars = list(set(unifyName(stars)))
done = list(set(done))
for s in done:
if s in stars:
stars.remove(s)
return stars
# retrieve the star photos and save to local folder
def getStarImage(stars, path='/assets'):
urlTemplate = 'http://www.faceplusplus.com.cn/assets/demo-img2/%s/%d.jpg'
pathTemplate = os.getcwd() + path + '/%s/%s-%d.jpg'
print pathTemplate
finishStars = []
for s in stars:
index = 1
while index > 0:
try:
url = urlTemplate%(s,index)
print url
res = urllib2.urlopen(url)
except Exception, e:
# url error like 404 status when image does not exist
print e
break
else:
filepath = os.getcwd() + path + '/' + s
# create a new folder for a new star
if index == 1 and os.path.exists(filepath) == False:
os.mkdir(filepath)
filepath = pathTemplate%(s,s,index)
# save the image to seperate folder
with open(filepath,'w') as img:
img.write(res.read())
# increase the index to fetch more images of the same star until 404 error
index += 1
if index > 1:
finishStars.append(s)
return finishStars
# update the finished star list in order not to fetch their photos next time
def updateFinishStars(finishStars, donefilename='done.txt'):
findone = open(donefilename,'r')
done = findone.readlines()
findone.close()
for i in range(len(done)):
done[i] = done[i][:-1]
done += finishStars
done = list(set(done))
foutdone = open(donefilename,'w')
for s in done:
foutdone.write(s + '\n')
foutdone.close()
# main function
def main():
stars = getStars('stars.txt', 'done.txt')
finishStars = getStarImage(stars, '/assets')
updateFinishStars(finishStars, 'done.txt')
if __name__ == '__main__':
main()