-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsearch_bing_api.py
100 lines (82 loc) · 3.27 KB
/
search_bing_api.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from requests import exceptions
import argparse
import requests
from PIL import Image
import os
from dotenv import load_dotenv
from pathlib import Path
load_dotenv()
print("In script")
ap = argparse.ArgumentParser()
ap.add_argument("-q", "--query", required=True,
help="search query to search Bing Image API for")
args = vars(ap.parse_args())
API_KEY = os.getenv("BING_IMG_SEARCH_API_KEY")
print(API_KEY)
if not API_KEY:
print("Please set BING_IMG_SEARCH_API_KEY")
quit()
MAX_RESULTS = 250
GROUP_SIZE = 50
URL = "https://api.cognitive.microsoft.com/bing/v7.0/images/search"
EXCEPTIONS = {IOError, FileNotFoundError, exceptions.RequestException, exceptions.HTTPError, exceptions.ConnectionError,
exceptions.Timeout}
term = args["query"]
headers = {"Ocp-Apim-Subscription-Key": API_KEY}
params = {"q": term, "offset": 0, "count": GROUP_SIZE}
# make the search
print("[INFO] searching Bing API for '{}'".format(term))
search = requests.get(URL, headers=headers, params=params)
search.raise_for_status()
results = search.json()
estNumResults = min(results["totalEstimatedMatches"], MAX_RESULTS)
print("[INFO] {} total results for '{}'".format(estNumResults,
term))
# initialize the total number of images downloaded thus far
total = 0
# loop over the estimated number of results in `GROUP_SIZE` groups
for offset in range(0, estNumResults, GROUP_SIZE):
# update the search parameters using the current offset, then
# make the request to fetch the results
print("[INFO] making request for group {}-{} of {}...".format(
offset, offset + GROUP_SIZE, estNumResults))
params["offset"] = offset
search = requests.get(URL, headers=headers, params=params)
search.raise_for_status()
results = search.json()
print("[INFO] saving images for group {}-{} of {}...".format(
offset, offset + GROUP_SIZE, estNumResults))
# loop over the results
for v in results["value"]:
# try to download the image
try:
# make a request to download the image
print("[INFO] fetching: {}".format(v["contentUrl"]))
r = requests.get(v["contentUrl"], timeout=30)
# build the path to the output image
ext = v["contentUrl"][v["contentUrl"].rfind("."):]
dataset_path = Path.cwd() / "dataset" / term
path_out_img = dataset_path / f"{str(total).zfill(8)}{ext}"
dataset_path.mkdir(parents=True, exist_ok=True)
# write the image to disk
f = open(path_out_img, "wb")
f.write(r.content)
f.close()
# catch any errors that would not unable us to download the
# image
except Exception as e:
# check to see if our exception is in our list of
# exceptions to check for
print(e)
if type(e) in EXCEPTIONS:
print("[INFO] skipping: {}".format(v["contentUrl"]))
continue
try:
im = Image.open(path_out_img)
except IOError:
# filename not an image file, so it should be ignored
print("[INFO] deleting: {}".format(path_out_img))
os.remove(path_out_img)
continue
# update the counter
total += 1