|
| 1 | +#!/usr/bin/env python3 |
| 2 | + |
| 3 | +"""Download flags of countries (with error handling). |
| 4 | +
|
| 5 | +asyncio async/await version |
| 6 | +
|
| 7 | +""" |
| 8 | +# tag::FLAGS2_ASYNCIO_TOP[] |
| 9 | +import asyncio |
| 10 | +from collections import Counter |
| 11 | +from http import HTTPStatus |
| 12 | +from pathlib import Path |
| 13 | + |
| 14 | +import httpx |
| 15 | +import tqdm # type: ignore |
| 16 | + |
| 17 | +from flags2_common import main, DownloadStatus, save_flag |
| 18 | + |
| 19 | +# low concurrency default to avoid errors from remote site, |
| 20 | +# such as 503 - Service Temporarily Unavailable |
| 21 | +DEFAULT_CONCUR_REQ = 5 |
| 22 | +MAX_CONCUR_REQ = 1000 |
| 23 | + |
| 24 | +async def get_flag(client: httpx.AsyncClient, # <1> |
| 25 | + base_url: str, |
| 26 | + cc: str) -> bytes: |
| 27 | + url = f'{base_url}/{cc}/{cc}.gif'.lower() |
| 28 | + resp = await client.get(url, timeout=3.1, follow_redirects=True) # <2> |
| 29 | + resp.raise_for_status() |
| 30 | + return resp.content |
| 31 | + |
| 32 | +# tag::FLAGS3_ASYNCIO_GET_COUNTRY[] |
| 33 | +async def get_country(client: httpx.AsyncClient, |
| 34 | + base_url: str, |
| 35 | + cc: str) -> str: # <1> |
| 36 | + url = f'{base_url}/{cc}/metadata.json'.lower() |
| 37 | + resp = await client.get(url, timeout=3.1, follow_redirects=True) |
| 38 | + resp.raise_for_status() |
| 39 | + metadata = resp.json() # <2> |
| 40 | + return metadata['country'] # <3> |
| 41 | +# end::FLAGS3_ASYNCIO_GET_COUNTRY[] |
| 42 | + |
| 43 | +# tag::FLAGS3_ASYNCIO_DOWNLOAD_ONE[] |
| 44 | +async def download_one(client: httpx.AsyncClient, |
| 45 | + cc: str, |
| 46 | + base_url: str, |
| 47 | + semaphore: asyncio.Semaphore, |
| 48 | + verbose: bool) -> DownloadStatus: |
| 49 | + try: |
| 50 | + async with semaphore: # <1> |
| 51 | + image = await get_flag(client, base_url, cc) |
| 52 | + async with semaphore: # <2> |
| 53 | + country = await get_country(client, base_url, cc) |
| 54 | + except httpx.HTTPStatusError as exc: |
| 55 | + res = exc.response |
| 56 | + if res.status_code == HTTPStatus.NOT_FOUND: |
| 57 | + status = DownloadStatus.NOT_FOUND |
| 58 | + msg = f'not found: {res.url}' |
| 59 | + else: |
| 60 | + raise |
| 61 | + else: |
| 62 | + filename = country.replace(' ', '_') # <3> |
| 63 | + await asyncio.to_thread(save_flag, image, f'{filename}.gif') |
| 64 | + status = DownloadStatus.OK |
| 65 | + msg = 'OK' |
| 66 | + if verbose and msg: |
| 67 | + print(cc, msg) |
| 68 | + return status |
| 69 | +# end::FLAGS3_ASYNCIO_DOWNLOAD_ONE[] |
| 70 | + |
| 71 | +# tag::FLAGS2_ASYNCIO_START[] |
| 72 | +async def supervisor(cc_list: list[str], |
| 73 | + base_url: str, |
| 74 | + verbose: bool, |
| 75 | + concur_req: int) -> Counter[DownloadStatus]: # <1> |
| 76 | + counter: Counter[DownloadStatus] = Counter() |
| 77 | + semaphore = asyncio.Semaphore(concur_req) # <2> |
| 78 | + async with httpx.AsyncClient() as client: |
| 79 | + to_do = [download_one(client, cc, base_url, semaphore, verbose) |
| 80 | + for cc in sorted(cc_list)] # <3> |
| 81 | + to_do_iter = asyncio.as_completed(to_do) # <4> |
| 82 | + if not verbose: |
| 83 | + to_do_iter = tqdm.tqdm(to_do_iter, total=len(cc_list)) # <5> |
| 84 | + error: httpx.HTTPError | None = None # <6> |
| 85 | + for coro in to_do_iter: # <7> |
| 86 | + try: |
| 87 | + status = await coro # <8> |
| 88 | + except httpx.HTTPStatusError as exc: |
| 89 | + error_msg = 'HTTP error {resp.status_code} - {resp.reason_phrase}' |
| 90 | + error_msg = error_msg.format(resp=exc.response) |
| 91 | + error = exc # <9> |
| 92 | + except httpx.RequestError as exc: |
| 93 | + error_msg = f'{exc} {type(exc)}'.strip() |
| 94 | + error = exc # <10> |
| 95 | + except KeyboardInterrupt: |
| 96 | + break |
| 97 | + |
| 98 | + if error: |
| 99 | + status = DownloadStatus.ERROR # <11> |
| 100 | + if verbose: |
| 101 | + url = str(error.request.url) # <12> |
| 102 | + cc = Path(url).stem.upper() # <13> |
| 103 | + print(f'{cc} error: {error_msg}') |
| 104 | + counter[status] += 1 |
| 105 | + |
| 106 | + return counter |
| 107 | + |
| 108 | +def download_many(cc_list: list[str], |
| 109 | + base_url: str, |
| 110 | + verbose: bool, |
| 111 | + concur_req: int) -> Counter[DownloadStatus]: |
| 112 | + coro = supervisor(cc_list, base_url, verbose, concur_req) |
| 113 | + counts = asyncio.run(coro) # <14> |
| 114 | + |
| 115 | + return counts |
| 116 | + |
| 117 | +if __name__ == '__main__': |
| 118 | + main(download_many, DEFAULT_CONCUR_REQ, MAX_CONCUR_REQ) |
| 119 | +# end::FLAGS2_ASYNCIO_START[] |
0 commit comments