From ae122f428a0a5e72ec76193b797b3750435f8f25 Mon Sep 17 00:00:00 2001 From: Kentaro Wada Date: Sun, 28 Jan 2024 10:42:58 +0900 Subject: [PATCH] Move tests from ci.yml to test___main__.py --- .github/workflows/ci.yml | 90 -------------------------- Makefile | 2 +- pyproject.toml | 1 + tests/test___main__.py | 132 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 134 insertions(+), 91 deletions(-) create mode 100644 tests/test___main__.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d0ff1205..77105a19 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,96 +32,6 @@ jobs: run: | make test - - name: Download from URL other than Gdrive - run: | - output=/tmp/gdown_r - gdown https://raw.githubusercontent.com/wkentaro/gdown/3.1.0/gdown/__init__.py -O $output --quiet - test $(md5sum $output | awk '{print $1}') = 2a51927dde6b146ce56b4d89ebbb5268 - rm -rf $output - - - name: Download small file from Google Drive - run: | - output=/tmp/spam.txt - success=0 - while read -r file_id - do - gdown $file_id -O $output --quiet || continue - test $(cat $output) = spam && success=1 && break - done < tests/data/file_ids.csv - test $success = 1 - rm -rf $output - - - name: Download large file from Google Drive - run: | - output=/tmp/large_file - success=0 - while IFS=, read -r file_id md5 - do - gdown $file_id -O $output --quiet || continue - test $(md5sum $output | awk '{print $1}') = $md5 && success=1 && break - done < tests/data/file_ids_large.csv - test $success = 1 - rm -rf $output - - - name: Download and extract - run: | - gdown https://github.com/wkentaro/gdown/archive/refs/tags/v4.0.0.tar.gz -O - --quiet | tar zxf - - test -d gdown-4.0.0 - - - name: Download folder from Google Drive - run: | - output=/tmp/folder/ - success=0 - while IFS=, read -r folder_id md5 - do - gdown $folder_id -O $output --quiet --folder || continue - actual_hash=$(find $output -type f -exec md5sum {} \; | awk '{print $1}' | sort | md5sum | awk '{print $1}') - test $actual_hash = $md5 || echo "$folder_id, $actual_hash" && success=1 && break - done < tests/data/folder_ids.csv - test $success = 1 - rm -rf $output - - - name: Try to download a folder with a file count more than the limit from Google Drive, without remaining ok - run: | - output=/tmp/folder-limit/ - gdown https://drive.google.com/drive/folders/1gd3xLkmjT8IckN6WtMbyFZvLR4exRIkn -O $output --quiet --folder && exit 1 || exit 0 - - - name: Download docs from Google Drive - run: | - output=/tmp/file.txt - file_id=1TFYNzuZJTgNGzGmjraZ58ZVOh9_YoKeBnU-opWgXQL4 - md5=6c17d87d3d01405ac5c9bb65ee2d2fc2 - gdown $file_id -O $output --quiet --format txt - actual_hash=$(md5sum $output | awk '{print $1}') - test $actual_hash = $md5 - - - name: Download spreadsheets from Google Drive - run: | - output=/tmp/file.pdf - file_id=1h6wQX7ATSJDOSWFEjHPmv_nukJzZD_zZ30Jvy6XNiTE - md5=5be20dd8a23afa06365714edc24856f3 - gdown $file_id -O $output --quiet --format pdf - actual_hash=$(md5sum $output | awk '{print $1}') - test $actual_hash = $md5 - - - name: Download slides from Google Drive - run: | - output=/tmp/file.pdf - file_id=13AhW1Z1GYGaiTpJ0Pr2TTXoQivb6jx-a - md5=96704c6c40e308a68d3842e83a0136b9 - gdown $file_id -O $output --quiet --format pdf - actual_hash=$(md5sum $output | awk '{print $1}') - test $actual_hash = $md5 - - # FIXME: too unstable - # - name: Download a folder with a file count more than the limit from Google Drive - # run: | - # output=/tmp/folder-limit/ - # gdown https://drive.google.com/drive/folders/1gd3xLkmjT8IckN6WtMbyFZvLR4exRIkn -O $output --quiet --folder --remaining-ok - # actual_hash=$(find $output -type f -exec md5sum {} \; | awk '{print $1}' | sort | md5sum | awk '{print $1}') - # test $actual_hash = b0e45acb8d9c7d9200ce8fe38896c790 - # rm -rf $output - - name: Install from dist run: | make build diff --git a/Makefile b/Makefile index f7e14c6e..e2e74a65 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,7 @@ format: ruff check --fix test: - python -m pytest -v tests + python -m pytest -n auto -v tests clean: rm -rf build dist *.egg-info diff --git a/pyproject.toml b/pyproject.toml index dbf374ca..06e6cc88 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,7 @@ test = [ "build", "mypy", "pytest", + "pytest-xdist", "ruff", "twine", "types-requests", diff --git a/tests/test___main__.py b/tests/test___main__.py new file mode 100644 index 00000000..6f5e8d07 --- /dev/null +++ b/tests/test___main__.py @@ -0,0 +1,132 @@ +import os +import shlex +import subprocess +import sys +import tempfile + +from gdown.cached_download import assert_md5sum + +here = os.path.dirname(os.path.abspath(__file__)) + + +def _test_cli_with_md5(url_or_id, md5, options=None): + with tempfile.NamedTemporaryFile() as f: + cmd = f"gdown {url_or_id} -O {f.name}" + if options is not None: + cmd = f"{cmd} {options}" + subprocess.call(shlex.split(cmd)) + assert_md5sum(filename=f.name, md5=md5) + + +def _test_cli_with_content(url_or_id, content): + with tempfile.NamedTemporaryFile() as f: + subprocess.call(shlex.split(f"gdown {url_or_id} -O {f.name}")) + with open(f.name) as f: + assert f.read() == content + + +def test_download_from_url_other_than_gdrive(): + url = "https://raw.githubusercontent.com/wkentaro/gdown/3.1.0/gdown/__init__.py" + md5 = "2a51927dde6b146ce56b4d89ebbb5268" + _test_cli_with_md5(url_or_id=url, md5=md5) + + +def test_download_small_file_from_gdrive(): + with open(os.path.join(here, "data/file_ids.csv")) as f: + file_ids = [file_id.strip() for file_id in f] + + for file_id in file_ids: + try: + _test_cli_with_content(url_or_id=file_id, content="spam\n") + break + except AssertionError as e: + print(e, file=sys.stderr) + continue + else: + raise AssertionError(f"Failed to download any of the files: {file_ids}") + + +def test_download_large_file_from_gdrive(): + with open(os.path.join(here, "data/file_ids_large.csv")) as f: + file_id_and_md5s = [[x.strip() for x in file_id.split(",")] for file_id in f] + + for file_id, md5 in file_id_and_md5s: + try: + _test_cli_with_md5(url_or_id=file_id, md5=md5) + break + except AssertionError as e: + print(e, file=sys.stderr) + continue + else: + raise AssertionError( + f"Failed to download any of the files: {zip(*file_id_and_md5s)[0]}" + ) + + +def test_download_and_extract(): + cmd = "gdown https://github.com/wkentaro/gdown/archive/refs/tags/v4.0.0.tar.gz -O - | tar zxvf -" # noqa: E501 + with tempfile.TemporaryDirectory() as d: + subprocess.call(cmd, shell=True, cwd=d) + assert os.path.exists(os.path.join(d, "gdown-4.0.0/gdown/__init__.py")) + + +def test_download_folder_from_gdrive(): + with open(os.path.join(here, "data/folder_ids.csv")) as f: + folder_id_and_md5s = [ + [x.strip() for x in folder_id.split(",")] for folder_id in f + ] + + for folder_id, md5 in folder_id_and_md5s: + with tempfile.TemporaryDirectory() as d: + cmd = f"gdown {folder_id} -O {d} --folder" + subprocess.call(shlex.split(cmd)) + + cmd = "find . -type f -exec md5sum {} \\; | awk '{print $1}' | sort | md5sum | awk '{print $1}'" # noqa: E501 + md5_actual = ( + subprocess.check_output(cmd, shell=True, cwd=d).decode().strip() + ) + try: + assert md5_actual == md5 + break + except AssertionError as e: + print(e, file=sys.stderr) + else: + raise AssertionError( + f"Failed to download any of the folders: {zip(*folder_id_and_md5s)[0]}" + ) + + +def test_download_a_folder_with_remining_ok_false(): + with tempfile.TemporaryDirectory() as d: + cmd = f"gdown https://drive.google.com/drive/folders/1gd3xLkmjT8IckN6WtMbyFZvLR4exRIkn -O {d} --folder" # noqa: E501 + assert subprocess.call(shlex.split(cmd)) == 1 + + +# def test_download_docs_from_gdrive(): +# file_id = "1TFYNzuZJTgNGzGmjraZ58ZVOh9_YoKeBnU-opWgXQL4" +# md5 = "6c17d87d3d01405ac5c9bb65ee2d2fc2" +# _test_cli_with_md5(url_or_id=file_id, md5=md5, options="--format txt") +# +# +# def test_download_spreadsheets_from_gdrive(): +# file_id = "1h6wQX7ATSJDOSWFEjHPmv_nukJzZD_zZ30Jvy6XNiTE" +# md5 = "5be20dd8a23afa06365714edc24856f3" +# _test_cli_with_md5(url_or_id=file_id, md5=md5, options="--format pdf") + + +def test_download_slides_from_gdrive(): + file_id = "13AhW1Z1GYGaiTpJ0Pr2TTXoQivb6jx-a" + md5 = "96704c6c40e308a68d3842e83a0136b9" + _test_cli_with_md5(url_or_id=file_id, md5=md5, options="--format pdf") + + +def test_download_a_folder_with_file_content_more_than_the_limit(): + url = "https://drive.google.com/drive/folders/1gd3xLkmjT8IckN6WtMbyFZvLR4exRIkn" + + with tempfile.TemporaryDirectory() as d: + cmd = f"gdown {url} -O {d} --folder --remaining-ok" + subprocess.check_call(shlex.split(cmd)) + + filenames = sorted(os.listdir(d)) + for i in range(50): + assert filenames[i] == f"file_{i:02d}.txt"