From cbb6930f81b300398d011c95c169b2770fbdd3c5 Mon Sep 17 00:00:00 2001 From: narugo1992 Date: Mon, 23 Sep 2024 12:51:05 +0800 Subject: [PATCH] dev(narugo): add silent option --- cheesechaser/datapool/base.py | 10 ++++++---- cheesechaser/datapool/danbooru.py | 8 ++++---- cheesechaser/datapool/nhentai.py | 3 ++- cheesechaser/pipe/base.py | 6 +++--- cheesechaser/pipe/image.py | 8 ++++---- requirements.txt | 4 ++-- 6 files changed, 21 insertions(+), 18 deletions(-) diff --git a/cheesechaser/datapool/base.py b/cheesechaser/datapool/base.py index 0850fde21..772f9f15b 100644 --- a/cheesechaser/datapool/base.py +++ b/cheesechaser/datapool/base.py @@ -116,7 +116,7 @@ class DataPool: """ @contextmanager - def mock_resource(self, resource_id, resource_info) -> ContextManager[Tuple[str, Any]]: + def mock_resource(self, resource_id, resource_info, silent: bool = False) -> ContextManager[Tuple[str, Any]]: """ Context manager to mock a resource. @@ -132,7 +132,8 @@ def mock_resource(self, resource_id, resource_info) -> ContextManager[Tuple[str, raise NotImplementedError # pragma: no cover def batch_download_to_directory(self, resource_ids, dst_dir: str, max_workers: int = 12, - save_metainfo: bool = True, metainfo_fmt: str = '{resource_id}_metainfo.json'): + save_metainfo: bool = True, metainfo_fmt: str = '{resource_id}_metainfo.json', + silent: bool = False): """ Download multiple resources to a directory. @@ -162,7 +163,7 @@ def batch_download_to_directory(self, resource_ids, dst_dir: str, max_workers: i def _func(resource_id, resource_info): try: - with self.mock_resource(resource_id, resource_info) as (td, resource_info): + with self.mock_resource(resource_id, resource_info, silent=silent) as (td, resource_info): copied = False for root, dirs, files in os.walk(td): for file in files: @@ -349,7 +350,7 @@ def _get_dst_filename(self, location: DataLocation): return os.path.basename(location.filename) @contextmanager - def mock_resource(self, resource_id, resource_info) -> ContextManager[Tuple[str, Any]]: + def mock_resource(self, resource_id, resource_info, silent: bool = False) -> ContextManager[Tuple[str, Any]]: """ Context manager to temporarily access a resource. @@ -382,6 +383,7 @@ def mock_resource(self, resource_id, resource_info) -> ContextManager[Tuple[str, idx_repo_type='dataset', idx_revision=self.idx_revision, hf_token=self._hf_token, + silent=silent, ) yield td, resource_info diff --git a/cheesechaser/datapool/danbooru.py b/cheesechaser/datapool/danbooru.py index f6516dc4d..9b13aa5b4 100644 --- a/cheesechaser/datapool/danbooru.py +++ b/cheesechaser/datapool/danbooru.py @@ -200,7 +200,7 @@ def __init__(self, hf_token: Optional[str] = None): self._newest_pool = _DanbooruNewestPartialDataPool(hf_token=hf_token) @contextmanager - def mock_resource(self, resource_id, resource_info) -> ContextManager[Tuple[str, Any]]: + def mock_resource(self, resource_id, resource_info, silent: bool = False) -> ContextManager[Tuple[str, Any]]: """ Provide a context manager for accessing a resource. @@ -219,7 +219,7 @@ def mock_resource(self, resource_id, resource_info) -> ContextManager[Tuple[str, found = False for pool in pools: try: - with pool.mock_resource(resource_id, resource_info) as (td, info): + with pool.mock_resource(resource_id, resource_info, silent=silent) as (td, info): yield td, info except ResourceNotFoundError: pass @@ -326,7 +326,7 @@ def __init__(self, hf_token: Optional[str] = None): self._newest_pool = _DanbooruNewestPartialWebpDataPool(hf_token=hf_token) @contextmanager - def mock_resource(self, resource_id, resource_info) -> ContextManager[Tuple[str, Any]]: + def mock_resource(self, resource_id, resource_info, silent: bool = False) -> ContextManager[Tuple[str, Any]]: """ Provide a context manager for accessing a WebP resource. @@ -345,7 +345,7 @@ def mock_resource(self, resource_id, resource_info) -> ContextManager[Tuple[str, found = False for pool in pools: try: - with pool.mock_resource(resource_id, resource_info) as (td, info): + with pool.mock_resource(resource_id, resource_info, silent=silent) as (td, info): yield td, info except ResourceNotFoundError: pass diff --git a/cheesechaser/datapool/nhentai.py b/cheesechaser/datapool/nhentai.py index d5d3857df..5490b9d09 100644 --- a/cheesechaser/datapool/nhentai.py +++ b/cheesechaser/datapool/nhentai.py @@ -166,7 +166,7 @@ def manga_posts_table(cls, revision: str = 'main', local_files_prefer: bool = Tr return pd.read_csv(csv_file) @contextmanager - def mock_resource(self, resource_id, resource_info) -> ContextManager[Tuple[str, Any]]: + def mock_resource(self, resource_id, resource_info, silent: bool = False) -> ContextManager[Tuple[str, Any]]: """ Create a mock resource for a given manga. @@ -194,6 +194,7 @@ def mock_resource(self, resource_id, resource_info) -> ContextManager[Tuple[str, self.images_pool.batch_download_to_directory( image_ids, origin_dir, save_metainfo=False, + silent=silent, ) files = {} for src_image_file in os.listdir(origin_dir): diff --git a/cheesechaser/pipe/base.py b/cheesechaser/pipe/base.py index 0ccebf20c..0ae35c984 100644 --- a/cheesechaser/pipe/base.py +++ b/cheesechaser/pipe/base.py @@ -163,7 +163,7 @@ class Pipe: def __init__(self, pool: DataPool): self.pool = pool - def retrieve(self, resource_id, resource_metainfo): + def retrieve(self, resource_id, resource_metainfo, silent: bool = False): """ Retrieve a single resource from the data pool. @@ -175,7 +175,7 @@ def retrieve(self, resource_id, resource_metainfo): """ raise NotImplementedError # pragma: no cover - def batch_retrieve(self, resource_ids, max_workers: int = 12) -> PipeSession: + def batch_retrieve(self, resource_ids, max_workers: int = 12, silent: bool = False) -> PipeSession: """ Retrieve multiple resources in parallel using a thread pool. @@ -198,7 +198,7 @@ def _func(order_id, resource_id, resource_metainfo): data, error = None, None try: try: - data = self.retrieve(resource_id, resource_metainfo) + data = self.retrieve(resource_id, resource_metainfo, silent=silent) except ResourceNotFoundError as err: logging.warning(f'Resource {resource_id!r} not found.') error = err diff --git a/cheesechaser/pipe/image.py b/cheesechaser/pipe/image.py index 6dd3897ba..52c81f0c9 100644 --- a/cheesechaser/pipe/image.py +++ b/cheesechaser/pipe/image.py @@ -48,7 +48,7 @@ class SimpleImagePipe(Pipe): :raises InvalidResourceDataError: If multiple image files are found in the resource. """ - def retrieve(self, resource_id, resource_metainfo): + def retrieve(self, resource_id, resource_metainfo, silent: bool = False): """ Retrieve an image from the resource pool. @@ -59,7 +59,7 @@ def retrieve(self, resource_id, resource_metainfo): :raises ResourceNotFoundError: If no image file is found. :raises InvalidResourceDataError: If multiple image files are found. """ - with self.pool.mock_resource(resource_id, resource_metainfo) as (td, resource_metainfo): + with self.pool.mock_resource(resource_id, resource_metainfo, silent=silent) as (td, resource_metainfo): files = os.listdir(td) image_files = [] for file in files: @@ -101,7 +101,7 @@ class DataAttachedImagePipe(Pipe): :raises InvalidResourceDataError: If multiple image files or JSON files are found in the resource. """ - def retrieve(self, resource_id, resource_metainfo): + def retrieve(self, resource_id, resource_metainfo, silent: bool = False): """ Retrieve an image and its associated data from the resource pool. @@ -112,7 +112,7 @@ def retrieve(self, resource_id, resource_metainfo): :raises ResourceNotFoundError: If no image file is found. :raises InvalidResourceDataError: If multiple image files or JSON files are found. """ - with self.pool.mock_resource(resource_id, resource_metainfo) as (td, resource_metainfo): + with self.pool.mock_resource(resource_id, resource_metainfo, silent=silent) as (td, resource_metainfo): files = os.listdir(td) if len(files) == 0: raise ResourceNotFoundError(f'Image not found for resource {resource_id!r}.') diff --git a/requirements.txt b/requirements.txt index be13c7969..c26646a67 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -hfutils>=0.4.0 +hfutils>=0.4.3 hbutils>=0.9.0 huggingface_hub>=0.22 tqdm @@ -9,4 +9,4 @@ httpx[http2] random_user_agent pandas pyrate_limiter -pyarrow \ No newline at end of file +pyarrow