Skip to content

Commit

Permalink
dev(narugo): add silent option
Browse files Browse the repository at this point in the history
  • Loading branch information
narugo1992 committed Sep 23, 2024
1 parent 945e527 commit cbb6930
Show file tree
Hide file tree
Showing 6 changed files with 21 additions and 18 deletions.
10 changes: 6 additions & 4 deletions cheesechaser/datapool/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ class DataPool:
"""

@contextmanager
def mock_resource(self, resource_id, resource_info) -> ContextManager[Tuple[str, Any]]:
def mock_resource(self, resource_id, resource_info, silent: bool = False) -> ContextManager[Tuple[str, Any]]:
"""
Context manager to mock a resource.
Expand All @@ -132,7 +132,8 @@ def mock_resource(self, resource_id, resource_info) -> ContextManager[Tuple[str,
raise NotImplementedError # pragma: no cover

def batch_download_to_directory(self, resource_ids, dst_dir: str, max_workers: int = 12,
save_metainfo: bool = True, metainfo_fmt: str = '{resource_id}_metainfo.json'):
save_metainfo: bool = True, metainfo_fmt: str = '{resource_id}_metainfo.json',
silent: bool = False):
"""
Download multiple resources to a directory.
Expand Down Expand Up @@ -162,7 +163,7 @@ def batch_download_to_directory(self, resource_ids, dst_dir: str, max_workers: i

def _func(resource_id, resource_info):
try:
with self.mock_resource(resource_id, resource_info) as (td, resource_info):
with self.mock_resource(resource_id, resource_info, silent=silent) as (td, resource_info):
copied = False
for root, dirs, files in os.walk(td):
for file in files:
Expand Down Expand Up @@ -349,7 +350,7 @@ def _get_dst_filename(self, location: DataLocation):
return os.path.basename(location.filename)

@contextmanager
def mock_resource(self, resource_id, resource_info) -> ContextManager[Tuple[str, Any]]:
def mock_resource(self, resource_id, resource_info, silent: bool = False) -> ContextManager[Tuple[str, Any]]:
"""
Context manager to temporarily access a resource.
Expand Down Expand Up @@ -382,6 +383,7 @@ def mock_resource(self, resource_id, resource_info) -> ContextManager[Tuple[str,
idx_repo_type='dataset',
idx_revision=self.idx_revision,
hf_token=self._hf_token,
silent=silent,
)
yield td, resource_info

Expand Down
8 changes: 4 additions & 4 deletions cheesechaser/datapool/danbooru.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ def __init__(self, hf_token: Optional[str] = None):
self._newest_pool = _DanbooruNewestPartialDataPool(hf_token=hf_token)

@contextmanager
def mock_resource(self, resource_id, resource_info) -> ContextManager[Tuple[str, Any]]:
def mock_resource(self, resource_id, resource_info, silent: bool = False) -> ContextManager[Tuple[str, Any]]:
"""
Provide a context manager for accessing a resource.
Expand All @@ -219,7 +219,7 @@ def mock_resource(self, resource_id, resource_info) -> ContextManager[Tuple[str,
found = False
for pool in pools:
try:
with pool.mock_resource(resource_id, resource_info) as (td, info):
with pool.mock_resource(resource_id, resource_info, silent=silent) as (td, info):
yield td, info
except ResourceNotFoundError:
pass
Expand Down Expand Up @@ -326,7 +326,7 @@ def __init__(self, hf_token: Optional[str] = None):
self._newest_pool = _DanbooruNewestPartialWebpDataPool(hf_token=hf_token)

@contextmanager
def mock_resource(self, resource_id, resource_info) -> ContextManager[Tuple[str, Any]]:
def mock_resource(self, resource_id, resource_info, silent: bool = False) -> ContextManager[Tuple[str, Any]]:
"""
Provide a context manager for accessing a WebP resource.
Expand All @@ -345,7 +345,7 @@ def mock_resource(self, resource_id, resource_info) -> ContextManager[Tuple[str,
found = False
for pool in pools:
try:
with pool.mock_resource(resource_id, resource_info) as (td, info):
with pool.mock_resource(resource_id, resource_info, silent=silent) as (td, info):
yield td, info
except ResourceNotFoundError:
pass
Expand Down
3 changes: 2 additions & 1 deletion cheesechaser/datapool/nhentai.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def manga_posts_table(cls, revision: str = 'main', local_files_prefer: bool = Tr
return pd.read_csv(csv_file)

@contextmanager
def mock_resource(self, resource_id, resource_info) -> ContextManager[Tuple[str, Any]]:
def mock_resource(self, resource_id, resource_info, silent: bool = False) -> ContextManager[Tuple[str, Any]]:
"""
Create a mock resource for a given manga.
Expand Down Expand Up @@ -194,6 +194,7 @@ def mock_resource(self, resource_id, resource_info) -> ContextManager[Tuple[str,
self.images_pool.batch_download_to_directory(
image_ids, origin_dir,
save_metainfo=False,
silent=silent,
)
files = {}
for src_image_file in os.listdir(origin_dir):
Expand Down
6 changes: 3 additions & 3 deletions cheesechaser/pipe/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ class Pipe:
def __init__(self, pool: DataPool):
self.pool = pool

def retrieve(self, resource_id, resource_metainfo):
def retrieve(self, resource_id, resource_metainfo, silent: bool = False):
"""
Retrieve a single resource from the data pool.
Expand All @@ -175,7 +175,7 @@ def retrieve(self, resource_id, resource_metainfo):
"""
raise NotImplementedError # pragma: no cover

def batch_retrieve(self, resource_ids, max_workers: int = 12) -> PipeSession:
def batch_retrieve(self, resource_ids, max_workers: int = 12, silent: bool = False) -> PipeSession:
"""
Retrieve multiple resources in parallel using a thread pool.
Expand All @@ -198,7 +198,7 @@ def _func(order_id, resource_id, resource_metainfo):
data, error = None, None
try:
try:
data = self.retrieve(resource_id, resource_metainfo)
data = self.retrieve(resource_id, resource_metainfo, silent=silent)
except ResourceNotFoundError as err:
logging.warning(f'Resource {resource_id!r} not found.')
error = err
Expand Down
8 changes: 4 additions & 4 deletions cheesechaser/pipe/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ class SimpleImagePipe(Pipe):
:raises InvalidResourceDataError: If multiple image files are found in the resource.
"""

def retrieve(self, resource_id, resource_metainfo):
def retrieve(self, resource_id, resource_metainfo, silent: bool = False):
"""
Retrieve an image from the resource pool.
Expand All @@ -59,7 +59,7 @@ def retrieve(self, resource_id, resource_metainfo):
:raises ResourceNotFoundError: If no image file is found.
:raises InvalidResourceDataError: If multiple image files are found.
"""
with self.pool.mock_resource(resource_id, resource_metainfo) as (td, resource_metainfo):
with self.pool.mock_resource(resource_id, resource_metainfo, silent=silent) as (td, resource_metainfo):
files = os.listdir(td)
image_files = []
for file in files:
Expand Down Expand Up @@ -101,7 +101,7 @@ class DataAttachedImagePipe(Pipe):
:raises InvalidResourceDataError: If multiple image files or JSON files are found in the resource.
"""

def retrieve(self, resource_id, resource_metainfo):
def retrieve(self, resource_id, resource_metainfo, silent: bool = False):
"""
Retrieve an image and its associated data from the resource pool.
Expand All @@ -112,7 +112,7 @@ def retrieve(self, resource_id, resource_metainfo):
:raises ResourceNotFoundError: If no image file is found.
:raises InvalidResourceDataError: If multiple image files or JSON files are found.
"""
with self.pool.mock_resource(resource_id, resource_metainfo) as (td, resource_metainfo):
with self.pool.mock_resource(resource_id, resource_metainfo, silent=silent) as (td, resource_metainfo):
files = os.listdir(td)
if len(files) == 0:
raise ResourceNotFoundError(f'Image not found for resource {resource_id!r}.')
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
hfutils>=0.4.0
hfutils>=0.4.3
hbutils>=0.9.0
huggingface_hub>=0.22
tqdm
Expand All @@ -9,4 +9,4 @@ httpx[http2]
random_user_agent
pandas
pyrate_limiter
pyarrow
pyarrow

0 comments on commit cbb6930

Please sign in to comment.