Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add compress to cbz option #7

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,9 @@ For example, downloading Tower of God, Chapter 150 would result in the following
$ python webtoon_downloader.py [url] --dest ./path/to/parent/folder/of/downloaded/images
```

* The downloaded images of the chapters are by default all located in the ```[dest]```, however these images can be seperated into seperate directories by providing the ```--seperate``` argument, where each directory corresponds to a downloaded chapter.
* The downloaded images of the chapters are by default all located in the ```[dest]```, however these images can be separated into separate directories by providing the ```--separate``` argument, where each directory corresponds to a downloaded chapter.
```ps
$ python webtoon_downloader.py [url] --seperate
$ python webtoon_downloader.py [url] --separate
```
For example, downloading Tower of God, Chapter 150 to 152 would result in the following:
```ps
Expand All @@ -133,6 +133,12 @@ For example, downloading Tower of God, Chapter 150 would result in the following
│...
```

* When downloading images into separate directories, the individual directories can additionally be compressed into .cbz files by providing the ```--cbz``` argument.
```ps
$ python webtoon_downloader.py [url] --separate --cbz
```
> NOTE: The ```--cbz``` argument only works when the ```--separate``` argument is also provided.

For more details on positional arguments, please use the ```-h ``` or ```--help``` argument:
```console
py webtoon_downloader.py --help
Expand Down
3 changes: 3 additions & 0 deletions src/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ def initialize(self):
action='store_true', default=False)
self.parser.add_argument('--readme', '-r', help=('displays readme file content for '
'more help details'), required=False, action='store_true')
self.parser.add_argument('--cbz', required=False,
wyldphyre marked this conversation as resolved.
Show resolved Hide resolved
help='compress each chapter to a .cbz comic archive. Only works when also using the --separate option',
action='store_true', default=False)
self.parser._positionals.title = "commands"

def print_readme(self):
Expand Down
41 changes: 32 additions & 9 deletions src/webtoon_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import signal
import sys
import time
import zipfile
from bs4 import BeautifulSoup
from dataclasses import dataclass, field
from options import Options
Expand Down Expand Up @@ -260,11 +261,16 @@ def download_image(chapter_download_task_id: int, url: str, dest: str, chapter_n
if r.status_code == 200:
r.raw.decode_content = True
file_name = f'{chapter_number}_{page_number}'
final_file_name = ''
if(image_format == 'png'):
Image.open(r.raw).save(os.path.join(dest, f'{file_name}.png'))
final_file_name = os.path.join(dest, f'{file_name}.png')
Image.open(r.raw).save(final_file_name)
else:
with open(os.path.join(dest, f'{file_name}.jpg'), 'wb') as f:
final_file_name = os.path.join(dest, f'{file_name}.jpg')
with open(final_file_name, 'wb') as f:
shutil.copyfileobj(r.raw, f)

return final_file_name
else:
log.error(f'[bold red blink]Unable to download page[/] [medium_spring_green]{page_number}[/]'
f'from chapter [medium_spring_green]{chapter_number}[/], request returned'
Expand All @@ -280,7 +286,7 @@ def exit_handler(sig, frame):
progress.console.print('')
sys.exit(0)

def download_chapter(chapter_download_task_id: int, session: requests.Session, viewer_url: str, chapter_info: ChapterInfo, dest: str, images_format: str='jpg'):
def download_chapter(chapter_download_task_id: int, session: requests.Session, viewer_url: str, chapter_info: ChapterInfo, dest: str, images_format: str='jpg', compress_cbz=False):
"""
downloads pages starting of a given chapter, inclusive.
stores the downloaded images into the dest path.
Expand Down Expand Up @@ -313,15 +319,28 @@ def download_chapter(chapter_download_task_id: int, session: requests.Session, v
os.makedirs(dest)
progress.update(chapter_download_task_id, total=len(img_urls), rendered_total=len(img_urls))
progress.start_task(chapter_download_task_id)
image_download_futures = set()
with ThreadPoolExecutorWithQueueSizeLimit(maxsize=10, max_workers=4) as pool:
for page_number, url in enumerate(img_urls):
pool.submit(download_image, chapter_download_task_id, url, dest, chapter_info.chapter_number, page_number, image_format=images_format)
image_download_futures.add(
pool.submit(download_image, chapter_download_task_id, url, dest, chapter_info.chapter_number, page_number, image_format=images_format)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Images will still be downloaded to the provided dest, so ending up with a copy of the images and the .cbz as well

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function needs to change anyways in order to allow for .cbz conversion in memory maybe.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I left the existing files and folder in place because I didn't want to assume too much about what should change. I mostly added this to scratch my own itch of not wanting to deal with 150+ folders to compress for the toon I wanted to download.

)
if done_event.is_set():
return

concurrent.futures.wait(image_download_futures, return_when=concurrent.futures.ALL_COMPLETED)

if compress_cbz:
with zipfile.ZipFile(f'{dest}.cbz', 'w') as cbz_zip:
for future in image_download_futures:
image_file_path = future.result()
image_folder, image_file_name = os.path.split(image_file_path)
cbz_zip.write(image_file_path, compress_type=zipfile.ZIP_STORED, arcname=image_file_name)

log.info(f'Chapter {chapter_info.chapter_number} download complete with a total of {len(img_urls)} pages [green]✓')
progress.remove_task(chapter_download_task_id)
def download_webtoon(series_url: str, start_chapter: int, end_chapter: int, dest: str, images_format: str='jpg', download_latest_chapter=False, separate_chapters=False):

def download_webtoon(series_url: str, start_chapter: int, end_chapter: int, dest: str, images_format: str='jpg', download_latest_chapter=False, separate_chapters=False, compress_cbz=False):
"""
downloads all chaptersstarting from start_chapter until end_chapter, inclusive.
stores the downloaded chapter into the dest path.
Expand All @@ -348,6 +367,9 @@ def download_webtoon(series_url: str, start_chapter: int, end_chapter: int, dest
separate_chapters: bool
separate downloaded chapters in their own folder under the dest path if true,
else stores all images in the dest folder.

compress_cbz: bool
compress separate chapters to a .cbz archive if true
"""
session = requests.session()
session.cookies.set("needGDPR", "FALSE", domain=".webtoons.com")
Expand Down Expand Up @@ -391,7 +413,7 @@ def download_webtoon(series_url: str, start_chapter: int, end_chapter: int, dest
chapter_dest = os.path.join(dest, str(chapter_info.chapter_number)) if separate_chapters else dest
chapter_download_task = progress.add_task(f"[plum2]Chapter {chapter_info.chapter_number}.", type='Pages', type_color='grey85', number_format='>02d', start=False, rendered_total='??')
chapter_download_futures.add(
pool.submit(download_chapter, chapter_download_task, session, viewer_url, chapter_info, chapter_dest, images_format)
pool.submit(download_chapter, chapter_download_task, session, viewer_url, chapter_info, chapter_dest, images_format, compress_cbz)
)

while chapter_download_futures:
Expand All @@ -410,7 +432,7 @@ def download_webtoon(series_url: str, start_chapter: int, end_chapter: int, dest
chapter_dest = os.path.join(dest, str(chapter_info.chapter_number)) if separate_chapters else dest
chapter_download_task = progress.add_task(f"[plum2]Chapter {chapter_info.chapter_number}.", type='Pages', type_color='grey85', number_format='>02d', start=False, rendered_total='??')
chapter_download_futures.add(
pool.submit(download_chapter, chapter_download_task, session, viewer_url, chapter_info, chapter_dest, images_format)
pool.submit(download_chapter, chapter_download_task, session, viewer_url, chapter_info, chapter_dest, images_format, compress_cbz)
)

rich.print(f'Successfully Downloaded [red]{n_chapters_to_download}[/] {"chapter" if n_chapters_to_download <= 1 else "chapters"} of [medium_spring_green]{series_title}[/] in [italic plum2]{os.path.abspath(dest)}[/].')
Expand All @@ -432,7 +454,8 @@ def main():
return
series_url = args.url
separate = args.seperate or args.separate
download_webtoon(series_url, args.start, args.end, args.dest, args.images_format, args.latest, separate)
compress_cbz = args.cbz and args.separate
wyldphyre marked this conversation as resolved.
Show resolved Hide resolved
download_webtoon(series_url, args.start, args.end, args.dest, args.images_format, args.latest, separate, compress_cbz)

if(__name__ == '__main__'):
main()