diff --git a/app/api/helpers/scraper.py b/app/api/helpers/scraper.py new file mode 100644 index 0000000..627db4c --- /dev/null +++ b/app/api/helpers/scraper.py @@ -0,0 +1,13 @@ +from selectolax.parser import Node + + +class ScrapeHelper: + @staticmethod + def get_text(node: Node, selector: str) -> str | None: + element = node.css_first(selector) + return element.text().strip() if element else None + + @staticmethod + def get_attribute(node: Node, selector: str, attribute: str) -> str | None: + element = node.css_first(selector) + return element.attributes[attribute] if element else None diff --git a/app/api/scrapers/most_viewed.py b/app/api/scrapers/most_viewed.py index 120ad5c..0dd942e 100644 --- a/app/api/scrapers/most_viewed.py +++ b/app/api/scrapers/most_viewed.py @@ -2,7 +2,7 @@ from selectolax.parser import Node from app.api.decorators.return_decorator import return_on_error -from ..utils import get_text, get_attribute +from app.api.helpers.scraper import ScrapeHelper from ..helpers.html_helper import HTMLHelper @@ -16,32 +16,37 @@ def __init__(self, chart: str) -> None: self.chart = chart # Facades self.html_helper = HTMLHelper() + self.scraper_helper = ScrapeHelper() # Parser self.parser = self.html_helper.get_parser(url) @return_on_error("") def __get_slug(self, node: Node) -> str: - slug = get_attribute(node, ".manga-detail .manga-name a", "href") + slug = self.scraper_helper.get_attribute( + node, ".manga-detail .manga-name a", "href" + ) return slug.replace("/", "") if slug else "" @return_on_error("") def __get_cover(self, node: Node) -> str: - cover = get_attribute(node, "img.manga-poster-img", "src") + cover = self.scraper_helper.get_attribute(node, "img.manga-poster-img", "src") return cover.replace("200x300", "500x800") if cover else "" @return_on_error("") def __get_views(self, node: Node) -> str: - views_string = get_text(node, ".fd-infor .fdi-view") + views_string = self.scraper_helper.get_text(node, ".fd-infor .fdi-view") return views_string.split()[0].replace(",", "") if views_string else "" @return_on_error([]) def __get_langs(self, node: Node) -> list: - langs_string = get_text(node, ".fd-infor > span:nth-child(1)") + langs_string = self.scraper_helper.get_text(node, ".fd-infor > span:nth-child(1)") return [lang for lang in langs_string.split("/")] if langs_string else [] @return_on_error("") def __get_chapters_volumes(self, node: Node, index: int) -> str: - data_string = get_text(node, f".d-block span:nth-child({index})") + data_string = self.scraper_helper.get_text( + node, f".d-block span:nth-child({index})" + ) return data_string.split()[1] if data_string else "" @return_on_error([]) @@ -52,8 +57,8 @@ def __get_genres(self, node: Node) -> list: @return_on_error({}) def __build_dict(self, node: Node) -> dict: manga_dict = { - "rank": get_text(node, ".ranking-number span"), - "title": get_text(node, ".manga-detail .manga-name a"), + "rank": self.scraper_helper.get_text(node, ".ranking-number span"), + "title": self.scraper_helper.get_text(node, ".manga-detail .manga-name a"), "slug": self.__get_slug(node), "cover": self.__get_cover(node), "views": self.__get_views(node), diff --git a/app/api/scrapers/popular.py b/app/api/scrapers/popular.py index bef24f3..00dfea8 100644 --- a/app/api/scrapers/popular.py +++ b/app/api/scrapers/popular.py @@ -1,7 +1,7 @@ from selectolax.parser import Node from app.api.decorators.return_decorator import return_on_error -from ..utils import get_attribute, get_text +from app.api.helpers.scraper import ScrapeHelper from ..helpers.html_helper import HTMLHelper @@ -10,25 +10,23 @@ def __init__(self) -> None: url = "https://mangareader.to/home" # Facades self.html_helper = HTMLHelper() + self.scraper_helper = ScrapeHelper() # Parser self.parser = self.html_helper.get_parser(url) - @staticmethod @return_on_error("") - def __get_slug(node: Node) -> str: - slug = get_attribute(node, "a.link-mask", "href") + def __get_slug(self, node: Node) -> str: + slug = self.scraper_helper.get_attribute(node, "a.link-mask", "href") return slug.replace("/", "") if slug else "" - @staticmethod @return_on_error([]) - def __get_langs(node: Node) -> list: - langs = get_text(node, ".mp-desc p:nth-of-type(3)") + def __get_langs(self, node: Node) -> list: + langs = self.scraper_helper.get_text(node, ".mp-desc p:nth-of-type(3)") return langs.split("/") if langs else [] - @staticmethod @return_on_error({}) - def __get_chapters_volumes(node: Node, index: int) -> dict: - data = get_text(node, f".mp-desc p:nth-of-type({index})") + def __get_chapters_volumes(self, node: Node, index: int) -> dict: + data = self.scraper_helper.get_text(node, f".mp-desc p:nth-of-type({index})") if data: total = data.split()[1] lang = data.split()[2].translate(str.maketrans("", "", "[]")) @@ -41,11 +39,11 @@ def __get_chapters_volumes(node: Node, index: int) -> dict: @return_on_error({}) def __build_dict(self, node) -> dict: manga_dict = { - "rank": get_text(node, ".number span"), - "title": get_text(node, ".anime-name"), + "rank": self.scraper_helper.get_text(node, ".number span"), + "title": self.scraper_helper.get_text(node, ".anime-name"), "slug": self.__get_slug(node), - "cover": get_attribute(node, "img.manga-poster-img", "src"), - "rating": get_text(node, ".mp-desc p:nth-of-type(2)"), + "cover": self.scraper_helper.get_attribute(node, "img.manga-poster-img", "src"), + "rating": self.scraper_helper.get_text(node, ".mp-desc p:nth-of-type(2)"), "langs": self.__get_langs(node), "chapters": self.__get_chapters_volumes(node, 4), "volumes": self.__get_chapters_volumes(node, 5), diff --git a/app/api/scrapers/topten.py b/app/api/scrapers/topten.py index 8b4cbd8..213a354 100644 --- a/app/api/scrapers/topten.py +++ b/app/api/scrapers/topten.py @@ -1,7 +1,7 @@ from selectolax.parser import Node from app.api.decorators.return_decorator import return_on_error -from ..utils import get_text, get_attribute +from app.api.helpers.scraper import ScrapeHelper from ..helpers.html_helper import HTMLHelper @@ -10,19 +10,18 @@ def __init__(self) -> None: url = "https://mangareader.to/home" # Facades self.html_helper = HTMLHelper() + self.scraper_helper = ScrapeHelper() # Parser self.parser = self.html_helper.get_parser(url) - @staticmethod @return_on_error("") - def __get_slug(node: Node) -> str: - slug = get_attribute(node, ".desi-head-title a", "href") + def __get_slug(self, node: Node) -> str: + slug = self.scraper_helper.get_attribute(node, ".desi-head-title a", "href") return slug.replace("/", "") if slug else "" - @staticmethod @return_on_error({}) - def __get_chapters(node: Node) -> dict: - chapters_string = get_text(node, ".desi-sub-text") + def __get_chapters(self, node: Node) -> dict: + chapters_string = self.scraper_helper.get_text(node, ".desi-sub-text") if chapters_string: total = chapters_string.split()[1] lang = chapters_string.split()[2].translate(str.maketrans("", "", "[]")) @@ -41,10 +40,10 @@ def __get_genres(node: Node) -> list: @return_on_error({}) def __build_dict(self, node: Node) -> dict: manga_dict = { - "title": get_text(node, ".desi-head-title a"), + "title": self.scraper_helper.get_text(node, ".desi-head-title a"), "slug": self.__get_slug(node), - "cover": get_attribute(node, "img.manga-poster-img", "src"), - "synopsis": get_text(node, ".sc-detail .scd-item"), + "cover": self.scraper_helper.get_attribute(node, "img.manga-poster-img", "src"), + "synopsis": self.scraper_helper.get_text(node, ".sc-detail .scd-item"), "chapters": self.__get_chapters(node), "genres": self.__get_genres(node), } diff --git a/app/api/utils.py b/app/api/utils.py deleted file mode 100644 index 10fc0d2..0000000 --- a/app/api/utils.py +++ /dev/null @@ -1,15 +0,0 @@ -import re -from selectolax.parser import Node - - -# Scraper funcions -def get_text(node: Node, selector: str) -> str | None: - """get text from a node according to css selector""" - element = node.css_first(selector) - return element.text().strip() if element else None - - -def get_attribute(node: Node, selector: str, attribute: str) -> str | None: - """get content from a node according to css selector and attribute""" - element = node.css_first(selector) - return element.attributes[attribute] if element else None