Skip to content

Commit

Permalink
Refactor: facade pattern for scrape helper
Browse files Browse the repository at this point in the history
  • Loading branch information
moonlitgrace committed Oct 9, 2023
1 parent 9d56f7e commit 6cc7c91
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 47 deletions.
13 changes: 13 additions & 0 deletions app/api/helpers/scraper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from selectolax.parser import Node


class ScrapeHelper:
@staticmethod
def get_text(node: Node, selector: str) -> str | None:
element = node.css_first(selector)
return element.text().strip() if element else None

@staticmethod
def get_attribute(node: Node, selector: str, attribute: str) -> str | None:
element = node.css_first(selector)
return element.attributes[attribute] if element else None
21 changes: 13 additions & 8 deletions app/api/scrapers/most_viewed.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from selectolax.parser import Node

from app.api.decorators.return_decorator import return_on_error
from ..utils import get_text, get_attribute
from app.api.helpers.scraper import ScrapeHelper
from ..helpers.html_helper import HTMLHelper


Expand All @@ -16,32 +16,37 @@ def __init__(self, chart: str) -> None:
self.chart = chart
# Facades
self.html_helper = HTMLHelper()
self.scraper_helper = ScrapeHelper()
# Parser
self.parser = self.html_helper.get_parser(url)

@return_on_error("")
def __get_slug(self, node: Node) -> str:
slug = get_attribute(node, ".manga-detail .manga-name a", "href")
slug = self.scraper_helper.get_attribute(
node, ".manga-detail .manga-name a", "href"
)
return slug.replace("/", "") if slug else ""

@return_on_error("")
def __get_cover(self, node: Node) -> str:
cover = get_attribute(node, "img.manga-poster-img", "src")
cover = self.scraper_helper.get_attribute(node, "img.manga-poster-img", "src")
return cover.replace("200x300", "500x800") if cover else ""

@return_on_error("")
def __get_views(self, node: Node) -> str:
views_string = get_text(node, ".fd-infor .fdi-view")
views_string = self.scraper_helper.get_text(node, ".fd-infor .fdi-view")
return views_string.split()[0].replace(",", "") if views_string else ""

@return_on_error([])
def __get_langs(self, node: Node) -> list:
langs_string = get_text(node, ".fd-infor > span:nth-child(1)")
langs_string = self.scraper_helper.get_text(node, ".fd-infor > span:nth-child(1)")
return [lang for lang in langs_string.split("/")] if langs_string else []

@return_on_error("")
def __get_chapters_volumes(self, node: Node, index: int) -> str:
data_string = get_text(node, f".d-block span:nth-child({index})")
data_string = self.scraper_helper.get_text(
node, f".d-block span:nth-child({index})"
)
return data_string.split()[1] if data_string else ""

@return_on_error([])
Expand All @@ -52,8 +57,8 @@ def __get_genres(self, node: Node) -> list:
@return_on_error({})
def __build_dict(self, node: Node) -> dict:
manga_dict = {
"rank": get_text(node, ".ranking-number span"),
"title": get_text(node, ".manga-detail .manga-name a"),
"rank": self.scraper_helper.get_text(node, ".ranking-number span"),
"title": self.scraper_helper.get_text(node, ".manga-detail .manga-name a"),
"slug": self.__get_slug(node),
"cover": self.__get_cover(node),
"views": self.__get_views(node),
Expand Down
26 changes: 12 additions & 14 deletions app/api/scrapers/popular.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from selectolax.parser import Node

from app.api.decorators.return_decorator import return_on_error
from ..utils import get_attribute, get_text
from app.api.helpers.scraper import ScrapeHelper
from ..helpers.html_helper import HTMLHelper


Expand All @@ -10,25 +10,23 @@ def __init__(self) -> None:
url = "https://mangareader.to/home"
# Facades
self.html_helper = HTMLHelper()
self.scraper_helper = ScrapeHelper()
# Parser
self.parser = self.html_helper.get_parser(url)

@staticmethod
@return_on_error("")
def __get_slug(node: Node) -> str:
slug = get_attribute(node, "a.link-mask", "href")
def __get_slug(self, node: Node) -> str:
slug = self.scraper_helper.get_attribute(node, "a.link-mask", "href")
return slug.replace("/", "") if slug else ""

@staticmethod
@return_on_error([])
def __get_langs(node: Node) -> list:
langs = get_text(node, ".mp-desc p:nth-of-type(3)")
def __get_langs(self, node: Node) -> list:
langs = self.scraper_helper.get_text(node, ".mp-desc p:nth-of-type(3)")
return langs.split("/") if langs else []

@staticmethod
@return_on_error({})
def __get_chapters_volumes(node: Node, index: int) -> dict:
data = get_text(node, f".mp-desc p:nth-of-type({index})")
def __get_chapters_volumes(self, node: Node, index: int) -> dict:
data = self.scraper_helper.get_text(node, f".mp-desc p:nth-of-type({index})")
if data:
total = data.split()[1]
lang = data.split()[2].translate(str.maketrans("", "", "[]"))
Expand All @@ -41,11 +39,11 @@ def __get_chapters_volumes(node: Node, index: int) -> dict:
@return_on_error({})
def __build_dict(self, node) -> dict:
manga_dict = {
"rank": get_text(node, ".number span"),
"title": get_text(node, ".anime-name"),
"rank": self.scraper_helper.get_text(node, ".number span"),
"title": self.scraper_helper.get_text(node, ".anime-name"),
"slug": self.__get_slug(node),
"cover": get_attribute(node, "img.manga-poster-img", "src"),
"rating": get_text(node, ".mp-desc p:nth-of-type(2)"),
"cover": self.scraper_helper.get_attribute(node, "img.manga-poster-img", "src"),
"rating": self.scraper_helper.get_text(node, ".mp-desc p:nth-of-type(2)"),
"langs": self.__get_langs(node),
"chapters": self.__get_chapters_volumes(node, 4),
"volumes": self.__get_chapters_volumes(node, 5),
Expand Down
19 changes: 9 additions & 10 deletions app/api/scrapers/topten.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from selectolax.parser import Node

from app.api.decorators.return_decorator import return_on_error
from ..utils import get_text, get_attribute
from app.api.helpers.scraper import ScrapeHelper
from ..helpers.html_helper import HTMLHelper


Expand All @@ -10,19 +10,18 @@ def __init__(self) -> None:
url = "https://mangareader.to/home"
# Facades
self.html_helper = HTMLHelper()
self.scraper_helper = ScrapeHelper()
# Parser
self.parser = self.html_helper.get_parser(url)

@staticmethod
@return_on_error("")
def __get_slug(node: Node) -> str:
slug = get_attribute(node, ".desi-head-title a", "href")
def __get_slug(self, node: Node) -> str:
slug = self.scraper_helper.get_attribute(node, ".desi-head-title a", "href")
return slug.replace("/", "") if slug else ""

@staticmethod
@return_on_error({})
def __get_chapters(node: Node) -> dict:
chapters_string = get_text(node, ".desi-sub-text")
def __get_chapters(self, node: Node) -> dict:
chapters_string = self.scraper_helper.get_text(node, ".desi-sub-text")
if chapters_string:
total = chapters_string.split()[1]
lang = chapters_string.split()[2].translate(str.maketrans("", "", "[]"))
Expand All @@ -41,10 +40,10 @@ def __get_genres(node: Node) -> list:
@return_on_error({})
def __build_dict(self, node: Node) -> dict:
manga_dict = {
"title": get_text(node, ".desi-head-title a"),
"title": self.scraper_helper.get_text(node, ".desi-head-title a"),
"slug": self.__get_slug(node),
"cover": get_attribute(node, "img.manga-poster-img", "src"),
"synopsis": get_text(node, ".sc-detail .scd-item"),
"cover": self.scraper_helper.get_attribute(node, "img.manga-poster-img", "src"),
"synopsis": self.scraper_helper.get_text(node, ".sc-detail .scd-item"),
"chapters": self.__get_chapters(node),
"genres": self.__get_genres(node),
}
Expand Down
15 changes: 0 additions & 15 deletions app/api/utils.py

This file was deleted.

0 comments on commit 6cc7c91

Please sign in to comment.