From a73031c9cbae719a027ba3a488232facc34f69f0 Mon Sep 17 00:00:00 2001 From: fracti0n Date: Tue, 3 Jan 2023 21:44:03 +0000 Subject: [PATCH 1/4] scrape series and episode number for series --- src/helpers/movie.helper.ts | 9 +++++++++ src/interfaces/movie.interface.ts | 1 + src/services/movie.service.ts | 2 ++ 3 files changed, 12 insertions(+) diff --git a/src/helpers/movie.helper.ts b/src/helpers/movie.helper.ts index b5e72f9..9f970a0 100644 --- a/src/helpers/movie.helper.ts +++ b/src/helpers/movie.helper.ts @@ -182,6 +182,15 @@ export const getType = (el: HTMLElement): string => { return type?.innerText?.replace(/[{()}]/g, '') || 'film'; }; +export const getEpisodeNum = (el: HTMLElement): string => { + const titleArray = el.querySelector('h1').innerText.split(`(`); + if (titleArray.length > 1){ + return (titleArray[titleArray.length-1].replace(`)`, ``).trim()) + } else { + return (null); + } +}; + export const getVods = (el: HTMLElement | null): CSFDVod[] => { let vods: CSFDVod[] = []; if (el) { diff --git a/src/interfaces/movie.interface.ts b/src/interfaces/movie.interface.ts index c09f7d1..21da742 100644 --- a/src/interfaces/movie.interface.ts +++ b/src/interfaces/movie.interface.ts @@ -17,6 +17,7 @@ export interface CSFDMovie extends CSFDScreening { premieres: CSFDPremiere[]; related: CSFDMovieListItem[]; similar: CSFDMovieListItem[]; + episodeNum: string; } export type CSFDVodService = diff --git a/src/services/movie.service.ts b/src/services/movie.service.ts index a60eaf4..1e8f9b2 100644 --- a/src/services/movie.service.ts +++ b/src/services/movie.service.ts @@ -18,6 +18,7 @@ import { getTitlesOther, getTrivia, getType, + getEpisodeNum, getVods, getYear } from '../helpers/movie.helper'; @@ -57,6 +58,7 @@ export class MovieScraper { descriptions: getDescriptions(el), genres: getGenres(el), type: getType(el) as CSFDFilmTypes, + episodeNum: getEpisodeNum(el), url: movieUrl(movieId), origins: getOrigins(el), colorRating: getColorRating(pageClasses), From acbb3b908a8e06bb17b217b0e12aea7da40831e0 Mon Sep 17 00:00:00 2001 From: fracti0n Date: Thu, 5 Jan 2023 21:47:45 +0000 Subject: [PATCH 2/4] Add parentId for series --- src/fetchers/index.ts | 1 + src/helpers/movie.helper.ts | 21 +++++++++++++++++++++ src/interfaces/movie.interface.ts | 1 + src/services/movie.service.ts | 2 ++ 4 files changed, 25 insertions(+) diff --git a/src/fetchers/index.ts b/src/fetchers/index.ts index 3a5bdbf..9fec695 100644 --- a/src/fetchers/index.ts +++ b/src/fetchers/index.ts @@ -17,6 +17,7 @@ export const fetchPage = async (url: string): Promise => { if (response.status >= 400 && response.status < 600) { throw new Error(`node-csfd-api: Bad response ${response.status} for url: ${url}`); } + console.log(response.text) return await response.text(); } catch (e) { console.error(e); diff --git a/src/helpers/movie.helper.ts b/src/helpers/movie.helper.ts index 9f970a0..00c7790 100644 --- a/src/helpers/movie.helper.ts +++ b/src/helpers/movie.helper.ts @@ -22,6 +22,27 @@ export const getTitle = (el: HTMLElement): string => { return el.querySelector('h1').innerText.split(`(`)[0].trim(); }; +export const getParent = (el: HTMLElement): string => { + let parentId = null; + parentId = getId(el.querySelector('h2').childNodes) || getId(el.querySelector('h1').childNodes); + return (parentId); + + function getId(nodes){ + let parentId = null; + let i = nodes.length; //we get all objects + while (i > 0){ + i--; + let node = nodes[i]; + if (node?._rawAttrs?.href){ + let arr = node._rawAttrs.href.split("/").filter(n => n); + parentId = arr[arr.length-1].split("-")[0]; + break; + } + } + return(parentId); + }; +}; + export const getGenres = (el: HTMLElement): CSFDGenres[] => { const genresRaw = el.querySelector('.genres').textContent; return genresRaw.split(' / ') as CSFDGenres[]; diff --git a/src/interfaces/movie.interface.ts b/src/interfaces/movie.interface.ts index 21da742..a8e3226 100644 --- a/src/interfaces/movie.interface.ts +++ b/src/interfaces/movie.interface.ts @@ -18,6 +18,7 @@ export interface CSFDMovie extends CSFDScreening { related: CSFDMovieListItem[]; similar: CSFDMovieListItem[]; episodeNum: string; + parentId: string; } export type CSFDVodService = diff --git a/src/services/movie.service.ts b/src/services/movie.service.ts index 1e8f9b2..51c96ad 100644 --- a/src/services/movie.service.ts +++ b/src/services/movie.service.ts @@ -15,6 +15,7 @@ import { getRatingCount, getTags, getTitle, + getParent, getTitlesOther, getTrivia, getType, @@ -53,6 +54,7 @@ export class MovieScraper { this.film = { id: movieId, title: getTitle(el), + parentId: getParent(el), year: getYear(jsonLd), duration: getDuration(jsonLd, el), descriptions: getDescriptions(el), From ecfce40b3fea4ce0f38644226f3ec4b79e14f807 Mon Sep 17 00:00:00 2001 From: fracti0n Date: Thu, 5 Jan 2023 21:53:00 +0000 Subject: [PATCH 3/4] removing leftover --- src/fetchers/index.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/fetchers/index.ts b/src/fetchers/index.ts index 9fec695..3a5bdbf 100644 --- a/src/fetchers/index.ts +++ b/src/fetchers/index.ts @@ -17,7 +17,6 @@ export const fetchPage = async (url: string): Promise => { if (response.status >= 400 && response.status < 600) { throw new Error(`node-csfd-api: Bad response ${response.status} for url: ${url}`); } - console.log(response.text) return await response.text(); } catch (e) { console.error(e); From 9d28bfeb1ef474940f6173bc794fcf490c2b929d Mon Sep 17 00:00:00 2001 From: fracti0n Date: Thu, 5 Jan 2023 22:12:24 +0000 Subject: [PATCH 4/4] making it TS compliant --- src/helpers/movie.helper.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/helpers/movie.helper.ts b/src/helpers/movie.helper.ts index 00c7790..909f4e8 100644 --- a/src/helpers/movie.helper.ts +++ b/src/helpers/movie.helper.ts @@ -27,14 +27,16 @@ export const getParent = (el: HTMLElement): string => { parentId = getId(el.querySelector('h2').childNodes) || getId(el.querySelector('h1').childNodes); return (parentId); - function getId(nodes){ + function getId(nodes: any){ let parentId = null; let i = nodes.length; //we get all objects while (i > 0){ i--; let node = nodes[i]; if (node?._rawAttrs?.href){ - let arr = node._rawAttrs.href.split("/").filter(n => n); + let arr = node._rawAttrs.href.split("/").filter(function (el: any) { + return el != ""; + }); parentId = arr[arr.length-1].split("-")[0]; break; }