diff --git a/src/clis/douban/marks.ts b/src/clis/douban/marks.ts index b27436ca..665c0b1c 100644 --- a/src/clis/douban/marks.ts +++ b/src/clis/douban/marks.ts @@ -50,7 +50,7 @@ async function fetchMarks( ): Promise { const marks: DoubanMark[] = []; let offset = 0; - const pageSize = 30; + const pageSize = 15; while (true) { const url = `https://movie.douban.com/people/${uid}/${status}?start=${offset}&sort=time&rating=all&filter=all&mode=grid`; diff --git a/src/clis/douban/subject.yaml b/src/clis/douban/subject.yaml index 590b0acd..043602df 100644 --- a/src/clis/douban/subject.yaml +++ b/src/clis/douban/subject.yaml @@ -18,46 +18,75 @@ pipeline: - evaluate: | (async () => { const id = '${{ args.id }}'; - + // Wait for page to load await new Promise(r => setTimeout(r, 2000)); - - // Extract title + + // Extract title - v:itemreviewed contains "中文名 OriginalName" const titleEl = document.querySelector('span[property="v:itemreviewed"]'); - const title = titleEl?.textContent?.trim() || ''; - - // Extract original title - const ogTitleEl = document.querySelector('span[property="v:originalTitle"]'); - const originalTitle = ogTitleEl?.textContent?.trim() || ''; - + const fullTitle = titleEl?.textContent?.trim() || ''; + + // Split title and originalTitle + // Douban format: "中文名 OriginalName" - split by first space that separates CJK from non-CJK + let title = fullTitle; + let originalTitle = ''; + const titleMatch = fullTitle.match(/^([\u4e00-\u9fff\u3000-\u303f\uff00-\uffef]+(?:\s*[\u4e00-\u9fff\u3000-\u303f\uff00-\uffef·::!?]+)*)\s+(.+)$/); + if (titleMatch) { + title = titleMatch[1].trim(); + originalTitle = titleMatch[2].trim(); + } + // Extract year const yearEl = document.querySelector('.year'); - const year = yearEl?.textContent?.trim() || ''; - + const year = yearEl?.textContent?.trim().replace(/[()()]/g, '') || ''; + // Extract rating const ratingEl = document.querySelector('strong[property="v:average"]'); const rating = parseFloat(ratingEl?.textContent || '0'); - + // Extract rating count const ratingCountEl = document.querySelector('span[property="v:votes"]'); const ratingCount = parseInt(ratingCountEl?.textContent || '0', 10); - + // Extract genres const genreEls = document.querySelectorAll('span[property="v:genre"]'); const genres = Array.from(genreEls).map(el => el.textContent?.trim()).filter(Boolean).join(','); - + // Extract directors const directorEls = document.querySelectorAll('a[rel="v:directedBy"]'); const directors = Array.from(directorEls).map(el => el.textContent?.trim()).filter(Boolean).join(','); - + // Extract casts const castEls = document.querySelectorAll('a[rel="v:starring"]'); - const casts = Array.from(castEls).slice(0, 5).map(el => el.textContent?.trim()).filter(Boolean).join(','); - + const casts = Array.from(castEls).slice(0, 5).map(el => el.textContent?.trim()).filter(Boolean); + + // Extract info section for country and duration + const infoEl = document.querySelector('#info'); + const infoText = infoEl?.textContent || ''; + + // Extract country/region from #info as list + let country = []; + const countryMatch = infoText.match(/制片国家\/地区:\s*([^\n]+)/); + if (countryMatch) { + country = countryMatch[1].trim().split(/\s*\/\s*/).filter(Boolean); + } + + // Extract duration from #info as pure number in min + const durationEl = document.querySelector('span[property="v:runtime"]'); + let durationRaw = durationEl?.textContent?.trim() || ''; + if (!durationRaw) { + const durationMatch = infoText.match(/片长:\s*([^\n]+)/); + if (durationMatch) { + durationRaw = durationMatch[1].trim(); + } + } + const durationNumMatch = durationRaw.match(/(\d+)/); + const duration = durationNumMatch ? parseInt(durationNumMatch[1], 10) : null; + // Extract summary const summaryEl = document.querySelector('span[property="v:summary"]'); const summary = summaryEl?.textContent?.trim() || ''; - + return [{ id, title, @@ -68,9 +97,11 @@ pipeline: genres, directors, casts, + country, + duration, summary: summary.substring(0, 200), url: `https://movie.douban.com/subject/${id}` }]; })() -columns: [id, title, originalTitle, year, rating, ratingCount, genres, directors, casts, summary, url] +columns: [id, title, originalTitle, year, rating, ratingCount, genres, directors, casts, country, duration, summary, url]