Skip to content

Commit 84213ea

Browse files
committed
[youtube] Extract chapters from JSON (closes ytdl-org#24819)
1 parent 562de77 commit 84213ea

File tree

2 files changed

+62
-3
lines changed

2 files changed

+62
-3
lines changed

test/test_youtube_chapters.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ def test_youtube_chapters(self):
267267
for description, duration, expected_chapters in self._TEST_CASES:
268268
ie = YoutubeIE()
269269
expect_value(
270-
self, ie._extract_chapters(description, duration),
270+
self, ie._extract_chapters_from_description(description, duration),
271271
expected_chapters, None)
272272

273273

youtube_dl/extractor/youtube.py

Lines changed: 61 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1652,8 +1652,63 @@ def extract_id(cls, url):
16521652
video_id = mobj.group(2)
16531653
return video_id
16541654

1655+
def _extract_chapters_from_json(self, webpage, video_id, duration):
1656+
if not webpage:
1657+
return
1658+
player = self._parse_json(
1659+
self._search_regex(
1660+
r'RELATED_PLAYER_ARGS["\']\s*:\s*({.+})\s*,?\s*\n', webpage,
1661+
'player args', default='{}'),
1662+
video_id, fatal=False)
1663+
if not player or not isinstance(player, dict):
1664+
return
1665+
watch_next_response = player.get('watch_next_response')
1666+
if not isinstance(watch_next_response, compat_str):
1667+
return
1668+
response = self._parse_json(watch_next_response, video_id, fatal=False)
1669+
if not response or not isinstance(response, dict):
1670+
return
1671+
chapters_list = try_get(
1672+
response,
1673+
lambda x: x['playerOverlays']
1674+
['playerOverlayRenderer']
1675+
['decoratedPlayerBarRenderer']
1676+
['decoratedPlayerBarRenderer']
1677+
['playerBar']
1678+
['chapteredPlayerBarRenderer']
1679+
['chapters'],
1680+
list)
1681+
if not chapters_list:
1682+
return
1683+
1684+
def chapter_time(chapter):
1685+
return float_or_none(
1686+
try_get(
1687+
chapter,
1688+
lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1689+
int),
1690+
scale=1000)
1691+
chapters = []
1692+
for next_num, chapter in enumerate(chapters_list, start=1):
1693+
start_time = chapter_time(chapter)
1694+
if start_time is None:
1695+
continue
1696+
end_time = (chapter_time(chapters_list[next_num])
1697+
if next_num < len(chapters_list) else duration)
1698+
if end_time is None:
1699+
continue
1700+
title = try_get(
1701+
chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1702+
compat_str)
1703+
chapters.append({
1704+
'start_time': start_time,
1705+
'end_time': end_time,
1706+
'title': title,
1707+
})
1708+
return chapters
1709+
16551710
@staticmethod
1656-
def _extract_chapters(description, duration):
1711+
def _extract_chapters_from_description(description, duration):
16571712
if not description:
16581713
return None
16591714
chapter_lines = re.findall(
@@ -1687,6 +1742,10 @@ def _extract_chapters(description, duration):
16871742
})
16881743
return chapters
16891744

1745+
def _extract_chapters(self, webpage, description, video_id, duration):
1746+
return (self._extract_chapters_from_json(webpage, video_id, duration)
1747+
or self._extract_chapters_from_description(description, duration))
1748+
16901749
def _real_extract(self, url):
16911750
url, smuggled_data = unsmuggle_url(url, {})
16921751

@@ -2324,7 +2383,7 @@ def _extract_count(count_name):
23242383
errnote='Unable to download video annotations', fatal=False,
23252384
data=urlencode_postdata({xsrf_field_name: xsrf_token}))
23262385

2327-
chapters = self._extract_chapters(description_original, video_duration)
2386+
chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
23282387

23292388
# Look for the DASH manifest
23302389
if self._downloader.params.get('youtube_include_dash_manifest', True):

0 commit comments

Comments
 (0)