]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/nobelprize.py
New upstream version 2020.09.14
[youtubedl] / youtube_dl / extractor / nobelprize.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import (
6 js_to_json,
7 mimetype2ext,
8 determine_ext,
9 update_url_query,
10 get_element_by_attribute,
11 int_or_none,
12 )
13
14
15 class NobelPrizeIE(InfoExtractor):
16 _VALID_URL = r'https?://(?:www\.)?nobelprize\.org/mediaplayer.*?\bid=(?P<id>\d+)'
17 _TEST = {
18 'url': 'http://www.nobelprize.org/mediaplayer/?id=2636',
19 'md5': '04c81e5714bb36cc4e2232fee1d8157f',
20 'info_dict': {
21 'id': '2636',
22 'ext': 'mp4',
23 'title': 'Announcement of the 2016 Nobel Prize in Physics',
24 'description': 'md5:05beba57f4f5a4bbd4cf2ef28fcff739',
25 }
26 }
27
28 def _real_extract(self, url):
29 video_id = self._match_id(url)
30 webpage = self._download_webpage(url, video_id)
31 media = self._parse_json(self._search_regex(
32 r'(?s)var\s*config\s*=\s*({.+?});', webpage,
33 'config'), video_id, js_to_json)['media']
34 title = media['title']
35
36 formats = []
37 for source in media.get('source', []):
38 source_src = source.get('src')
39 if not source_src:
40 continue
41 ext = mimetype2ext(source.get('type')) or determine_ext(source_src)
42 if ext == 'm3u8':
43 formats.extend(self._extract_m3u8_formats(
44 source_src, video_id, 'mp4', 'm3u8_native',
45 m3u8_id='hls', fatal=False))
46 elif ext == 'f4m':
47 formats.extend(self._extract_f4m_formats(
48 update_url_query(source_src, {'hdcore': '3.7.0'}),
49 video_id, f4m_id='hds', fatal=False))
50 else:
51 formats.append({
52 'url': source_src,
53 })
54 self._sort_formats(formats)
55
56 return {
57 'id': video_id,
58 'title': title,
59 'description': get_element_by_attribute('itemprop', 'description', webpage),
60 'duration': int_or_none(media.get('duration')),
61 'formats': formats,
62 }