]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/ccc.py
1 from __future__
import unicode_literals
5 from .common
import InfoExtractor
14 class CCCIE(InfoExtractor
):
15 IE_NAME
= 'media.ccc.de'
16 _VALID_URL
= r
'https?://(?:www\.)?media\.ccc\.de/v/(?P<id>[^/?#&]+)'
19 'url': 'https://media.ccc.de/v/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor#video',
20 'md5': '3a1eda8f3a29515d27f5adb967d7e740',
22 'id': '30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor',
24 'title': 'Introduction to Processor Design',
25 'description': 'md5:80be298773966f66d56cb11260b879af',
26 'thumbnail': 're:^https?://.*\.jpg$',
28 'upload_date': '20131228',
32 'url': 'https://media.ccc.de/v/32c3-7368-shopshifting#download',
33 'only_matching': True,
36 def _real_extract(self
, url
):
37 video_id
= self
._match
_id
(url
)
38 webpage
= self
._download
_webpage
(url
, video_id
)
40 if self
._downloader
.params
.get('prefer_free_formats'):
41 preference
= qualities(['mp3', 'opus', 'mp4-lq', 'webm-lq', 'h264-sd', 'mp4-sd', 'webm-sd', 'mp4', 'webm', 'mp4-hd', 'h264-hd', 'webm-hd'])
43 preference
= qualities(['opus', 'mp3', 'webm-lq', 'mp4-lq', 'webm-sd', 'h264-sd', 'mp4-sd', 'webm', 'mp4', 'webm-hd', 'mp4-hd', 'h264-hd'])
45 title
= self
._html
_search
_regex
(
46 r
'(?s)<h1>(.*?)</h1>', webpage
, 'title')
47 description
= self
._html
_search
_regex
(
48 r
'(?s)<h3>About</h3>(.+?)<h3>',
49 webpage
, 'description', fatal
=False)
50 upload_date
= unified_strdate(self
._html
_search
_regex
(
51 r
"(?s)<span[^>]+class='[^']*fa-calendar-o'[^>]*>(.+?)</span>",
52 webpage
, 'upload date', fatal
=False))
53 view_count
= int_or_none(self
._html
_search
_regex
(
54 r
"(?s)<span class='[^']*fa-eye'></span>(.*?)</li>",
55 webpage
, 'view count', fatal
=False))
56 duration
= parse_duration(self
._html
_search
_regex
(
57 r
'(?s)<span[^>]+class=(["\']).*?fa
-clock
-o
.*?\
1[^
>]*></span
>(?P
<duration
>.+?
)</li
',
58 webpage, 'duration
', fatal=False, group='duration
'))
60 matches = re.finditer(r'''(?xs)
61 <(?:span|div)\s+class='label\s
+filetype
'>(?P<format>[^<]*)</(?:span|div)>\s*
62 <(?:span|div)\s+class='label\s
+filetype
'>(?P<lang>[^<]*)</(?:span|div)>\s*
63 <a\s+download\s+href='(?P
<http_url
>[^
']+)'>\s
*
66 <a\s
+(?
:download\s
+)?href
='(?P<torrent_url>[^']+\
.torrent
)'
70 format = m.group('format
')
71 format_id = self._search_regex(
72 r'.*/([a
-z0
-9_-]+)/[^
/]*$
',
73 m.group('http_url
'), 'format
id', default=None)
75 format_id = m.group('lang
') + '-' + format_id
76 vcodec = 'h264
' if 'h264
' in format_id else (
77 'none
' if format_id in ('mp3
', 'opus
') else None
80 'format_id
': format_id,
82 'language
': m.group('lang
'),
83 'url
': m.group('http_url
'),
85 'preference
': preference(format_id),
88 if m.group('torrent_url
'):
90 'format_id
': 'torrent
-%s' % (format if format_id is None else format_id),
91 'format
': '%s (torrent
)' % format,
93 'format_note
': '(unsupported
; will just download the
.torrent
file)',
95 'preference
': -100 + preference(format_id),
96 'url
': m.group('torrent_url
'),
98 self._sort_formats(formats)
100 thumbnail = self._html_search_regex(
101 r"<video.*?poster='([^
']+)'", webpage, 'thumbnail', fatal=False)
106 'description': description,
107 'thumbnail': thumbnail,
108 'view_count': view_count,
109 'upload_date': upload_date,
110 'duration': duration,