]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/ccc.py
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
  13 class CCCIE(InfoExtractor
): 
  14     IE_NAME 
= 'media.ccc.de' 
  15     _VALID_URL 
= r
'https?://(?:www\.)?media\.ccc\.de/[^?#]+/[^?#/]*?_(?P<id>[0-9]{8,})._[^?#/]*\.html' 
  18         'url': 'http://media.ccc.de/browse/congress/2013/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor.html#video', 
  19         'md5': '3a1eda8f3a29515d27f5adb967d7e740', 
  23             'title': 'Introduction to Processor Design', 
  24             'description': 'md5:5ddbf8c734800267f2cee4eab187bc1b', 
  25             'thumbnail': 're:^https?://.*\.jpg$', 
  27             'upload_date': '20131229', 
  31     def _real_extract(self
, url
): 
  32         video_id 
= self
._match
_id
(url
) 
  33         webpage 
= self
._download
_webpage
(url
, video_id
) 
  35         if self
._downloader
.params
.get('prefer_free_formats'): 
  36             preference 
= qualities(['mp3', 'opus', 'mp4-lq', 'webm-lq', 'h264-sd', 'mp4-sd', 'webm-sd', 'mp4', 'webm', 'mp4-hd', 'h264-hd', 'webm-hd']) 
  38             preference 
= qualities(['opus', 'mp3', 'webm-lq', 'mp4-lq', 'webm-sd', 'h264-sd', 'mp4-sd', 'webm', 'mp4', 'webm-hd', 'mp4-hd', 'h264-hd']) 
  40         title 
= self
._html
_search
_regex
( 
  41             r
'(?s)<h1>(.*?)</h1>', webpage
, 'title') 
  42         description 
= self
._html
_search
_regex
( 
  43             r
"(?s)<p class='description'>(.*?)</p>", 
  44             webpage
, 'description', fatal
=False) 
  45         upload_date 
= unified_strdate(self
._html
_search
_regex
( 
  46             r
"(?s)<span class='[^']*fa-calendar-o'></span>(.*?)</li>", 
  47             webpage
, 'upload date', fatal
=False)) 
  48         view_count 
= int_or_none(self
._html
_search
_regex
( 
  49             r
"(?s)<span class='[^']*fa-eye'></span>(.*?)</li>", 
  50             webpage
, 'view count', fatal
=False)) 
  52         matches 
= re
.finditer(r
'''(?xs) 
  53             <(?:span|div)\s+class='label\s+filetype'>(?P<format>.*?)</(?:span|div)>\s* 
  54             <a\s+download\s+href='(?P<http_url>[^']+)'>\s* 
  57                 <a\s+href='(?P<torrent_url>[^']+\.torrent)' 
  61             format 
= m
.group('format') 
  62             format_id 
= self
._search
_regex
( 
  63                 r
'.*/([a-z0-9_-]+)/[^/]*$', 
  64                 m
.group('http_url'), 'format id', default
=None) 
  65             vcodec 
= 'h264' if 'h264' in format_id 
else ( 
  66                 'none' if format_id 
in ('mp3', 'opus') else None 
  69                 'format_id': format_id
, 
  71                 'url': m
.group('http_url'), 
  73                 'preference': preference(format_id
), 
  76             if m
.group('torrent_url'): 
  78                     'format_id': 'torrent-%s' % (format 
if format_id 
is None else format_id
), 
  79                     'format': '%s (torrent)' % format
, 
  81                     'format_note': '(unsupported; will just download the .torrent file)', 
  83                     'preference': -100 + preference(format_id
), 
  84                     'url': m
.group('torrent_url'), 
  86         self
._sort
_formats
(formats
) 
  88         thumbnail 
= self
._html
_search
_regex
( 
  89             r
"<video.*?poster='([^']+)'", webpage
, 'thumbnail', fatal
=False) 
  94             'description': description
, 
  95             'thumbnail': thumbnail
, 
  96             'view_count': view_count
, 
  97             'upload_date': upload_date
,