]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/dreisat.py
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
  17 class DreiSatIE(InfoExtractor
): 
  19     _VALID_URL 
= r
'(?:https?://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php|mediathek\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$' 
  22             'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918', 
  23             'md5': 'be37228896d30a88f315b638900a026e', 
  27                 'title': 'Waidmannsheil', 
  28                 'description': 'md5:cce00ca1d70e21425e72c86a98a56817', 
  29                 'uploader': 'SCHWEIZWEIT', 
  30                 'uploader_id': '100000210', 
  31                 'upload_date': '20140913' 
  34                 'skip_download': True,  # m3u8 downloads 
  38             'url': 'http://www.3sat.de/mediathek/mediathek.php?mode=play&obj=51066', 
  39             'only_matching': True, 
  43     def _parse_smil_formats(self
, smil
, smil_url
, video_id
, namespace
=None, f4m_params
=None, transform_rtmp_url
=None): 
  45         for param_group 
in smil
.findall(self
._xpath
_ns
('./head/paramGroup', namespace
)): 
  46             group_id 
= param_group
.attrib
.get(self
._xpath
_ns
('id', 'http://www.w3.org/XML/1998/namespace')) 
  48             for param 
in param_group
: 
  49                 params
[param
.get('name')] = param
.get('value') 
  50             param_groups
[group_id
] = params
 
  53         for video 
in smil
.findall(self
._xpath
_ns
('.//video', namespace
)): 
  54             src 
= video
.get('src') 
  57             bitrate 
= float_or_none(video
.get('system-bitrate') or video
.get('systemBitrate'), 1000) 
  58             group_id 
= video
.get('paramGroup') 
  59             param_group 
= param_groups
[group_id
] 
  60             for proto 
in param_group
['protocols'].split(','): 
  62                     'url': '%s://%s' % (proto
, param_group
['host']), 
  63                     'app': param_group
['app'], 
  66                     'format_id': '%s-%d' % (proto
, bitrate
), 
  69         self
._sort
_formats
(formats
) 
  72     def extract_from_xml_url(self
, video_id
, xml_url
): 
  73         doc 
= self
._download
_xml
( 
  75             note
='Downloading video info', 
  76             errnote
='Failed to download video info') 
  78         status_code 
= doc
.find('./status/statuscode') 
  79         if status_code 
is not None and status_code
.text 
!= 'ok': 
  80             code 
= status_code
.text
 
  81             if code 
== 'notVisibleAnymore': 
  82                 message 
= 'Video %s is not available' % video_id
 
  84                 message 
= '%s returned error: %s' % (self
.IE_NAME
, code
) 
  85             raise ExtractorError(message
, expected
=True) 
  87         title 
= doc
.find('.//information/title').text
 
  88         description 
= xpath_text(doc
, './/information/detail', 'description') 
  89         duration 
= int_or_none(xpath_text(doc
, './/details/lengthSec', 'duration')) 
  90         uploader 
= xpath_text(doc
, './/details/originChannelTitle', 'uploader') 
  91         uploader_id 
= xpath_text(doc
, './/details/originChannelId', 'uploader id') 
  92         upload_date 
= unified_strdate(xpath_text(doc
, './/details/airtime', 'upload date')) 
  94         def xml_to_thumbnails(fnode
): 
  97                 thumbnail_url 
= node
.text
 
 101                     'url': thumbnail_url
, 
 103                 if 'key' in node
.attrib
: 
 104                     m 
= re
.match('^([0-9]+)x([0-9]+)$', node
.attrib
['key']) 
 106                         thumbnail
['width'] = int(m
.group(1)) 
 107                         thumbnail
['height'] = int(m
.group(2)) 
 108                 thumbnails
.append(thumbnail
) 
 111         thumbnails 
= xml_to_thumbnails(doc
.findall('.//teaserimages/teaserimage')) 
 113         format_nodes 
= doc
.findall('.//formitaeten/formitaet') 
 114         quality 
= qualities(['veryhigh', 'high', 'med', 'low']) 
 116         def get_quality(elem
): 
 117             return quality(xpath_text(elem
, 'quality')) 
 118         format_nodes
.sort(key
=get_quality
) 
 121         for fnode 
in format_nodes
: 
 122             video_url 
= fnode
.find('url').text
 
 123             is_available 
= 'http://www.metafilegenerator' not in video_url
 
 126             format_id 
= fnode
.attrib
['basetype'] 
 127             quality 
= xpath_text(fnode
, './quality', 'quality') 
 128             format_m 
= re
.match(r
'''(?x) 
 129                 (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_ 
 130                 (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+) 
 133             ext 
= determine_ext(video_url
, None) or format_m
.group('container') 
 134             if ext 
not in ('smil', 'f4m', 'm3u8'): 
 135                 format_id 
= format_id 
+ '-' + quality
 
 136             if format_id 
in format_ids
: 
 142                 formats
.extend(self
._extract
_smil
_formats
( 
 143                     video_url
, video_id
, fatal
=False)) 
 145                 # the certificates are misconfigured (see 
 146                 # https://github.com/rg3/youtube-dl/issues/8665) 
 147                 if video_url
.startswith('https://'): 
 149                 formats
.extend(self
._extract
_m
3u8_formats
( 
 150                     video_url
, video_id
, 'mp4', m3u8_id
=format_id
, fatal
=False)) 
 152                 formats
.extend(self
._extract
_f
4m
_formats
( 
 153                     video_url
, video_id
, f4m_id
=format_id
, fatal
=False)) 
 155                 proto 
= format_m
.group('proto').lower() 
 157                 abr 
= int_or_none(xpath_text(fnode
, './audioBitrate', 'abr'), 1000) 
 158                 vbr 
= int_or_none(xpath_text(fnode
, './videoBitrate', 'vbr'), 1000) 
 160                 width 
= int_or_none(xpath_text(fnode
, './width', 'width')) 
 161                 height 
= int_or_none(xpath_text(fnode
, './height', 'height')) 
 163                 filesize 
= int_or_none(xpath_text(fnode
, './filesize', 'filesize')) 
 170                     'format_id': format_id
, 
 173                     'acodec': format_m
.group('acodec'), 
 174                     'vcodec': format_m
.group('vcodec'), 
 179                     'filesize': filesize
, 
 180                     'format_note': format_note
, 
 182                     '_available': is_available
, 
 184             format_ids
.append(format_id
) 
 186         self
._sort
_formats
(formats
) 
 191             'description': description
, 
 192             'duration': duration
, 
 193             'thumbnails': thumbnails
, 
 194             'uploader': uploader
, 
 195             'uploader_id': uploader_id
, 
 196             'upload_date': upload_date
, 
 200     def _real_extract(self
, url
): 
 201         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 202         video_id 
= mobj
.group('id') 
 203         details_url 
= 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
 
 204         return self
.extract_from_xml_url(video_id
, details_url
)