]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/dreisat.py
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
  16 class DreiSatIE(InfoExtractor
): 
  18     _GEO_COUNTRIES 
= ['DE'] 
  19     _VALID_URL 
= r
'https?://(?:www\.)?3sat\.de/mediathek/(?:(?:index|mediathek)\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)' 
  22             'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918', 
  23             'md5': 'be37228896d30a88f315b638900a026e', 
  27                 'title': 'Waidmannsheil', 
  28                 'description': 'md5:cce00ca1d70e21425e72c86a98a56817', 
  29                 'uploader': 'SCHWEIZWEIT', 
  30                 'uploader_id': '100000210', 
  31                 'upload_date': '20140913' 
  34                 'skip_download': True,  # m3u8 downloads 
  38             'url': 'http://www.3sat.de/mediathek/mediathek.php?mode=play&obj=51066', 
  39             'only_matching': True, 
  43     def _parse_smil_formats(self
, smil
, smil_url
, video_id
, namespace
=None, f4m_params
=None, transform_rtmp_url
=None): 
  45         for param_group 
in smil
.findall(self
._xpath
_ns
('./head/paramGroup', namespace
)): 
  46             group_id 
= param_group
.get(self
._xpath
_ns
( 
  47                 'id', 'http://www.w3.org/XML/1998/namespace')) 
  49             for param 
in param_group
: 
  50                 params
[param
.get('name')] = param
.get('value') 
  51             param_groups
[group_id
] = params
 
  54         for video 
in smil
.findall(self
._xpath
_ns
('.//video', namespace
)): 
  55             src 
= video
.get('src') 
  58             bitrate 
= int_or_none(self
._search
_regex
(r
'_(\d+)k', src
, 'bitrate', None)) or float_or_none(video
.get('system-bitrate') or video
.get('systemBitrate'), 1000) 
  59             group_id 
= video
.get('paramGroup') 
  60             param_group 
= param_groups
[group_id
] 
  61             for proto 
in param_group
['protocols'].split(','): 
  63                     'url': '%s://%s' % (proto
, param_group
['host']), 
  64                     'app': param_group
['app'], 
  67                     'format_id': '%s-%d' % (proto
, bitrate
), 
  70         self
._sort
_formats
(formats
) 
  73     def extract_from_xml_url(self
, video_id
, xml_url
): 
  74         doc 
= self
._download
_xml
( 
  76             note
='Downloading video info', 
  77             errnote
='Failed to download video info') 
  79         status_code 
= xpath_text(doc
, './status/statuscode') 
  80         if status_code 
and status_code 
!= 'ok': 
  81             if status_code 
== 'notVisibleAnymore': 
  82                 message 
= 'Video %s is not available' % video_id
 
  84                 message 
= '%s returned error: %s' % (self
.IE_NAME
, status_code
) 
  85             raise ExtractorError(message
, expected
=True) 
  87         title 
= xpath_text(doc
, './/information/title', 'title', True) 
  91         for fnode 
in doc
.findall('.//formitaeten/formitaet'): 
  92             video_url 
= xpath_text(fnode
, 'url') 
  93             if not video_url 
or video_url 
in urls
: 
  95             urls
.append(video_url
) 
  97             is_available 
= 'http://www.metafilegenerator' not in video_url
 
  98             geoloced 
= 'static_geoloced_online' in video_url
 
  99             if not is_available 
or geoloced
: 
 102             format_id 
= fnode
.attrib
['basetype'] 
 103             format_m 
= re
.match(r
'''(?x) 
 104                 (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_ 
 105                 (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+) 
 108             ext 
= determine_ext(video_url
, None) or format_m
.group('container') 
 113                 formats
.extend(self
._extract
_smil
_formats
( 
 114                     video_url
, video_id
, fatal
=False)) 
 116                 # the certificates are misconfigured (see 
 117                 # https://github.com/ytdl-org/youtube-dl/issues/8665) 
 118                 if video_url
.startswith('https://'): 
 120                 formats
.extend(self
._extract
_m
3u8_formats
( 
 121                     video_url
, video_id
, 'mp4', 'm3u8_native', 
 122                     m3u8_id
=format_id
, fatal
=False)) 
 124                 formats
.extend(self
._extract
_f
4m
_formats
( 
 125                     video_url
, video_id
, f4m_id
=format_id
, fatal
=False)) 
 127                 quality 
= xpath_text(fnode
, './quality') 
 129                     format_id 
+= '-' + quality
 
 131                 abr 
= int_or_none(xpath_text(fnode
, './audioBitrate'), 1000) 
 132                 vbr 
= int_or_none(xpath_text(fnode
, './videoBitrate'), 1000) 
 134                 tbr 
= int_or_none(self
._search
_regex
( 
 135                     r
'_(\d+)k', video_url
, 'bitrate', None)) 
 136                 if tbr 
and vbr 
and not abr
: 
 140                     'format_id': format_id
, 
 143                     'acodec': format_m
.group('acodec'), 
 144                     'vcodec': format_m
.group('vcodec'), 
 148                     'width': int_or_none(xpath_text(fnode
, './width')), 
 149                     'height': int_or_none(xpath_text(fnode
, './height')), 
 150                     'filesize': int_or_none(xpath_text(fnode
, './filesize')), 
 151                     'protocol': format_m
.group('proto').lower(), 
 154         geolocation 
= xpath_text(doc
, './/details/geolocation') 
 155         if not formats 
and geolocation 
and geolocation 
!= 'none': 
 156             self
.raise_geo_restricted(countries
=self
._GEO
_COUNTRIES
) 
 158         self
._sort
_formats
(formats
) 
 161         for node 
in doc
.findall('.//teaserimages/teaserimage'): 
 162             thumbnail_url 
= node
.text
 
 163             if not thumbnail_url
: 
 166                 'url': thumbnail_url
, 
 168             thumbnail_key 
= node
.get('key') 
 170                 m 
= re
.match('^([0-9]+)x([0-9]+)$', thumbnail_key
) 
 172                     thumbnail
['width'] = int(m
.group(1)) 
 173                     thumbnail
['height'] = int(m
.group(2)) 
 174             thumbnails
.append(thumbnail
) 
 176         upload_date 
= unified_strdate(xpath_text(doc
, './/details/airtime')) 
 181             'description': xpath_text(doc
, './/information/detail'), 
 182             'duration': int_or_none(xpath_text(doc
, './/details/lengthSec')), 
 183             'thumbnails': thumbnails
, 
 184             'uploader': xpath_text(doc
, './/details/originChannelTitle'), 
 185             'uploader_id': xpath_text(doc
, './/details/originChannelId'), 
 186             'upload_date': upload_date
, 
 190     def _real_extract(self
, url
): 
 191         video_id 
= self
._match
_id
(url
) 
 192         details_url 
= 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?id=%s' % video_id
 
 193         return self
.extract_from_xml_url(video_id
, details_url
)