]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/radiocanada.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  20 class RadioCanadaIE(InfoExtractor
): 
  21     IE_NAME 
= 'radiocanada' 
  22     _VALID_URL 
= r
'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)' 
  25             'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272', 
  29                 'title': 'Le parcours du tireur capté sur vidéo', 
  30                 'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa', 
  31                 'upload_date': '20141023', 
  35                 'skip_download': True, 
  40             'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7754998/', 
  44                 'title': 'letelejournal22h', 
  45                 'description': 'INTEGRALE WEB 22H-TJ', 
  46                 'upload_date': '20170720', 
  50                 'skip_download': True, 
  55     def _real_extract(self
, url
): 
  56         url
, smuggled_data 
= unsmuggle_url(url
, {}) 
  57         app_code
, video_id 
= re
.match(self
._VALID
_URL
, url
).groups() 
  59         metadata 
= self
._download
_xml
( 
  60             'http://api.radio-canada.ca/metaMedia/v1/index.ashx', 
  61             video_id
, note
='Downloading metadata XML', query
={ 
  67             el 
= find_xpath_attr(metadata
, './/Meta', 'name', name
) 
  68             return el
.text 
if el 
is not None else None 
  70         if get_meta('protectionType'): 
  71             raise ExtractorError('This video is DRM protected.', expected
=True) 
  73         device_types 
= ['ipad'] 
  75             device_types
.append('flash') 
  76             device_types
.append('android') 
  80         # TODO: extract f4m formats 
  81         # f4m formats can be extracted using flashhd device_type but they produce unplayable file 
  82         for device_type 
in device_types
: 
  83             validation_url 
= 'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx' 
  87                 'connectionType': 'broadband', 
  88                 'multibitrate': 'true', 
  89                 'deviceType': device_type
, 
  92                 validation_url 
= 'https://services.radio-canada.ca/media/validation/v2/' 
  93                 query
.update(smuggled_data
) 
  96                     # paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction 
  97                     'paysJ391wsHjbOJwvCs26toz': 'CA', 
  98                     'bypasslock': 'NZt5K62gRqfc', 
 100             v_data 
= self
._download
_xml
(validation_url
, video_id
, note
='Downloading %s XML' % device_type
, query
=query
, fatal
=False) 
 101             v_url 
= xpath_text(v_data
, 'url') 
 105                 error 
= xpath_text(v_data
, 'message') 
 107             ext 
= determine_ext(v_url
) 
 109                 formats
.extend(self
._extract
_m
3u8_formats
( 
 110                     v_url
, video_id
, 'mp4', m3u8_id
='hls', fatal
=False)) 
 112                 formats
.extend(self
._extract
_f
4m
_formats
( 
 113                     v_url
, video_id
, f4m_id
='hds', fatal
=False)) 
 115                 ext 
= determine_ext(v_url
) 
 116                 bitrates 
= xpath_element(v_data
, 'bitrates') 
 117                 for url_e 
in bitrates
.findall('url'): 
 118                     tbr 
= int_or_none(url_e
.get('bitrate')) 
 121                     f_url 
= re
.sub(r
'\d+\.%s' % ext
, '%d.%s' % (tbr
, ext
), v_url
) 
 122                     protocol 
= determine_protocol({'url': f_url
}) 
 124                         'format_id': '%s-%d' % (protocol
, tbr
), 
 126                         'ext': 'flv' if protocol 
== 'rtmp' else ext
, 
 127                         'protocol': protocol
, 
 128                         'width': int_or_none(url_e
.get('width')), 
 129                         'height': int_or_none(url_e
.get('height')), 
 132                     mobj 
= re
.match(r
'(?P<url>rtmp://[^/]+/[^/]+)/(?P<playpath>[^?]+)(?P<auth>\?.+)', f_url
) 
 135                             'url': mobj
.group('url') + mobj
.group('auth'), 
 136                             'play_path': mobj
.group('playpath'), 
 139                     if protocol 
== 'rtsp': 
 140                         base_url 
= self
._search
_regex
( 
 141                             r
'rtsp://([^?]+)', f_url
, 'base url', default
=None) 
 143                             base_url 
= 'http://' + base_url
 
 144                             formats
.extend(self
._extract
_m
3u8_formats
( 
 145                                 base_url 
+ '/playlist.m3u8', video_id
, 'mp4', 
 146                                 'm3u8_native', m3u8_id
='hls', fatal
=False)) 
 147                             formats
.extend(self
._extract
_f
4m
_formats
( 
 148                                 base_url 
+ '/manifest.f4m', video_id
, 
 149                                 f4m_id
='hds', fatal
=False)) 
 150         if not formats 
and error
: 
 151             raise ExtractorError( 
 152                 '%s said: %s' % (self
.IE_NAME
, error
), expected
=True) 
 153         self
._sort
_formats
(formats
) 
 156         closed_caption_url 
= get_meta('closedCaption') or get_meta('closedCaptionHTML5') 
 157         if closed_caption_url
: 
 159                 'url': closed_caption_url
, 
 160                 'ext': determine_ext(closed_caption_url
, 'vtt'), 
 165             'title': get_meta('Title') or get_meta('AV-nomEmission'), 
 166             'description': get_meta('Description') or get_meta('ShortDescription'), 
 167             'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'), 
 168             'duration': int_or_none(get_meta('length')), 
 169             'series': get_meta('Emission'), 
 170             'season_number': int_or_none('SrcSaison'), 
 171             'episode_number': int_or_none('SrcEpisode'), 
 172             'upload_date': unified_strdate(get_meta('Date')), 
 173             'subtitles': subtitles
, 
 178 class RadioCanadaAudioVideoIE(InfoExtractor
): 
 179     'radiocanada:audiovideo' 
 180     _VALID_URL 
= r
'https?://ici\.radio-canada\.ca/audio-video/media-(?P<id>[0-9]+)' 
 182         'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam', 
 186             'title': 'Barack Obama au Vietnam', 
 187             'description': 'Les États-Unis lèvent l\'embargo sur la vente d\'armes qui datait de la guerre du Vietnam', 
 188             'upload_date': '20160523', 
 192             'skip_download': True, 
 196     def _real_extract(self
, url
): 
 197         return self
.url_result('radiocanada:medianet:%s' % self
._match
_id
(url
))