]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/radiocanada.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  20 class RadioCanadaIE(InfoExtractor
): 
  21     IE_NAME 
= 'radiocanada' 
  22     _VALID_URL 
= r
'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)' 
  24         'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272', 
  28             'title': 'Le parcours du tireur capté sur vidéo', 
  29             'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa', 
  30             'upload_date': '20141023', 
  34             'skip_download': True, 
  38     def _real_extract(self
, url
): 
  39         url
, smuggled_data 
= unsmuggle_url(url
, {}) 
  40         app_code
, video_id 
= re
.match(self
._VALID
_URL
, url
).groups() 
  42         metadata 
= self
._download
_xml
( 
  43             'http://api.radio-canada.ca/metaMedia/v1/index.ashx', 
  44             video_id
, note
='Downloading metadata XML', query
={ 
  50             el 
= find_xpath_attr(metadata
, './/Meta', 'name', name
) 
  51             return el
.text 
if el 
is not None else None 
  53         if get_meta('protectionType'): 
  54             raise ExtractorError('This video is DRM protected.', expected
=True) 
  56         device_types 
= ['ipad'] 
  58             device_types
.append('flash') 
  59             device_types
.append('android') 
  62         # TODO: extract f4m formats 
  63         # f4m formats can be extracted using flashhd device_type but they produce unplayable file 
  64         for device_type 
in device_types
: 
  65             validation_url 
= 'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx' 
  69                 'connectionType': 'broadband', 
  70                 'multibitrate': 'true', 
  71                 'deviceType': device_type
, 
  74                 validation_url 
= 'https://services.radio-canada.ca/media/validation/v2/' 
  75                 query
.update(smuggled_data
) 
  78                     # paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction 
  79                     'paysJ391wsHjbOJwvCs26toz': 'CA', 
  80                     'bypasslock': 'NZt5K62gRqfc', 
  82             v_data 
= self
._download
_xml
(validation_url
, video_id
, note
='Downloading %s XML' % device_type
, query
=query
, fatal
=False) 
  83             v_url 
= xpath_text(v_data
, 'url') 
  87                 raise ExtractorError('%s said: %s' % ( 
  88                     self
.IE_NAME
, xpath_text(v_data
, 'message')), expected
=True) 
  89             ext 
= determine_ext(v_url
) 
  91                 formats
.extend(self
._extract
_m
3u8_formats
( 
  92                     v_url
, video_id
, 'mp4', m3u8_id
='hls', fatal
=False)) 
  94                 formats
.extend(self
._extract
_f
4m
_formats
( 
  95                     v_url
, video_id
, f4m_id
='hds', fatal
=False)) 
  97                 ext 
= determine_ext(v_url
) 
  98                 bitrates 
= xpath_element(v_data
, 'bitrates') 
  99                 for url_e 
in bitrates
.findall('url'): 
 100                     tbr 
= int_or_none(url_e
.get('bitrate')) 
 103                     f_url 
= re
.sub(r
'\d+\.%s' % ext
, '%d.%s' % (tbr
, ext
), v_url
) 
 104                     protocol 
= determine_protocol({'url': f_url
}) 
 106                         'format_id': '%s-%d' % (protocol
, tbr
), 
 108                         'ext': 'flv' if protocol 
== 'rtmp' else ext
, 
 109                         'protocol': protocol
, 
 110                         'width': int_or_none(url_e
.get('width')), 
 111                         'height': int_or_none(url_e
.get('height')), 
 114                     mobj 
= re
.match(r
'(?P<url>rtmp://[^/]+/[^/]+)/(?P<playpath>[^?]+)(?P<auth>\?.+)', f_url
) 
 117                             'url': mobj
.group('url') + mobj
.group('auth'), 
 118                             'play_path': mobj
.group('playpath'), 
 121                     if protocol 
== 'rtsp': 
 122                         base_url 
= self
._search
_regex
( 
 123                             r
'rtsp://([^?]+)', f_url
, 'base url', default
=None) 
 125                             base_url 
= 'http://' + base_url
 
 126                             formats
.extend(self
._extract
_m
3u8_formats
( 
 127                                 base_url 
+ '/playlist.m3u8', video_id
, 'mp4', 
 128                                 'm3u8_native', m3u8_id
='hls', fatal
=False)) 
 129                             formats
.extend(self
._extract
_f
4m
_formats
( 
 130                                 base_url 
+ '/manifest.f4m', video_id
, 
 131                                 f4m_id
='hds', fatal
=False)) 
 132         self
._sort
_formats
(formats
) 
 135         closed_caption_url 
= get_meta('closedCaption') or get_meta('closedCaptionHTML5') 
 136         if closed_caption_url
: 
 138                 'url': closed_caption_url
, 
 139                 'ext': determine_ext(closed_caption_url
, 'vtt'), 
 144             'title': get_meta('Title'), 
 145             'description': get_meta('Description') or get_meta('ShortDescription'), 
 146             'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'), 
 147             'duration': int_or_none(get_meta('length')), 
 148             'series': get_meta('Emission'), 
 149             'season_number': int_or_none('SrcSaison'), 
 150             'episode_number': int_or_none('SrcEpisode'), 
 151             'upload_date': unified_strdate(get_meta('Date')), 
 152             'subtitles': subtitles
, 
 157 class RadioCanadaAudioVideoIE(InfoExtractor
): 
 158     'radiocanada:audiovideo' 
 159     _VALID_URL 
= r
'https?://ici\.radio-canada\.ca/audio-video/media-(?P<id>[0-9]+)' 
 161         'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam', 
 165             'title': 'Barack Obama au Vietnam', 
 166             'description': 'Les États-Unis lèvent l\'embargo sur la vente d\'armes qui datait de la guerre du Vietnam', 
 167             'upload_date': '20160523', 
 171             'skip_download': True, 
 175     def _real_extract(self
, url
): 
 176         return self
.url_result('radiocanada:medianet:%s' % self
._match
_id
(url
))