]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/radiocanada.py
321917ad0810c6ddfe1d8586ba31570251fe012e
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  20 class RadioCanadaIE(InfoExtractor
): 
  21     IE_NAME 
= 'radiocanada' 
  22     _VALID_URL 
= r
'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)' 
  24         'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272', 
  28             'title': 'Le parcours du tireur capté sur vidéo', 
  29             'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa', 
  30             'upload_date': '20141023', 
  34             'skip_download': True, 
  38     def _real_extract(self
, url
): 
  39         url
, smuggled_data 
= unsmuggle_url(url
, {}) 
  40         app_code
, video_id 
= re
.match(self
._VALID
_URL
, url
).groups() 
  42         metadata 
= self
._download
_xml
( 
  43             'http://api.radio-canada.ca/metaMedia/v1/index.ashx', 
  44             video_id
, note
='Downloading metadata XML', query
={ 
  50             el 
= find_xpath_attr(metadata
, './/Meta', 'name', name
) 
  51             return el
.text 
if el 
is not None else None 
  53         if get_meta('protectionType'): 
  54             raise ExtractorError('This video is DRM protected.', expected
=True) 
  56         device_types 
= ['ipad'] 
  57         if app_code 
!= 'toutv': 
  58             device_types
.append('flash') 
  60             device_types
.append('android') 
  63         # TODO: extract f4m formats 
  64         # f4m formats can be extracted using flashhd device_type but they produce unplayable file 
  65         for device_type 
in device_types
: 
  66             validation_url 
= 'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx' 
  70                 'connectionType': 'broadband', 
  71                 'multibitrate': 'true', 
  72                 'deviceType': device_type
, 
  75                 validation_url 
= 'https://services.radio-canada.ca/media/validation/v2/' 
  76                 query
.update(smuggled_data
) 
  79                     # paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction 
  80                     'paysJ391wsHjbOJwvCs26toz': 'CA', 
  81                     'bypasslock': 'NZt5K62gRqfc', 
  83             v_data 
= self
._download
_xml
(validation_url
, video_id
, note
='Downloading %s XML' % device_type
, query
=query
, fatal
=False) 
  84             v_url 
= xpath_text(v_data
, 'url') 
  88                 raise ExtractorError('%s said: %s' % ( 
  89                     self
.IE_NAME
, xpath_text(v_data
, 'message')), expected
=True) 
  90             ext 
= determine_ext(v_url
) 
  92                 formats
.extend(self
._extract
_m
3u8_formats
( 
  93                     v_url
, video_id
, 'mp4', m3u8_id
='hls', fatal
=False)) 
  95                 formats
.extend(self
._extract
_f
4m
_formats
( 
  96                     v_url
, video_id
, f4m_id
='hds', fatal
=False)) 
  98                 ext 
= determine_ext(v_url
) 
  99                 bitrates 
= xpath_element(v_data
, 'bitrates') 
 100                 for url_e 
in bitrates
.findall('url'): 
 101                     tbr 
= int_or_none(url_e
.get('bitrate')) 
 104                     f_url 
= re
.sub(r
'\d+\.%s' % ext
, '%d.%s' % (tbr
, ext
), v_url
) 
 105                     protocol 
= determine_protocol({'url': f_url
}) 
 107                         'format_id': '%s-%d' % (protocol
, tbr
), 
 109                         'ext': 'flv' if protocol 
== 'rtmp' else ext
, 
 110                         'protocol': protocol
, 
 111                         'width': int_or_none(url_e
.get('width')), 
 112                         'height': int_or_none(url_e
.get('height')), 
 115                     if protocol 
== 'rtsp': 
 116                         base_url 
= self
._search
_regex
( 
 117                             r
'rtsp://([^?]+)', f_url
, 'base url', default
=None) 
 119                             base_url 
= 'http://' + base_url
 
 120                             formats
.extend(self
._extract
_m
3u8_formats
( 
 121                                 base_url 
+ '/playlist.m3u8', video_id
, 'mp4', 
 122                                 'm3u8_native', m3u8_id
='hls', fatal
=False)) 
 123                             formats
.extend(self
._extract
_f
4m
_formats
( 
 124                                 base_url 
+ '/manifest.f4m', video_id
, 
 125                                 f4m_id
='hds', fatal
=False)) 
 126         self
._sort
_formats
(formats
) 
 129         closed_caption_url 
= get_meta('closedCaption') or get_meta('closedCaptionHTML5') 
 130         if closed_caption_url
: 
 132                 'url': closed_caption_url
, 
 133                 'ext': determine_ext(closed_caption_url
, 'vtt'), 
 138             'title': get_meta('Title'), 
 139             'description': get_meta('Description') or get_meta('ShortDescription'), 
 140             'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'), 
 141             'duration': int_or_none(get_meta('length')), 
 142             'series': get_meta('Emission'), 
 143             'season_number': int_or_none('SrcSaison'), 
 144             'episode_number': int_or_none('SrcEpisode'), 
 145             'upload_date': unified_strdate(get_meta('Date')), 
 146             'subtitles': subtitles
, 
 151 class RadioCanadaAudioVideoIE(InfoExtractor
): 
 152     'radiocanada:audiovideo' 
 153     _VALID_URL 
= r
'https?://ici\.radio-canada\.ca/audio-video/media-(?P<id>[0-9]+)' 
 155         'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam', 
 159             'title': 'Barack Obama au Vietnam', 
 160             'description': 'Les États-Unis lèvent l\'embargo sur la vente d\'armes qui datait de la guerre du Vietnam', 
 161             'upload_date': '20160523', 
 165             'skip_download': True, 
 169     def _real_extract(self
, url
): 
 170         return self
.url_result('radiocanada:medianet:%s' % self
._match
_id
(url
))