]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/reuters.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  14 class ReutersIE(InfoExtractor
): 
  15     _VALID_URL 
= r
'https?://(?:www\.)?reuters\.com/.*?\?.*?videoId=(?P<id>[0-9]+)' 
  17         'url': 'http://www.reuters.com/video/2016/05/20/san-francisco-police-chief-resigns?videoId=368575562', 
  18         'md5': '8015113643a0b12838f160b0b81cc2ee', 
  22             'title': 'San Francisco police chief resigns', 
  26     def _real_extract(self
, url
): 
  27         video_id 
= self
._match
_id
(url
) 
  28         webpage 
= self
._download
_webpage
( 
  29             'http://www.reuters.com/assets/iframe/yovideo?videoId=%s' % video_id
, video_id
) 
  30         video_data 
= js_to_json(self
._search
_regex
( 
  31             r
'(?s)Reuters\.yovideo\.drawPlayer\(({.*?})\);', 
  32             webpage
, 'video data')) 
  34         def get_json_value(key
, fatal
=False): 
  35             return self
._search
_regex
(r
'"%s"\s*:\s*"([^"]+)"' % key
, video_data
, key
, fatal
=fatal
) 
  37         title 
= unescapeHTML(get_json_value('title', fatal
=True)) 
  38         mmid
, fid 
= re
.search(r
',/(\d+)\?f=(\d+)', get_json_value('flv', fatal
=True)).groups() 
  40         mas_data 
= self
._download
_json
( 
  41             'http://mas-e.cds1.yospace.com/mas/%s/%s?trans=json' % (mmid
, fid
), 
  42             video_id
, transform_source
=js_to_json
) 
  48             method 
= f
.get('method') 
  50                 formats
.extend(self
._extract
_m
3u8_formats
( 
  51                     f_url
, video_id
, 'mp4', 'm3u8_native', m3u8_id
='hls', fatal
=False)) 
  53                 container 
= f
.get('container') 
  54                 ext 
= '3gp' if method 
== 'mobile' else container
 
  59                     'container': container 
if method 
!= 'mobile' else None, 
  61         self
._sort
_formats
(formats
) 
  66             'thumbnail': get_json_value('thumb'), 
  67             'duration': int_or_none(get_json_value('seconds')),