]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/wistia.py
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
  14 class WistiaIE(InfoExtractor
): 
  15     _VALID_URL 
= r
'(?:wistia:|https?://(?:fast\.)?wistia\.net/embed/iframe/)(?P<id>[a-z0-9]+)' 
  16     _API_URL 
= 'http://fast.wistia.com/embed/medias/%s.json' 
  17     _IFRAME_URL 
= 'http://fast.wistia.net/embed/iframe/%s' 
  20         'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt', 
  21         'md5': 'cafeb56ec0c53c18c97405eecb3133df', 
  25             'title': 'Being Resourceful', 
  26             'description': 'a Clients From Hell Video Series video from worldwidewebhosting', 
  27             'upload_date': '20131204', 
  28             'timestamp': 1386185018, 
  32         'url': 'wistia:sh7fpupwlt', 
  33         'only_matching': True, 
  36         'url': 'wistia:807fafadvk', 
  37         'only_matching': True, 
  41     def _extract_url(webpage
): 
  43             r
'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P
<url
>(?
:https?
:)?
//(?
:fast\
.)?wistia\
.net
/embed
/iframe
/.+?
)\
1', webpage) 
  45             return unescapeHTML(match.group('url
')) 
  47         match = re.search(r'(?
:id=["\']wistia_|data-wistia-?id=["\']|Wistia\
.embed\
(["\'])(?P<id>[^"\']+)', webpage) 
  49             return 'wistia
:%s' % match.group('id') 
  53                 <script[^>]+src=(["'])(?
:https?
:)?
//fast\
.wistia\
.com
/assets
/external
/E
-v1\
.js\
1[^
>]*>.*?
 
  54                 <div
[^
>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2 
  57             return 'wistia:%s' % match.group('id') 
  59     def _real_extract(self, url): 
  60         video_id = self._match_id(url) 
  62         data_json = self._download_json( 
  63             self._API_URL % video_id, video_id, 
  64             # Some videos require this. 
  66                 'Referer': url if url.startswith('http') else self._IFRAME_URL % video_id, 
  69         if data_json.get('error'): 
  71                 'Error while getting the playlist', expected=True) 
  73         data = data_json['media'] 
  78         for a in data['assets']: 
  82             astatus = a.get('status') 
  84             if (astatus is not None and astatus != 2) or atype in ('preview', 'storyboard'): 
  86             elif atype in ('still', 'still_image'): 
  89                     'width': int_or_none(a.get('width')), 
  90                     'height': int_or_none(a.get('height')), 
  94                 is_m3u8 = a.get('container') == 'm3u8' or aext == 'm3u8' 
  98                     'tbr': int_or_none(a.get('bitrate')), 
  99                     'vbr': int_or_none(a.get('opt_vbitrate')), 
 100                     'width': int_or_none(a.get('width')), 
 101                     'height': int_or_none(a.get('height')), 
 102                     'filesize': int_or_none(a.get('size')), 
 103                     'vcodec': a.get('codec'), 
 104                     'container': a.get('container'), 
 105                     'ext': 'mp4' if is_m3u8 else aext, 
 106                     'protocol': 'm3u8' if is_m3u8 else None, 
 107                     'preference': 1 if atype == 'original' else None, 
 110         self._sort_formats(formats) 
 115             'description': data.get('seoDescription'), 
 117             'thumbnails': thumbnails, 
 118             'duration': float_or_none(data.get('duration')), 
 119             'timestamp': int_or_none(data.get('createdAt')),