]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/wistia.py
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
  14 class WistiaIE(InfoExtractor
): 
  15     _VALID_URL 
= r
'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/)(?P<id>[a-z0-9]+)' 
  16     _API_URL 
= 'http://fast.wistia.com/embed/medias/%s.json' 
  17     _IFRAME_URL 
= 'http://fast.wistia.net/embed/iframe/%s' 
  20         'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt', 
  21         'md5': 'cafeb56ec0c53c18c97405eecb3133df', 
  25             'title': 'Being Resourceful', 
  26             'description': 'a Clients From Hell Video Series video from worldwidewebhosting', 
  27             'upload_date': '20131204', 
  28             'timestamp': 1386185018, 
  32         'url': 'wistia:sh7fpupwlt', 
  33         'only_matching': True, 
  36         'url': 'wistia:807fafadvk', 
  37         'only_matching': True, 
  39         'url': 'http://fast.wistia.com/embed/iframe/sh7fpupwlt', 
  40         'only_matching': True, 
  42         'url': 'http://fast.wistia.net/embed/medias/sh7fpupwlt.json', 
  43         'only_matching': True, 
  47     def _extract_url(webpage
): 
  49             r
'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P
<url
>(?
:https?
:)?
//(?
:fast\
.)?wistia\
.(?
:net|com
)/embed
/iframe
/.+?
)\
1', webpage) 
  51             return unescapeHTML(match.group('url
')) 
  53         match = re.search(r'(?
:id=["\']wistia_|data-wistia-?id=["\']|Wistia\
.embed\
(["\'])(?P<id>[^"\']+)', webpage) 
  55             return 'wistia
:%s' % match.group('id') 
  59                 <script[^>]+src=(["'])(?
:https?
:)?
//fast\
.wistia\
.com
/assets
/external
/E
-v1\
.js\
1[^
>]*>.*?
 
  60                 <div
[^
>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2 
  63             return 'wistia:%s' % match.group('id') 
  65     def _real_extract(self, url): 
  66         video_id = self._match_id(url) 
  68         data_json = self._download_json( 
  69             self._API_URL % video_id, video_id, 
  70             # Some videos require this. 
  72                 'Referer': url if url.startswith('http') else self._IFRAME_URL % video_id, 
  75         if data_json.get('error'): 
  77                 'Error while getting the playlist', expected=True) 
  79         data = data_json['media'] 
  84         for a in data['assets']: 
  88             astatus = a.get('status') 
  90             if (astatus is not None and astatus != 2) or atype in ('preview', 'storyboard'): 
  92             elif atype in ('still', 'still_image'): 
  95                     'width': int_or_none(a.get('width')), 
  96                     'height': int_or_none(a.get('height')), 
 100                 is_m3u8 = a.get('container') == 'm3u8' or aext == 'm3u8' 
 104                     'tbr': int_or_none(a.get('bitrate')), 
 105                     'vbr': int_or_none(a.get('opt_vbitrate')), 
 106                     'width': int_or_none(a.get('width')), 
 107                     'height': int_or_none(a.get('height')), 
 108                     'filesize': int_or_none(a.get('size')), 
 109                     'vcodec': a.get('codec'), 
 110                     'container': a.get('container'), 
 111                     'ext': 'mp4' if is_m3u8 else aext, 
 112                     'protocol': 'm3u8' if is_m3u8 else None, 
 113                     'preference': 1 if atype == 'original' else None, 
 116         self._sort_formats(formats) 
 121             'description': data.get('seoDescription'), 
 123             'thumbnails': thumbnails, 
 124             'duration': float_or_none(data.get('duration')), 
 125             'timestamp': int_or_none(data.get('createdAt')),