]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/wistia.py
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
  14 class WistiaIE(InfoExtractor
): 
  15     _VALID_URL 
= r
'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/)(?P<id>[a-z0-9]{10})' 
  16     _EMBED_BASE_URL 
= 'http://fast.wistia.com/embed/' 
  19         'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt', 
  20         'md5': 'cafeb56ec0c53c18c97405eecb3133df', 
  24             'title': 'Being Resourceful', 
  25             'description': 'a Clients From Hell Video Series video from worldwidewebhosting', 
  26             'upload_date': '20131204', 
  27             'timestamp': 1386185018, 
  31         'url': 'wistia:sh7fpupwlt', 
  32         'only_matching': True, 
  35         'url': 'wistia:807fafadvk', 
  36         'only_matching': True, 
  38         'url': 'http://fast.wistia.com/embed/iframe/sh7fpupwlt', 
  39         'only_matching': True, 
  41         'url': 'http://fast.wistia.net/embed/medias/sh7fpupwlt.json', 
  42         'only_matching': True, 
  45     # https://wistia.com/support/embed-and-share/video-on-your-website 
  47     def _extract_url(webpage
): 
  48         urls 
= WistiaIE
._extract
_urls
(webpage
) 
  49         return urls
[0] if urls 
else None 
  52     def _extract_urls(webpage
): 
  54         for match 
in re
.finditer( 
  55                 r
'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P
<url
>(?
:https?
:)?
//(?
:fast\
.)?wistia\
.(?
:net|com
)/embed
/(?
:iframe|medias
)/[a
-z0
-9]{10}
)', webpage): 
  56             urls.append(unescapeHTML(match.group('url
'))) 
  57         for match in re.finditer( 
  59                     <div[^>]+class=(["']).*?
\bwistia
_async
_(?P
<id>[a
-z0
-9]{10}
)\b.*?\
2 
  61             urls.append('wistia:%s' % match.group('id')) 
  62         for match in re.finditer(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage): 
  63             urls.append('wistia:%s' % match.group('id')) 
  66     def _real_extract(self, url): 
  67         video_id = self._match_id(url) 
  69         data_json = self._download_json( 
  70             self._EMBED_BASE_URL + 'medias/%s.json' % video_id, video_id, 
  71             # Some videos require this. 
  73                 'Referer': url if url.startswith('http') else self._EMBED_BASE_URL + 'iframe/' + video_id, 
  76         if data_json.get('error'): 
  78                 'Error while getting the playlist', expected=True) 
  80         data = data_json['media'] 
  85         for a in data['assets']: 
  89             astatus = a.get('status') 
  91             if (astatus is not None and astatus != 2) or atype in ('preview', 'storyboard'): 
  93             elif atype in ('still', 'still_image'): 
  96                     'width': int_or_none(a.get('width')), 
  97                     'height': int_or_none(a.get('height')), 
  98                     'filesize': int_or_none(a.get('size')), 
 102                 display_name = a.get('display_name') 
 104                 if atype and atype.endswith('_video') and display_name: 
 105                     format_id = '%s-%s' % (atype[:-6], display_name) 
 107                     'format_id': format_id, 
 109                     'tbr': int_or_none(a.get('bitrate')) or None, 
 110                     'preference': 1 if atype == 'original' else None, 
 112                 if display_name == 'Audio': 
 118                         'width': int_or_none(a.get('width')), 
 119                         'height': int_or_none(a.get('height')), 
 120                         'vcodec': a.get('codec'), 
 122                 if a.get('container') == 'm3u8' or aext == 'm3u8': 
 126                         'format_id': f['format_id'].replace('hls-', 'ts-'), 
 127                         'url': f['url'].replace('.bin', '.ts'), 
 132                         'protocol': 'm3u8_native', 
 136                         'container': a.get('container'), 
 138                         'filesize': int_or_none(a.get('size')), 
 142         self._sort_formats(formats) 
 145         for caption in data.get('captions', []): 
 146             language = caption.get('language') 
 149             subtitles[language] = [{ 
 150                 'url': self._EMBED_BASE_URL + 'captions/' + video_id + '.vtt?language=' + language, 
 156             'description': data.get('seoDescription'), 
 158             'thumbnails': thumbnails, 
 159             'duration': float_or_none(data.get('duration')), 
 160             'timestamp': int_or_none(data.get('createdAt')), 
 161             'subtitles': subtitles,