Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/internetvideoarchive.py

   1 from __future__ import unicode_literals
   2
   3 from .common import InfoExtractor
   4 from ..compat import (
   5     compat_parse_qs,
   6     compat_urlparse,
   7 )
   8 from ..utils import (
   9     determine_ext,
  10     int_or_none,
  11     xpath_text,
  12 )
  13
  14
  15 class InternetVideoArchiveIE(InfoExtractor):
  16     _VALID_URL = r'https?://video\.internetvideoarchive\.net/(?:player|flash/players)/.*?\?.*?publishedid.*?'
  17
  18     _TEST = {
  19         'url': 'http://video.internetvideoarchive.net/player/6/configuration.ashx?customerid=69249&publishedid=194487&reporttag=vdbetatitle&playerid=641&autolist=0&domain=www.videodetective.com&maxrate=high&minrate=low&socialplayer=false',
  20         'info_dict': {
  21             'id': '194487',
  22             'ext': 'mp4',
  23             'title': 'KICK-ASS 2',
  24             'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a',
  25         },
  26         'params': {
  27             # m3u8 download
  28             'skip_download': True,
  29         },
  30     }
  31
  32     @staticmethod
  33     def _build_json_url(query):
  34         return 'http://video.internetvideoarchive.net/player/6/configuration.ashx?' + query
  35
  36     @staticmethod
  37     def _build_xml_url(query):
  38         return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query
  39
  40     def _real_extract(self, url):
  41         query = compat_urlparse.urlparse(url).query
  42         query_dic = compat_parse_qs(query)
  43         video_id = query_dic['publishedid'][0]
  44
  45         if '/player/' in url:
  46             configuration = self._download_json(url, video_id)
  47
  48             # There are multiple videos in the playlist whlie only the first one
  49             # matches the video played in browsers
  50             video_info = configuration['playlist'][0]
  51             title = video_info['title']
  52
  53             formats = []
  54             for source in video_info['sources']:
  55                 file_url = source['file']
  56                 if determine_ext(file_url) == 'm3u8':
  57                     m3u8_formats = self._extract_m3u8_formats(
  58                         file_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
  59                     if m3u8_formats:
  60                         formats.extend(m3u8_formats)
  61                         file_url = m3u8_formats[0]['url']
  62                         formats.extend(self._extract_f4m_formats(
  63                             file_url.replace('.m3u8', '.f4m'),
  64                             video_id, f4m_id='hds', fatal=False))
  65                         formats.extend(self._extract_mpd_formats(
  66                             file_url.replace('.m3u8', '.mpd'),
  67                             video_id, mpd_id='dash', fatal=False))
  68                 else:
  69                     a_format = {
  70                         'url': file_url,
  71                     }
  72
  73                     if source.get('label') and source['label'][-4:] == ' kbs':
  74                         tbr = int_or_none(source['label'][:-4])
  75                         a_format.update({
  76                             'tbr': tbr,
  77                             'format_id': 'http-%d' % tbr,
  78                         })
  79                         formats.append(a_format)
  80
  81             self._sort_formats(formats)
  82
  83             description = video_info.get('description')
  84             thumbnail = video_info.get('image')
  85         else:
  86             configuration = self._download_xml(url, video_id)
  87             formats = [{
  88                 'url': xpath_text(configuration, './file', 'file URL', fatal=True),
  89             }]
  90             thumbnail = xpath_text(configuration, './image', 'thumbnail')
  91             title = 'InternetVideoArchive video %s' % video_id
  92             description = None
  93
  94         return {
  95             'id': video_id,
  96             'title': title,
  97             'formats': formats,
  98             'thumbnail': thumbnail,
  99             'description': description,
 100         }