Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/videobam.py

   1 from __future__ import unicode_literals
   2
   3 import re
   4 import json
   5
   6 from .common import InfoExtractor
   7 from ..utils import int_or_none
   8
   9
  10 class VideoBamIE(InfoExtractor):
  11     _VALID_URL = r'http://(?:www\.)?videobam\.com/(?:videos/download/)?(?P<id>[a-zA-Z]+)'
  12
  13     _TESTS = [
  14         {
  15             'url': 'http://videobam.com/OiJQM',
  16             'md5': 'db471f27763a531f10416a0c58b5a1e0',
  17             'info_dict': {
  18                 'id': 'OiJQM',
  19                 'ext': 'mp4',
  20                 'title': 'Is Alcohol Worse Than Ecstasy?',
  21                 'description': 'md5:d25b96151515c91debc42bfbb3eb2683',
  22                 'uploader': 'frihetsvinge',
  23             },
  24         },
  25         {
  26             'url': 'http://videobam.com/pqLvq',
  27             'md5': 'd9a565b5379a99126ef94e1d7f9a383e',
  28             'note': 'HD video',
  29             'info_dict': {
  30                 'id': 'pqLvq',
  31                 'ext': 'mp4',
  32                 'title': '_',
  33             }
  34         },
  35     ]
  36
  37     def _real_extract(self, url):
  38         mobj = re.match(self._VALID_URL, url)
  39         video_id = mobj.group('id')
  40
  41         page = self._download_webpage('http://videobam.com/%s' % video_id, video_id, 'Downloading page')
  42
  43         formats = []
  44
  45         for preference, format_id in enumerate(['low', 'high']):
  46             mobj = re.search(r"%s: '(?P<url>[^']+)'" % format_id, page)
  47             if not mobj:
  48                 continue
  49             formats.append({
  50                 'url': mobj.group('url'),
  51                 'ext': 'mp4',
  52                 'format_id': format_id,
  53                 'preference': preference,
  54             })
  55
  56         if not formats:
  57             player_config = json.loads(self._html_search_regex(r'var player_config = ({.+?});', page, 'player config'))
  58             formats = [{
  59                 'url': item['url'],
  60                 'ext': 'mp4',
  61             } for item in player_config['playlist'] if 'autoPlay' in item]
  62
  63         self._sort_formats(formats)
  64
  65         title = self._og_search_title(page, default='_', fatal=False)
  66         description = self._og_search_description(page, default=None)
  67         thumbnail = self._og_search_thumbnail(page)
  68         uploader = self._html_search_regex(r'Upload by ([^<]+)</a>', page, 'uploader', fatal=False, default=None)
  69         view_count = int_or_none(
  70             self._html_search_regex(r'<strong>Views:</strong> (\d+) ', page, 'view count', fatal=False))
  71
  72         return {
  73             'id': video_id,
  74             'title': title,
  75             'description': description,
  76             'thumbnail': thumbnail,
  77             'uploader': uploader,
  78             'view_count': view_count,
  79             'formats': formats,
  80             'age_limit': 18,
  81         }