]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/screenjunkies.py
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
   6 from ..compat 
import compat_str
 
  13 class ScreenJunkiesIE(InfoExtractor
): 
  14     _VALID_URL 
= r
'http://www.screenjunkies.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)' 
  16         'url': 'http://www.screenjunkies.com/video/best-quentin-tarantino-movie-2841915', 
  17         'md5': '5c2b686bec3d43de42bde9ec047536b0', 
  20             'display_id': 'best-quentin-tarantino-movie', 
  22             'title': 'Best Quentin Tarantino Movie', 
  23             'thumbnail': 're:^https?://.*\.jpg', 
  29         'url': 'http://www.screenjunkies.com/video/honest-trailers-the-dark-knight', 
  32             'display_id': 'honest-trailers-the-dark-knight', 
  34             'title': "Honest Trailers: 'The Dark Knight'", 
  35             'thumbnail': 're:^https?://.*\.jpg', 
  40         # requires subscription but worked around 
  41         'url': 'http://www.screenjunkies.com/video/knocking-dead-ep-1-the-show-so-far-3003285', 
  44             'display_id': 'knocking-dead-ep-1-the-show-so-far', 
  46             'title': 'Knocking Dead Ep 1: State of The Dead Recap', 
  47             'thumbnail': 're:^https?://.*\.jpg', 
  54     _DEFAULT_BITRATES 
= (48, 150, 496, 864, 2240) 
  56     def _real_extract(self
, url
): 
  57         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  58         video_id 
= mobj
.group('id') 
  59         display_id 
= mobj
.group('display_id') 
  62             webpage 
= self
._download
_webpage
(url
, display_id
) 
  63             video_id 
= self
._search
_regex
( 
  64                 (r
'src=["\']/embed
/(\d
+)', r'data
-video
-content
-id=["\'](\d+)'), 
  67         webpage = self._download_webpage( 
  68             'http://www.screenjunkies.com/embed/%s' % video_id, 
  69             display_id, 'Downloading video embed page') 
  70         embed_vars = self._parse_json( 
  72                 r'(?s)embedVars\s*=\s*({.+?})\s*</script>', webpage, 'embed vars'), 
  75         title = embed_vars['contentName'] 
  79         for f in embed_vars.get('media', []): 
  80             if not f.get('uri') or f.get('mediaPurpose') != 'play': 
  82             bitrate = int_or_none(f.get('bitRate')) 
  84                 bitrates.append(bitrate) 
  87                 'format_id': 'http-%d' % bitrate if bitrate else 'http', 
  88                 'width': int_or_none(f.get('width')), 
  89                 'height': int_or_none(f.get('height')), 
  95             # When subscriptionLevel > 0, i.e. plus subscription is required 
  96             # media list will be empty. However, hds and hls uris are still 
  97             # available. We can grab them assuming bitrates to be default. 
  98             bitrates = self._DEFAULT_BITRATES 
 100         auth_token = embed_vars.get('AuthToken') 
 102         def construct_manifest_url(base_url, ext): 
 104             pieces.extend([compat_str(b) for b in bitrates]) 
 105             pieces.append('_kbps.mp4.%s?%s' % (ext, auth_token)) 
 106             return ','.join(pieces) 
 108         if bitrates and auth_token: 
 109             hds_url = embed_vars.get('hdsUri') 
 111                 f4m_formats = self._extract_f4m_formats( 
 112                     construct_manifest_url(hds_url, 'f4m'), 
 113                     display_id, f4m_id='hds', fatal=False) 
 114                 if len(f4m_formats) == len(bitrates): 
 115                     for f, bitrate in zip(f4m_formats, bitrates): 
 117                             f['format_id'] = 'hds-%d' % bitrate 
 119                 # TODO: fix f4m downloader to handle manifests without bitrates if possible 
 120                 # formats.extend(f4m_formats) 
 122             hls_url = embed_vars.get('hlsUri') 
 124                 formats.extend(self._extract_m3u8_formats( 
 125                     construct_manifest_url(hls_url, 'm3u8'), 
 126                     display_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) 
 127         self._sort_formats(formats) 
 131             'display_id': display_id, 
 133             'thumbnail': embed_vars.get('thumbUri'), 
 134             'duration': int_or_none(embed_vars.get('videoLengthInSeconds')) or None, 
 135             'age_limit': parse_age_limit(embed_vars.get('audienceRating')), 
 136             'tags': embed_vars.get('tags', '').split(','),