]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/theplatform.py
   1 from __future__ 
import unicode_literals
 
  11 from .common 
import InfoExtractor
 
  12 from ..compat 
import ( 
  23 _x 
= lambda p
: xpath_with_ns(p
, {'smil': 'http://www.w3.org/2005/SMIL21/Language'}) 
  26 class ThePlatformIE(InfoExtractor
): 
  28         (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/ 
  29            (?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)? 
  30          |theplatform:)(?P<id>[^/\?&]+)''' 
  33         # from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/ 
  34         'url': 'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true', 
  38             'title': 'Blackberry\'s big, bold Z30', 
  39             'description': 'The Z30 is Blackberry\'s biggest, baddest mobile messaging device yet.', 
  44             'skip_download': True, 
  47         # from http://www.cnet.com/videos/tesla-model-s-a-second-step-towards-a-cleaner-motoring-future/ 
  48         'url': 'http://link.theplatform.com/s/kYEXFC/22d_qsQ6MIRT', 
  52             'description': 'md5:ac330c9258c04f9d7512cf26b9595409', 
  53             'title': 'Tesla Model S: A second step towards a cleaner motoring future', 
  57             'skip_download': True, 
  62     def _sign_url(url
, sig_key
, sig_secret
, life
=600, include_qs
=False): 
  63         flags 
= '10' if include_qs 
else '00' 
  64         expiration_date 
= '%x' % (int(time
.time()) + life
) 
  67             return binascii
.b2a_hex(str.encode('ascii')).decode('ascii') 
  70             return binascii
.a2b_hex(hex) 
  72         relative_path 
= url
.split('http://link.theplatform.com/s/')[1].split('?')[0] 
  73         clear_text 
= hex_to_str(flags 
+ expiration_date 
+ str_to_hex(relative_path
)) 
  74         checksum 
= hmac
.new(sig_key
.encode('ascii'), clear_text
, hashlib
.sha1
).hexdigest() 
  75         sig 
= flags 
+ expiration_date 
+ checksum 
+ str_to_hex(sig_secret
) 
  76         return '%s&sig=%s' % (url
, sig
) 
  78     def _real_extract(self
, url
): 
  79         url
, smuggled_data 
= unsmuggle_url(url
, {}) 
  81         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  82         provider_id 
= mobj
.group('provider_id') 
  83         video_id 
= mobj
.group('id') 
  86             provider_id 
= 'dJ5BDC' 
  88         if smuggled_data
.get('force_smil_url', False): 
  90         elif mobj
.group('config'): 
  91             config_url 
= url 
+ '&form=json' 
  92             config_url 
= config_url
.replace('swf/', 'config/') 
  93             config_url 
= config_url
.replace('onsite/', 'onsite/config/') 
  94             config 
= self
._download
_json
(config_url
, video_id
, 'Downloading config') 
  95             smil_url 
= config
['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m' 
  97             smil_url 
= ('http://link.theplatform.com/s/{0}/{1}/meta.smil?' 
  98                         'format=smil&mbr=true'.format(provider_id
, video_id
)) 
 100         sig 
= smuggled_data
.get('sig') 
 102             smil_url 
= self
._sign
_url
(smil_url
, sig
['key'], sig
['secret']) 
 104         meta 
= self
._download
_xml
(smil_url
, video_id
) 
 108                 for n 
in meta
.findall(_x('.//smil:ref')) 
 109                 if n
.attrib
.get('title') == 'Geographic Restriction' or n
.attrib
.get('title') == 'Expired') 
 110         except StopIteration: 
 113             raise ExtractorError(error_msg
, expected
=True) 
 115         info_url 
= 'http://link.theplatform.com/s/{0}/{1}?format=preview'.format(provider_id
, video_id
) 
 116         info_json 
= self
._download
_webpage
(info_url
, video_id
) 
 117         info 
= json
.loads(info_json
) 
 120         captions 
= info
.get('captions') 
 121         if isinstance(captions
, list): 
 122             for caption 
in captions
: 
 123                 lang
, src
, mime 
= caption
.get('lang', 'en'), caption
.get('src'), caption
.get('type') 
 125                     'ext': 'srt' if mime 
== 'text/srt' else 'ttml', 
 129         head 
= meta
.find(_x('smil:head')) 
 130         body 
= meta
.find(_x('smil:body')) 
 132         f4m_node 
= body
.find(_x('smil:seq//smil:video')) 
 134             f4m_node 
= body
.find(_x('smil:seq/smil:video')) 
 135         if f4m_node 
is not None and '.f4m' in f4m_node
.attrib
['src']: 
 136             f4m_url 
= f4m_node
.attrib
['src'] 
 137             if 'manifest.f4m?' not in f4m_url
: 
 139             # the parameters are from syfy.com, other sites may use others, 
 140             # they also work for nbc.com 
 141             f4m_url 
+= '&g=UXWGVKRWHFSP&hdcore=3.0.3' 
 142             formats 
= self
._extract
_f
4m
_formats
(f4m_url
, video_id
) 
 145             switch 
= body
.find(_x('smil:switch')) 
 147                 switch 
= body
.find(_x('smil:par//smil:switch')) 
 149                 switch 
= body
.find(_x('smil:par/smil:switch')) 
 151                 switch 
= body
.find(_x('smil:par')) 
 152             if switch 
is not None: 
 153                 base_url 
= head
.find(_x('smil:meta')).attrib
['base'] 
 154                 for f 
in switch
.findall(_x('smil:video')): 
 156                     width 
= int_or_none(attr
.get('width')) 
 157                     height 
= int_or_none(attr
.get('height')) 
 158                     vbr 
= int_or_none(attr
.get('system-bitrate'), 1000) 
 159                     format_id 
= '%dx%d_%dk' % (width
, height
, vbr
) 
 161                         'format_id': format_id
, 
 163                         'play_path': 'mp4:' + attr
['src'], 
 170                 switch 
= body
.find(_x('smil:seq//smil:switch')) 
 172                     switch 
= body
.find(_x('smil:seq/smil:switch')) 
 173                 for f 
in switch
.findall(_x('smil:video')): 
 175                     vbr 
= int_or_none(attr
.get('system-bitrate'), 1000) 
 176                     ext 
= determine_ext(attr
['src']) 
 180                         'format_id': compat_str(vbr
), 
 185             self
._sort
_formats
(formats
) 
 189             'title': info
['title'], 
 190             'subtitles': subtitles
, 
 192             'description': info
['description'], 
 193             'thumbnail': info
['defaultThumbnailUrl'], 
 194             'duration': int_or_none(info
.get('duration'), 1000),