]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/theplatform.py
Merge tag 'upstream/2014.07.11'
[youtubedl] / youtube_dl / extractor / theplatform.py
1 from __future__ import unicode_literals
2
3 import re
4 import json
5
6 from .common import InfoExtractor
7 from ..utils import (
8 ExtractorError,
9 xpath_with_ns,
10 )
11
12 _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
13
14
15 class ThePlatformIE(InfoExtractor):
16 _VALID_URL = r'''(?x)
17 (?:https?://(?:link|player)\.theplatform\.com/[sp]/[^/]+/
18 (?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?
19 |theplatform:)(?P<id>[^/\?&]+)'''
20
21 _TEST = {
22 # from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/
23 'url': 'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true',
24 'info_dict': {
25 'id': 'e9I_cZgTgIPd',
26 'ext': 'flv',
27 'title': 'Blackberry\'s big, bold Z30',
28 'description': 'The Z30 is Blackberry\'s biggest, baddest mobile messaging device yet.',
29 'duration': 247,
30 },
31 'params': {
32 # rtmp download
33 'skip_download': True,
34 },
35 }
36
37 def _get_info(self, video_id, smil_url):
38 meta = self._download_xml(smil_url, video_id)
39
40 try:
41 error_msg = next(
42 n.attrib['abstract']
43 for n in meta.findall(_x('.//smil:ref'))
44 if n.attrib.get('title') == 'Geographic Restriction')
45 except StopIteration:
46 pass
47 else:
48 raise ExtractorError(error_msg, expected=True)
49
50 info_url = 'http://link.theplatform.com/s/dJ5BDC/{0}?format=preview'.format(video_id)
51 info_json = self._download_webpage(info_url, video_id)
52 info = json.loads(info_json)
53
54 head = meta.find(_x('smil:head'))
55 body = meta.find(_x('smil:body'))
56
57 f4m_node = body.find(_x('smil:seq//smil:video'))
58 if f4m_node is not None:
59 f4m_url = f4m_node.attrib['src']
60 if 'manifest.f4m?' not in f4m_url:
61 f4m_url += '?'
62 # the parameters are from syfy.com, other sites may use others,
63 # they also work for nbc.com
64 f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3'
65 formats = [{
66 'ext': 'flv',
67 'url': f4m_url,
68 }]
69 else:
70 base_url = head.find(_x('smil:meta')).attrib['base']
71 switch = body.find(_x('smil:switch'))
72 formats = []
73 for f in switch.findall(_x('smil:video')):
74 attr = f.attrib
75 width = int(attr['width'])
76 height = int(attr['height'])
77 vbr = int(attr['system-bitrate']) // 1000
78 format_id = '%dx%d_%dk' % (width, height, vbr)
79 formats.append({
80 'format_id': format_id,
81 'url': base_url,
82 'play_path': 'mp4:' + attr['src'],
83 'ext': 'flv',
84 'width': width,
85 'height': height,
86 'vbr': vbr,
87 })
88 self._sort_formats(formats)
89
90 return {
91 'id': video_id,
92 'title': info['title'],
93 'formats': formats,
94 'description': info['description'],
95 'thumbnail': info['defaultThumbnailUrl'],
96 'duration': info['duration']//1000,
97 }
98
99 def _real_extract(self, url):
100 mobj = re.match(self._VALID_URL, url)
101 video_id = mobj.group('id')
102 if mobj.group('config'):
103 config_url = url+ '&form=json'
104 config_url = config_url.replace('swf/', 'config/')
105 config_url = config_url.replace('onsite/', 'onsite/config/')
106 config = self._download_json(config_url, video_id, 'Downloading config')
107 smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m'
108 else:
109 smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
110 'format=smil&mbr=true'.format(video_id))
111 return self._get_info(video_id, smil_url)