]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/lego.py
d3bca64359b2840e72547599701db7a815f390e1
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   7 from ..compat 
import compat_str
 
  15 class LEGOIE(InfoExtractor
): 
  16     _VALID_URL 
= r
'https?://(?:www\.)?lego\.com/(?P<locale>[^/]+)/(?:[^/]+/)*videos/(?:[^/]+/)*[^/?#]+-(?P<id>[0-9a-f]+)' 
  18         'url': 'http://www.lego.com/en-us/videos/themes/club/blocumentary-kawaguchi-55492d823b1b4d5e985787fa8c2973b1', 
  19         'md5': 'f34468f176cfd76488767fc162c405fa', 
  21             'id': '55492d823b1b4d5e985787fa8c2973b1', 
  23             'title': 'Blocumentary Great Creations: Akiyuki Kawaguchi', 
  24             'description': 'Blocumentary Great Creations: Akiyuki Kawaguchi', 
  27         # geo-restricted but the contentUrl contain a valid url 
  28         'url': 'http://www.lego.com/nl-nl/videos/themes/nexoknights/episode-20-kingdom-of-heroes-13bdc2299ab24d9685701a915b3d71e7##sp=399', 
  29         'md5': '4c3fec48a12e40c6e5995abc3d36cc2e', 
  31             'id': '13bdc2299ab24d9685701a915b3d71e7', 
  33             'title': 'Aflevering 20 - Helden van het koninkrijk', 
  34             'description': 'md5:8ee499aac26d7fa8bcb0cedb7f9c3941', 
  37         # special characters in title 
  38         'url': 'http://www.lego.com/en-us/starwars/videos/lego-star-wars-force-surprise-9685ee9d12e84ff38e84b4e3d0db533d', 
  40             'id': '9685ee9d12e84ff38e84b4e3d0db533d', 
  42             'title': 'Force Surprise – LEGO® Star Wars™ Microfighters', 
  43             'description': 'md5:9c673c96ce6f6271b88563fe9dc56de3', 
  46             'skip_download': True, 
  49     _BITRATES 
= [256, 512, 1024, 1536, 2560] 
  51     def _real_extract(self
, url
): 
  52         locale
, video_id 
= re
.match(self
._VALID
_URL
, url
).groups() 
  53         webpage 
= self
._download
_webpage
(url
, video_id
) 
  54         title 
= get_element_by_class('video-header', webpage
).strip() 
  55         progressive_base 
= 'https://lc-mediaplayerns-live-s.legocdn.com/' 
  56         streaming_base 
= 'http://legoprod-f.akamaihd.net/' 
  57         content_url 
= self
._html
_search
_meta
('contentUrl', webpage
) 
  58         path 
= self
._search
_regex
( 
  59             r
'(?:https?:)?//[^/]+/(?:[iz]/s/)?public/(.+)_[0-9,]+\.(?:mp4|webm)', 
  60             content_url
, 'video path', default
=None) 
  62             player_url 
= self
._proto
_relative
_url
(self
._search
_regex
( 
  63                 r
'<iframe[^>]+src="((?:https?)?//(?:www\.)?lego\.com/[^/]+/mediaplayer/video/[^"]+)', 
  64                 webpage
, 'player url', default
=None)) 
  66                 base_url 
= self
._proto
_relative
_url
(self
._search
_regex
( 
  67                     r
'data-baseurl="([^"]+)"', webpage
, 'base url', 
  68                     default
='http://www.lego.com/%s/mediaplayer/video/' % locale
)) 
  69                 player_url 
= base_url 
+ video_id
 
  70             player_webpage 
= self
._download
_webpage
(player_url
, video_id
) 
  71             video_data 
= self
._parse
_json
(unescapeHTML(self
._search
_regex
( 
  72                 r
"video='([^']+)'", player_webpage
, 'video data')), video_id
) 
  73             progressive_base 
= self
._search
_regex
( 
  74                 r
'data-video-progressive-url="([^"]+)"', 
  75                 player_webpage
, 'progressive base', default
='https://lc-mediaplayerns-live-s.legocdn.com/') 
  76             streaming_base 
= self
._search
_regex
( 
  77                 r
'data-video-streaming-url="([^"]+)"', 
  78                 player_webpage
, 'streaming base', default
='http://legoprod-f.akamaihd.net/') 
  79             item_id 
= video_data
['ItemId'] 
  81             net_storage_path 
= video_data
.get('NetStoragePath') or '/'.join([item_id
[:2], item_id
[2:4]]) 
  82             base_path 
= '_'.join([item_id
, video_data
['VideoId'], video_data
['Locale'], compat_str(video_data
['VideoVersion'])]) 
  83             path 
= '/'.join([net_storage_path
, base_path
]) 
  84         streaming_path 
= ','.join(map(lambda bitrate
: compat_str(bitrate
), self
._BITRATES
)) 
  86         formats 
= self
._extract
_akamai
_formats
( 
  87             '%si/s/public/%s_,%s,.mp4.csmil/master.m3u8' % (streaming_base
, path
, streaming_path
), video_id
) 
  88         m3u8_formats 
= list(filter( 
  89             lambda f
: f
.get('protocol') == 'm3u8_native' and f
.get('vcodec') != 'none' and f
.get('resolution') != 'multiple', 
  91         if len(m3u8_formats
) == len(self
._BITRATES
): 
  92             self
._sort
_formats
(m3u8_formats
) 
  93             for bitrate
, m3u8_format 
in zip(self
._BITRATES
, m3u8_formats
): 
  94                 progressive_base_url 
= '%spublic/%s_%d.' % (progressive_base
, path
, bitrate
) 
  95                 mp4_f 
= m3u8_format
.copy() 
  97                     'url': progressive_base_url 
+ 'mp4', 
  98                     'format_id': m3u8_format
['format_id'].replace('hls', 'mp4'), 
 102                     'url': progressive_base_url 
+ 'webm', 
 103                     'format_id': m3u8_format
['format_id'].replace('hls', 'webm'), 
 104                     'width': m3u8_format
['width'], 
 105                     'height': m3u8_format
['height'], 
 106                     'tbr': m3u8_format
.get('tbr'), 
 109                 formats
.extend([web_f
, mp4_f
]) 
 111             for bitrate 
in self
._BITRATES
: 
 112                 for ext 
in ('web', 'mp4'): 
 114                         'format_id': '%s-%s' % (ext
, bitrate
), 
 115                         'url': '%spublic/%s_%d.%s' % (progressive_base
, path
, bitrate
, ext
), 
 119         self
._sort
_formats
(formats
) 
 124             'description': self
._html
_search
_meta
('description', webpage
), 
 125             'thumbnail': self
._html
_search
_meta
('thumbnail', webpage
), 
 126             'duration': parse_duration(self
._html
_search
_meta
('duration', webpage
)),