]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/movieclips.py
2 from __future__
import unicode_literals
4 from .common
import InfoExtractor
5 from ..utils
import sanitized_Request
8 class MovieClipsIE(InfoExtractor
):
9 _VALID_URL
= r
'https?://(?:www.)?movieclips\.com/videos/(?P<id>[^/?#]+)'
11 'url': 'http://www.movieclips.com/videos/warcraft-trailer-1-561180739597?autoPlay=true&playlistId=5',
14 'display_id': 'warcraft-trailer-1-561180739597',
16 'title': 'Warcraft Trailer 1',
17 'description': 'Watch Trailer 1 from Warcraft (2016). Legendary’s WARCRAFT is a 3D epic adventure of world-colliding conflict based.',
18 'thumbnail': 're:^https?://.*\.jpg$',
20 'add_ie': ['ThePlatform'],
23 def _real_extract(self
, url
):
24 display_id
= self
._match
_id
(url
)
26 req
= sanitized_Request(url
)
27 # it doesn't work if it thinks the browser it's too old
28 req
.add_header('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/43.0 (Chrome)')
29 webpage
= self
._download
_webpage
(req
, display_id
)
30 theplatform_link
= self
._html
_search
_regex
(r
'src="(http://player.theplatform.com/p/.*?)"', webpage
, 'theplatform link')
31 title
= self
._html
_search
_regex
(r
'<title[^>]*>([^>]+)-\s*\d+\s*|\s*Movieclips.com</title>', webpage
, 'title')
32 description
= self
._html
_search
_meta
('description', webpage
)
35 '_type': 'url_transparent',
36 'url': theplatform_link
,
38 'display_id': display_id
,
39 'description': description
,