]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/movieclips.py
   2 from __future__ 
import unicode_literals
 
   4 from .common 
import InfoExtractor
 
  10 class MovieClipsIE(InfoExtractor
): 
  11     _VALID_URL 
= r
'https?://(?:www.)?movieclips\.com/videos/(?P<id>[^/?#]+)' 
  13         'url': 'http://www.movieclips.com/videos/warcraft-trailer-1-561180739597?autoPlay=true&playlistId=5', 
  16             'display_id': 'warcraft-trailer-1-561180739597', 
  18             'title': 'Warcraft Trailer 1', 
  19             'description': 'Watch Trailer 1 from Warcraft (2016). Legendary’s WARCRAFT is a 3D epic adventure of world-colliding conflict based.', 
  20             'thumbnail': 're:^https?://.*\.jpg$', 
  22         'add_ie': ['ThePlatform'], 
  25     def _real_extract(self
, url
): 
  26         display_id 
= self
._match
_id
(url
) 
  28         req 
= compat_urllib_request
.Request(url
) 
  29         # it doesn't work if it thinks the browser it's too old 
  30         req
.add_header('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/43.0 (Chrome)') 
  31         webpage 
= self
._download
_webpage
(req
, display_id
) 
  32         theplatform_link 
= self
._html
_search
_regex
(r
'src="(http://player.theplatform.com/p/.*?)"', webpage
, 'theplatform link') 
  33         title 
= self
._html
_search
_regex
(r
'<title[^>]*>([^>]+)-\s*\d+\s*|\s*Movieclips.com</title>', webpage
, 'title') 
  34         description 
= self
._html
_search
_meta
('description', webpage
) 
  37             '_type': 'url_transparent', 
  38             'url': theplatform_link
, 
  40             'display_id': display_id
, 
  41             'description': description
,