]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/veehd.py
   1 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  17 class VeeHDIE(InfoExtractor
): 
  18     _VALID_URL 
= r
'https?://veehd\.com/video/(?P<id>\d+)' 
  20     # Seems VeeHD videos have multiple copies on several servers, all of 
  21     # whom have different MD5 checksums, so omit md5 field in all tests 
  23         'url': 'http://veehd.com/video/4639434_Solar-Sinter', 
  27             'title': 'Solar Sinter', 
  28             'uploader_id': 'VideoEyes', 
  29             'description': 'md5:46a840e8692ddbaffb5f81d9885cb457', 
  31         'skip': 'Video deleted', 
  33         'url': 'http://veehd.com/video/4905758_Elysian-Fields-Channeling', 
  37             'title': 'Elysian Fields - Channeling', 
  38             'description': 'md5:360e4e95fdab58aefbea0f2a19e5604b', 
  39             'uploader_id': 'spotted', 
  42         'url': 'http://veehd.com/video/2046729_2012-2009-DivX-Trailer', 
  46             'title': '2012 (2009) DivX Trailer', 
  47             'description': 'md5:75435ee95255e6a9838ac6f6f3a2396b', 
  48             'uploader_id': 'Movie_Trailers', 
  52     def _real_extract(self
, url
): 
  53         video_id 
= self
._match
_id
(url
) 
  55         # VeeHD seems to send garbage on the first request. 
  56         # See https://github.com/rg3/youtube-dl/issues/2102 
  57         self
._download
_webpage
(url
, video_id
, 'Requesting webpage') 
  58         webpage 
= self
._download
_webpage
(url
, video_id
) 
  60         if 'This video has been removed<' in webpage
: 
  61             raise ExtractorError('Video %s has been removed' % video_id
, expected
=True) 
  63         player_path 
= self
._search
_regex
( 
  64             r
'\$\("#playeriframe"\).attr\({src : "(.+?)"', 
  65             webpage
, 'player path') 
  66         player_url 
= compat_urlparse
.urljoin(url
, player_path
) 
  68         self
._download
_webpage
(player_url
, video_id
, 'Requesting player page') 
  69         player_page 
= self
._download
_webpage
( 
  70             player_url
, video_id
, 'Downloading player page') 
  74         config_json 
= self
._search
_regex
( 
  75             r
'value=\'config
=({.+?
})\'', player_page, 'config json
', default=None) 
  78             config = json.loads(config_json) 
  79             video_url = compat_urlparse.unquote(config['clip
']['url
']) 
  82             video_url = self._html_search_regex( 
  83                 r'<embed
[^
>]+type="video/divx"[^
>]+src
="([^"]+)"', 
  84                 player_page, 'video url', default=None) 
  87             iframe_src = self._search_regex( 
  88                 r'<iframe[^>]+src="/?
([^
"]+)"', player_page, 'iframe url
') 
  89             iframe_url = 'http
://veehd
.com
/%s' % iframe_src 
  91             self._download_webpage(iframe_url, video_id, 'Requesting iframe page
') 
  92             iframe_page = self._download_webpage( 
  93                 iframe_url, video_id, 'Downloading iframe page
') 
  95             video_url = self._search_regex( 
  96                 r"file\s*:\s*'([^
']+)'", iframe_page, 'video url') 
  98         title = clean_html(get_element_by_id('videoName', webpage).rpartition('|')[0]) 
  99         uploader_id = self._html_search_regex( 
 100             r'<a href="/profile
/\d
+">(.+?)</a>', 
 102         thumbnail = self._search_regex( 
 103             r'<img id="veehdpreview
" src="(.+?
)"', 
 104             webpage, 'thumbnail') 
 105         description = self._html_search_regex( 
 106             r'<td class="infodropdown
".*?<div>(.*?)<ul', 
 107             webpage, 'description', flags=re.DOTALL) 
 114             'uploader_id': uploader_id, 
 115             'thumbnail': thumbnail, 
 116             'description': description,