1 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   8     compat_urllib_parse_unquote
, 
  18 class VeeHDIE(InfoExtractor
): 
  19     _VALID_URL 
= r
'https?://veehd\.com/video/(?P<id>\d+)' 
  21     # Seems VeeHD videos have multiple copies on several servers, all of 
  22     # whom have different MD5 checksums, so omit md5 field in all tests 
  24         'url': 'http://veehd.com/video/4639434_Solar-Sinter', 
  28             'title': 'Solar Sinter', 
  29             'uploader_id': 'VideoEyes', 
  30             'description': 'md5:46a840e8692ddbaffb5f81d9885cb457', 
  32         'skip': 'Video deleted', 
  34         'url': 'http://veehd.com/video/4905758_Elysian-Fields-Channeling', 
  38             'title': 'Elysian Fields - Channeling', 
  39             'description': 'md5:360e4e95fdab58aefbea0f2a19e5604b', 
  40             'uploader_id': 'spotted', 
  43         'url': 'http://veehd.com/video/2046729_2012-2009-DivX-Trailer', 
  47             'title': '2012 (2009) DivX Trailer', 
  48             'description': 'md5:75435ee95255e6a9838ac6f6f3a2396b', 
  49             'uploader_id': 'Movie_Trailers', 
  53     def _real_extract(self
, url
): 
  54         video_id 
= self
._match
_id
(url
) 
  56         # VeeHD seems to send garbage on the first request. 
  57         # See https://github.com/rg3/youtube-dl/issues/2102 
  58         self
._download
_webpage
(url
, video_id
, 'Requesting webpage') 
  59         webpage 
= self
._download
_webpage
(url
, video_id
) 
  61         if 'This video has been removed<' in webpage
: 
  62             raise ExtractorError('Video %s has been removed' % video_id
, expected
=True) 
  64         player_path 
= self
._search
_regex
( 
  65             r
'\$\("#playeriframe"\).attr\({src : "(.+?)"', 
  66             webpage
, 'player path') 
  67         player_url 
= compat_urlparse
.urljoin(url
, player_path
) 
  69         self
._download
_webpage
(player_url
, video_id
, 'Requesting player page') 
  70         player_page 
= self
._download
_webpage
( 
  71             player_url
, video_id
, 'Downloading player page') 
  75         config_json 
= self
._search
_regex
( 
  76             r
'value=\'config
=({.+?
})\'', player_page, 'config json
', default=None) 
  79             config = json.loads(config_json) 
  80             video_url = compat_urllib_parse_unquote(config['clip
']['url
']) 
  83             video_url = self._html_search_regex( 
  84                 r'<embed
[^
>]+type="video/divx"[^
>]+src
="([^"]+)"', 
  85                 player_page, 'video url', default=None) 
  88             iframe_src = self._search_regex( 
  89                 r'<iframe[^>]+src="/?
([^
"]+)"', player_page, 'iframe url
') 
  90             iframe_url = 'http
://veehd
.com
/%s' % iframe_src 
  92             self._download_webpage(iframe_url, video_id, 'Requesting iframe page
') 
  93             iframe_page = self._download_webpage( 
  94                 iframe_url, video_id, 'Downloading iframe page
') 
  96             video_url = self._search_regex( 
  97                 r"file\s*:\s*'([^
']+)'", iframe_page, 'video url') 
  99         title = clean_html(get_element_by_id('videoName', webpage).rpartition('|')[0]) 
 100         uploader_id = self._html_search_regex( 
 101             r'<a href="/profile
/\d
+">(.+?)</a>', 
 103         thumbnail = self._search_regex( 
 104             r'<img id="veehdpreview
" src="(.+?
)"', 
 105             webpage, 'thumbnail') 
 106         description = self._html_search_regex( 
 107             r'<td class="infodropdown
".*?<div>(.*?)<ul', 
 108             webpage, 'description', flags=re.DOTALL) 
 115             'uploader_id': uploader_id, 
 116             'thumbnail': thumbnail, 
 117             'description': description,