]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/veehd.py
   1 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  17 class VeeHDIE(InfoExtractor
): 
  18     _VALID_URL 
= r
'https?://veehd\.com/video/(?P<id>\d+)' 
  21         'url': 'http://veehd.com/video/4639434_Solar-Sinter', 
  25             'title': 'Solar Sinter', 
  26             'uploader_id': 'VideoEyes', 
  27             'description': 'md5:46a840e8692ddbaffb5f81d9885cb457', 
  31     def _real_extract(self
, url
): 
  32         video_id 
= self
._match
_id
(url
) 
  34         # VeeHD seems to send garbage on the first request. 
  35         # See https://github.com/rg3/youtube-dl/issues/2102 
  36         self
._download
_webpage
(url
, video_id
, 'Requesting webpage') 
  37         webpage 
= self
._download
_webpage
(url
, video_id
) 
  39         if 'This video has been removed<' in webpage
: 
  40             raise ExtractorError('Video %s has been removed' % video_id
, expected
=True) 
  42         player_path 
= self
._search
_regex
( 
  43             r
'\$\("#playeriframe"\).attr\({src : "(.+?)"', 
  44             webpage
, 'player path') 
  45         player_url 
= compat_urlparse
.urljoin(url
, player_path
) 
  47         self
._download
_webpage
(player_url
, video_id
, 'Requesting player page') 
  48         player_page 
= self
._download
_webpage
( 
  49             player_url
, video_id
, 'Downloading player page') 
  51         config_json 
= self
._search
_regex
( 
  52             r
'value=\'config
=({.+?
})\'', player_page, 'config json
', default=None) 
  55             config = json.loads(config_json) 
  56             video_url = compat_urlparse.unquote(config['clip
']['url
']) 
  58             iframe_src = self._search_regex( 
  59                 r'<iframe
[^
>]+src
="/?([^"]+)"', player_page, 'iframe url') 
  60             iframe_url = 'http://veehd.com/%s' % iframe_src 
  62             self._download_webpage(iframe_url, video_id, 'Requesting iframe page') 
  63             iframe_page = self._download_webpage( 
  64                 iframe_url, video_id, 'Downloading iframe page') 
  66             video_url = self._search_regex( 
  67                 r"file\s
*:\s
*'([^']+)'", iframe_page, 'video url
') 
  69         title = clean_html(get_element_by_id('videoName
', webpage).rpartition('|
')[0]) 
  70         uploader_id = self._html_search_regex( 
  71             r'<a href
="/profile/\d+">(.+?
)</a
>', 
  73         thumbnail = self._search_regex( 
  74             r'<img 
id="veehdpreview" src
="(.+?)"', 
  76         description = self._html_search_regex( 
  77             r'<td 
class="infodropdown".*?
<div
>(.*?
)<ul
', 
  78             webpage, 'description
', flags=re.DOTALL) 
  86             'uploader_id
': uploader_id, 
  87             'thumbnail
': thumbnail, 
  88             'description
': description,