]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/einthusan.py
   2 from __future__ 
import unicode_literals
 
   4 from .common 
import InfoExtractor
 
   5 from ..compat 
import compat_urlparse
 
  12 class EinthusanIE(InfoExtractor
): 
  13     _VALID_URL 
= r
'https?://(?:www\.)?einthusan\.com/movies/watch.php\?([^#]*?)id=(?P<id>[0-9]+)' 
  16             'url': 'http://www.einthusan.com/movies/watch.php?id=2447', 
  17             'md5': 'd71379996ff5b7f217eca034c34e3461', 
  21                 'title': 'Ek Villain', 
  22                 'thumbnail': 're:^https?://.*\.jpg$', 
  23                 'description': 'md5:9d29fc91a7abadd4591fb862fa560d93', 
  27             'url': 'http://www.einthusan.com/movies/watch.php?id=1671', 
  28             'md5': 'b16a6fd3c67c06eb7c79c8a8615f4213', 
  32                 'title': 'Soodhu Kavvuum', 
  33                 'thumbnail': 're:^https?://.*\.jpg$', 
  34                 'description': 'md5:b40f2bf7320b4f9414f3780817b2af8c', 
  39     def _real_extract(self
, url
): 
  40         video_id 
= self
._match
_id
(url
) 
  42         request 
= sanitized_Request(url
) 
  43         request
.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0') 
  44         webpage 
= self
._download
_webpage
(request
, video_id
) 
  46         title 
= self
._html
_search
_regex
( 
  47             r
'<h1><a[^>]+class=["\']movie
-title
["\'][^>]*>(.+?)</a></h1>', 
  50         video_id = self._search_regex( 
  51             r'data-movieid=["\'](\d
+)', webpage, 'video 
id', default=video_id) 
  53         m3u8_url = self._download_webpage( 
  54             'http
://cdn
.einthusan
.com
/geturl
/%s/hd
/London
,Washington
,Toronto
,Dallas
,San
,Sydney
/' 
  55             % video_id, video_id, headers={'Referer
': url}) 
  56         formats = self._extract_m3u8_formats( 
  57             m3u8_url, video_id, ext='mp4
', entry_protocol='m3u8_native
') 
  59         description = self._html_search_meta('description
', webpage) 
  60         thumbnail = self._html_search_regex( 
  61             r'''<a class="movie-cover-wrapper".*?><img src=["'](.*?
)["'].*?/></a>''', 
  62             webpage, "thumbnail url
", fatal=False) 
  63         if thumbnail is not None: 
  64             thumbnail = compat_urlparse.urljoin(url, remove_start(thumbnail, '..')) 
  70             'thumbnail': thumbnail, 
  71             'description': description,