]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/popcorntv.py
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
  13 class PopcornTVIE(InfoExtractor
): 
  14     _VALID_URL 
= r
'https?://[^/]+\.popcorntv\.it/guarda/(?P<display_id>[^/]+)/(?P<id>\d+)' 
  16         'url': 'https://animemanga.popcorntv.it/guarda/food-wars-battaglie-culinarie-episodio-01/9183', 
  17         'md5': '47d65a48d147caf692ab8562fe630b45', 
  20             'display_id': 'food-wars-battaglie-culinarie-episodio-01', 
  22             'title': 'Food Wars, Battaglie Culinarie | Episodio 01', 
  23             'description': 'md5:b8bea378faae4651d3b34c6e112463d0', 
  24             'thumbnail': r
're:^https?://.*\.jpg$', 
  25             'timestamp': 1497610857, 
  26             'upload_date': '20170616', 
  31         'url': 'https://cinema.popcorntv.it/guarda/smash-cut/10433', 
  32         'only_matching': True, 
  35     def _real_extract(self
, url
): 
  36         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  37         display_id
, video_id 
= mobj
.group('display_id', 'id') 
  39         webpage 
= self
._download
_webpage
(url
, display_id
) 
  41         m3u8_url 
= extract_attributes( 
  43                 r
'(<link[^>]+itemprop=["\'](?
:content|embed
)Url
[^
>]*>)', 
  47         formats = self._extract_m3u8_formats( 
  48             m3u8_url, display_id, 'mp4
', entry_protocol='m3u8_native
', 
  51         title = self._search_regex( 
  52             r'<h1
[^
>]+itemprop
=["\']name[^>]*>([^<]+)', webpage, 
  53             'title', default=None) or self._og_search_title(webpage) 
  55         description = self._html_search_regex( 
  56             r'(?s)<article[^>]+itemprop=["\']description
[^
>]*>(.+?
)</article
>', 
  57             webpage, 'description
', fatal=False) 
  58         thumbnail = self._og_search_thumbnail(webpage) 
  59         timestamp = unified_timestamp(self._html_search_meta( 
  60             'uploadDate
', webpage, 'timestamp
')) 
  61         print(self._html_search_meta( 
  63         duration = int_or_none(self._html_search_meta( 
  64             'duration
', webpage), invscale=60) 
  65         view_count = int_or_none(self._html_search_meta( 
  66             'interactionCount
', webpage, 'view count
')) 
  70             'display_id
': display_id, 
  72             'description
': description, 
  73             'thumbnail
': thumbnail, 
  74             'timestamp
': timestamp, 
  76             'view_count
': view_count,