]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/zaq1.py
   2 from __future__ 
import unicode_literals
 
   4 from .common 
import InfoExtractor
 
  11 class Zaq1IE(InfoExtractor
): 
  12     _VALID_URL 
= r
'https?://(?:www\.)?zaq1\.pl/video/(?P<id>[^/?#&]+)' 
  14         'url': 'http://zaq1.pl/video/xev0e', 
  15         'md5': '24a5eb3f052e604ae597c4d0d19b351e', 
  18             'title': 'DJ NA WESELE. TANIEC Z FIGURAMI.węgrów/sokołów podlaski/siedlce/mińsk mazowiecki/warszawa', 
  19             'description': 'www.facebook.com/weseledjKontakt: 728 448 199 / 505 419 147', 
  22             'timestamp': 1490896361, 
  24             'upload_date': '20170330', 
  29         'url': 'http://zaq1.pl/video/x81vn', 
  32             'title': 'SEKRETNE ŻYCIE WALTERA MITTY', 
  35             'timestamp': 1493494860, 
  37             'upload_date': '20170429', 
  41             'skip_download': True, 
  43         'expected_warnings': ['Failed to parse JSON'], 
  46     def _real_extract(self
, url
): 
  47         video_id 
= self
._match
_id
(url
) 
  49         webpage 
= self
._download
_webpage
(url
, video_id
) 
  51         video_url 
= self
._search
_regex
( 
  52             r
'data-video-url=(["\'])(?P
<url
>(?
:(?
!\
1).)+)\
1', webpage, 
  53             'video url
', group='url
') 
  55         info = self._search_json_ld(webpage, video_id, fatal=False) 
  57         def extract_data(field, name, fatal=False): 
  58             return self._search_regex( 
  59                 r'data
-%s=(["\'])(?P<field>(?:(?!\1).)+)\1' % field, 
  60                 webpage, field, fatal=fatal, group='field') 
  62         if not info.get('title'): 
  63             info['title'] = extract_data('file-name', 'title', fatal=True) 
  65         if not info.get('duration'): 
  66             info['duration'] = int_or_none(extract_data('duration', 'duration')) 
  68         if not info.get('thumbnail'): 
  69             info['thumbnail'] = extract_data('photo-url', 'thumbnail') 
  71         if not info.get('timestamp'): 
  72             info['timestamp'] = unified_timestamp(self._html_search_meta( 
  73                 'uploadDate', webpage, 'timestamp')) 
  75         if not info.get('interactionCount'): 
  76             info['view_count'] = int_or_none(self._html_search_meta( 
  77                 'interactionCount', webpage, 'view count')) 
  79         uploader = self._html_search_regex( 
  80             r'Wideo dodał:\s*<a[^>]*>([^<]+)</a>', webpage, 'uploader', 
  83         width = int_or_none(self._html_search_meta( 
  84             'width', webpage, fatal=False)) 
  85         height = int_or_none(self._html_search_meta( 
  86             'height', webpage, fatal=False))