]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/zaq1.py
2 from __future__
import unicode_literals
4 from .common
import InfoExtractor
11 class Zaq1IE(InfoExtractor
):
12 _VALID_URL
= r
'https?://(?:www\.)?zaq1\.pl/video/(?P<id>[^/?#&]+)'
14 'url': 'http://zaq1.pl/video/xev0e',
15 'md5': '24a5eb3f052e604ae597c4d0d19b351e',
18 'title': 'DJ NA WESELE. TANIEC Z FIGURAMI.węgrów/sokołów podlaski/siedlce/mińsk mazowiecki/warszawa',
19 'description': 'www.facebook.com/weseledjKontakt: 728 448 199 / 505 419 147',
22 'timestamp': 1490896361,
24 'upload_date': '20170330',
29 'url': 'http://zaq1.pl/video/x81vn',
32 'title': 'SEKRETNE ŻYCIE WALTERA MITTY',
35 'timestamp': 1493494860,
37 'upload_date': '20170429',
41 'skip_download': True,
43 'expected_warnings': ['Failed to parse JSON'],
46 def _real_extract(self
, url
):
47 video_id
= self
._match
_id
(url
)
49 webpage
= self
._download
_webpage
(url
, video_id
)
51 video_url
= self
._search
_regex
(
52 r
'data-video-url=(["\'])(?P
<url
>(?
:(?
!\
1).)+)\
1', webpage,
53 'video url
', group='url
')
55 info = self._search_json_ld(webpage, video_id, fatal=False)
57 def extract_data(field, name, fatal=False):
58 return self._search_regex(
59 r'data
-%s=(["\'])(?P<field>(?:(?!\1).)+)\1' % field,
60 webpage, field, fatal=fatal, group='field')
62 if not info.get('title'):
63 info['title'] = extract_data('file-name', 'title', fatal=True)
65 if not info.get('duration'):
66 info['duration'] = int_or_none(extract_data('duration', 'duration'))
68 if not info.get('thumbnail'):
69 info['thumbnail'] = extract_data('photo-url', 'thumbnail')
71 if not info.get('timestamp'):
72 info['timestamp'] = unified_timestamp(self._html_search_meta(
73 'uploadDate', webpage, 'timestamp'))
75 if not info.get('interactionCount'):
76 info['view_count'] = int_or_none(self._html_search_meta(
77 'interactionCount', webpage, 'view count'))
79 uploader = self._html_search_regex(
80 r'Wideo dodał:\s*<a[^>]*>([^<]+)</a>', webpage, 'uploader',
83 width = int_or_none(self._html_search_meta(
84 'width', webpage, fatal=False))
85 height = int_or_none(self._html_search_meta(
86 'height', webpage, fatal=False))