]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/cda.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  15 class CDAIE(InfoExtractor
): 
  16     _VALID_URL 
= r
'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)' 
  17     _BASE_URL 
= 'http://www.cda.pl/' 
  19         'url': 'http://www.cda.pl/video/5749950c', 
  20         'md5': '6f844bf51b15f31fae165365707ae970', 
  25             'title': 'Oto dlaczego przed zakrętem należy zwolnić.', 
  26             'description': 'md5:269ccd135d550da90d1662651fcb9772', 
  27             'thumbnail': 're:^https?://.*\.jpg$', 
  28             'average_rating': float, 
  32         'url': 'http://www.cda.pl/video/57413289', 
  33         'md5': 'a88828770a8310fc00be6c95faf7f4d5', 
  37             'title': 'Lądowanie na lotnisku na Maderze', 
  38             'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a', 
  39             'thumbnail': 're:^https?://.*\.jpg$', 
  40             'uploader': 'crash404', 
  42             'average_rating': float, 
  46         'url': 'http://ebd.cda.pl/0x0/5749950c', 
  47         'only_matching': True, 
  50     def _real_extract(self
, url
): 
  51         video_id 
= self
._match
_id
(url
) 
  52         self
._set
_cookie
('cda.pl', 'cda.player', 'html5') 
  53         webpage 
= self
._download
_webpage
( 
  54             self
._BASE
_URL 
+ '/video/' + video_id
, video_id
) 
  56         if 'Ten film jest dostępny dla użytkowników premium' in webpage
: 
  57             raise ExtractorError('This video is only available for premium users.', expected
=True) 
  61         uploader 
= self
._search
_regex
(r
'''(?x) 
  62             <(span|meta)[^>]+itemprop=(["\'])author\
2[^
>]*> 
  63             (?
:<\
1[^
>]*>[^
<]*</\
1>|
(?
!</\
1>)(?
:.|
\n))*?
 
  64             <(span|meta
)[^
>]+itemprop
=(["\'])name\4[^>]*>(?P<uploader>[^<]+)</\3> 
  65         ''', webpage, 'uploader', default=None, group='uploader') 
  66         view_count = self._search_regex( 
  67             r'Odsłony:(?:\s| )*([0-9]+)', webpage, 
  68             'view_count', default=None) 
  69         average_rating = self._search_regex( 
  70             r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\
1[^
>]*>(?P
<rating_value
>[0-9.]+)', 
  71             webpage, 'rating
', fatal=False, group='rating_value
') 
  75             'title
': self._og_search_title(webpage), 
  76             'description
': self._og_search_description(webpage), 
  78             'view_count
': int_or_none(view_count), 
  79             'average_rating
': float_or_none(average_rating), 
  80             'thumbnail
': self._og_search_thumbnail(webpage), 
  85         def extract_format(page, version): 
  86             json_str = self._search_regex( 
  87                 r'player_data
=(\\?
["\'])(?P<player_data>.+?)\1', page, 
  88                 '%s player_json' % version, fatal=False, group='player_data') 
  91             player_data = self._parse_json( 
  92                 json_str, '%s player_data' % version, fatal=False) 
  95             video = player_data.get('video') 
  96             if not video or 'file' not in video: 
  97                 self.report_warning('Unable to extract %s version information' % version) 
 100                 'url': video['file'], 
 103                 r'<a[^>]+data-quality="(?P
<format_id
>[^
"]+)"[^
>]+href
="[^"]+"[^>]+class="[^
"]*quality-btn-active[^"]*">(?P<height>[0-9]+)p', 
 107                     'format_id': m.group('format_id'), 
 108                     'height': int(m.group('height')), 
 110             info_dict['formats'].append(f) 
 111             if not info_dict['duration']: 
 112                 info_dict['duration'] = parse_duration(video.get('duration')) 
 114         extract_format(webpage, 'default') 
 116         for href, resolution in re.findall( 
 117                 r'<a[^>]+data-quality="[^
"]+"[^
>]+href
="([^"]+)"[^>]+class="quality
-btn
"[^>]*>([0-9]+p)', 
 119             webpage = self._download_webpage( 
 120                 self._BASE_URL + href, video_id, 
 121                 'Downloading %s version information' % resolution, fatal=False) 
 123                 # Manually report warning because empty page is returned when 
 124                 # invalid version is requested. 
 125                 self.report_warning('Unable to download %s version information' % resolution) 
 127             extract_format(webpage, resolution) 
 129         self._sort_formats(formats)