]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/vidio.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   7 from ..utils 
import int_or_none
 
  10 class VidioIE(InfoExtractor
): 
  11     _VALID_URL 
= r
'https?://(?:www\.)?vidio\.com/watch/(?P<id>\d+)-(?P<display_id>[^/?#&]+)' 
  13         'url': 'http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015', 
  14         'md5': 'cd2801394afc164e9775db6a140b91fe', 
  17             'display_id': 'dj_ambred-booyah-live-2015', 
  19             'title': 'DJ_AMBRED - Booyah (Live 2015)', 
  20             'description': 'md5:27dc15f819b6a78a626490881adbadf8', 
  21             'thumbnail': r
're:^https?://.*\.jpg$', 
  26         'url': 'https://www.vidio.com/watch/77949-south-korea-test-fires-missile-that-can-strike-all-of-the-north', 
  27         'only_matching': True, 
  30     def _real_extract(self
, url
): 
  31         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  32         video_id
, display_id 
= mobj
.group('id', 'display_id') 
  34         webpage 
= self
._download
_webpage
(url
, display_id
) 
  36         title 
= self
._og
_search
_title
(webpage
) 
  38         m3u8_url
, duration
, thumbnail 
= [None] * 3 
  40         clips 
= self
._parse
_json
( 
  41             self
._html
_search
_regex
( 
  42                 r
'data-json-clips\s*=\s*(["\'])(?P
<data
>\
[.+?\
])\
1', 
  43                 webpage, 'video data
', default='[]', group='data
'), 
  44             display_id, fatal=False) 
  47             m3u8_url = clip.get('sources
', [{}])[0].get('file') 
  48             duration = clip.get('clip_duration
') 
  49             thumbnail = clip.get('image
') 
  51         m3u8_url = m3u8_url or self._search_regex( 
  52             r'data(?
:-vjs
)?
-clip
-hls
-url
=(["\'])(?P<url>(?:(?!\1).)+)\1', 
  53             webpage, 'hls url', group='url') 
  54         formats = self._extract_m3u8_formats( 
  55             m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native') 
  56         self._sort_formats(formats) 
  58         duration = int_or_none(duration or self._search_regex( 
  59             r'data-video-duration=(["\'])(?P
<duration
>\d
+)\
1', webpage, 
  60             'duration
', fatal=False, group='duration
')) 
  61         thumbnail = thumbnail or self._og_search_thumbnail(webpage) 
  63         like_count = int_or_none(self._search_regex( 
  64             (r'<span
[^
>]+data
-comment
-vote
-count
=["\'](\d+)', 
  65              r'<span[^>]+class=["\'].*?
\blike
(?
:__|
-)count
\b.*?
["\'][^>]*>\s*(\d+)'), 
  66             webpage, 'like count', fatal=False)) 
  70             'display_id': display_id, 
  72             'description': self._og_search_description(webpage), 
  73             'thumbnail': thumbnail, 
  75             'like_count': like_count,