]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/xnxx.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  15 class XNXXIE(InfoExtractor
): 
  16     _VALID_URL 
= r
'https?://(?:video|www)\.xnxx\.com/video-?(?P<id>[0-9a-z]+)/' 
  18         'url': 'http://www.xnxx.com/video-55awb78/skyrim_test_video', 
  19         'md5': '7583e96c15c0f21e9da3453d9920fbba', 
  23             'title': 'Skyrim Test Video', 
  24             'thumbnail': r
're:^https?://.*\.jpg', 
  30         'url': 'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_', 
  31         'only_matching': True, 
  33         'url': 'http://www.xnxx.com/video-55awb78/', 
  34         'only_matching': True, 
  37     def _real_extract(self
, url
): 
  38         video_id 
= self
._match
_id
(url
) 
  40         webpage 
= self
._download
_webpage
(url
, video_id
) 
  42         def get(meta
, default
=NO_DEFAULT
, fatal
=True): 
  43             return self
._search
_regex
( 
  44                 r
'set%s\s*\(\s*(["\'])(?P
<value
>(?
:(?
!\
1).)+)\
1' % meta, 
  45                 webpage, meta, default=default, fatal=fatal, group='value
') 
  47         title = self._og_search_title( 
  48             webpage, default=None) or get('VideoTitle
') 
  51         for mobj in re.finditer( 
  52                 r'setVideo(?
:Url(?P
<id>Low|High
)|HLS
)\s
*\
(\s
*(?P
<q
>["\'])(?P<url>(?:https?:)?//.+?)(?P=q)', webpage): 
  53             format_url = mobj.group('url') 
  54             if determine_ext(format_url) == 'm3u8': 
  55                 formats.extend(self._extract_m3u8_formats( 
  56                     format_url, video_id, 'mp4', entry_protocol='m3u8_native', 
  57                     preference=1, m3u8_id='hls', fatal=False)) 
  59                 format_id = mobj.group('id') 
  61                     format_id = format_id.lower() 
  64                     'format_id': format_id, 
  65                     'quality': -1 if format_id == 'low' else 0, 
  67         self._sort_formats(formats) 
  69         thumbnail = self._og_search_thumbnail(webpage, default=None) or get( 
  70             'ThumbUrl', fatal=False) or get('ThumbUrl169', fatal=False) 
  71         duration = int_or_none(self._og_search_property('duration', webpage)) 
  72         view_count = str_to_int(self._search_regex( 
  73             r'id=["\']nb
-views
-number
[^
>]+>([\d
,.]+)', webpage, 'view count
', 
  79             'thumbnail
': thumbnail, 
  81             'view_count
': view_count,