]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/sexu.py
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
   8 class SexuIE(InfoExtractor
): 
   9     _VALID_URL 
= r
'https?://(?:www\.)?sexu\.com/(?P<id>\d+)' 
  11         'url': 'http://sexu.com/961791/', 
  12         'md5': 'ff615aca9691053c94f8f10d96cd7884', 
  16             'title': 'md5:4d05a19a5fc049a63dbbaf05fb71d91b', 
  17             'description': 'md5:c5ed8625eb386855d5a7967bd7b77a54', 
  18             'categories': list,  # NSFW 
  19             'thumbnail': 're:https?://.*\.jpg$', 
  24     def _real_extract(self
, url
): 
  25         video_id 
= self
._match
_id
(url
) 
  26         webpage 
= self
._download
_webpage
(url
, video_id
) 
  28         quality_arr 
= self
._search
_regex
( 
  29             r
'sources:\s*\[([^\]]+)\]', webpage
, 'forrmat string') 
  31             'url': fmt
[0].replace('\\', ''), 
  33             'height': int(fmt
[1][:3]), 
  34         } for fmt 
in re
.findall(r
'"file":"([^"]+)","label":"([^"]+)"', quality_arr
)] 
  35         self
._sort
_formats
(formats
) 
  37         title 
= self
._html
_search
_regex
( 
  38             r
'<title>([^<]+)\s*-\s*Sexu\.Com</title>', webpage
, 'title') 
  40         description 
= self
._html
_search
_meta
( 
  41             'description', webpage
, 'description') 
  43         thumbnail 
= self
._html
_search
_regex
( 
  44             r
'image:\s*"([^"]+)"', 
  45             webpage
, 'thumbnail', fatal
=False) 
  47         categories_str 
= self
._html
_search
_meta
( 
  48             'keywords', webpage
, 'categories') 
  50             None if categories_str 
is None 
  51             else categories_str
.split(',')) 
  56             'description': description
, 
  57             'thumbnail': thumbnail
, 
  58             'categories': categories
,