]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/beeg.py
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
   8 class BeegIE(InfoExtractor
): 
   9     _VALID_URL 
= r
'https?://(?:www\.)?beeg\.com/(?P<id>\d+)' 
  11         'url': 'http://beeg.com/5416503', 
  12         'md5': '1bff67111adb785c51d1b42959ec10e5', 
  16             'title': 'Sultry Striptease', 
  17             'description': 'md5:6db3c6177972822aaba18652ff59c773', 
  18             'categories': list,  # NSFW 
  19             'thumbnail': 're:https?://.*\.jpg$', 
  24     def _real_extract(self
, url
): 
  25         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  26         video_id 
= mobj
.group('id') 
  28         webpage 
= self
._download
_webpage
(url
, video_id
) 
  30         quality_arr 
= self
._search
_regex
( 
  31             r
'(?s)var\s+qualityArr\s*=\s*{\s*(.+?)\s*}', webpage
, 'quality formats') 
  36             'height': int(fmt
[0][:-1]), 
  37         } for fmt 
in re
.findall(r
"'([^']+)'\s*:\s*'([^']+)'", quality_arr
)] 
  39         self
._sort
_formats
(formats
) 
  41         title 
= self
._html
_search
_regex
( 
  42             r
'<title>([^<]+)\s*-\s*beeg\.?</title>', webpage
, 'title') 
  44         description 
= self
._html
_search
_regex
( 
  45             r
'<meta name="description" content="([^"]*)"', 
  46             webpage
, 'description', fatal
=False) 
  47         thumbnail 
= self
._html
_search
_regex
( 
  48             r
'\'previewer
.url
\'\s
*:\s
*"([^"]*)"', 
  49             webpage, 'thumbnail', fatal=False) 
  51         categories_str = self._html_search_regex( 
  52             r'<meta name="keywords
" content="([^
"]+)"', webpage, 'categories
', fatal=False) 
  54             None if categories_str is None 
  55             else categories_str.split(',')) 
  60             'description
': description, 
  61             'thumbnail
': thumbnail, 
  62             'categories
': categories,