]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/pornoxo.py
202f58673ae4f1dd77caee159f37dc24be9aad64
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
  11 class PornoXOIE(InfoExtractor
): 
  12     _VALID_URL 
= r
'https?://(?:www\.)?pornoxo\.com/videos/(?P<id>\d+)/(?P<display_id>[^/]+)\.html' 
  14         'url': 'http://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary.html', 
  15         'md5': '582f28ecbaa9e6e24cb90f50f524ce87', 
  19             'title': 'Striptease From Sexy Secretary!', 
  20             'description': 'Striptease From Sexy Secretary!', 
  21             'categories': list,  # NSFW 
  22             'thumbnail': 're:https?://.*\.jpg$', 
  27     def _real_extract(self
, url
): 
  28         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  29         video_id 
= mobj
.group('id') 
  31         webpage 
= self
._download
_webpage
(url
, video_id
) 
  33         video_url 
= self
._html
_search
_regex
( 
  34             r
'\'file\'\s
*:\s
*"([^"]+)"', webpage, 'video_url') 
  36         title = self._html_search_regex( 
  37             r'<title>([^<]+)\s*-\s*PornoXO', webpage, 'title') 
  39         description = self._html_search_regex( 
  40             r'<meta name="description
" content="([^
"]+)\s*featuring', 
  41             webpage, 'description', fatal=False) 
  43         thumbnail = self._html_search_regex( 
  44             r'\'image\'\s*:\s*"([^
"]+)"', webpage, 'thumbnail
', fatal=False) 
  46         view_count = str_to_int(self._html_search_regex( 
  47             r'[vV
]iews
:\s
*([0-9,]+)', webpage, 'view count
', fatal=False)) 
  49         categories_str = self._html_search_regex( 
  50             r'<meta name
="description" content
=".*featuring\s*([^"]+)"', 
  51             webpage, 'categories', fatal=False) 
  53             None if categories_str is None 
  54             else categories_str.split(',')) 
  60             'description': description, 
  61             'thumbnail': thumbnail, 
  62             'categories': categories, 
  63             'view_count': view_count,