]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/vulture.py
   1 from __future__ 
import unicode_literals
 
   7 from .common 
import InfoExtractor
 
  14 class VultureIE(InfoExtractor
): 
  15     IE_NAME 
= 'vulture.com' 
  16     _VALID_URL 
= r
'https?://video\.vulture\.com/video/(?P<display_id>[^/]+)/' 
  18         'url': 'http://video.vulture.com/video/Mindy-Kaling-s-Harvard-Speech/player?layout=compact&read_more=1', 
  19         'md5': '8d997845642a2b5152820f7257871bc8', 
  21             'id': '6GHRQL3RV7MSD1H4', 
  23             'title': 'kaling-speech-2-MAGNIFY STANDARD CONTAINER REVISED', 
  24             'uploader_id': 'Sarah', 
  25             'thumbnail': 're:^http://.*\.jpg$', 
  26             'timestamp': 1401288564, 
  27             'upload_date': '20140528', 
  28             'description': 'Uplifting and witty, as predicted.', 
  33     def _real_extract(self
, url
): 
  34         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  35         display_id 
= mobj
.group('display_id') 
  37         webpage 
= self
._download
_webpage
(url
, display_id
) 
  38         query_string 
= self
._search
_regex
( 
  39             r
"queryString\s*=\s*'([^']+)'", webpage
, 'query string') 
  40         video_id 
= self
._search
_regex
( 
  41             r
'content=([^&]+)', query_string
, 'video ID') 
  42         query_url 
= 'http://video.vulture.com/embed/player/container/1000/1000/?%s' % query_string
 
  44         query_webpage 
= self
._download
_webpage
( 
  45             query_url
, display_id
, note
='Downloading query page') 
  46         params_json 
= self
._search
_regex
( 
  47             r
'(?sm)new MagnifyEmbeddablePlayer\({.*?contentItem:\s*(\{.*?\})\n?,\n', 
  50         params 
= json
.loads(params_json
) 
  52         upload_timestamp 
= parse_iso8601(params
['posted'].replace(' ', 'T')) 
  53         uploader_id 
= params
.get('user', {}).get('handle') 
  55         media_item 
= params
['media_item'] 
  56         title 
= os
.path
.splitext(media_item
['title'])[0] 
  57         duration 
= int_or_none(media_item
.get('duration_seconds')) 
  61             'display_id': display_id
, 
  62             'url': media_item
['pipeline_xid'], 
  64             'timestamp': upload_timestamp
, 
  65             'thumbnail': params
.get('thumbnail_url'), 
  66             'uploader_id': uploader_id
, 
  67             'description': params
.get('description'),