]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/vulture.py
1 from __future__
import unicode_literals
7 from .common
import InfoExtractor
14 class VultureIE(InfoExtractor
):
15 IE_NAME
= 'vulture.com'
16 _VALID_URL
= r
'https?://video\.vulture\.com/video/(?P<display_id>[^/]+)/'
18 'url': 'http://video.vulture.com/video/Mindy-Kaling-s-Harvard-Speech/player?layout=compact&read_more=1',
19 'md5': '8d997845642a2b5152820f7257871bc8',
21 'id': '6GHRQL3RV7MSD1H4',
23 'title': 'kaling-speech-2-MAGNIFY STANDARD CONTAINER REVISED',
24 'uploader_id': 'Sarah',
25 'thumbnail': 're:^http://.*\.jpg$',
26 'timestamp': 1401288564,
27 'upload_date': '20140528',
28 'description': 'Uplifting and witty, as predicted.',
33 def _real_extract(self
, url
):
34 mobj
= re
.match(self
._VALID
_URL
, url
)
35 display_id
= mobj
.group('display_id')
37 webpage
= self
._download
_webpage
(url
, display_id
)
38 query_string
= self
._search
_regex
(
39 r
"queryString\s*=\s*'([^']+)'", webpage
, 'query string')
40 video_id
= self
._search
_regex
(
41 r
'content=([^&]+)', query_string
, 'video ID')
42 query_url
= 'http://video.vulture.com/embed/player/container/1000/1000/?%s' % query_string
44 query_webpage
= self
._download
_webpage
(
45 query_url
, display_id
, note
='Downloading query page')
46 params_json
= self
._search
_regex
(
47 r
'(?sm)new MagnifyEmbeddablePlayer\({.*?contentItem:\s*(\{.*?\})\n,\n',
50 params
= json
.loads(params_json
)
52 upload_timestamp
= parse_iso8601(params
['posted'].replace(' ', 'T'))
53 uploader_id
= params
.get('user', {}).get('handle')
55 media_item
= params
['media_item']
56 title
= os
.path
.splitext(media_item
['title'])[0]
57 duration
= int_or_none(media_item
.get('duration_seconds'))
61 'display_id': display_id
,
62 'url': media_item
['pipeline_xid'],
64 'timestamp': upload_timestamp
,
65 'thumbnail': params
.get('thumbnail_url'),
66 'uploader_id': uploader_id
,
67 'description': params
.get('description'),