]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/mtv.py
969db71139b1f81d290ad6549ed3b3d8207da0c7
3 import xml
.etree
.ElementTree
5 from .common
import InfoExtractor
10 compat_urllib_request
,
16 class MTVIE(InfoExtractor
):
17 _VALID_URL
= r
'^(?P<proto>https?://)?(?:www\.)?mtv\.com/videos/[^/]+/(?P<videoid>[0-9]+)/[^/]+$'
20 def _real_extract(self
, url
):
21 mobj
= re
.match(self
._VALID
_URL
, url
)
23 raise ExtractorError(u
'Invalid URL: %s' % url
)
24 if not mobj
.group('proto'):
26 video_id
= mobj
.group('videoid')
28 webpage
= self
._download
_webpage
(url
, video_id
)
30 # Some videos come from Vevo.com
31 m_vevo
= re
.search(r
'isVevoVideo = true;.*?vevoVideoId = "(.*?)";',
34 vevo_id
= m_vevo
.group(1);
35 self
.to_screen(u
'Vevo video detected: %s' % vevo_id
)
36 return self
.url_result('vevo:%s' % vevo_id
, ie
='Vevo')
38 #song_name = self._html_search_regex(r'<meta name="mtv_vt" content="([^"]+)"/>',
39 # webpage, u'song name', fatal=False)
41 video_title
= self
._html
_search
_regex
(r
'<meta name="mtv_an" content="([^"]+)"/>',
44 mtvn_uri
= self
._html
_search
_regex
(r
'<meta name="mtvn_uri" content="([^"]+)"/>',
45 webpage
, u
'mtvn_uri', fatal
=False)
47 content_id
= self
._search
_regex
(r
'MTVN.Player.defaultPlaylistId = ([0-9]+);',
48 webpage
, u
'content id', fatal
=False)
50 videogen_url
= 'http://www.mtv.com/player/includes/mediaGen.jhtml?uri=' + mtvn_uri
+ '&id=' + content_id
+ '&vid=' + video_id
+ '&ref=www.mtvn.com&viewUri=' + mtvn_uri
51 self
.report_extraction(video_id
)
52 request
= compat_urllib_request
.Request(videogen_url
)
54 metadataXml
= compat_urllib_request
.urlopen(request
).read()
55 except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
56 raise ExtractorError(u
'Unable to download video metadata: %s' % compat_str(err
))
58 mdoc
= xml
.etree
.ElementTree
.fromstring(metadataXml
)
59 renditions
= mdoc
.findall('.//rendition')
61 # For now, always pick the highest quality.
62 rendition
= renditions
[-1]
65 _
,_
,ext
= rendition
.attrib
['type'].partition('/')
66 format
= ext
+ '-' + rendition
.attrib
['width'] + 'x' + rendition
.attrib
['height'] + '_' + rendition
.attrib
['bitrate']
67 video_url
= rendition
.find('./src').text
69 raise ExtractorError('Invalid rendition field.')