]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/mtv.py
   2 import xml
.etree
.ElementTree
 
   4 from .common 
import InfoExtractor
 
  10 def _media_xml_tag(tag
): 
  11     return '{http://search.yahoo.com/mrss/}%s' % tag
 
  13 class MTVIE(InfoExtractor
): 
  14     _VALID_URL 
= r
'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$' 
  16     _FEED_URL 
= 'http://www.mtv.com/player/embed/AS3/rss/' 
  20             u
'url': u
'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml', 
  21             u
'file': u
'853555.mp4', 
  22             u
'md5': u
'850f3f143316b1e71fa56a4edfd6e0f8', 
  24                 u
'title': u
'Taylor Swift - "Ours (VH1 Storytellers)"', 
  25                 u
'description': u
'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.', 
  29             u
'url': u
'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml', 
  30             u
'file': u
'USCJY1331283.mp4', 
  31             u
'md5': u
'73b4e7fcadd88929292fe52c3ced8caf', 
  33                 u
'title': u
'Everything Has Changed', 
  34                 u
'upload_date': u
'20130606', 
  35                 u
'uploader': u
'Taylor Swift', 
  37             u
'skip': u
'VEVO is only available in some countries', 
  42     def _id_from_uri(uri
): 
  43         return uri
.split(':')[-1] 
  45     # This was originally implemented for ComedyCentral, but it also works here 
  47     def _transform_rtmp_url(rtmp_video_url
): 
  48         m 
= re
.match(r
'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url
) 
  50             raise ExtractorError(u
'Cannot transform RTMP url') 
  51         base 
= 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/' 
  52         return base 
+ m
.group('finalid') 
  54     def _get_thumbnail_url(self
, uri
, itemdoc
): 
  55         return 'http://mtv.mtvnimages.com/uri/' + uri
 
  57     def _extract_video_url(self
, metadataXml
): 
  58         if '/error_country_block.swf' in metadataXml
: 
  59             raise ExtractorError(u
'This video is not available from your country.', expected
=True) 
  60         mdoc 
= xml
.etree
.ElementTree
.fromstring(metadataXml
.encode('utf-8')) 
  61         renditions 
= mdoc
.findall('.//rendition') 
  63         # For now, always pick the highest quality. 
  64         rendition 
= renditions
[-1] 
  67             _
,_
,ext 
= rendition
.attrib
['type'].partition('/') 
  68             format 
= ext 
+ '-' + rendition
.attrib
['width'] + 'x' + rendition
.attrib
['height'] + '_' + rendition
.attrib
['bitrate'] 
  69             rtmp_video_url 
= rendition
.find('./src').text
 
  71             raise ExtractorError('Invalid rendition field.') 
  72         video_url 
= self
._transform
_rtmp
_url
(rtmp_video_url
) 
  73         return {'ext': ext
, 'url': video_url
, 'format': format
} 
  75     def _get_video_info(self
, itemdoc
): 
  76         uri 
= itemdoc
.find('guid').text
 
  77         video_id 
= self
._id
_from
_uri
(uri
) 
  78         self
.report_extraction(video_id
) 
  79         mediagen_url 
= itemdoc
.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content'))).attrib
['url'] 
  80         if 'acceptMethods' not in mediagen_url
: 
  81             mediagen_url 
+= '&acceptMethods=fms' 
  82         mediagen_page 
= self
._download
_webpage
(mediagen_url
, video_id
, 
  83                                                u
'Downloading video urls') 
  84         video_info 
= self
._extract
_video
_url
(mediagen_page
) 
  86         description_node 
= itemdoc
.find('description') 
  87         if description_node 
is not None: 
  88             description 
= description_node
.text
 
  91         video_info
.update({'title': itemdoc
.find('title').text
, 
  93                            'thumbnail': self
._get
_thumbnail
_url
(uri
, itemdoc
), 
  94                            'description': description
, 
  98     def _get_videos_info(self
, uri
): 
  99         video_id 
= self
._id
_from
_uri
(uri
) 
 100         data 
= compat_urllib_parse
.urlencode({'uri': uri
}) 
 101         infoXml 
= self
._download
_webpage
(self
._FEED
_URL 
+'?' + data
, video_id
, 
 103         idoc 
= xml
.etree
.ElementTree
.fromstring(infoXml
.encode('utf-8')) 
 104         return [self
._get
_video
_info
(item
) for item 
in idoc
.findall('.//item')] 
 106     def _real_extract(self
, url
): 
 107         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 108         video_id 
= mobj
.group('videoid') 
 110         webpage 
= self
._download
_webpage
(url
, video_id
) 
 112         # Some videos come from Vevo.com 
 113         m_vevo 
= re
.search(r
'isVevoVideo = true;.*?vevoVideoId = "(.*?)";', 
 116             vevo_id 
= m_vevo
.group(1); 
 117             self
.to_screen(u
'Vevo video detected: %s' % vevo_id
) 
 118             return self
.url_result('vevo:%s' % vevo_id
, ie
='Vevo') 
 120         uri 
= self
._html
_search
_regex
(r
'/uri/(.*?)\?', webpage
, u
'uri') 
 121         return self
._get
_videos
_info
(uri
)