]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/mtv.py
   2 import xml
.etree
.ElementTree
 
   4 from .common 
import InfoExtractor
 
  10 def _media_xml_tag(tag
): 
  11     return '{http://search.yahoo.com/mrss/}%s' % tag
 
  13 class MTVIE(InfoExtractor
): 
  14     _VALID_URL 
= r
'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$' 
  16     _FEED_URL 
= 'http://www.mtv.com/player/embed/AS3/rss/' 
  20             u
'url': u
'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml', 
  21             u
'file': u
'853555.mp4', 
  22             u
'md5': u
'850f3f143316b1e71fa56a4edfd6e0f8', 
  24                 u
'title': u
'Taylor Swift - "Ours (VH1 Storytellers)"', 
  25                 u
'description': u
'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.', 
  30             u
'url': u
'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml', 
  31             u
'file': u
'USCJY1331283.mp4', 
  32             u
'md5': u
'73b4e7fcadd88929292fe52c3ced8caf', 
  34                 u
'title': u
'Everything Has Changed', 
  35                 u
'upload_date': u
'20130606', 
  36                 u
'uploader': u
'Taylor Swift', 
  38             u
'skip': u
'VEVO is only available in some countries', 
  43     def _id_from_uri(uri
): 
  44         return uri
.split(':')[-1] 
  46     # This was originally implemented for ComedyCentral, but it also works here 
  48     def _transform_rtmp_url(rtmp_video_url
): 
  49         m 
= re
.match(r
'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url
) 
  51             raise ExtractorError(u
'Cannot transform RTMP url') 
  52         base 
= 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/' 
  53         return base 
+ m
.group('finalid') 
  55     def _get_thumbnail_url(self
, uri
, itemdoc
): 
  56         return 'http://mtv.mtvnimages.com/uri/' + uri
 
  58     def _extract_video_formats(self
, metadataXml
): 
  59         if '/error_country_block.swf' in metadataXml
: 
  60             raise ExtractorError(u
'This video is not available from your country.', expected
=True) 
  61         mdoc 
= xml
.etree
.ElementTree
.fromstring(metadataXml
.encode('utf-8')) 
  62         renditions 
= mdoc
.findall('.//rendition') 
  65         for rendition 
in mdoc
.findall('.//rendition'): 
  67                 _
, _
, ext 
= rendition
.attrib
['type'].partition('/') 
  68                 rtmp_video_url 
= rendition
.find('./src').text
 
  69                 formats
.append({'ext': ext
, 
  70                                 'url': self
._transform
_rtmp
_url
(rtmp_video_url
), 
  71                                 'format_id': rendition
.get('bitrate'), 
  72                                 'width': int(rendition
.get('width')), 
  73                                 'height': int(rendition
.get('height')), 
  75             except (KeyError, TypeError): 
  76                 raise ExtractorError('Invalid rendition field.') 
  79     def _get_video_info(self
, itemdoc
): 
  80         uri 
= itemdoc
.find('guid').text
 
  81         video_id 
= self
._id
_from
_uri
(uri
) 
  82         self
.report_extraction(video_id
) 
  83         mediagen_url 
= itemdoc
.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content'))).attrib
['url'] 
  84         # Remove the templates, like &device={device} 
  85         mediagen_url 
= re
.sub(r
'&[^=]*?={.*?}(?=(&|$))', u
'', mediagen_url
) 
  86         if 'acceptMethods' not in mediagen_url
: 
  87             mediagen_url 
+= '&acceptMethods=fms' 
  88         mediagen_page 
= self
._download
_webpage
(mediagen_url
, video_id
, 
  89                                                u
'Downloading video urls') 
  91         description_node 
= itemdoc
.find('description') 
  92         if description_node 
is not None: 
  93             description 
= description_node
.text
.strip() 
  98             'title': itemdoc
.find('title').text
, 
  99             'formats': self
._extract
_video
_formats
(mediagen_page
), 
 101             'thumbnail': self
._get
_thumbnail
_url
(uri
, itemdoc
), 
 102             'description': description
, 
 105         # TODO: Remove when #980 has been merged 
 106         info
.update(info
['formats'][-1]) 
 110     def _get_videos_info(self
, uri
): 
 111         video_id 
= self
._id
_from
_uri
(uri
) 
 112         data 
= compat_urllib_parse
.urlencode({'uri': uri
}) 
 113         infoXml 
= self
._download
_webpage
(self
._FEED
_URL 
+'?' + data
, video_id
, 
 115         idoc 
= xml
.etree
.ElementTree
.fromstring(infoXml
.encode('utf-8')) 
 116         return [self
._get
_video
_info
(item
) for item 
in idoc
.findall('.//item')] 
 118     def _real_extract(self
, url
): 
 119         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 120         video_id 
= mobj
.group('videoid') 
 122         webpage 
= self
._download
_webpage
(url
, video_id
) 
 124         # Some videos come from Vevo.com 
 125         m_vevo 
= re
.search(r
'isVevoVideo = true;.*?vevoVideoId = "(.*?)";', 
 128             vevo_id 
= m_vevo
.group(1); 
 129             self
.to_screen(u
'Vevo video detected: %s' % vevo_id
) 
 130             return self
.url_result('vevo:%s' % vevo_id
, ie
='Vevo') 
 132         uri 
= self
._html
_search
_regex
(r
'/uri/(.*?)\?', webpage
, u
'uri') 
 133         return self
._get
_videos
_info
(uri
)