]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/mtv.py
e520e2bb491f2c55f3867ab214b2b949eca6e684
   2 import xml
.etree
.ElementTree
 
   4 from .common 
import InfoExtractor
 
  10 def _media_xml_tag(tag
): 
  11     return '{http://search.yahoo.com/mrss/}%s' % tag
 
  13 class MTVIE(InfoExtractor
): 
  14     _VALID_URL 
= r
'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$' 
  16     _FEED_URL 
= 'http://www.mtv.com/player/embed/AS3/rss/' 
  20             u
'url': u
'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml', 
  21             u
'file': u
'853555.mp4', 
  22             u
'md5': u
'850f3f143316b1e71fa56a4edfd6e0f8', 
  24                 u
'title': u
'Taylor Swift - "Ours (VH1 Storytellers)"', 
  25                 u
'description': u
'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.', 
  29             u
'url': u
'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml', 
  30             u
'file': u
'USCJY1331283.mp4', 
  31             u
'md5': u
'73b4e7fcadd88929292fe52c3ced8caf', 
  33                 u
'title': u
'Everything Has Changed', 
  34                 u
'upload_date': u
'20130606', 
  35                 u
'uploader': u
'Taylor Swift', 
  37             u
'skip': u
'VEVO is only available in some countries', 
  42     def _id_from_uri(uri
): 
  43         return uri
.split(':')[-1] 
  45     # This was originally implemented for ComedyCentral, but it also works here 
  47     def _transform_rtmp_url(rtmp_video_url
): 
  48         m 
= re
.match(r
'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url
) 
  50             raise ExtractorError(u
'Cannot transform RTMP url') 
  51         base 
= 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/' 
  52         return base 
+ m
.group('finalid') 
  54     def _get_thumbnail_url(self
, uri
, itemdoc
): 
  55         return 'http://mtv.mtvnimages.com/uri/' + uri
 
  57     def _extract_video_formats(self
, metadataXml
): 
  58         if '/error_country_block.swf' in metadataXml
: 
  59             raise ExtractorError(u
'This video is not available from your country.', expected
=True) 
  60         mdoc 
= xml
.etree
.ElementTree
.fromstring(metadataXml
.encode('utf-8')) 
  61         renditions 
= mdoc
.findall('.//rendition') 
  64         for rendition 
in mdoc
.findall('.//rendition'): 
  66                 _
, _
, ext 
= rendition
.attrib
['type'].partition('/') 
  67                 rtmp_video_url 
= rendition
.find('./src').text
 
  68                 formats
.append({'ext': ext
, 
  69                                 'url': self
._transform
_rtmp
_url
(rtmp_video_url
), 
  70                                 'format_id': rendition
.get('bitrate'), 
  71                                 'width': int(rendition
.get('width')), 
  72                                 'height': int(rendition
.get('height')), 
  74             except (KeyError, TypeError): 
  75                 raise ExtractorError('Invalid rendition field.') 
  78     def _get_video_info(self
, itemdoc
): 
  79         uri 
= itemdoc
.find('guid').text
 
  80         video_id 
= self
._id
_from
_uri
(uri
) 
  81         self
.report_extraction(video_id
) 
  82         mediagen_url 
= itemdoc
.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content'))).attrib
['url'] 
  83         if 'acceptMethods' not in mediagen_url
: 
  84             mediagen_url 
+= '&acceptMethods=fms' 
  85         mediagen_page 
= self
._download
_webpage
(mediagen_url
, video_id
, 
  86                                                u
'Downloading video urls') 
  88         description_node 
= itemdoc
.find('description') 
  89         if description_node 
is not None: 
  90             description 
= description_node
.text
.strip() 
  95             'title': itemdoc
.find('title').text
, 
  96             'formats': self
._extract
_video
_formats
(mediagen_page
), 
  98             'thumbnail': self
._get
_thumbnail
_url
(uri
, itemdoc
), 
  99             'description': description
, 
 102         # TODO: Remove when #980 has been merged 
 103         info
.update(info
['formats'][-1]) 
 107     def _get_videos_info(self
, uri
): 
 108         video_id 
= self
._id
_from
_uri
(uri
) 
 109         data 
= compat_urllib_parse
.urlencode({'uri': uri
}) 
 110         infoXml 
= self
._download
_webpage
(self
._FEED
_URL 
+'?' + data
, video_id
, 
 112         idoc 
= xml
.etree
.ElementTree
.fromstring(infoXml
.encode('utf-8')) 
 113         return [self
._get
_video
_info
(item
) for item 
in idoc
.findall('.//item')] 
 115     def _real_extract(self
, url
): 
 116         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 117         video_id 
= mobj
.group('videoid') 
 119         webpage 
= self
._download
_webpage
(url
, video_id
) 
 121         # Some videos come from Vevo.com 
 122         m_vevo 
= re
.search(r
'isVevoVideo = true;.*?vevoVideoId = "(.*?)";', 
 125             vevo_id 
= m_vevo
.group(1); 
 126             self
.to_screen(u
'Vevo video detected: %s' % vevo_id
) 
 127             return self
.url_result('vevo:%s' % vevo_id
, ie
='Vevo') 
 129         uri 
= self
._html
_search
_regex
(r
'/uri/(.*?)\?', webpage
, u
'uri') 
 130         return self
._get
_videos
_info
(uri
)