from .common import InfoExtractor
from ..compat import (
- compat_urllib_parse,
- compat_urllib_request,
+ compat_urllib_parse_urlencode,
compat_str,
+ compat_xpath,
)
from ..utils import (
ExtractorError,
find_xpath_attr,
fix_xml_ampersands,
+ float_or_none,
HEADRequest,
+ sanitized_Request,
unescapeHTML,
url_basename,
RegexNotFoundError,
+ xpath_text,
)
def _extract_mobile_video_formats(self, mtvn_id):
webpage_url = self._MOBILE_TEMPLATE % mtvn_id
- req = compat_urllib_request.Request(webpage_url)
+ req = sanitized_Request(webpage_url)
# Otherwise we get a webpage that would execute some javascript
req.add_header('User-Agent', 'curl/7')
webpage = self._download_webpage(req, mtvn_id,
rtmp_video_url = rendition.find('./src').text
if rtmp_video_url.endswith('siteunavail.png'):
continue
+ new_url = self._transform_rtmp_url(rtmp_video_url)
formats.append({
- 'ext': ext,
- 'url': self._transform_rtmp_url(rtmp_video_url),
+ 'ext': 'flv' if new_url.startswith('rtmp') else ext,
+ 'url': new_url,
'format_id': rendition.get('bitrate'),
'width': int(rendition.get('width')),
'height': int(rendition.get('height')),
uri = itemdoc.find('guid').text
video_id = self._id_from_uri(uri)
self.report_extraction(video_id)
- mediagen_url = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content'))).attrib['url']
+ content_el = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content')))
+ mediagen_url = content_el.attrib['url']
# Remove the templates, like &device={device}
mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', mediagen_url)
if 'acceptMethods' not in mediagen_url:
message += item.text
raise ExtractorError(message, expected=True)
- description_node = itemdoc.find('description')
- if description_node is not None:
- description = description_node.text.strip()
- else:
- description = None
+ description = xpath_text(itemdoc, 'description')
title_el = None
if title_el is None:
itemdoc, './/{http://search.yahoo.com/mrss/}category',
'scheme', 'urn:mtvn:video_title')
if title_el is None:
- title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title')
+ title_el = itemdoc.find(compat_xpath('.//{http://search.yahoo.com/mrss/}title'))
if title_el is None:
- title_el = itemdoc.find('.//title') or itemdoc.find('./title')
+ title_el = itemdoc.find(compat_xpath('.//title'))
if title_el.text is None:
title_el = None
'id': video_id,
'thumbnail': self._get_thumbnail_url(uri, itemdoc),
'description': description,
+ 'duration': float_or_none(content_el.attrib.get('duration')),
}
+ def _get_feed_query(self, uri):
+ data = {'uri': uri}
+ if self._LANG:
+ data['lang'] = self._LANG
+ return compat_urllib_parse_urlencode(data)
+
def _get_videos_info(self, uri):
video_id = self._id_from_uri(uri)
feed_url = self._get_feed_url(uri)
- data = compat_urllib_parse.urlencode({'uri': uri})
- info_url = feed_url + '?'
- if self._LANG:
- info_url += 'lang=%s&' % self._LANG
- info_url += data
+ info_url = feed_url + '?' + self._get_feed_query(uri)
return self._get_videos_info_from_url(info_url, video_id)
def _get_videos_info_from_url(self, url, video_id):
return self.playlist_result(
[self._get_video_info(item) for item in idoc.findall('.//item')])
- def _real_extract(self, url):
- title = url_basename(url)
- webpage = self._download_webpage(url, title)
+ def _extract_mgid(self, webpage):
try:
# the url can be http://media.mtvnservices.com/fb/{mgid}.swf
# or http://media.mtvnservices.com/{mgid}
'sm4:video:embed', webpage, 'sm4 embed', default='')
mgid = self._search_regex(
r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid')
+ return mgid
+ def _real_extract(self, url):
+ title = url_basename(url)
+ webpage = self._download_webpage(url, title)
+ mgid = self._extract_mgid(webpage)
videos_info = self._get_videos_info(mgid)
return videos_info