+ def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
+ regexes = self._og_regexes('video')
+ if secure: regexes = self._og_regexes('video:secure_url') + regexes
+ return self._html_search_regex(regexes, html, name, **kargs)
+
+ def _og_search_url(self, html, **kargs):
+ return self._og_search_property('url', html, **kargs)
+
+ def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
+ if display_name is None:
+ display_name = name
+ return self._html_search_regex(
+ r'''(?ix)<meta
+ (?=[^>]+(?:itemprop|name|property)=["\']%s["\'])
+ [^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
+ html, display_name, fatal=fatal, **kwargs)
+
+ def _dc_search_uploader(self, html):
+ return self._html_search_meta('dc.creator', html, 'uploader')
+
+ def _rta_search(self, html):
+ # See http://www.rtalabel.org/index.php?content=howtofaq#single
+ if re.search(r'(?ix)<meta\s+name="rating"\s+'
+ r' content="RTA-5042-1996-1400-1577-RTA"',
+ html):
+ return 18
+ return 0
+
+ def _media_rating_search(self, html):
+ # See http://www.tjg-designs.com/WP/metadata-code-examples-adding-metadata-to-your-web-pages/
+ rating = self._html_search_meta('rating', html)
+
+ if not rating:
+ return None
+
+ RATING_TABLE = {
+ 'safe for kids': 0,
+ 'general': 8,
+ '14 years': 14,
+ 'mature': 17,
+ 'restricted': 19,
+ }
+ return RATING_TABLE.get(rating.lower(), None)
+
+ def _twitter_search_player(self, html):
+ return self._html_search_meta('twitter:player', html,
+ 'twitter card player')
+
+ def _sort_formats(self, formats):
+ if not formats:
+ raise ExtractorError(u'No video formats found')
+
+ def _formats_key(f):
+ # TODO remove the following workaround
+ from ..utils import determine_ext
+ if not f.get('ext') and 'url' in f:
+ f['ext'] = determine_ext(f['url'])
+
+ preference = f.get('preference')
+ if preference is None:
+ proto = f.get('protocol')
+ if proto is None:
+ proto = compat_urllib_parse_urlparse(f.get('url', '')).scheme
+
+ preference = 0 if proto in ['http', 'https'] else -0.1
+ if f.get('ext') in ['f4f', 'f4m']: # Not yet supported
+ preference -= 0.5
+
+ if f.get('vcodec') == 'none': # audio only
+ if self._downloader.params.get('prefer_free_formats'):
+ ORDER = [u'aac', u'mp3', u'm4a', u'webm', u'ogg', u'opus']
+ else:
+ ORDER = [u'webm', u'opus', u'ogg', u'mp3', u'aac', u'm4a']
+ ext_preference = 0
+ try:
+ audio_ext_preference = ORDER.index(f['ext'])
+ except ValueError:
+ audio_ext_preference = -1
+ else:
+ if self._downloader.params.get('prefer_free_formats'):
+ ORDER = [u'flv', u'mp4', u'webm']
+ else:
+ ORDER = [u'webm', u'flv', u'mp4']
+ try:
+ ext_preference = ORDER.index(f['ext'])
+ except ValueError:
+ ext_preference = -1
+ audio_ext_preference = 0
+
+ return (
+ preference,
+ f.get('quality') if f.get('quality') is not None else -1,
+ f.get('height') if f.get('height') is not None else -1,
+ f.get('width') if f.get('width') is not None else -1,
+ ext_preference,
+ f.get('tbr') if f.get('tbr') is not None else -1,
+ f.get('vbr') if f.get('vbr') is not None else -1,
+ f.get('abr') if f.get('abr') is not None else -1,
+ audio_ext_preference,
+ f.get('filesize') if f.get('filesize') is not None else -1,
+ f.get('format_id'),
+ )
+ formats.sort(key=_formats_key)
+
+ def http_scheme(self):
+ """ Either "https:" or "https:", depending on the user's preferences """
+ return (
+ 'http:'
+ if self._downloader.params.get('prefer_insecure', False)
+ else 'https:')
+
+ def _proto_relative_url(self, url, scheme=None):
+ if url is None:
+ return url
+ if url.startswith('//'):
+ if scheme is None:
+ scheme = self.http_scheme()
+ return scheme + url
+ else:
+ return url
+
+ def _sleep(self, timeout, video_id, msg_template=None):
+ if msg_template is None:
+ msg_template = u'%(video_id)s: Waiting for %(timeout)s seconds'
+ msg = msg_template % {'video_id': video_id, 'timeout': timeout}
+ self.to_screen(msg)
+ time.sleep(timeout)
+