+ def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
+ regexes = self._og_regexes('video') + self._og_regexes('video:url')
+ if secure:
+ regexes = self._og_regexes('video:secure_url') + regexes
+ return self._html_search_regex(regexes, html, name, **kargs)
+
+ def _og_search_url(self, html, **kargs):
+ return self._og_search_property('url', html, **kargs)
+
+ def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
+ if display_name is None:
+ display_name = name
+ return self._html_search_regex(
+ r'''(?ix)<meta
+ (?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1)
+ [^>]+content=(["\'])(?P<content>.*?)\1''' % re.escape(name),
+ html, display_name, fatal=fatal, group='content', **kwargs)
+
+ def _dc_search_uploader(self, html):
+ return self._html_search_meta('dc.creator', html, 'uploader')
+
+ def _rta_search(self, html):
+ # See http://www.rtalabel.org/index.php?content=howtofaq#single
+ if re.search(r'(?ix)<meta\s+name="rating"\s+'
+ r' content="RTA-5042-1996-1400-1577-RTA"',
+ html):
+ return 18
+ return 0
+
+ def _media_rating_search(self, html):
+ # See http://www.tjg-designs.com/WP/metadata-code-examples-adding-metadata-to-your-web-pages/
+ rating = self._html_search_meta('rating', html)
+
+ if not rating:
+ return None
+
+ RATING_TABLE = {
+ 'safe for kids': 0,
+ 'general': 8,
+ '14 years': 14,
+ 'mature': 17,
+ 'restricted': 19,
+ }
+ return RATING_TABLE.get(rating.lower(), None)
+
+ def _twitter_search_player(self, html):
+ return self._html_search_meta('twitter:player', html,
+ 'twitter card player')
+
+ def _sort_formats(self, formats):
+ if not formats:
+ raise ExtractorError('No video formats found')
+
+ def _formats_key(f):
+ # TODO remove the following workaround
+ from ..utils import determine_ext
+ if not f.get('ext') and 'url' in f:
+ f['ext'] = determine_ext(f['url'])
+
+ preference = f.get('preference')
+ if preference is None:
+ proto = f.get('protocol')
+ if proto is None:
+ proto = compat_urllib_parse_urlparse(f.get('url', '')).scheme
+
+ preference = 0 if proto in ['http', 'https'] else -0.1
+ if f.get('ext') in ['f4f', 'f4m']: # Not yet supported
+ preference -= 0.5
+
+ if f.get('vcodec') == 'none': # audio only
+ if self._downloader.params.get('prefer_free_formats'):
+ ORDER = ['aac', 'mp3', 'm4a', 'webm', 'ogg', 'opus']
+ else:
+ ORDER = ['webm', 'opus', 'ogg', 'mp3', 'aac', 'm4a']
+ ext_preference = 0
+ try:
+ audio_ext_preference = ORDER.index(f['ext'])
+ except ValueError:
+ audio_ext_preference = -1
+ else:
+ if self._downloader.params.get('prefer_free_formats'):
+ ORDER = ['flv', 'mp4', 'webm']
+ else:
+ ORDER = ['webm', 'flv', 'mp4']
+ try:
+ ext_preference = ORDER.index(f['ext'])
+ except ValueError:
+ ext_preference = -1
+ audio_ext_preference = 0
+
+ return (
+ preference,
+ f.get('language_preference') if f.get('language_preference') is not None else -1,
+ f.get('quality') if f.get('quality') is not None else -1,
+ f.get('height') if f.get('height') is not None else -1,
+ f.get('width') if f.get('width') is not None else -1,
+ ext_preference,
+ f.get('tbr') if f.get('tbr') is not None else -1,
+ f.get('vbr') if f.get('vbr') is not None else -1,
+ f.get('abr') if f.get('abr') is not None else -1,
+ audio_ext_preference,
+ f.get('fps') if f.get('fps') is not None else -1,
+ f.get('filesize') if f.get('filesize') is not None else -1,
+ f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
+ f.get('source_preference') if f.get('source_preference') is not None else -1,
+ f.get('format_id'),
+ )
+ formats.sort(key=_formats_key)
+
+ def http_scheme(self):
+ """ Either "http:" or "https:", depending on the user's preferences """
+ return (
+ 'http:'
+ if self._downloader.params.get('prefer_insecure', False)
+ else 'https:')
+
+ def _proto_relative_url(self, url, scheme=None):
+ if url is None:
+ return url
+ if url.startswith('//'):
+ if scheme is None:
+ scheme = self.http_scheme()
+ return scheme + url
+ else:
+ return url
+
+ def _sleep(self, timeout, video_id, msg_template=None):
+ if msg_template is None:
+ msg_template = '%(video_id)s: Waiting for %(timeout)s seconds'
+ msg = msg_template % {'video_id': video_id, 'timeout': timeout}
+ self.to_screen(msg)
+ time.sleep(timeout)
+
+ def _extract_f4m_formats(self, manifest_url, video_id):
+ manifest = self._download_xml(
+ manifest_url, video_id, 'Downloading f4m manifest',
+ 'Unable to download f4m manifest')
+
+ formats = []
+ media_nodes = manifest.findall('{http://ns.adobe.com/f4m/1.0}media')
+ for i, media_el in enumerate(media_nodes):
+ tbr = int_or_none(media_el.attrib.get('bitrate'))
+ format_id = 'f4m-%d' % (i if tbr is None else tbr)
+ formats.append({
+ 'format_id': format_id,
+ 'url': manifest_url,
+ 'ext': 'flv',
+ 'tbr': tbr,
+ 'width': int_or_none(media_el.attrib.get('width')),
+ 'height': int_or_none(media_el.attrib.get('height')),
+ })
+ self._sort_formats(formats)
+
+ return formats
+
+ def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
+ entry_protocol='m3u8', preference=None):
+
+ formats = [{
+ 'format_id': 'm3u8-meta',
+ 'url': m3u8_url,
+ 'ext': ext,
+ 'protocol': 'm3u8',
+ 'preference': -1,
+ 'resolution': 'multiple',
+ 'format_note': 'Quality selection URL',
+ }]
+
+ format_url = lambda u: (
+ u
+ if re.match(r'^https?://', u)
+ else compat_urlparse.urljoin(m3u8_url, u))
+
+ m3u8_doc = self._download_webpage(
+ m3u8_url, video_id,
+ note='Downloading m3u8 information',
+ errnote='Failed to download m3u8 information')
+ last_info = None
+ kv_rex = re.compile(
+ r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)')
+ for line in m3u8_doc.splitlines():
+ if line.startswith('#EXT-X-STREAM-INF:'):
+ last_info = {}
+ for m in kv_rex.finditer(line):
+ v = m.group('val')
+ if v.startswith('"'):
+ v = v[1:-1]
+ last_info[m.group('key')] = v
+ elif line.startswith('#') or not line.strip():
+ continue
+ else:
+ if last_info is None:
+ formats.append({'url': format_url(line)})
+ continue
+ tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
+
+ f = {
+ 'format_id': 'm3u8-%d' % (tbr if tbr else len(formats)),
+ 'url': format_url(line.strip()),
+ 'tbr': tbr,
+ 'ext': ext,
+ 'protocol': entry_protocol,
+ 'preference': preference,
+ }
+ codecs = last_info.get('CODECS')
+ if codecs:
+ # TODO: looks like video codec is not always necessarily goes first
+ va_codecs = codecs.split(',')
+ if va_codecs[0]:
+ f['vcodec'] = va_codecs[0].partition('.')[0]
+ if len(va_codecs) > 1 and va_codecs[1]:
+ f['acodec'] = va_codecs[1].partition('.')[0]
+ resolution = last_info.get('RESOLUTION')
+ if resolution:
+ width_str, height_str = resolution.split('x')
+ f['width'] = int(width_str)
+ f['height'] = int(height_str)
+ formats.append(f)
+ last_info = {}
+ self._sort_formats(formats)
+ return formats
+
+ def _live_title(self, name):
+ """ Generate the title for a live video """
+ now = datetime.datetime.now()
+ now_str = now.strftime("%Y-%m-%d %H:%M")
+ return name + ' ' + now_str
+
+ def _int(self, v, name, fatal=False, **kwargs):
+ res = int_or_none(v, **kwargs)
+ if 'get_attr' in kwargs:
+ print(getattr(v, kwargs['get_attr']))
+ if res is None:
+ msg = 'Failed to extract %s: Could not parse value %r' % (name, v)
+ if fatal:
+ raise ExtractorError(msg)
+ else:
+ self._downloader.report_warning(msg)
+ return res
+
+ def _float(self, v, name, fatal=False, **kwargs):
+ res = float_or_none(v, **kwargs)
+ if res is None:
+ msg = 'Failed to extract %s: Could not parse value %r' % (name, v)
+ if fatal:
+ raise ExtractorError(msg)
+ else:
+ self._downloader.report_warning(msg)
+ return res
+
+ def _set_cookie(self, domain, name, value, expire_time=None):
+ cookie = compat_cookiejar.Cookie(0, name, value, None, None, domain, None,
+ None, '/', True, False, expire_time, '', None, None, None)
+ self._downloader.cookiejar.set_cookie(cookie)
+