X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/03342304420e5daeb428ffdcc7bbd2bbfecfa61a..d1d8237037fb87a57bd1d073c3b9e642d4a75016:/youtube_dl/extractor/common.py?ds=inline diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 14f5756..5a2603b 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -15,14 +15,17 @@ import math from ..compat import ( compat_cookiejar, compat_cookies, + compat_etree_fromstring, compat_getpass, compat_http_client, + compat_os_name, + compat_str, compat_urllib_error, - compat_urllib_parse, + compat_urllib_parse_urlencode, + compat_urllib_request, compat_urlparse, - compat_str, - compat_etree_fromstring, ) +from ..downloader.f4m import remove_encrypted_media from ..utils import ( NO_DEFAULT, age_restricted, @@ -42,11 +45,15 @@ from ..utils import ( unescapeHTML, unified_strdate, url_basename, + xpath_element, xpath_text, xpath_with_ns, determine_protocol, parse_duration, mimetype2ext, + update_Request, + update_url_query, + parse_m3u8_attributes, ) @@ -104,7 +111,7 @@ class InfoExtractor(object): * protocol The protocol that will be used for the actual download, lower-case. "http", "https", "rtsp", "rtmp", "rtmpe", - "m3u8", or "m3u8_native". + "m3u8", "m3u8_native" or "http_dash_segments". * preference Order number of this format. If this field is present and not None, the formats get sorted by this field, regardless of all other values. @@ -157,12 +164,14 @@ class InfoExtractor(object): thumbnail: Full URL to a video thumbnail image. description: Full video description. uploader: Full name of the video uploader. - creator: The main artist who created the video. + license: License name the video is licensed under. + creator: The creator of the video. release_date: The date (YYYYMMDD) when the video was released. timestamp: UNIX timestamp of the moment the video became available. upload_date: Video upload date (YYYYMMDD). If not explicitly set, calculated from timestamp. uploader_id: Nickname or id of the video uploader. + uploader_url: Full URL to a personal webpage of the video uploader. location: Physical location where the video was filmed. subtitles: The available subtitles as a dictionary in the format {language: subformats}. "subformats" is a list sorted from @@ -225,6 +234,24 @@ class InfoExtractor(object): episode_number: Number of the video episode within a season, as an integer. episode_id: Id of the video episode, as a unicode string. + The following fields should only be used when the media is a track or a part of + a music album: + + track: Title of the track. + track_number: Number of the track within an album or a disc, as an integer. + track_id: Id of the track (useful in case of custom indexing, e.g. 6.iii), + as a unicode string. + artist: Artist(s) of the track. + genre: Genre(s) of the track. + album: Title of the album the track belongs to. + album_type: Type of the album (e.g. "Demo", "Full-length", "Split", "Compilation", etc). + album_artist: List of all artists appeared on the album (e.g. + "Ash Borer / Fell Voices" or "Various Artists", useful for splits + and compilations). + disc_number: Number of the disc or other physical medium the track belongs to, + as an integer. + release_year: Year (YYYY) when the album was released. + Unless mentioned otherwise, the fields should be Unicode strings. Unless mentioned otherwise, None is equivalent to absence of information. @@ -342,7 +369,7 @@ class InfoExtractor(object): def IE_NAME(self): return compat_str(type(self).__name__[:-2]) - def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True): + def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}): """ Returns the response handle """ if note is None: self.report_download_webpage(video_id) @@ -351,6 +378,14 @@ class InfoExtractor(object): self.to_screen('%s' % (note,)) else: self.to_screen('%s: %s' % (video_id, note)) + if isinstance(url_or_request, compat_urllib_request.Request): + url_or_request = update_Request( + url_or_request, data=data, headers=headers, query=query) + else: + if query: + url_or_request = update_url_query(url_or_request, query) + if data is not None or headers: + url_or_request = sanitized_Request(url_or_request, data, headers) try: return self._downloader.urlopen(url_or_request) except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: @@ -366,13 +401,13 @@ class InfoExtractor(object): self._downloader.report_warning(errmsg) return False - def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None): + def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}): """ Returns a tuple (page content as string, URL handle) """ # Strip hashes from the URL (#1038) if isinstance(url_or_request, (compat_str, str)): url_or_request = url_or_request.partition('#')[0] - urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal) + urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query) if urlh is False: assert not fatal return False @@ -425,7 +460,7 @@ class InfoExtractor(object): self.to_screen('Saving request to ' + filename) # Working around MAX_PATH limitation on Windows (see # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx) - if os.name == 'nt': + if compat_os_name == 'nt': absfilepath = os.path.abspath(filename) if len(absfilepath) > 259: filename = '\\\\?\\' + absfilepath @@ -459,13 +494,13 @@ class InfoExtractor(object): return content - def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None): + def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None, data=None, headers={}, query={}): """ Returns the data of the page as a string """ success = False try_count = 0 while success is False: try: - res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal, encoding=encoding) + res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal, encoding=encoding, data=data, headers=headers, query=query) success = True except compat_http_client.IncompleteRead as e: try_count += 1 @@ -480,10 +515,10 @@ class InfoExtractor(object): def _download_xml(self, url_or_request, video_id, note='Downloading XML', errnote='Unable to download XML', - transform_source=None, fatal=True, encoding=None): + transform_source=None, fatal=True, encoding=None, data=None, headers={}, query={}): """Return the xml as an xml.etree.ElementTree.Element""" xml_string = self._download_webpage( - url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding) + url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding, data=data, headers=headers, query=query) if xml_string is False: return xml_string if transform_source: @@ -494,10 +529,10 @@ class InfoExtractor(object): note='Downloading JSON metadata', errnote='Unable to download JSON metadata', transform_source=None, - fatal=True, encoding=None): + fatal=True, encoding=None, data=None, headers={}, query={}): json_string = self._download_webpage( url_or_request, video_id, note, errnote, fatal=fatal, - encoding=encoding) + encoding=encoding, data=data, headers=headers, query=query) if (not fatal) and json_string is False: return None return self._parse_json( @@ -594,7 +629,7 @@ class InfoExtractor(object): if mobj: break - if not self._downloader.params.get('no_color') and os.name != 'nt' and sys.stderr.isatty(): + if not self._downloader.params.get('no_color') and compat_os_name != 'nt' and sys.stderr.isatty(): _name = '\033[0;34m%s\033[0m' % name else: _name = name @@ -809,7 +844,7 @@ class InfoExtractor(object): for input in re.findall(r'(?i)]+)>', html): if not re.search(r'type=(["\'])(?:hidden|submit)\1', input): continue - name = re.search(r'name=(["\'])(?P.+?)\1', input) + name = re.search(r'(?:name|id)=(["\'])(?P.+?)\1', input) if not name: continue value = re.search(r'value=(["\'])(?P.*?)\1', input) @@ -852,6 +887,7 @@ class InfoExtractor(object): proto_preference = 0 if determine_protocol(f) in ['http', 'https'] else -0.1 if f.get('vcodec') == 'none': # audio only + preference -= 50 if self._downloader.params.get('prefer_free_formats'): ORDER = ['aac', 'mp3', 'm4a', 'webm', 'ogg', 'opus'] else: @@ -862,6 +898,8 @@ class InfoExtractor(object): except ValueError: audio_ext_preference = -1 else: + if f.get('acodec') == 'none': # video only + preference -= 40 if self._downloader.params.get('prefer_free_formats'): ORDER = ['flv', 'mp4', 'webm'] else: @@ -951,7 +989,7 @@ class InfoExtractor(object): def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), - fatal=True): + fatal=True, m3u8_id=None): manifest = self._download_xml( manifest_url, video_id, 'Downloading f4m manifest', 'Unable to download f4m manifest', @@ -963,20 +1001,56 @@ class InfoExtractor(object): if manifest is False: return [] + return self._parse_f4m_formats( + manifest, manifest_url, video_id, preference=preference, f4m_id=f4m_id, + transform_source=transform_source, fatal=fatal, m3u8_id=m3u8_id) + + def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None, + transform_source=lambda s: fix_xml_ampersands(s).strip(), + fatal=True, m3u8_id=None): + # currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy + akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0') + if akamai_pv is not None and ';' in akamai_pv.text: + playerVerificationChallenge = akamai_pv.text.split(';')[0] + if playerVerificationChallenge.strip() != '': + return [] + formats = [] manifest_version = '1.0' media_nodes = manifest.findall('{http://ns.adobe.com/f4m/1.0}media') if not media_nodes: manifest_version = '2.0' media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media') + # Remove unsupported DRM protected media from final formats + # rendition (see https://github.com/rg3/youtube-dl/issues/8573). + media_nodes = remove_encrypted_media(media_nodes) + if not media_nodes: + return formats base_url = xpath_text( manifest, ['{http://ns.adobe.com/f4m/1.0}baseURL', '{http://ns.adobe.com/f4m/2.0}baseURL'], 'base URL', default=None) if base_url: base_url = base_url.strip() + + bootstrap_info = xpath_element( + manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'], + 'bootstrap info', default=None) + for i, media_el in enumerate(media_nodes): - if manifest_version == '2.0': - media_url = media_el.attrib.get('href') or media_el.attrib.get('url') + tbr = int_or_none(media_el.attrib.get('bitrate')) + width = int_or_none(media_el.attrib.get('width')) + height = int_or_none(media_el.attrib.get('height')) + format_id = '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])) + # If is present, the specified f4m is a + # stream-level manifest, and only set-level manifests may refer to + # external resources. See section 11.4 and section 4 of F4M spec + if bootstrap_info is None: + media_url = None + # @href is introduced in 2.0, see section 11.6 of F4M spec + if manifest_version == '2.0': + media_url = media_el.attrib.get('href') + if media_url is None: + media_url = media_el.attrib.get('url') if not media_url: continue manifest_url = ( @@ -986,30 +1060,43 @@ class InfoExtractor(object): # since bitrates in parent manifest (this one) and media_url manifest # may differ leading to inability to resolve the format by requested # bitrate in f4m downloader - if determine_ext(manifest_url) == 'f4m': - formats.extend(self._extract_f4m_formats( - manifest_url, video_id, preference, f4m_id, fatal=fatal)) + ext = determine_ext(manifest_url) + if ext == 'f4m': + f4m_formats = self._extract_f4m_formats( + manifest_url, video_id, preference=preference, f4m_id=f4m_id, + transform_source=transform_source, fatal=fatal) + # Sometimes stream-level manifest contains single media entry that + # does not contain any quality metadata (e.g. http://matchtv.ru/#live-player). + # At the same time parent's media entry in set-level manifest may + # contain it. We will copy it from parent in such cases. + if len(f4m_formats) == 1: + f = f4m_formats[0] + f.update({ + 'tbr': f.get('tbr') or tbr, + 'width': f.get('width') or width, + 'height': f.get('height') or height, + 'format_id': f.get('format_id') if not tbr else format_id, + }) + formats.extend(f4m_formats) + continue + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + manifest_url, video_id, 'mp4', preference=preference, + m3u8_id=m3u8_id, fatal=fatal)) continue - tbr = int_or_none(media_el.attrib.get('bitrate')) formats.append({ - 'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])), + 'format_id': format_id, 'url': manifest_url, - 'ext': 'flv', + 'ext': 'flv' if bootstrap_info is not None else None, 'tbr': tbr, - 'width': int_or_none(media_el.attrib.get('width')), - 'height': int_or_none(media_el.attrib.get('height')), + 'width': width, + 'height': height, 'preference': preference, }) - self._sort_formats(formats) - return formats - def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, - entry_protocol='m3u8', preference=None, - m3u8_id=None, note=None, errnote=None, - fatal=True): - - formats = [{ + def _m3u8_meta_format(self, m3u8_url, ext=None, preference=None, m3u8_id=None): + return { 'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])), 'url': m3u8_url, 'ext': ext, @@ -1017,7 +1104,14 @@ class InfoExtractor(object): 'preference': preference - 1 if preference else -1, 'resolution': 'multiple', 'format_note': 'Quality selection URL', - }] + } + + def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, + entry_protocol='m3u8', preference=None, + m3u8_id=None, note=None, errnote=None, + fatal=True, live=False): + + formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)] format_url = lambda u: ( u @@ -1033,11 +1127,21 @@ class InfoExtractor(object): return [] m3u8_doc, urlh = res m3u8_url = urlh.geturl() - # A Media Playlist Tag MUST NOT appear in a Master Playlist - # https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3 - # The EXT-X-TARGETDURATION tag is REQUIRED for every M3U8 Media Playlists - # https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.1 - if '#EXT-X-TARGETDURATION' in m3u8_doc: + + # We should try extracting formats only from master playlists [1], i.e. + # playlists that describe available qualities. On the other hand media + # playlists [2] should be returned as is since they contain just the media + # without qualities renditions. + # Fortunately, master playlist can be easily distinguished from media + # playlist based on particular tags availability. As of [1, 2] master + # playlist tags MUST NOT appear in a media playist and vice versa. + # As of [3] #EXT-X-TARGETDURATION tag is REQUIRED for every media playlist + # and MUST NOT appear in master playlist thus we can clearly detect media + # playlist with this criterion. + # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.4 + # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3 + # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.1 + if '#EXT-X-TARGETDURATION' in m3u8_doc: # media playlist, return as is return [{ 'url': m3u8_url, 'format_id': m3u8_id, @@ -1047,23 +1151,11 @@ class InfoExtractor(object): }] last_info = None last_media = None - kv_rex = re.compile( - r'(?P[a-zA-Z_-]+)=(?P"[^"]+"|[^",]+)(?:,|$)') for line in m3u8_doc.splitlines(): if line.startswith('#EXT-X-STREAM-INF:'): - last_info = {} - for m in kv_rex.finditer(line): - v = m.group('val') - if v.startswith('"'): - v = v[1:-1] - last_info[m.group('key')] = v + last_info = parse_m3u8_attributes(line) elif line.startswith('#EXT-X-MEDIA:'): - last_media = {} - for m in kv_rex.finditer(line): - v = m.group('val') - if v.startswith('"'): - v = v[1:-1] - last_media[m.group('key')] = v + last_media = parse_m3u8_attributes(line) elif line.startswith('#') or not line.strip(): continue else: @@ -1074,8 +1166,15 @@ class InfoExtractor(object): format_id = [] if m3u8_id: format_id.append(m3u8_id) - last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') != 'SUBTITLES' else None - format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats))) + last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') not in ('SUBTITLES', 'CLOSED-CAPTIONS') else None + # Despite specification does not mention NAME attribute for + # EXT-X-STREAM-INF it still sometimes may be present + stream_name = last_info.get('NAME') or last_media_name + # Bandwidth of live streams may differ over time thus making + # format_id unpredictable. So it's better to keep provided + # format_id intact. + if not live: + format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats))) f = { 'format_id': '-'.join(format_id), 'url': format_url(line.strip()), @@ -1084,25 +1183,34 @@ class InfoExtractor(object): 'protocol': entry_protocol, 'preference': preference, } - codecs = last_info.get('CODECS') - if codecs: - # TODO: looks like video codec is not always necessarily goes first - va_codecs = codecs.split(',') - if va_codecs[0]: - f['vcodec'] = va_codecs[0] - if len(va_codecs) > 1 and va_codecs[1]: - f['acodec'] = va_codecs[1] resolution = last_info.get('RESOLUTION') if resolution: width_str, height_str = resolution.split('x') f['width'] = int(width_str) f['height'] = int(height_str) + codecs = last_info.get('CODECS') + if codecs: + vcodec, acodec = [None] * 2 + va_codecs = codecs.split(',') + if len(va_codecs) == 1: + # Audio only entries usually come with single codec and + # no resolution. For more robustness we also check it to + # be mp4 audio. + if not resolution and va_codecs[0].startswith('mp4a'): + vcodec, acodec = 'none', va_codecs[0] + else: + vcodec = va_codecs[0] + else: + vcodec, acodec = va_codecs[:2] + f.update({ + 'acodec': acodec, + 'vcodec': vcodec, + }) if last_media is not None: f['m3u8_media'] = last_media last_media = None formats.append(f) last_info = {} - self._sort_formats(formats) return formats @staticmethod @@ -1117,8 +1225,8 @@ class InfoExtractor(object): out.append('{%s}%s' % (namespace, c)) return '/'.join(out) - def _extract_smil_formats(self, smil_url, video_id, fatal=True, f4m_params=None): - smil = self._download_smil(smil_url, video_id, fatal=fatal) + def _extract_smil_formats(self, smil_url, video_id, fatal=True, f4m_params=None, transform_source=None): + smil = self._download_smil(smil_url, video_id, fatal=fatal, transform_source=transform_source) if smil is False: assert not fatal @@ -1135,10 +1243,10 @@ class InfoExtractor(object): return {} return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params) - def _download_smil(self, smil_url, video_id, fatal=True): + def _download_smil(self, smil_url, video_id, fatal=True, transform_source=None): return self._download_xml( smil_url, video_id, 'Downloading SMIL file', - 'Unable to download SMIL file', fatal=fatal) + 'Unable to download SMIL file', fatal=fatal, transform_source=transform_source) def _parse_smil(self, smil, smil_url, video_id, f4m_params=None): namespace = self._parse_smil_namespace(smil) @@ -1198,21 +1306,21 @@ class InfoExtractor(object): m3u8_count = 0 srcs = [] - videos = smil.findall(self._xpath_ns('.//video', namespace)) - for video in videos: - src = video.get('src') + media = smil.findall(self._xpath_ns('.//video', namespace)) + smil.findall(self._xpath_ns('.//audio', namespace)) + for medium in media: + src = medium.get('src') if not src or src in srcs: continue srcs.append(src) - bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000) - filesize = int_or_none(video.get('size') or video.get('fileSize')) - width = int_or_none(video.get('width')) - height = int_or_none(video.get('height')) - proto = video.get('proto') - ext = video.get('ext') + bitrate = float_or_none(medium.get('system-bitrate') or medium.get('systemBitrate'), 1000) + filesize = int_or_none(medium.get('size') or medium.get('fileSize')) + width = int_or_none(medium.get('width')) + height = int_or_none(medium.get('height')) + proto = medium.get('proto') + ext = medium.get('ext') src_ext = determine_ext(src) - streamer = video.get('streamer') or base + streamer = medium.get('streamer') or base if proto == 'rtmp' or streamer.startswith('rtmp'): rtmp_count += 1 @@ -1259,7 +1367,7 @@ class InfoExtractor(object): 'plugin': 'flowplayer-3.2.0.1', } f4m_url += '&' if '?' in f4m_url else '?' - f4m_url += compat_urllib_parse.urlencode(f4m_params) + f4m_url += compat_urllib_parse_urlencode(f4m_params) formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False)) continue @@ -1276,8 +1384,6 @@ class InfoExtractor(object): }) continue - self._sort_formats(formats) - return formats def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'): @@ -1288,7 +1394,7 @@ class InfoExtractor(object): if not src or src in urls: continue urls.append(src) - ext = textstream.get('ext') or determine_ext(src) or mimetype2ext(textstream.get('type')) + ext = textstream.get('ext') or mimetype2ext(textstream.get('type')) or determine_ext(src) lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName') or textstream.get('lang') or subtitles_lang subtitles.setdefault(lang, []).append({ 'url': src, @@ -1424,8 +1530,9 @@ class InfoExtractor(object): continue representation_attrib = adaptation_set.attrib.copy() representation_attrib.update(representation.attrib) - mime_type = representation_attrib.get('mimeType') - content_type = mime_type.split('/')[0] if mime_type else representation_attrib.get('contentType') + # According to page 41 of ISO/IEC 29001-1:2014, @mimeType is mandatory + mime_type = representation_attrib['mimeType'] + content_type = mime_type.split('/')[0] if content_type == 'text': # TODO implement WebVTT downloading pass @@ -1448,6 +1555,7 @@ class InfoExtractor(object): f = { 'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id, 'url': base_url, + 'ext': mimetype2ext(mime_type), 'width': int_or_none(representation_attrib.get('width')), 'height': int_or_none(representation_attrib.get('height')), 'tbr': int_or_none(representation_attrib.get('bandwidth'), 1000), @@ -1466,9 +1574,16 @@ class InfoExtractor(object): representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration)) media_template = representation_ms_info['media_template'] media_template = media_template.replace('$RepresentationID$', representation_id) - media_template = re.sub(r'\$(Number|Bandwidth)(?:%(0\d+)d)?\$', r'%(\1)\2d', media_template) + media_template = re.sub(r'\$(Number|Bandwidth)\$', r'%(\1)d', media_template) + media_template = re.sub(r'\$(Number|Bandwidth)%([^$]+)\$', r'%(\1)\2', media_template) media_template.replace('$$', '$') - representation_ms_info['segment_urls'] = [media_template % {'Number': segment_number, 'Bandwidth': representation_attrib.get('bandwidth')} for segment_number in range(representation_ms_info['start_number'], representation_ms_info['total_number'] + representation_ms_info['start_number'])] + representation_ms_info['segment_urls'] = [ + media_template % { + 'Number': segment_number, + 'Bandwidth': representation_attrib.get('bandwidth')} + for segment_number in range( + representation_ms_info['start_number'], + representation_ms_info['total_number'] + representation_ms_info['start_number'])] if 'segment_urls' in representation_ms_info: f.update({ 'segment_urls': representation_ms_info['segment_urls'], @@ -1493,7 +1608,6 @@ class InfoExtractor(object): existing_format.update(f) else: self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) - self._sort_formats(formats) return formats def _live_title(self, name): @@ -1600,6 +1714,15 @@ class InfoExtractor(object): def _get_automatic_captions(self, *args, **kwargs): raise NotImplementedError('This method must be implemented by subclasses') + def mark_watched(self, *args, **kwargs): + if (self._downloader.params.get('mark_watched', False) and + (self._get_login_info()[0] is not None or + self._downloader.params.get('cookiefile') is not None)): + self._mark_watched(*args, **kwargs) + + def _mark_watched(self, *args, **kwargs): + raise NotImplementedError('This method must be implemented by subclasses') + class SearchInfoExtractor(InfoExtractor): """