X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/3477c644417600d9ec8f8d2a44f82da0a4b15eb5..fe979149c83b5a935f7d28baf75848a9137316fd:/youtube_dl/downloader/f4m.py?ds=sidebyside diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 6170cc1..8f88b02 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -12,36 +12,49 @@ from ..compat import ( compat_urlparse, compat_urllib_error, compat_urllib_parse_urlparse, + compat_struct_pack, + compat_struct_unpack, ) from ..utils import ( encodeFilename, + fix_xml_ampersands, sanitize_open, - struct_pack, - struct_unpack, xpath_text, ) +class DataTruncatedError(Exception): + pass + + class FlvReader(io.BytesIO): """ Reader for Flv files The file format is documented in https://www.adobe.com/devnet/f4v.html """ + def read_bytes(self, n): + data = self.read(n) + if len(data) < n: + raise DataTruncatedError( + 'FlvReader error: need %d bytes while only %d bytes got' % ( + n, len(data))) + return data + # Utility functions for reading numbers and strings def read_unsigned_long_long(self): - return struct_unpack('!Q', self.read(8))[0] + return compat_struct_unpack('!Q', self.read_bytes(8))[0] def read_unsigned_int(self): - return struct_unpack('!I', self.read(4))[0] + return compat_struct_unpack('!I', self.read_bytes(4))[0] def read_unsigned_char(self): - return struct_unpack('!B', self.read(1))[0] + return compat_struct_unpack('!B', self.read_bytes(1))[0] def read_string(self): res = b'' while True: - char = self.read(1) + char = self.read_bytes(1) if char == b'\x00': break res += char @@ -52,18 +65,18 @@ class FlvReader(io.BytesIO): Read a box and return the info as a tuple: (box_size, box_type, box_data) """ real_size = size = self.read_unsigned_int() - box_type = self.read(4) + box_type = self.read_bytes(4) header_end = 8 if size == 1: real_size = self.read_unsigned_long_long() header_end = 16 - return real_size, box_type, self.read(real_size - header_end) + return real_size, box_type, self.read_bytes(real_size - header_end) def read_asrt(self): # version self.read_unsigned_char() # flags - self.read(3) + self.read_bytes(3) quality_entry_count = self.read_unsigned_char() # QualityEntryCount for i in range(quality_entry_count): @@ -84,7 +97,7 @@ class FlvReader(io.BytesIO): # version self.read_unsigned_char() # flags - self.read(3) + self.read_bytes(3) # time scale self.read_unsigned_int() @@ -118,7 +131,7 @@ class FlvReader(io.BytesIO): # version self.read_unsigned_char() # flags - self.read(3) + self.read_bytes(3) self.read_unsigned_int() # BootstrapinfoVersion # Profile,Live,Update,Reserved @@ -193,11 +206,11 @@ def build_fragments_list(boot_info): def write_unsigned_int(stream, val): - stream.write(struct_pack('!I', val)) + stream.write(compat_struct_pack('!I', val)) def write_unsigned_int_24(stream, val): - stream.write(struct_pack('!I', val)[1:]) + stream.write(compat_struct_pack('!I', val)[1:]) def write_flv_header(stream): @@ -222,6 +235,12 @@ def write_metadata_tag(stream, metadata): write_unsigned_int(stream, FLV_TAG_HEADER_LEN + len(metadata)) +def remove_encrypted_media(media): + return list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and + 'drmAdditionalHeaderSetId' not in e.attrib, + media)) + + def _add_ns(prop): return '{http://ns.adobe.com/f4m/1.0}%s' % prop @@ -243,9 +262,7 @@ class F4mFD(FragmentFD): # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute if 'id' not in e.attrib: self.report_error('Missing ID in f4m DRM') - media = list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and - 'drmAdditionalHeaderSetId' not in e.attrib, - media)) + media = remove_encrypted_media(media) if not media: self.report_error('Unsupported DRM') return media @@ -272,15 +289,21 @@ class F4mFD(FragmentFD): return fragments_list def _parse_bootstrap_node(self, node, base_url): - if node.text is None: + # Sometimes non empty inline bootstrap info can be specified along + # with bootstrap url attribute (e.g. dummy inline bootstrap info + # contains whitespace characters in [1]). We will prefer bootstrap + # url over inline bootstrap info when present. + # 1. http://live-1-1.rutube.ru/stream/1024/HDS/SD/C2NKsS85HQNckgn5HdEmOQ/1454167650/S-s604419906/move/four/dirs/upper/1024-576p.f4m + bootstrap_url = node.get('url') + if bootstrap_url: bootstrap_url = compat_urlparse.urljoin( - base_url, node.attrib['url']) + base_url, bootstrap_url) boot_info = self._get_bootstrap_from_url(bootstrap_url) else: bootstrap_url = None bootstrap = base64.b64decode(node.text.encode('ascii')) boot_info = read_bootstrap_info(bootstrap) - return (boot_info, bootstrap_url) + return boot_info, bootstrap_url def real_download(self, filename, info_dict): man_url = info_dict['url'] @@ -288,12 +311,15 @@ class F4mFD(FragmentFD): self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME) urlh = self.ydl.urlopen(man_url) man_url = urlh.geturl() - manifest = urlh.read() + # Some manifests may be malformed, e.g. prosiebensat1 generated manifests + # (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244 + # and https://github.com/rg3/youtube-dl/issues/7823) + manifest = fix_xml_ampersands(urlh.read().decode('utf-8', 'ignore')).strip() doc = compat_etree_fromstring(manifest) formats = [(int(f.attrib.get('bitrate', -1)), f) for f in self._get_unencrypted_media(doc)] - if requested_bitrate is None: + if requested_bitrate is None or len(formats) == 1: # get the best format formats = sorted(formats, key=lambda f: f[0]) rate, media = formats[-1] @@ -312,7 +338,8 @@ class F4mFD(FragmentFD): metadata = None fragments_list = build_fragments_list(boot_info) - if self.params.get('test', False): + test = self.params.get('test', False) + if test: # We only download the first fragment fragments_list = fragments_list[:1] total_frags = len(fragments_list) @@ -322,6 +349,7 @@ class F4mFD(FragmentFD): ctx = { 'filename': filename, 'total_frags': total_frags, + 'live': live, } self._prepare_frag_download(ctx) @@ -358,7 +386,17 @@ class F4mFD(FragmentFD): down.close() reader = FlvReader(down_data) while True: - _, box_type, box_data = reader.read_box_info() + try: + _, box_type, box_data = reader.read_box_info() + except DataTruncatedError: + if test: + # In tests, segments may be truncated, and thus + # FlvReader may not be able to parse the whole + # chunk. If so, write the segment as is + # See https://github.com/rg3/youtube-dl/issues/9214 + dest_stream.write(down_data) + break + raise if box_type == b'mdat': dest_stream.write(box_data) break @@ -376,7 +414,7 @@ class F4mFD(FragmentFD): else: raise - if not fragments_list and live and bootstrap_url: + if not fragments_list and not test and live and bootstrap_url: fragments_list = self._update_live_fragments(bootstrap_url, frag_i) total_frags += len(fragments_list) if fragments_list and (fragments_list[0][1] > frag_i + 1):