compat_urlparse,
compat_urllib_error,
compat_urllib_parse_urlparse,
+ compat_struct_pack,
+ compat_struct_unpack,
)
from ..utils import (
encodeFilename,
fix_xml_ampersands,
sanitize_open,
- struct_pack,
- struct_unpack,
xpath_text,
)
+class DataTruncatedError(Exception):
+ pass
+
+
class FlvReader(io.BytesIO):
"""
Reader for Flv files
The file format is documented in https://www.adobe.com/devnet/f4v.html
"""
+ def read_bytes(self, n):
+ data = self.read(n)
+ if len(data) < n:
+ raise DataTruncatedError(
+ 'FlvReader error: need %d bytes while only %d bytes got' % (
+ n, len(data)))
+ return data
+
# Utility functions for reading numbers and strings
def read_unsigned_long_long(self):
- return struct_unpack('!Q', self.read(8))[0]
+ return compat_struct_unpack('!Q', self.read_bytes(8))[0]
def read_unsigned_int(self):
- return struct_unpack('!I', self.read(4))[0]
+ return compat_struct_unpack('!I', self.read_bytes(4))[0]
def read_unsigned_char(self):
- return struct_unpack('!B', self.read(1))[0]
+ return compat_struct_unpack('!B', self.read_bytes(1))[0]
def read_string(self):
res = b''
while True:
- char = self.read(1)
+ char = self.read_bytes(1)
if char == b'\x00':
break
res += char
Read a box and return the info as a tuple: (box_size, box_type, box_data)
"""
real_size = size = self.read_unsigned_int()
- box_type = self.read(4)
+ box_type = self.read_bytes(4)
header_end = 8
if size == 1:
real_size = self.read_unsigned_long_long()
header_end = 16
- return real_size, box_type, self.read(real_size - header_end)
+ return real_size, box_type, self.read_bytes(real_size - header_end)
def read_asrt(self):
# version
self.read_unsigned_char()
# flags
- self.read(3)
+ self.read_bytes(3)
quality_entry_count = self.read_unsigned_char()
# QualityEntryCount
for i in range(quality_entry_count):
# version
self.read_unsigned_char()
# flags
- self.read(3)
+ self.read_bytes(3)
# time scale
self.read_unsigned_int()
# version
self.read_unsigned_char()
# flags
- self.read(3)
+ self.read_bytes(3)
self.read_unsigned_int() # BootstrapinfoVersion
# Profile,Live,Update,Reserved
first_frag_number = fragment_run_entry_table[0]['first']
fragments_counter = itertools.count(first_frag_number)
for segment, fragments_count in segment_run_table['segment_run']:
+ # In some live HDS streams (for example Rai), `fragments_count` is
+ # abnormal and causing out-of-memory errors. It's OK to change the
+ # number of fragments for live streams as they are updated periodically
+ if fragments_count == 4294967295 and boot_info['live']:
+ fragments_count = 2
for _ in range(fragments_count):
res.append((segment, next(fragments_counter)))
def write_unsigned_int(stream, val):
- stream.write(struct_pack('!I', val))
+ stream.write(compat_struct_pack('!I', val))
def write_unsigned_int_24(stream, val):
- stream.write(struct_pack('!I', val)[1:])
+ stream.write(compat_struct_pack('!I', val)[1:])
def write_flv_header(stream):
write_unsigned_int(stream, FLV_TAG_HEADER_LEN + len(metadata))
+def remove_encrypted_media(media):
+ return list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and
+ 'drmAdditionalHeaderSetId' not in e.attrib,
+ media))
+
+
def _add_ns(prop):
return '{http://ns.adobe.com/f4m/1.0}%s' % prop
# without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute
if 'id' not in e.attrib:
self.report_error('Missing ID in f4m DRM')
- media = list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and
- 'drmAdditionalHeaderSetId' not in e.attrib,
- media))
+ media = remove_encrypted_media(media)
if not media:
self.report_error('Unsupported DRM')
return media
doc = compat_etree_fromstring(manifest)
formats = [(int(f.attrib.get('bitrate', -1)), f)
for f in self._get_unencrypted_media(doc)]
- if requested_bitrate is None:
+ if requested_bitrate is None or len(formats) == 1:
# get the best format
formats = sorted(formats, key=lambda f: f[0])
rate, media = formats[-1]
base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
- boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, base_url)
+ # From Adobe F4M 3.0 spec:
+ # The <baseURL> element SHALL be the base URL for all relative
+ # (HTTP-based) URLs in the manifest. If <baseURL> is not present, said
+ # URLs should be relative to the location of the containing document.
+ boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, man_url)
live = boot_info['live']
metadata_node = media.find(_add_ns('metadata'))
if metadata_node is not None:
down.close()
reader = FlvReader(down_data)
while True:
- _, box_type, box_data = reader.read_box_info()
+ try:
+ _, box_type, box_data = reader.read_box_info()
+ except DataTruncatedError:
+ if test:
+ # In tests, segments may be truncated, and thus
+ # FlvReader may not be able to parse the whole
+ # chunk. If so, write the segment as is
+ # See https://github.com/rg3/youtube-dl/issues/9214
+ dest_stream.write(down_data)
+ break
+ raise
if box_type == b'mdat':
dest_stream.write(box_data)
break