]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/downloader/f4m.py
1 from __future__
import unicode_literals
8 import xml
.etree
.ElementTree
as etree
10 from .common
import FileDownloader
11 from .http
import HttpFD
23 class FlvReader(io
.BytesIO
):
26 The file format is documented in https://www.adobe.com/devnet/f4v.html
29 # Utility functions for reading numbers and strings
30 def read_unsigned_long_long(self
):
31 return struct_unpack('!Q', self
.read(8))[0]
33 def read_unsigned_int(self
):
34 return struct_unpack('!I', self
.read(4))[0]
36 def read_unsigned_char(self
):
37 return struct_unpack('!B', self
.read(1))[0]
39 def read_string(self
):
48 def read_box_info(self
):
50 Read a box and return the info as a tuple: (box_size, box_type, box_data)
52 real_size
= size
= self
.read_unsigned_int()
53 box_type
= self
.read(4)
56 real_size
= self
.read_unsigned_long_long()
58 return real_size
, box_type
, self
.read(real_size
-header_end
)
62 self
.read_unsigned_char()
65 quality_entry_count
= self
.read_unsigned_char()
67 for i
in range(quality_entry_count
):
70 segment_run_count
= self
.read_unsigned_int()
72 for i
in range(segment_run_count
):
73 first_segment
= self
.read_unsigned_int()
74 fragments_per_segment
= self
.read_unsigned_int()
75 segments
.append((first_segment
, fragments_per_segment
))
78 'segment_run': segments
,
83 self
.read_unsigned_char()
87 self
.read_unsigned_int()
89 quality_entry_count
= self
.read_unsigned_char()
90 # QualitySegmentUrlModifiers
91 for i
in range(quality_entry_count
):
94 fragments_count
= self
.read_unsigned_int()
96 for i
in range(fragments_count
):
97 first
= self
.read_unsigned_int()
98 first_ts
= self
.read_unsigned_long_long()
99 duration
= self
.read_unsigned_int()
101 discontinuity_indicator
= self
.read_unsigned_char()
103 discontinuity_indicator
= None
107 'duration': duration
,
108 'discontinuity_indicator': discontinuity_indicator
,
112 'fragments': fragments
,
117 self
.read_unsigned_char()
121 self
.read_unsigned_int() # BootstrapinfoVersion
122 # Profile,Live,Update,Reserved
125 self
.read_unsigned_int()
127 self
.read_unsigned_long_long()
128 # SmpteTimeCodeOffset
129 self
.read_unsigned_long_long()
131 self
.read_string() # MovieIdentifier
132 server_count
= self
.read_unsigned_char()
134 for i
in range(server_count
):
136 quality_count
= self
.read_unsigned_char()
138 for i
in range(quality_count
):
145 segments_count
= self
.read_unsigned_char()
147 for i
in range(segments_count
):
148 box_size
, box_type
, box_data
= self
.read_box_info()
149 assert box_type
== b
'asrt'
150 segment
= FlvReader(box_data
).read_asrt()
151 segments
.append(segment
)
152 fragments_run_count
= self
.read_unsigned_char()
154 for i
in range(fragments_run_count
):
155 box_size
, box_type
, box_data
= self
.read_box_info()
156 assert box_type
== b
'afrt'
157 fragments
.append(FlvReader(box_data
).read_afrt())
160 'segments': segments
,
161 'fragments': fragments
,
164 def read_bootstrap_info(self
):
165 total_size
, box_type
, box_data
= self
.read_box_info()
166 assert box_type
== b
'abst'
167 return FlvReader(box_data
).read_abst()
170 def read_bootstrap_info(bootstrap_bytes
):
171 return FlvReader(bootstrap_bytes
).read_bootstrap_info()
174 def build_fragments_list(boot_info
):
175 """ Return a list of (segment, fragment) for each fragment in the video """
177 segment_run_table
= boot_info
['segments'][0]
178 # I've only found videos with one segment
179 segment_run_entry
= segment_run_table
['segment_run'][0]
180 n_frags
= segment_run_entry
[1]
181 fragment_run_entry_table
= boot_info
['fragments'][0]['fragments']
182 first_frag_number
= fragment_run_entry_table
[0]['first']
183 for (i
, frag_number
) in zip(range(1, n_frags
+1), itertools
.count(first_frag_number
)):
184 res
.append((1, frag_number
))
188 def write_flv_header(stream
, metadata
):
189 """Writes the FLV header and the metadata to stream"""
191 stream
.write(b
'FLV\x01')
192 stream
.write(b
'\x05')
193 stream
.write(b
'\x00\x00\x00\x09')
195 stream
.write(b
'\x00\x00\x00\x00')
198 stream
.write(b
'\x12')
199 # Size of the metadata with 3 bytes
200 stream
.write(struct_pack('!L', len(metadata
))[1:])
201 stream
.write(b
'\x00\x00\x00\x00\x00\x00\x00')
202 stream
.write(metadata
)
203 # Magic numbers extracted from the output files produced by AdobeHDS.php
204 #(https://github.com/K-S-V/Scripts)
205 stream
.write(b
'\x00\x00\x01\x73')
209 return '{http://ns.adobe.com/f4m/1.0}%s' % prop
212 class HttpQuietDownloader(HttpFD
):
213 def to_screen(self
, *args
, **kargs
):
217 class F4mFD(FileDownloader
):
219 A downloader for f4m manifests or AdobeHDS.
222 def real_download(self
, filename
, info_dict
):
223 man_url
= info_dict
['url']
224 requested_bitrate
= info_dict
.get('tbr')
225 self
.to_screen('[download] Downloading f4m manifest')
226 manifest
= self
.ydl
.urlopen(man_url
).read()
227 self
.report_destination(filename
)
228 http_dl
= HttpQuietDownloader(self
.ydl
,
233 'test': self
.params
.get('test', False),
236 doc
= etree
.fromstring(manifest
)
237 formats
= [(int(f
.attrib
.get('bitrate', -1)), f
) for f
in doc
.findall(_add_ns('media'))]
238 if requested_bitrate
is None:
239 # get the best format
240 formats
= sorted(formats
, key
=lambda f
: f
[0])
241 rate
, media
= formats
[-1]
243 rate
, media
= list(filter(
244 lambda f
: int(f
[0]) == requested_bitrate
, formats
))[0]
246 base_url
= compat_urlparse
.urljoin(man_url
, media
.attrib
['url'])
247 bootstrap_node
= doc
.find(_add_ns('bootstrapInfo'))
248 if bootstrap_node
.text
is None:
249 bootstrap_url
= compat_urlparse
.urljoin(
250 base_url
, bootstrap_node
.attrib
['url'])
251 bootstrap
= self
.ydl
.urlopen(bootstrap_url
).read()
253 bootstrap
= base64
.b64decode(bootstrap_node
.text
)
254 metadata
= base64
.b64decode(media
.find(_add_ns('metadata')).text
)
255 boot_info
= read_bootstrap_info(bootstrap
)
257 fragments_list
= build_fragments_list(boot_info
)
258 if self
.params
.get('test', False):
259 # We only download the first fragment
260 fragments_list
= fragments_list
[:1]
261 total_frags
= len(fragments_list
)
262 # For some akamai manifests we'll need to add a query to the fragment url
263 akamai_pv
= xpath_text(doc
, _add_ns('pv-2.0'))
265 tmpfilename
= self
.temp_name(filename
)
266 (dest_stream
, tmpfilename
) = sanitize_open(tmpfilename
, 'wb')
267 write_flv_header(dest_stream
, metadata
)
269 # This dict stores the download progress, it's updated by the progress
272 'downloaded_bytes': 0,
277 def frag_progress_hook(status
):
278 frag_total_bytes
= status
.get('total_bytes', 0)
279 estimated_size
= (state
['downloaded_bytes'] +
280 (total_frags
- state
['frag_counter']) * frag_total_bytes
)
281 if status
['status'] == 'finished':
282 state
['downloaded_bytes'] += frag_total_bytes
283 state
['frag_counter'] += 1
284 progress
= self
.calc_percent(state
['frag_counter'], total_frags
)
285 byte_counter
= state
['downloaded_bytes']
287 frag_downloaded_bytes
= status
['downloaded_bytes']
288 byte_counter
= state
['downloaded_bytes'] + frag_downloaded_bytes
289 frag_progress
= self
.calc_percent(frag_downloaded_bytes
,
291 progress
= self
.calc_percent(state
['frag_counter'], total_frags
)
292 progress
+= frag_progress
/ float(total_frags
)
294 eta
= self
.calc_eta(start
, time
.time(), estimated_size
, byte_counter
)
295 self
.report_progress(progress
, format_bytes(estimated_size
),
296 status
.get('speed'), eta
)
297 http_dl
.add_progress_hook(frag_progress_hook
)
300 for (seg_i
, frag_i
) in fragments_list
:
301 name
= 'Seg%d-Frag%d' % (seg_i
, frag_i
)
302 url
= base_url
+ name
304 url
+= '?' + akamai_pv
.strip(';')
305 frag_filename
= '%s-%s' % (tmpfilename
, name
)
306 success
= http_dl
.download(frag_filename
, {'url': url
})
309 with open(frag_filename
, 'rb') as down
:
310 down_data
= down
.read()
311 reader
= FlvReader(down_data
)
313 _
, box_type
, box_data
= reader
.read_box_info()
314 if box_type
== b
'mdat':
315 dest_stream
.write(box_data
)
317 frags_filenames
.append(frag_filename
)
320 self
.report_finish(format_bytes(state
['downloaded_bytes']), time
.time() - start
)
322 self
.try_rename(tmpfilename
, filename
)
323 for frag_file
in frags_filenames
:
326 fsize
= os
.path
.getsize(encodeFilename(filename
))
327 self
._hook
_progress
({
328 'downloaded_bytes': fsize
,
329 'total_bytes': fsize
,
330 'filename': filename
,
331 'status': 'finished',