]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/downloader/f4m.py
7cd22c504e463ad2551692728bd3933e8bcf20ab
1 from __future__
import unicode_literals
8 import xml
.etree
.ElementTree
as etree
10 from .common
import FileDownloader
11 from .http
import HttpFD
23 class FlvReader(io
.BytesIO
):
26 The file format is documented in https://www.adobe.com/devnet/f4v.html
29 # Utility functions for reading numbers and strings
30 def read_unsigned_long_long(self
):
31 return struct_unpack('!Q', self
.read(8))[0]
33 def read_unsigned_int(self
):
34 return struct_unpack('!I', self
.read(4))[0]
36 def read_unsigned_char(self
):
37 return struct_unpack('!B', self
.read(1))[0]
39 def read_string(self
):
48 def read_box_info(self
):
50 Read a box and return the info as a tuple: (box_size, box_type, box_data)
52 real_size
= size
= self
.read_unsigned_int()
53 box_type
= self
.read(4)
56 real_size
= self
.read_unsigned_long_long()
58 return real_size
, box_type
, self
.read(real_size
- header_end
)
62 self
.read_unsigned_char()
65 quality_entry_count
= self
.read_unsigned_char()
67 for i
in range(quality_entry_count
):
70 segment_run_count
= self
.read_unsigned_int()
72 for i
in range(segment_run_count
):
73 first_segment
= self
.read_unsigned_int()
74 fragments_per_segment
= self
.read_unsigned_int()
75 segments
.append((first_segment
, fragments_per_segment
))
78 'segment_run': segments
,
83 self
.read_unsigned_char()
87 self
.read_unsigned_int()
89 quality_entry_count
= self
.read_unsigned_char()
90 # QualitySegmentUrlModifiers
91 for i
in range(quality_entry_count
):
94 fragments_count
= self
.read_unsigned_int()
96 for i
in range(fragments_count
):
97 first
= self
.read_unsigned_int()
98 first_ts
= self
.read_unsigned_long_long()
99 duration
= self
.read_unsigned_int()
101 discontinuity_indicator
= self
.read_unsigned_char()
103 discontinuity_indicator
= None
107 'duration': duration
,
108 'discontinuity_indicator': discontinuity_indicator
,
112 'fragments': fragments
,
117 self
.read_unsigned_char()
121 self
.read_unsigned_int() # BootstrapinfoVersion
122 # Profile,Live,Update,Reserved
125 self
.read_unsigned_int()
127 self
.read_unsigned_long_long()
128 # SmpteTimeCodeOffset
129 self
.read_unsigned_long_long()
131 self
.read_string() # MovieIdentifier
132 server_count
= self
.read_unsigned_char()
134 for i
in range(server_count
):
136 quality_count
= self
.read_unsigned_char()
138 for i
in range(quality_count
):
145 segments_count
= self
.read_unsigned_char()
147 for i
in range(segments_count
):
148 box_size
, box_type
, box_data
= self
.read_box_info()
149 assert box_type
== b
'asrt'
150 segment
= FlvReader(box_data
).read_asrt()
151 segments
.append(segment
)
152 fragments_run_count
= self
.read_unsigned_char()
154 for i
in range(fragments_run_count
):
155 box_size
, box_type
, box_data
= self
.read_box_info()
156 assert box_type
== b
'afrt'
157 fragments
.append(FlvReader(box_data
).read_afrt())
160 'segments': segments
,
161 'fragments': fragments
,
164 def read_bootstrap_info(self
):
165 total_size
, box_type
, box_data
= self
.read_box_info()
166 assert box_type
== b
'abst'
167 return FlvReader(box_data
).read_abst()
170 def read_bootstrap_info(bootstrap_bytes
):
171 return FlvReader(bootstrap_bytes
).read_bootstrap_info()
174 def build_fragments_list(boot_info
):
175 """ Return a list of (segment, fragment) for each fragment in the video """
177 segment_run_table
= boot_info
['segments'][0]
178 # I've only found videos with one segment
179 segment_run_entry
= segment_run_table
['segment_run'][0]
180 n_frags
= segment_run_entry
[1]
181 fragment_run_entry_table
= boot_info
['fragments'][0]['fragments']
182 first_frag_number
= fragment_run_entry_table
[0]['first']
183 for (i
, frag_number
) in zip(range(1, n_frags
+ 1), itertools
.count(first_frag_number
)):
184 res
.append((1, frag_number
))
188 def write_flv_header(stream
, metadata
):
189 """Writes the FLV header and the metadata to stream"""
191 stream
.write(b
'FLV\x01')
192 stream
.write(b
'\x05')
193 stream
.write(b
'\x00\x00\x00\x09')
195 stream
.write(b
'\x00\x00\x00\x00')
198 stream
.write(b
'\x12')
199 # Size of the metadata with 3 bytes
200 stream
.write(struct_pack('!L', len(metadata
))[1:])
201 stream
.write(b
'\x00\x00\x00\x00\x00\x00\x00')
202 stream
.write(metadata
)
203 # Magic numbers extracted from the output files produced by AdobeHDS.php
204 #(https://github.com/K-S-V/Scripts)
205 stream
.write(b
'\x00\x00\x01\x73')
209 return '{http://ns.adobe.com/f4m/1.0}%s' % prop
212 class HttpQuietDownloader(HttpFD
):
213 def to_screen(self
, *args
, **kargs
):
217 class F4mFD(FileDownloader
):
219 A downloader for f4m manifests or AdobeHDS.
222 def real_download(self
, filename
, info_dict
):
223 man_url
= info_dict
['url']
224 requested_bitrate
= info_dict
.get('tbr')
225 self
.to_screen('[download] Downloading f4m manifest')
226 manifest
= self
.ydl
.urlopen(man_url
).read()
227 self
.report_destination(filename
)
228 http_dl
= HttpQuietDownloader(
234 'test': self
.params
.get('test', False),
238 doc
= etree
.fromstring(manifest
)
239 formats
= [(int(f
.attrib
.get('bitrate', -1)), f
) for f
in doc
.findall(_add_ns('media'))]
240 if requested_bitrate
is None:
241 # get the best format
242 formats
= sorted(formats
, key
=lambda f
: f
[0])
243 rate
, media
= formats
[-1]
245 rate
, media
= list(filter(
246 lambda f
: int(f
[0]) == requested_bitrate
, formats
))[0]
248 base_url
= compat_urlparse
.urljoin(man_url
, media
.attrib
['url'])
249 bootstrap_node
= doc
.find(_add_ns('bootstrapInfo'))
250 if bootstrap_node
.text
is None:
251 bootstrap_url
= compat_urlparse
.urljoin(
252 base_url
, bootstrap_node
.attrib
['url'])
253 bootstrap
= self
.ydl
.urlopen(bootstrap_url
).read()
255 bootstrap
= base64
.b64decode(bootstrap_node
.text
)
256 metadata
= base64
.b64decode(media
.find(_add_ns('metadata')).text
)
257 boot_info
= read_bootstrap_info(bootstrap
)
259 fragments_list
= build_fragments_list(boot_info
)
260 if self
.params
.get('test', False):
261 # We only download the first fragment
262 fragments_list
= fragments_list
[:1]
263 total_frags
= len(fragments_list
)
264 # For some akamai manifests we'll need to add a query to the fragment url
265 akamai_pv
= xpath_text(doc
, _add_ns('pv-2.0'))
267 tmpfilename
= self
.temp_name(filename
)
268 (dest_stream
, tmpfilename
) = sanitize_open(tmpfilename
, 'wb')
269 write_flv_header(dest_stream
, metadata
)
271 # This dict stores the download progress, it's updated by the progress
274 'downloaded_bytes': 0,
279 def frag_progress_hook(status
):
280 frag_total_bytes
= status
.get('total_bytes', 0)
281 estimated_size
= (state
['downloaded_bytes'] +
282 (total_frags
- state
['frag_counter']) * frag_total_bytes
)
283 if status
['status'] == 'finished':
284 state
['downloaded_bytes'] += frag_total_bytes
285 state
['frag_counter'] += 1
286 progress
= self
.calc_percent(state
['frag_counter'], total_frags
)
287 byte_counter
= state
['downloaded_bytes']
289 frag_downloaded_bytes
= status
['downloaded_bytes']
290 byte_counter
= state
['downloaded_bytes'] + frag_downloaded_bytes
291 frag_progress
= self
.calc_percent(frag_downloaded_bytes
,
293 progress
= self
.calc_percent(state
['frag_counter'], total_frags
)
294 progress
+= frag_progress
/ float(total_frags
)
296 eta
= self
.calc_eta(start
, time
.time(), estimated_size
, byte_counter
)
297 self
.report_progress(progress
, format_bytes(estimated_size
),
298 status
.get('speed'), eta
)
299 http_dl
.add_progress_hook(frag_progress_hook
)
302 for (seg_i
, frag_i
) in fragments_list
:
303 name
= 'Seg%d-Frag%d' % (seg_i
, frag_i
)
304 url
= base_url
+ name
306 url
+= '?' + akamai_pv
.strip(';')
307 frag_filename
= '%s-%s' % (tmpfilename
, name
)
308 success
= http_dl
.download(frag_filename
, {'url': url
})
311 with open(frag_filename
, 'rb') as down
:
312 down_data
= down
.read()
313 reader
= FlvReader(down_data
)
315 _
, box_type
, box_data
= reader
.read_box_info()
316 if box_type
== b
'mdat':
317 dest_stream
.write(box_data
)
319 frags_filenames
.append(frag_filename
)
322 self
.report_finish(format_bytes(state
['downloaded_bytes']), time
.time() - start
)
324 self
.try_rename(tmpfilename
, filename
)
325 for frag_file
in frags_filenames
:
328 fsize
= os
.path
.getsize(encodeFilename(filename
))
329 self
._hook
_progress
({
330 'downloaded_bytes': fsize
,
331 'total_bytes': fsize
,
332 'filename': filename
,
333 'status': 'finished',