]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/downloader/f4m.py
2a870a758fa32dd475eaf725f69dedbf3c45903b
1 from __future__
import unicode_literals
8 import xml
.etree
.ElementTree
as etree
10 from .common
import FileDownloader
11 from .http
import HttpFD
15 compat_urllib_request
,
23 class FlvReader(io
.BytesIO
):
26 The file format is documented in https://www.adobe.com/devnet/f4v.html
29 # Utility functions for reading numbers and strings
30 def read_unsigned_long_long(self
):
31 return struct_unpack('!Q', self
.read(8))[0]
33 def read_unsigned_int(self
):
34 return struct_unpack('!I', self
.read(4))[0]
36 def read_unsigned_char(self
):
37 return struct_unpack('!B', self
.read(1))[0]
39 def read_string(self
):
48 def read_box_info(self
):
50 Read a box and return the info as a tuple: (box_size, box_type, box_data)
52 real_size
= size
= self
.read_unsigned_int()
53 box_type
= self
.read(4)
56 real_size
= self
.read_unsigned_long_long()
58 return real_size
, box_type
, self
.read(real_size
-header_end
)
62 self
.read_unsigned_char()
65 quality_entry_count
= self
.read_unsigned_char()
67 for i
in range(quality_entry_count
):
70 segment_run_count
= self
.read_unsigned_int()
72 for i
in range(segment_run_count
):
73 first_segment
= self
.read_unsigned_int()
74 fragments_per_segment
= self
.read_unsigned_int()
75 segments
.append((first_segment
, fragments_per_segment
))
78 'segment_run': segments
,
83 self
.read_unsigned_char()
87 self
.read_unsigned_int()
89 quality_entry_count
= self
.read_unsigned_char()
90 # QualitySegmentUrlModifiers
91 for i
in range(quality_entry_count
):
94 fragments_count
= self
.read_unsigned_int()
96 for i
in range(fragments_count
):
97 first
= self
.read_unsigned_int()
98 first_ts
= self
.read_unsigned_long_long()
99 duration
= self
.read_unsigned_int()
101 discontinuity_indicator
= self
.read_unsigned_char()
103 discontinuity_indicator
= None
107 'duration': duration
,
108 'discontinuity_indicator': discontinuity_indicator
,
112 'fragments': fragments
,
117 self
.read_unsigned_char()
120 # BootstrapinfoVersion
121 bootstrap_info_version
= self
.read_unsigned_int()
122 # Profile,Live,Update,Reserved
125 self
.read_unsigned_int()
127 self
.read_unsigned_long_long()
128 # SmpteTimeCodeOffset
129 self
.read_unsigned_long_long()
131 movie_identifier
= self
.read_string()
132 server_count
= self
.read_unsigned_char()
134 for i
in range(server_count
):
136 quality_count
= self
.read_unsigned_char()
138 for i
in range(server_count
):
145 segments_count
= self
.read_unsigned_char()
147 for i
in range(segments_count
):
148 box_size
, box_type
, box_data
= self
.read_box_info()
149 assert box_type
== b
'asrt'
150 segment
= FlvReader(box_data
).read_asrt()
151 segments
.append(segment
)
152 fragments_run_count
= self
.read_unsigned_char()
154 for i
in range(fragments_run_count
):
155 box_size
, box_type
, box_data
= self
.read_box_info()
156 assert box_type
== b
'afrt'
157 fragments
.append(FlvReader(box_data
).read_afrt())
160 'segments': segments
,
161 'fragments': fragments
,
164 def read_bootstrap_info(self
):
165 total_size
, box_type
, box_data
= self
.read_box_info()
166 assert box_type
== b
'abst'
167 return FlvReader(box_data
).read_abst()
170 def read_bootstrap_info(bootstrap_bytes
):
171 return FlvReader(bootstrap_bytes
).read_bootstrap_info()
174 def build_fragments_list(boot_info
):
175 """ Return a list of (segment, fragment) for each fragment in the video """
177 segment_run_table
= boot_info
['segments'][0]
178 # I've only found videos with one segment
179 segment_run_entry
= segment_run_table
['segment_run'][0]
180 n_frags
= segment_run_entry
[1]
181 fragment_run_entry_table
= boot_info
['fragments'][0]['fragments']
182 first_frag_number
= fragment_run_entry_table
[0]['first']
183 for (i
, frag_number
) in zip(range(1, n_frags
+1), itertools
.count(first_frag_number
)):
184 res
.append((1, frag_number
))
188 def write_flv_header(stream
, metadata
):
189 """Writes the FLV header and the metadata to stream"""
191 stream
.write(b
'FLV\x01')
192 stream
.write(b
'\x05')
193 stream
.write(b
'\x00\x00\x00\x09')
195 stream
.write(b
'\x00\x00\x00\x00')
198 stream
.write(b
'\x12')
199 # Size of the metadata with 3 bytes
200 stream
.write(struct_pack('!L', len(metadata
))[1:])
201 stream
.write(b
'\x00\x00\x00\x00\x00\x00\x00')
202 stream
.write(metadata
)
203 # Magic numbers extracted from the output files produced by AdobeHDS.php
204 #(https://github.com/K-S-V/Scripts)
205 stream
.write(b
'\x00\x00\x01\x73')
209 return '{http://ns.adobe.com/f4m/1.0}%s' % prop
212 class HttpQuietDownloader(HttpFD
):
213 def to_screen(self
, *args
, **kargs
):
217 class F4mFD(FileDownloader
):
219 A downloader for f4m manifests or AdobeHDS.
222 def real_download(self
, filename
, info_dict
):
223 man_url
= info_dict
['url']
224 self
.to_screen('[download] Downloading f4m manifest')
225 manifest
= self
.ydl
.urlopen(man_url
).read()
226 self
.report_destination(filename
)
227 http_dl
= HttpQuietDownloader(self
.ydl
,
232 'test': self
.params
.get('test', False),
235 doc
= etree
.fromstring(manifest
)
236 formats
= [(int(f
.attrib
.get('bitrate', -1)), f
) for f
in doc
.findall(_add_ns('media'))]
237 formats
= sorted(formats
, key
=lambda f
: f
[0])
238 rate
, media
= formats
[-1]
239 base_url
= compat_urlparse
.urljoin(man_url
, media
.attrib
['url'])
240 bootstrap
= base64
.b64decode(doc
.find(_add_ns('bootstrapInfo')).text
)
241 metadata
= base64
.b64decode(media
.find(_add_ns('metadata')).text
)
242 boot_info
= read_bootstrap_info(bootstrap
)
243 fragments_list
= build_fragments_list(boot_info
)
244 if self
.params
.get('test', False):
245 # We only download the first fragment
246 fragments_list
= fragments_list
[:1]
247 total_frags
= len(fragments_list
)
249 tmpfilename
= self
.temp_name(filename
)
250 (dest_stream
, tmpfilename
) = sanitize_open(tmpfilename
, 'wb')
251 write_flv_header(dest_stream
, metadata
)
253 # This dict stores the download progress, it's updated by the progress
256 'downloaded_bytes': 0,
261 def frag_progress_hook(status
):
262 frag_total_bytes
= status
.get('total_bytes', 0)
263 estimated_size
= (state
['downloaded_bytes'] +
264 (total_frags
- state
['frag_counter']) * frag_total_bytes
)
265 if status
['status'] == 'finished':
266 state
['downloaded_bytes'] += frag_total_bytes
267 state
['frag_counter'] += 1
268 progress
= self
.calc_percent(state
['frag_counter'], total_frags
)
269 byte_counter
= state
['downloaded_bytes']
271 frag_downloaded_bytes
= status
['downloaded_bytes']
272 byte_counter
= state
['downloaded_bytes'] + frag_downloaded_bytes
273 frag_progress
= self
.calc_percent(frag_downloaded_bytes
,
275 progress
= self
.calc_percent(state
['frag_counter'], total_frags
)
276 progress
+= frag_progress
/ float(total_frags
)
278 eta
= self
.calc_eta(start
, time
.time(), estimated_size
, byte_counter
)
279 self
.report_progress(progress
, format_bytes(estimated_size
),
280 status
.get('speed'), eta
)
281 http_dl
.add_progress_hook(frag_progress_hook
)
284 for (seg_i
, frag_i
) in fragments_list
:
285 name
= 'Seg%d-Frag%d' % (seg_i
, frag_i
)
286 url
= base_url
+ name
287 frag_filename
= '%s-%s' % (tmpfilename
, name
)
288 success
= http_dl
.download(frag_filename
, {'url': url
})
291 with open(frag_filename
, 'rb') as down
:
292 down_data
= down
.read()
293 reader
= FlvReader(down_data
)
295 _
, box_type
, box_data
= reader
.read_box_info()
296 if box_type
== b
'mdat':
297 dest_stream
.write(box_data
)
299 frags_filenames
.append(frag_filename
)
301 self
.report_finish(format_bytes(state
['downloaded_bytes']), time
.time() - start
)
303 self
.try_rename(tmpfilename
, filename
)
304 for frag_file
in frags_filenames
:
307 fsize
= os
.path
.getsize(encodeFilename(filename
))
308 self
._hook
_progress
({
309 'downloaded_bytes': fsize
,
310 'total_bytes': fsize
,
311 'filename': filename
,
312 'status': 'finished',