]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/downloader/f4m.py
   1 from __future__ 
import unicode_literals
 
   8 import xml
.etree
.ElementTree 
as etree
 
  10 from .common 
import FileDownloader
 
  11 from .http 
import HttpFD
 
  23 class FlvReader(io
.BytesIO
): 
  26     The file format is documented in https://www.adobe.com/devnet/f4v.html 
  29     # Utility functions for reading numbers and strings 
  30     def read_unsigned_long_long(self
): 
  31         return struct_unpack('!Q', self
.read(8))[0] 
  33     def read_unsigned_int(self
): 
  34         return struct_unpack('!I', self
.read(4))[0] 
  36     def read_unsigned_char(self
): 
  37         return struct_unpack('!B', self
.read(1))[0] 
  39     def read_string(self
): 
  48     def read_box_info(self
): 
  50         Read a box and return the info as a tuple: (box_size, box_type, box_data) 
  52         real_size 
= size 
= self
.read_unsigned_int() 
  53         box_type 
= self
.read(4) 
  56             real_size 
= self
.read_unsigned_long_long() 
  58         return real_size
, box_type
, self
.read(real_size 
- header_end
) 
  62         self
.read_unsigned_char() 
  65         quality_entry_count 
= self
.read_unsigned_char() 
  67         for i 
in range(quality_entry_count
): 
  70         segment_run_count 
= self
.read_unsigned_int() 
  72         for i 
in range(segment_run_count
): 
  73             first_segment 
= self
.read_unsigned_int() 
  74             fragments_per_segment 
= self
.read_unsigned_int() 
  75             segments
.append((first_segment
, fragments_per_segment
)) 
  78             'segment_run': segments
, 
  83         self
.read_unsigned_char() 
  87         self
.read_unsigned_int() 
  89         quality_entry_count 
= self
.read_unsigned_char() 
  90         # QualitySegmentUrlModifiers 
  91         for i 
in range(quality_entry_count
): 
  94         fragments_count 
= self
.read_unsigned_int() 
  96         for i 
in range(fragments_count
): 
  97             first 
= self
.read_unsigned_int() 
  98             first_ts 
= self
.read_unsigned_long_long() 
  99             duration 
= self
.read_unsigned_int() 
 101                 discontinuity_indicator 
= self
.read_unsigned_char() 
 103                 discontinuity_indicator 
= None 
 107                 'duration': duration
, 
 108                 'discontinuity_indicator': discontinuity_indicator
, 
 112             'fragments': fragments
, 
 117         self
.read_unsigned_char() 
 121         self
.read_unsigned_int()  # BootstrapinfoVersion 
 122         # Profile,Live,Update,Reserved 
 125         self
.read_unsigned_int() 
 127         self
.read_unsigned_long_long() 
 128         # SmpteTimeCodeOffset 
 129         self
.read_unsigned_long_long() 
 131         self
.read_string()  # MovieIdentifier 
 132         server_count 
= self
.read_unsigned_char() 
 134         for i 
in range(server_count
): 
 136         quality_count 
= self
.read_unsigned_char() 
 138         for i 
in range(quality_count
): 
 145         segments_count 
= self
.read_unsigned_char() 
 147         for i 
in range(segments_count
): 
 148             box_size
, box_type
, box_data 
= self
.read_box_info() 
 149             assert box_type 
== b
'asrt' 
 150             segment 
= FlvReader(box_data
).read_asrt() 
 151             segments
.append(segment
) 
 152         fragments_run_count 
= self
.read_unsigned_char() 
 154         for i 
in range(fragments_run_count
): 
 155             box_size
, box_type
, box_data 
= self
.read_box_info() 
 156             assert box_type 
== b
'afrt' 
 157             fragments
.append(FlvReader(box_data
).read_afrt()) 
 160             'segments': segments
, 
 161             'fragments': fragments
, 
 164     def read_bootstrap_info(self
): 
 165         total_size
, box_type
, box_data 
= self
.read_box_info() 
 166         assert box_type 
== b
'abst' 
 167         return FlvReader(box_data
).read_abst() 
 170 def read_bootstrap_info(bootstrap_bytes
): 
 171     return FlvReader(bootstrap_bytes
).read_bootstrap_info() 
 174 def build_fragments_list(boot_info
): 
 175     """ Return a list of (segment, fragment) for each fragment in the video """ 
 177     segment_run_table 
= boot_info
['segments'][0] 
 178     # I've only found videos with one segment 
 179     segment_run_entry 
= segment_run_table
['segment_run'][0] 
 180     n_frags 
= segment_run_entry
[1] 
 181     fragment_run_entry_table 
= boot_info
['fragments'][0]['fragments'] 
 182     first_frag_number 
= fragment_run_entry_table
[0]['first'] 
 183     for (i
, frag_number
) in zip(range(1, n_frags 
+ 1), itertools
.count(first_frag_number
)): 
 184         res
.append((1, frag_number
)) 
 188 def write_flv_header(stream
, metadata
): 
 189     """Writes the FLV header and the metadata to stream""" 
 191     stream
.write(b
'FLV\x01') 
 192     stream
.write(b
'\x05') 
 193     stream
.write(b
'\x00\x00\x00\x09') 
 195     stream
.write(b
'\x00\x00\x00\x00') 
 198     stream
.write(b
'\x12') 
 199     # Size of the metadata with 3 bytes 
 200     stream
.write(struct_pack('!L', len(metadata
))[1:]) 
 201     stream
.write(b
'\x00\x00\x00\x00\x00\x00\x00') 
 202     stream
.write(metadata
) 
 203     # Magic numbers extracted from the output files produced by AdobeHDS.php 
 204     #(https://github.com/K-S-V/Scripts) 
 205     stream
.write(b
'\x00\x00\x01\x73') 
 209     return '{http://ns.adobe.com/f4m/1.0}%s' % prop
 
 212 class HttpQuietDownloader(HttpFD
): 
 213     def to_screen(self
, *args
, **kargs
): 
 217 class F4mFD(FileDownloader
): 
 219     A downloader for f4m manifests or AdobeHDS. 
 222     def real_download(self
, filename
, info_dict
): 
 223         man_url 
= info_dict
['url'] 
 224         requested_bitrate 
= info_dict
.get('tbr') 
 225         self
.to_screen('[download] Downloading f4m manifest') 
 226         manifest 
= self
.ydl
.urlopen(man_url
).read() 
 227         self
.report_destination(filename
) 
 228         http_dl 
= HttpQuietDownloader( 
 234                 'test': self
.params
.get('test', False), 
 238         doc 
= etree
.fromstring(manifest
) 
 239         formats 
= [(int(f
.attrib
.get('bitrate', -1)), f
) for f 
in doc
.findall(_add_ns('media'))] 
 240         if requested_bitrate 
is None: 
 241             # get the best format 
 242             formats 
= sorted(formats
, key
=lambda f
: f
[0]) 
 243             rate
, media 
= formats
[-1] 
 245             rate
, media 
= list(filter( 
 246                 lambda f
: int(f
[0]) == requested_bitrate
, formats
))[0] 
 248         base_url 
= compat_urlparse
.urljoin(man_url
, media
.attrib
['url']) 
 249         bootstrap_node 
= doc
.find(_add_ns('bootstrapInfo')) 
 250         if bootstrap_node
.text 
is None: 
 251             bootstrap_url 
= compat_urlparse
.urljoin( 
 252                 base_url
, bootstrap_node
.attrib
['url']) 
 253             bootstrap 
= self
.ydl
.urlopen(bootstrap_url
).read() 
 255             bootstrap 
= base64
.b64decode(bootstrap_node
.text
) 
 256         metadata 
= base64
.b64decode(media
.find(_add_ns('metadata')).text
) 
 257         boot_info 
= read_bootstrap_info(bootstrap
) 
 259         fragments_list 
= build_fragments_list(boot_info
) 
 260         if self
.params
.get('test', False): 
 261             # We only download the first fragment 
 262             fragments_list 
= fragments_list
[:1] 
 263         total_frags 
= len(fragments_list
) 
 264         # For some akamai manifests we'll need to add a query to the fragment url 
 265         akamai_pv 
= xpath_text(doc
, _add_ns('pv-2.0')) 
 267         tmpfilename 
= self
.temp_name(filename
) 
 268         (dest_stream
, tmpfilename
) = sanitize_open(tmpfilename
, 'wb') 
 269         write_flv_header(dest_stream
, metadata
) 
 271         # This dict stores the download progress, it's updated by the progress 
 274             'downloaded_bytes': 0, 
 279         def frag_progress_hook(status
): 
 280             frag_total_bytes 
= status
.get('total_bytes', 0) 
 281             estimated_size 
= (state
['downloaded_bytes'] + 
 282                               (total_frags 
- state
['frag_counter']) * frag_total_bytes
) 
 283             if status
['status'] == 'finished': 
 284                 state
['downloaded_bytes'] += frag_total_bytes
 
 285                 state
['frag_counter'] += 1 
 286                 progress 
= self
.calc_percent(state
['frag_counter'], total_frags
) 
 287                 byte_counter 
= state
['downloaded_bytes'] 
 289                 frag_downloaded_bytes 
= status
['downloaded_bytes'] 
 290                 byte_counter 
= state
['downloaded_bytes'] + frag_downloaded_bytes
 
 291                 frag_progress 
= self
.calc_percent(frag_downloaded_bytes
, 
 293                 progress 
= self
.calc_percent(state
['frag_counter'], total_frags
) 
 294                 progress 
+= frag_progress 
/ float(total_frags
) 
 296             eta 
= self
.calc_eta(start
, time
.time(), estimated_size
, byte_counter
) 
 297             self
.report_progress(progress
, format_bytes(estimated_size
), 
 298                                  status
.get('speed'), eta
) 
 299         http_dl
.add_progress_hook(frag_progress_hook
) 
 302         for (seg_i
, frag_i
) in fragments_list
: 
 303             name 
= 'Seg%d-Frag%d' % (seg_i
, frag_i
) 
 304             url 
= base_url 
+ name
 
 306                 url 
+= '?' + akamai_pv
.strip(';') 
 307             frag_filename 
= '%s-%s' % (tmpfilename
, name
) 
 308             success 
= http_dl
.download(frag_filename
, {'url': url
}) 
 311             with open(frag_filename
, 'rb') as down
: 
 312                 down_data 
= down
.read() 
 313                 reader 
= FlvReader(down_data
) 
 315                     _
, box_type
, box_data 
= reader
.read_box_info() 
 316                     if box_type 
== b
'mdat': 
 317                         dest_stream
.write(box_data
) 
 319             frags_filenames
.append(frag_filename
) 
 322         self
.report_finish(format_bytes(state
['downloaded_bytes']), time
.time() - start
) 
 324         self
.try_rename(tmpfilename
, filename
) 
 325         for frag_file 
in frags_filenames
: 
 328         fsize 
= os
.path
.getsize(encodeFilename(filename
)) 
 329         self
._hook
_progress
({ 
 330             'downloaded_bytes': fsize
, 
 331             'total_bytes': fsize
, 
 332             'filename': filename
, 
 333             'status': 'finished',