]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/downloader/f4m.py
   1 from __future__ 
import unicode_literals
 
   8 import xml
.etree
.ElementTree 
as etree
 
  10 from .common 
import FileDownloader
 
  11 from .http 
import HttpFD
 
  22 class FlvReader(io
.BytesIO
): 
  25     The file format is documented in https://www.adobe.com/devnet/f4v.html 
  28     # Utility functions for reading numbers and strings 
  29     def read_unsigned_long_long(self
): 
  30         return struct_unpack('!Q', self
.read(8))[0] 
  32     def read_unsigned_int(self
): 
  33         return struct_unpack('!I', self
.read(4))[0] 
  35     def read_unsigned_char(self
): 
  36         return struct_unpack('!B', self
.read(1))[0] 
  38     def read_string(self
): 
  47     def read_box_info(self
): 
  49         Read a box and return the info as a tuple: (box_size, box_type, box_data) 
  51         real_size 
= size 
= self
.read_unsigned_int() 
  52         box_type 
= self
.read(4) 
  55             real_size 
= self
.read_unsigned_long_long() 
  57         return real_size
, box_type
, self
.read(real_size
-header_end
) 
  61         self
.read_unsigned_char() 
  64         quality_entry_count 
= self
.read_unsigned_char() 
  66         for i 
in range(quality_entry_count
): 
  69         segment_run_count 
= self
.read_unsigned_int() 
  71         for i 
in range(segment_run_count
): 
  72             first_segment 
= self
.read_unsigned_int() 
  73             fragments_per_segment 
= self
.read_unsigned_int() 
  74             segments
.append((first_segment
, fragments_per_segment
)) 
  77             'segment_run': segments
, 
  82         self
.read_unsigned_char() 
  86         self
.read_unsigned_int() 
  88         quality_entry_count 
= self
.read_unsigned_char() 
  89         # QualitySegmentUrlModifiers 
  90         for i 
in range(quality_entry_count
): 
  93         fragments_count 
= self
.read_unsigned_int() 
  95         for i 
in range(fragments_count
): 
  96             first 
= self
.read_unsigned_int() 
  97             first_ts 
= self
.read_unsigned_long_long() 
  98             duration 
= self
.read_unsigned_int() 
 100                 discontinuity_indicator 
= self
.read_unsigned_char() 
 102                 discontinuity_indicator 
= None 
 106                 'duration': duration
, 
 107                 'discontinuity_indicator': discontinuity_indicator
, 
 111             'fragments': fragments
, 
 116         self
.read_unsigned_char() 
 120         self
.read_unsigned_int()  # BootstrapinfoVersion 
 121         # Profile,Live,Update,Reserved 
 124         self
.read_unsigned_int() 
 126         self
.read_unsigned_long_long() 
 127         # SmpteTimeCodeOffset 
 128         self
.read_unsigned_long_long() 
 130         self
.read_string()  # MovieIdentifier 
 131         server_count 
= self
.read_unsigned_char() 
 133         for i 
in range(server_count
): 
 135         quality_count 
= self
.read_unsigned_char() 
 137         for i 
in range(quality_count
): 
 144         segments_count 
= self
.read_unsigned_char() 
 146         for i 
in range(segments_count
): 
 147             box_size
, box_type
, box_data 
= self
.read_box_info() 
 148             assert box_type 
== b
'asrt' 
 149             segment 
= FlvReader(box_data
).read_asrt() 
 150             segments
.append(segment
) 
 151         fragments_run_count 
= self
.read_unsigned_char() 
 153         for i 
in range(fragments_run_count
): 
 154             box_size
, box_type
, box_data 
= self
.read_box_info() 
 155             assert box_type 
== b
'afrt' 
 156             fragments
.append(FlvReader(box_data
).read_afrt()) 
 159             'segments': segments
, 
 160             'fragments': fragments
, 
 163     def read_bootstrap_info(self
): 
 164         total_size
, box_type
, box_data 
= self
.read_box_info() 
 165         assert box_type 
== b
'abst' 
 166         return FlvReader(box_data
).read_abst() 
 169 def read_bootstrap_info(bootstrap_bytes
): 
 170     return FlvReader(bootstrap_bytes
).read_bootstrap_info() 
 173 def build_fragments_list(boot_info
): 
 174     """ Return a list of (segment, fragment) for each fragment in the video """ 
 176     segment_run_table 
= boot_info
['segments'][0] 
 177     # I've only found videos with one segment 
 178     segment_run_entry 
= segment_run_table
['segment_run'][0] 
 179     n_frags 
= segment_run_entry
[1] 
 180     fragment_run_entry_table 
= boot_info
['fragments'][0]['fragments'] 
 181     first_frag_number 
= fragment_run_entry_table
[0]['first'] 
 182     for (i
, frag_number
) in zip(range(1, n_frags
+1), itertools
.count(first_frag_number
)): 
 183         res
.append((1, frag_number
)) 
 187 def write_flv_header(stream
, metadata
): 
 188     """Writes the FLV header and the metadata to stream""" 
 190     stream
.write(b
'FLV\x01') 
 191     stream
.write(b
'\x05') 
 192     stream
.write(b
'\x00\x00\x00\x09') 
 194     stream
.write(b
'\x00\x00\x00\x00') 
 197     stream
.write(b
'\x12') 
 198     # Size of the metadata with 3 bytes 
 199     stream
.write(struct_pack('!L', len(metadata
))[1:]) 
 200     stream
.write(b
'\x00\x00\x00\x00\x00\x00\x00') 
 201     stream
.write(metadata
) 
 202     # Magic numbers extracted from the output files produced by AdobeHDS.php 
 203     #(https://github.com/K-S-V/Scripts) 
 204     stream
.write(b
'\x00\x00\x01\x73') 
 208     return '{http://ns.adobe.com/f4m/1.0}%s' % prop
 
 211 class HttpQuietDownloader(HttpFD
): 
 212     def to_screen(self
, *args
, **kargs
): 
 216 class F4mFD(FileDownloader
): 
 218     A downloader for f4m manifests or AdobeHDS. 
 221     def real_download(self
, filename
, info_dict
): 
 222         man_url 
= info_dict
['url'] 
 223         self
.to_screen('[download] Downloading f4m manifest') 
 224         manifest 
= self
.ydl
.urlopen(man_url
).read() 
 225         self
.report_destination(filename
) 
 226         http_dl 
= HttpQuietDownloader(self
.ydl
, 
 231                 'test': self
.params
.get('test', False), 
 234         doc 
= etree
.fromstring(manifest
) 
 235         formats 
= [(int(f
.attrib
.get('bitrate', -1)), f
) for f 
in doc
.findall(_add_ns('media'))] 
 236         formats 
= sorted(formats
, key
=lambda f
: f
[0]) 
 237         rate
, media 
= formats
[-1] 
 238         base_url 
= compat_urlparse
.urljoin(man_url
, media
.attrib
['url']) 
 239         bootstrap 
= base64
.b64decode(doc
.find(_add_ns('bootstrapInfo')).text
) 
 240         metadata 
= base64
.b64decode(media
.find(_add_ns('metadata')).text
) 
 241         boot_info 
= read_bootstrap_info(bootstrap
) 
 242         fragments_list 
= build_fragments_list(boot_info
) 
 243         if self
.params
.get('test', False): 
 244             # We only download the first fragment 
 245             fragments_list 
= fragments_list
[:1] 
 246         total_frags 
= len(fragments_list
) 
 248         tmpfilename 
= self
.temp_name(filename
) 
 249         (dest_stream
, tmpfilename
) = sanitize_open(tmpfilename
, 'wb') 
 250         write_flv_header(dest_stream
, metadata
) 
 252         # This dict stores the download progress, it's updated by the progress 
 255             'downloaded_bytes': 0, 
 260         def frag_progress_hook(status
): 
 261             frag_total_bytes 
= status
.get('total_bytes', 0) 
 262             estimated_size 
= (state
['downloaded_bytes'] + 
 263                 (total_frags 
- state
['frag_counter']) * frag_total_bytes
) 
 264             if status
['status'] == 'finished': 
 265                 state
['downloaded_bytes'] += frag_total_bytes
 
 266                 state
['frag_counter'] += 1 
 267                 progress 
= self
.calc_percent(state
['frag_counter'], total_frags
) 
 268                 byte_counter 
= state
['downloaded_bytes'] 
 270                 frag_downloaded_bytes 
= status
['downloaded_bytes'] 
 271                 byte_counter 
= state
['downloaded_bytes'] + frag_downloaded_bytes
 
 272                 frag_progress 
= self
.calc_percent(frag_downloaded_bytes
, 
 274                 progress 
= self
.calc_percent(state
['frag_counter'], total_frags
) 
 275                 progress 
+= frag_progress 
/ float(total_frags
) 
 277             eta 
= self
.calc_eta(start
, time
.time(), estimated_size
, byte_counter
) 
 278             self
.report_progress(progress
, format_bytes(estimated_size
), 
 279                 status
.get('speed'), eta
) 
 280         http_dl
.add_progress_hook(frag_progress_hook
) 
 283         for (seg_i
, frag_i
) in fragments_list
: 
 284             name 
= 'Seg%d-Frag%d' % (seg_i
, frag_i
) 
 285             url 
= base_url 
+ name
 
 286             frag_filename 
= '%s-%s' % (tmpfilename
, name
) 
 287             success 
= http_dl
.download(frag_filename
, {'url': url
}) 
 290             with open(frag_filename
, 'rb') as down
: 
 291                 down_data 
= down
.read() 
 292                 reader 
= FlvReader(down_data
) 
 294                     _
, box_type
, box_data 
= reader
.read_box_info() 
 295                     if box_type 
== b
'mdat': 
 296                         dest_stream
.write(box_data
) 
 298             frags_filenames
.append(frag_filename
) 
 301         self
.report_finish(format_bytes(state
['downloaded_bytes']), time
.time() - start
) 
 303         self
.try_rename(tmpfilename
, filename
) 
 304         for frag_file 
in frags_filenames
: 
 307         fsize 
= os
.path
.getsize(encodeFilename(filename
)) 
 308         self
._hook
_progress
({ 
 309             'downloaded_bytes': fsize
, 
 310             'total_bytes': fsize
, 
 311             'filename': filename
, 
 312             'status': 'finished',