]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/downloader/f4m.py
2a870a758fa32dd475eaf725f69dedbf3c45903b
   1 from __future__ 
import unicode_literals
 
   8 import xml
.etree
.ElementTree 
as etree
 
  10 from .common 
import FileDownloader
 
  11 from .http 
import HttpFD
 
  15     compat_urllib_request
, 
  23 class FlvReader(io
.BytesIO
): 
  26     The file format is documented in https://www.adobe.com/devnet/f4v.html 
  29     # Utility functions for reading numbers and strings 
  30     def read_unsigned_long_long(self
): 
  31         return struct_unpack('!Q', self
.read(8))[0] 
  33     def read_unsigned_int(self
): 
  34         return struct_unpack('!I', self
.read(4))[0] 
  36     def read_unsigned_char(self
): 
  37         return struct_unpack('!B', self
.read(1))[0] 
  39     def read_string(self
): 
  48     def read_box_info(self
): 
  50         Read a box and return the info as a tuple: (box_size, box_type, box_data) 
  52         real_size 
= size 
= self
.read_unsigned_int() 
  53         box_type 
= self
.read(4) 
  56             real_size 
= self
.read_unsigned_long_long() 
  58         return real_size
, box_type
, self
.read(real_size
-header_end
) 
  62         self
.read_unsigned_char() 
  65         quality_entry_count 
= self
.read_unsigned_char() 
  67         for i 
in range(quality_entry_count
): 
  70         segment_run_count 
= self
.read_unsigned_int() 
  72         for i 
in range(segment_run_count
): 
  73             first_segment 
= self
.read_unsigned_int() 
  74             fragments_per_segment 
= self
.read_unsigned_int() 
  75             segments
.append((first_segment
, fragments_per_segment
)) 
  78             'segment_run': segments
, 
  83         self
.read_unsigned_char() 
  87         self
.read_unsigned_int() 
  89         quality_entry_count 
= self
.read_unsigned_char() 
  90         # QualitySegmentUrlModifiers 
  91         for i 
in range(quality_entry_count
): 
  94         fragments_count 
= self
.read_unsigned_int() 
  96         for i 
in range(fragments_count
): 
  97             first 
= self
.read_unsigned_int() 
  98             first_ts 
= self
.read_unsigned_long_long() 
  99             duration 
= self
.read_unsigned_int() 
 101                 discontinuity_indicator 
= self
.read_unsigned_char() 
 103                 discontinuity_indicator 
= None 
 107                 'duration': duration
, 
 108                 'discontinuity_indicator': discontinuity_indicator
, 
 112             'fragments': fragments
, 
 117         self
.read_unsigned_char() 
 120         # BootstrapinfoVersion 
 121         bootstrap_info_version 
= self
.read_unsigned_int() 
 122         # Profile,Live,Update,Reserved 
 125         self
.read_unsigned_int() 
 127         self
.read_unsigned_long_long() 
 128         # SmpteTimeCodeOffset 
 129         self
.read_unsigned_long_long() 
 131         movie_identifier 
= self
.read_string() 
 132         server_count 
= self
.read_unsigned_char() 
 134         for i 
in range(server_count
): 
 136         quality_count 
= self
.read_unsigned_char() 
 138         for i 
in range(server_count
): 
 145         segments_count 
= self
.read_unsigned_char() 
 147         for i 
in range(segments_count
): 
 148             box_size
, box_type
, box_data 
= self
.read_box_info() 
 149             assert box_type 
== b
'asrt' 
 150             segment 
= FlvReader(box_data
).read_asrt() 
 151             segments
.append(segment
) 
 152         fragments_run_count 
= self
.read_unsigned_char() 
 154         for i 
in range(fragments_run_count
): 
 155             box_size
, box_type
, box_data 
= self
.read_box_info() 
 156             assert box_type 
== b
'afrt' 
 157             fragments
.append(FlvReader(box_data
).read_afrt()) 
 160             'segments': segments
, 
 161             'fragments': fragments
, 
 164     def read_bootstrap_info(self
): 
 165         total_size
, box_type
, box_data 
= self
.read_box_info() 
 166         assert box_type 
== b
'abst' 
 167         return FlvReader(box_data
).read_abst() 
 170 def read_bootstrap_info(bootstrap_bytes
): 
 171     return FlvReader(bootstrap_bytes
).read_bootstrap_info() 
 174 def build_fragments_list(boot_info
): 
 175     """ Return a list of (segment, fragment) for each fragment in the video """ 
 177     segment_run_table 
= boot_info
['segments'][0] 
 178     # I've only found videos with one segment 
 179     segment_run_entry 
= segment_run_table
['segment_run'][0] 
 180     n_frags 
= segment_run_entry
[1] 
 181     fragment_run_entry_table 
= boot_info
['fragments'][0]['fragments'] 
 182     first_frag_number 
= fragment_run_entry_table
[0]['first'] 
 183     for (i
, frag_number
) in zip(range(1, n_frags
+1), itertools
.count(first_frag_number
)): 
 184         res
.append((1, frag_number
)) 
 188 def write_flv_header(stream
, metadata
): 
 189     """Writes the FLV header and the metadata to stream""" 
 191     stream
.write(b
'FLV\x01') 
 192     stream
.write(b
'\x05') 
 193     stream
.write(b
'\x00\x00\x00\x09') 
 195     stream
.write(b
'\x00\x00\x00\x00') 
 198     stream
.write(b
'\x12') 
 199     # Size of the metadata with 3 bytes 
 200     stream
.write(struct_pack('!L', len(metadata
))[1:]) 
 201     stream
.write(b
'\x00\x00\x00\x00\x00\x00\x00') 
 202     stream
.write(metadata
) 
 203     # Magic numbers extracted from the output files produced by AdobeHDS.php 
 204     #(https://github.com/K-S-V/Scripts) 
 205     stream
.write(b
'\x00\x00\x01\x73') 
 209     return '{http://ns.adobe.com/f4m/1.0}%s' % prop
 
 212 class HttpQuietDownloader(HttpFD
): 
 213     def to_screen(self
, *args
, **kargs
): 
 217 class F4mFD(FileDownloader
): 
 219     A downloader for f4m manifests or AdobeHDS. 
 222     def real_download(self
, filename
, info_dict
): 
 223         man_url 
= info_dict
['url'] 
 224         self
.to_screen('[download] Downloading f4m manifest') 
 225         manifest 
= self
.ydl
.urlopen(man_url
).read() 
 226         self
.report_destination(filename
) 
 227         http_dl 
= HttpQuietDownloader(self
.ydl
, 
 232                 'test': self
.params
.get('test', False), 
 235         doc 
= etree
.fromstring(manifest
) 
 236         formats 
= [(int(f
.attrib
.get('bitrate', -1)), f
) for f 
in doc
.findall(_add_ns('media'))] 
 237         formats 
= sorted(formats
, key
=lambda f
: f
[0]) 
 238         rate
, media 
= formats
[-1] 
 239         base_url 
= compat_urlparse
.urljoin(man_url
, media
.attrib
['url']) 
 240         bootstrap 
= base64
.b64decode(doc
.find(_add_ns('bootstrapInfo')).text
) 
 241         metadata 
= base64
.b64decode(media
.find(_add_ns('metadata')).text
) 
 242         boot_info 
= read_bootstrap_info(bootstrap
) 
 243         fragments_list 
= build_fragments_list(boot_info
) 
 244         if self
.params
.get('test', False): 
 245             # We only download the first fragment 
 246             fragments_list 
= fragments_list
[:1] 
 247         total_frags 
= len(fragments_list
) 
 249         tmpfilename 
= self
.temp_name(filename
) 
 250         (dest_stream
, tmpfilename
) = sanitize_open(tmpfilename
, 'wb') 
 251         write_flv_header(dest_stream
, metadata
) 
 253         # This dict stores the download progress, it's updated by the progress 
 256             'downloaded_bytes': 0, 
 261         def frag_progress_hook(status
): 
 262             frag_total_bytes 
= status
.get('total_bytes', 0) 
 263             estimated_size 
= (state
['downloaded_bytes'] + 
 264                 (total_frags 
- state
['frag_counter']) * frag_total_bytes
) 
 265             if status
['status'] == 'finished': 
 266                 state
['downloaded_bytes'] += frag_total_bytes
 
 267                 state
['frag_counter'] += 1 
 268                 progress 
= self
.calc_percent(state
['frag_counter'], total_frags
) 
 269                 byte_counter 
= state
['downloaded_bytes'] 
 271                 frag_downloaded_bytes 
= status
['downloaded_bytes'] 
 272                 byte_counter 
= state
['downloaded_bytes'] + frag_downloaded_bytes
 
 273                 frag_progress 
= self
.calc_percent(frag_downloaded_bytes
, 
 275                 progress 
= self
.calc_percent(state
['frag_counter'], total_frags
) 
 276                 progress 
+= frag_progress 
/ float(total_frags
) 
 278             eta 
= self
.calc_eta(start
, time
.time(), estimated_size
, byte_counter
) 
 279             self
.report_progress(progress
, format_bytes(estimated_size
), 
 280                 status
.get('speed'), eta
) 
 281         http_dl
.add_progress_hook(frag_progress_hook
) 
 284         for (seg_i
, frag_i
) in fragments_list
: 
 285             name 
= 'Seg%d-Frag%d' % (seg_i
, frag_i
) 
 286             url 
= base_url 
+ name
 
 287             frag_filename 
= '%s-%s' % (tmpfilename
, name
) 
 288             success 
= http_dl
.download(frag_filename
, {'url': url
}) 
 291             with open(frag_filename
, 'rb') as down
: 
 292                 down_data 
= down
.read() 
 293                 reader 
= FlvReader(down_data
) 
 295                     _
, box_type
, box_data 
= reader
.read_box_info() 
 296                     if box_type 
== b
'mdat': 
 297                         dest_stream
.write(box_data
) 
 299             frags_filenames
.append(frag_filename
) 
 301         self
.report_finish(format_bytes(state
['downloaded_bytes']), time
.time() - start
) 
 303         self
.try_rename(tmpfilename
, filename
) 
 304         for frag_file 
in frags_filenames
: 
 307         fsize 
= os
.path
.getsize(encodeFilename(filename
)) 
 308         self
._hook
_progress
({ 
 309             'downloaded_bytes': fsize
, 
 310             'total_bytes': fsize
, 
 311             'filename': filename
, 
 312             'status': 'finished',