]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/YoutubeDL.py 
2a078adfbbc7f7aed7ca31a6aff85d0e6a9c19b2
   2  # -*- coding: utf-8 -*-    4  from  __future__ 
import  absolute_import
  28      compat_urllib_request
,   49      UnavailableVideoError
,   55  from  . extractor 
import  get_info_extractor
,  gen_extractors
  56  from  . FileDownloader 
import  FileDownloader
  57  from  . version 
import  __version__
  60  class  YoutubeDL ( object ):   63      YoutubeDL objects are the ones responsible of downloading the   64      actual video file and writing it to disk if the user has requested   65      it, among some other tasks. In most cases there should be one per   66      program. As, given a video URL, the downloader doesn't know how to   67      extract all the needed information, task that InfoExtractors do, it   68      has to pass the URL to one of them.   70      For this, YoutubeDL objects have a method that allows   71      InfoExtractors to be registered in a given order. When it is passed   72      a URL, the YoutubeDL object handles it to the first InfoExtractor it   73      finds that reports being able to handle it. The InfoExtractor extracts   74      all the information about the video or videos the URL refers to, and   75      YoutubeDL process the extracted information, possibly using a File   76      Downloader to download the video.   78      YoutubeDL objects accept a lot of parameters. In order not to saturate   79      the object constructor with arguments, it receives a dictionary of   80      options instead. These options are available through the params   81      attribute for the InfoExtractors to use. The YoutubeDL also   82      registers itself as the downloader in charge for the InfoExtractors   83      that are added to it, so this is a "mutual registration".   87      username:          Username for authentication purposes.   88      password:          Password for authentication purposes.   89      videopassword:     Password for acces a video.   90      usenetrc:          Use netrc for authentication instead.   91      verbose:           Print additional info to stdout.   92      quiet:             Do not print messages to stdout.   93      forceurl:          Force printing final URL.   94      forcetitle:        Force printing title.   95      forceid:           Force printing ID.   96      forcethumbnail:    Force printing thumbnail URL.   97      forcedescription:  Force printing description.   98      forcefilename:     Force printing final filename.   99      forceduration:     Force printing duration.  100      forcejson:         Force printing info_dict as JSON.  101      simulate:          Do not download the video files.  102      format:            Video format code.  103      format_limit:      Highest quality format to try.  104      outtmpl:           Template for output names.  105      restrictfilenames: Do not allow "&" and spaces in file names  106      ignoreerrors:      Do not stop on download errors.  107      nooverwrites:      Prevent overwriting files.  108      playliststart:     Playlist item to start at.  109      playlistend:       Playlist item to end at.  110      matchtitle:        Download only matching titles.  111      rejecttitle:       Reject downloads for matching titles.  112      logger:            Log messages to a logging.Logger instance.  113      logtostderr:       Log messages to stderr instead of stdout.  114      writedescription:  Write the video description to a .description file  115      writeinfojson:     Write the video description to a .info.json file  116      writeannotations:  Write the video annotations to a .annotations.xml file  117      writethumbnail:    Write the thumbnail image to a file  118      writesubtitles:    Write the video subtitles to a file  119      writeautomaticsub: Write the automatic subtitles to a file  120      allsubtitles:      Downloads all the subtitles of the video  121                         (requires writesubtitles or writeautomaticsub)  122      listsubtitles:     Lists all available subtitles for the video  123      subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)  124      subtitleslangs:    List of languages of the subtitles to download  125      keepvideo:         Keep the video file after post-processing  126      daterange:         A DateRange object, download only if the upload_date is in the range.  127      skip_download:     Skip the actual download of the video file  128      cachedir:          Location of the cache files in the filesystem.  129                         None to disable filesystem cache.  130      noplaylist:        Download single video instead of a playlist if in doubt.  131      age_limit:         An integer representing the user's age in years.  132                         Unsuitable videos for the given age are skipped.  133      min_views:         An integer representing the minimum view count the video  134                         must have in order to not be skipped.  135                         Videos without view count information are always  136                         downloaded. None for no limit.  137      max_views:         An integer representing the maximum view count.  138                         Videos that are more popular than that are not  140                         Videos without view count information are always  141                         downloaded. None for no limit.  142      download_archive:  File name of a file where all downloads are recorded.  143                         Videos already present in the file are not downloaded  145      cookiefile:        File name where cookies should be read from and dumped to.  146      nocheckcertificate:Do not verify SSL certificates  147      proxy:             URL of the proxy server to use  148      socket_timeout:    Time to wait for unresponsive hosts, in seconds  149      bidi_workaround:   Work around buggy terminals without bidirectional text  150                         support, using fridibi  152      The following parameters are not used by YoutubeDL itself, they are used by  154      nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,  155      noresizebuffer, retries, continuedl, noprogress, consoletitle  161      _download_retcode 
=  None  162      _num_downloads 
=  None  165      def  __init__ ( self
,  params
= None ):  166          """Create a FileDownloader object with the given options."""  168          self
._ ies
_ instances 
= {}  170          self
._ progress
_ hooks 
= []  171          self
._ download
_ retcode 
=  0  172          self
._ num
_ downloads 
=  0  173          self
._ screen
_ file 
= [ sys
. stdout
,  sys
. stderr
][ params
. get ( 'logtostderr' ,  False )]  174          self
._ err
_ file 
=  sys
. stderr
 175          self
. params 
= {}  if  params 
is None else  params
 177          if  params
. get ( 'bidi_workaround' ,  False ):  180                  master
,  slave 
=  pty
. openpty ()  181                  width 
=  get_term_width ()  185                      width_args 
= [ '-w' ,  str ( width
)]  186                  self
._ fribidi 
=  subprocess
. Popen (  187                      [ 'fribidi' ,  '-c' ,  'UTF-8' ] +  width_args
,  188                      stdin
= subprocess
. PIPE
,  190                      stderr
= self
._ err
_ file
)  191                  self
._ fribidi
_ channel 
=  os
. fdopen ( master
,  'rb' )  192              except  OSError  as  ose
:  194                      self
. report_warning ( u
'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.' )  198          if  ( sys
. version_info 
>= ( 3 ,)  and  sys
. platform 
!=  'win32'  and  199                  sys
. getfilesystemencoding ()  in  [ 'ascii' ,  'ANSI_X3.4-1968' ]  200                  and not  params
[ 'restrictfilenames' ]):  201              # On Python 3, the Unicode filesystem API will throw errors (#1474)  203                  u
'Assuming --restrict-filenames since file system encoding '  204                  u
'cannot encode all charactes. '  205                  u
'Set the LC_ALL environment variable to fix this.' )  206              self
. params
[ 'restrictfilenames' ] =  True  208          self
. fd 
=  FileDownloader ( self
,  self
. params
)  210          if  ' %(stitle)s '  in  self
. params
. get ( 'outtmpl' ,  '' ):  211              self
. report_warning ( u
' %(stitle)s  is deprecated. Use the  %(title)s  and the --restrict-filenames flag(which also secures  %(uploader)s  et al) instead.' )  215      def  add_info_extractor ( self
,  ie
):  216          """Add an InfoExtractor object to the end of the list."""  218          self
._ ies
_ instances
[ ie
. ie_key ()] =  ie
 219          ie
. set_downloader ( self
)  221      def  get_info_extractor ( self
,  ie_key
):  223          Get an instance of an IE with name ie_key, it will try to get one from  224          the _ies list, if there's no instance it will create a new one and add  225          it to the extractor list.  227          ie 
=  self
._ ies
_ instances
. get ( ie_key
)  229              ie 
=  get_info_extractor ( ie_key
)()  230              self
. add_info_extractor ( ie
)  233      def  add_default_info_extractors ( self
):  235          Add the InfoExtractors returned by gen_extractors to the end of the list  237          for  ie 
in  gen_extractors ():  238              self
. add_info_extractor ( ie
)  240      def  add_post_processor ( self
,  pp
):  241          """Add a PostProcessor object to the end of the chain."""  243          pp
. set_downloader ( self
)  245      def  _bidi_workaround ( self
,  message
):  246          if not  hasattr ( self
,  '_fribidi_channel' ):  249          assert  type ( message
) ==  type ( u
'' )  250          line_count 
=  message
. count ( u
' \n ' ) +  1  251          self
._ fribidi
. stdin
. write (( message 
+  u
' \n ' ). encode ( 'utf-8' ))  252          self
._ fribidi
. stdin
. flush ()  253          res 
=  u
'' . join ( self
._ fribidi
_ channel
. readline (). decode ( 'utf-8' )  254                         for  _ 
in  range ( line_count
))  255          return  res
[:- len ( u
' \n ' )]  257      def  to_screen ( self
,  message
,  skip_eol
= False ):  258          """Print message to stdout if not in quiet mode."""  259          return  self
. to_stdout ( message
,  skip_eol
,  check_quiet
= True )  261      def  to_stdout ( self
,  message
,  skip_eol
= False ,  check_quiet
= False ):  262          """Print message to stdout if not in quiet mode."""  263          if  self
. params
. get ( 'logger' ):  264              self
. params
[ 'logger' ]. debug ( message
)  265          elif not  check_quiet 
or not  self
. params
. get ( 'quiet' ,  False ):  266              message 
=  self
._ bidi
_ workaround
( message
)  267              terminator 
= [ u
' \n ' ,  u
'' ][ skip_eol
]  268              output 
=  message 
+  terminator
 270              write_string ( output
,  self
._ screen
_ file
)  272      def  to_stderr ( self
,  message
):  273          """Print message to stderr."""  274          assert  type ( message
) ==  type ( u
'' )  275          if  self
. params
. get ( 'logger' ):  276              self
. params
[ 'logger' ]. error ( message
)  278              message 
=  self
._ bidi
_ workaround
( message
)  279              output 
=  message 
+  u
' \n '  280              write_string ( output
,  self
._ err
_ file
)  282      def  to_console_title ( self
,  message
):  283          if not  self
. params
. get ( 'consoletitle' ,  False ):  285          if  os
. name 
==  'nt'  and  ctypes
. windll
. kernel32
. GetConsoleWindow ():  286              # c_wchar_p() might not be necessary if `message` is  287              # already of type unicode()  288              ctypes
. windll
. kernel32
. SetConsoleTitleW ( ctypes
. c_wchar_p ( message
))  289          elif  'TERM'  in  os
. environ
:  290              write_string ( u
' \033 ]0; %s \007 '  %  message
,  self
._ screen
_ file
)  292      def  save_console_title ( self
):  293          if not  self
. params
. get ( 'consoletitle' ,  False ):  295          if  'TERM'  in  os
. environ
:  296              # Save the title on stack  297              write_string ( u
' \033 [22;0t' ,  self
._ screen
_ file
)  299      def  restore_console_title ( self
):  300          if not  self
. params
. get ( 'consoletitle' ,  False ):  302          if  'TERM'  in  os
. environ
:  303              # Restore the title from stack  304              write_string ( u
' \033 [23;0t' ,  self
._ screen
_ file
)  307          self
. save_console_title ()  310      def  __exit__ ( self
, * args
):  311          self
. restore_console_title ()  313          if  self
. params
. get ( 'cookiefile' )  is not None :  314              self
. cookiejar
. save ()  316      def  trouble ( self
,  message
= None ,  tb
= None ):  317          """Determine action to take when a download problem appears.  319          Depending on if the downloader has been configured to ignore  320          download errors or not, this method may throw an exception or  321          not when errors are found, after printing the message.  323          tb, if given, is additional traceback information.  325          if  message 
is not None :  326              self
. to_stderr ( message
)  327          if  self
. params
. get ( 'verbose' ):  329                  if  sys
. exc_info ()[ 0 ]:   # if .trouble has been called from an except block  331                      if  hasattr ( sys
. exc_info ()[ 1 ],  'exc_info' )  and  sys
. exc_info ()[ 1 ]. exc_info
[ 0 ]:  332                          tb 
+=  u
'' . join ( traceback
. format_exception (* sys
. exc_info ()[ 1 ]. exc_info
))  333                      tb 
+=  compat_str ( traceback
. format_exc ())  335                      tb_data 
=  traceback
. format_list ( traceback
. extract_stack ())  336                      tb 
=  u
'' . join ( tb_data
)  338          if not  self
. params
. get ( 'ignoreerrors' ,  False ):  339              if  sys
. exc_info ()[ 0 ]  and  hasattr ( sys
. exc_info ()[ 1 ],  'exc_info' )  and  sys
. exc_info ()[ 1 ]. exc_info
[ 0 ]:  340                  exc_info 
=  sys
. exc_info ()[ 1 ]. exc_info
 342                  exc_info 
=  sys
. exc_info ()  343              raise  DownloadError ( message
,  exc_info
)  344          self
._ download
_ retcode 
=  1  346      def  report_warning ( self
,  message
):  348          Print the message to stderr, it will be prefixed with 'WARNING:'  349          If stderr is a tty file the 'WARNING:' will be colored  351          if  self
._ err
_ file
. isatty ()  and  os
. name 
!=  'nt' :  352              _msg_header 
=  u
' \033 [0;33mWARNING: \033 [0m'  354              _msg_header 
=  u
'WARNING:'  355          warning_message 
=  u
' %s %s '  % ( _msg_header
,  message
)  356          self
. to_stderr ( warning_message
)  358      def  report_error ( self
,  message
,  tb
= None ):  360          Do the same as trouble, but prefixes the message with 'ERROR:', colored  361          in red if stderr is a tty file.  363          if  self
._ err
_ file
. isatty ()  and  os
. name 
!=  'nt' :  364              _msg_header 
=  u
' \033 [0;31mERROR: \033 [0m'  366              _msg_header 
=  u
'ERROR:'  367          error_message 
=  u
' %s %s '  % ( _msg_header
,  message
)  368          self
. trouble ( error_message
,  tb
)  370      def  report_file_already_downloaded ( self
,  file_name
):  371          """Report file has already been fully downloaded."""  373              self
. to_screen ( u
'[download]  %s  has already been downloaded'  %  file_name
)  374          except  UnicodeEncodeError :  375              self
. to_screen ( u
'[download] The file has already been downloaded' )  377      def  increment_downloads ( self
):  378          """Increment the ordinal that assigns a number to each file."""  379          self
._ num
_ downloads 
+=  1  381      def  prepare_filename ( self
,  info_dict
):  382          """Generate the output filename."""  384              template_dict 
=  dict ( info_dict
)  386              template_dict
[ 'epoch' ] =  int ( time
. time ())  387              autonumber_size 
=  self
. params
. get ( 'autonumber_size' )  388              if  autonumber_size 
is None :  390              autonumber_templ 
=  u
' %0 '  +  str ( autonumber_size
) +  u
'd'  391              template_dict
[ 'autonumber' ] =  autonumber_templ 
%  self
._ num
_ downloads
 392              if  template_dict
. get ( 'playlist_index' )  is not None :  393                  template_dict
[ 'playlist_index' ] =  u
' %0 5d'  %  template_dict
[ 'playlist_index' ]  395              sanitize 
=  lambda  k
,  v
:  sanitize_filename (  397                  restricted
= self
. params
. get ( 'restrictfilenames' ),  399              template_dict 
=  dict (( k
,  sanitize ( k
,  v
))  400                                   for  k
,  v 
in  template_dict
. items ()  402              template_dict 
=  collections
. defaultdict ( lambda :  u
'NA' ,  template_dict
)  404              tmpl 
=  os
. path
. expanduser ( self
. params
[ 'outtmpl' ])  405              filename 
=  tmpl 
%  template_dict
 407          except  ValueError  as  err
:  408              self
. report_error ( u
'Error in output template: '  +  str ( err
) +  u
' (encoding: '  +  repr ( preferredencoding ()) +  ')' )  411      def  _match_entry ( self
,  info_dict
):  412          """ Returns None iff the file should be downloaded """  414          video_title 
=  info_dict
. get ( 'title' ,  info_dict
. get ( 'id' ,  u
'video' ))  415          if  'title'  in  info_dict
:  416              # This can happen when we're just evaluating the playlist  417              title 
=  info_dict
[ 'title' ]  418              matchtitle 
=  self
. params
. get ( 'matchtitle' ,  False )  420                  if not  re
. search ( matchtitle
,  title
,  re
. IGNORECASE
):  421                      return  u
'"'  +  title 
+  '" title did not match pattern "'  +  matchtitle 
+  '"'  422              rejecttitle 
=  self
. params
. get ( 'rejecttitle' ,  False )  424                  if  re
. search ( rejecttitle
,  title
,  re
. IGNORECASE
):  425                      return  u
'"'  +  title 
+  '" title matched reject pattern "'  +  rejecttitle 
+  '"'  426          date 
=  info_dict
. get ( 'upload_date' ,  None )  428              dateRange 
=  self
. params
. get ( 'daterange' ,  DateRange ())  429              if  date 
not in  dateRange
:  430                  return  u
' %s  upload date is not in range  %s '  % ( date_from_str ( date
). isoformat (),  dateRange
)  431          view_count 
=  info_dict
. get ( 'view_count' ,  None )  432          if  view_count 
is not None :  433              min_views 
=  self
. params
. get ( 'min_views' )  434              if  min_views 
is not None and  view_count 
<  min_views
:  435                  return  u
'Skipping  %s , because it has not reached minimum view count ( %d / %d )'  % ( video_title
,  view_count
,  min_views
)  436              max_views 
=  self
. params
. get ( 'max_views' )  437              if  max_views 
is not None and  view_count 
>  max_views
:  438                  return  u
'Skipping  %s , because it has exceeded the maximum view count ( %d / %d )'  % ( video_title
,  view_count
,  max_views
)  439          age_limit 
=  self
. params
. get ( 'age_limit' )  440          if  age_limit 
is not None :  441              if  age_limit 
<  info_dict
. get ( 'age_limit' ,  0 ):  442                  return  u
'Skipping "'  +  title 
+  '" because it is age restricted'  443          if  self
. in_download_archive ( info_dict
):  444              return  u
' %s  has already been recorded in archive'  %  video_title
 448      def  add_extra_info ( info_dict
,  extra_info
):  449          '''Set the keys from extra_info in info dict if they are missing'''  450          for  key
,  value 
in  extra_info
. items ():  451              info_dict
. setdefault ( key
,  value
)  453      def  extract_info ( self
,  url
,  download
= True ,  ie_key
= None ,  extra_info
={},  456          Returns a list with a dictionary for each video we find.  457          If 'download', also downloads the videos.  458          extra_info is a dict containing the extra values to add to each result  462              ies 
= [ self
. get_info_extractor ( ie_key
)]  467              if not  ie
. suitable ( url
):  471                  self
. report_warning ( u
'The program functionality for this site has been marked as broken, '  472                                      u
'and will probably not work.' )  475                  ie_result 
=  ie
. extract ( url
)  476                  if  ie_result 
is None :  # Finished already (backwards compatibility; listformats and friends should be moved here)  478                  if  isinstance ( ie_result
,  list ):  479                      # Backwards compatibility: old IE result format  481                          '_type' :  'compat_list' ,  482                          'entries' :  ie_result
,  484                  self
. add_extra_info ( ie_result
,  486                          'extractor' :  ie
. IE_NAME
,  488                          'webpage_url_basename' :  url_basename ( url
),  489                          'extractor_key' :  ie
. ie_key (),  492                      return  self
. process_ie_result ( ie_result
,  download
,  extra_info
)  495              except  ExtractorError 
as  de
:  # An error we somewhat expected  496                  self
. report_error ( compat_str ( de
),  de
. format_traceback ())  498              except  Exception  as  e
:  499                  if  self
. params
. get ( 'ignoreerrors' ,  False ):  500                      self
. report_error ( compat_str ( e
),  tb
= compat_str ( traceback
. format_exc ()))  505              self
. report_error ( u
'no suitable InfoExtractor:  %s '  %  url
)  507      def  process_ie_result ( self
,  ie_result
,  download
= True ,  extra_info
={}):  509          Take the result of the ie(may be modified) and resolve all unresolved  510          references (URLs, playlist items).  512          It will also download the videos if 'download'.  513          Returns the resolved ie_result.  516          result_type 
=  ie_result
. get ( '_type' ,  'video' )  # If not given we suppose it's a video, support the default old system  517          if  result_type 
==  'video' :  518              self
. add_extra_info ( ie_result
,  extra_info
)  519              return  self
. process_video_result ( ie_result
,  download
= download
)  520          elif  result_type 
==  'url' :  521              # We have to add extra_info to the results because it may be  522              # contained in a playlist  523              return  self
. extract_info ( ie_result
[ 'url' ],  525                                       ie_key
= ie_result
. get ( 'ie_key' ),  526                                       extra_info
= extra_info
)  527          elif  result_type 
==  'url_transparent' :  528              # Use the information from the embedding page  529              info 
=  self
. extract_info (  530                  ie_result
[ 'url' ],  ie_key
= ie_result
. get ( 'ie_key' ),  531                  extra_info
= extra_info
,  download
= False ,  process
= False )  533              def  make_result ( embedded_info
):  534                  new_result 
=  ie_result
. copy ()  535                  for  f 
in  ( '_type' ,  'url' ,  'ext' ,  'player_url' ,  'formats' ,  536                            'entries' ,  'urlhandle' ,  'ie_key' ,  'duration' ,  537                            'subtitles' ,  'annotations' ,  'format' ,  538                            'thumbnail' ,  'thumbnails' ):  541                      if  f 
in  embedded_info
:  542                          new_result
[ f
] =  embedded_info
[ f
]  544              new_result 
=  make_result ( info
)  546              assert  new_result
. get ( '_type' ) !=  'url_transparent'  547              if  new_result
. get ( '_type' ) ==  'compat_list' :  548                  new_result
[ 'entries' ] = [  549                      make_result ( e
)  for  e 
in  new_result
[ 'entries' ]]  551              return  self
. process_ie_result (  552                  new_result
,  download
= download
,  extra_info
= extra_info
)  553          elif  result_type 
==  'playlist' :  554              # We process each entry in the playlist  555              playlist 
=  ie_result
. get ( 'title' ,  None )  or  ie_result
. get ( 'id' ,  None )  556              self
. to_screen ( u
'[download] Downloading playlist:  %s '  %  playlist
)  558              playlist_results 
= []  560              n_all_entries 
=  len ( ie_result
[ 'entries' ])  561              playliststart 
=  self
. params
. get ( 'playliststart' ,  1 ) -  1  562              playlistend 
=  self
. params
. get ( 'playlistend' ,  None )  563              # For backwards compatibility, interpret -1 as whole list  564              if  playlistend 
== - 1 :  567              entries 
=  ie_result
[ 'entries' ][ playliststart
: playlistend
]  568              n_entries 
=  len ( entries
)  571                  u
"[ %s ] playlist ' %s ': Collected  %d  video ids (downloading  %d  of them)"  %  572                  ( ie_result
[ 'extractor' ],  playlist
,  n_all_entries
,  n_entries
))  574              for  i
,  entry 
in  enumerate ( entries
,  1 ):  575                  self
. to_screen ( u
'[download] Downloading video # %s  of  %s '  % ( i
,  n_entries
))  577                      'playlist' :  playlist
,  578                      'playlist_index' :  i 
+  playliststart
,  579                      'extractor' :  ie_result
[ 'extractor' ],  580                      'webpage_url' :  ie_result
[ 'webpage_url' ],  581                      'webpage_url_basename' :  url_basename ( ie_result
[ 'webpage_url' ]),  582                      'extractor_key' :  ie_result
[ 'extractor_key' ],  585                  reason 
=  self
._ match
_ entry
( entry
)  586                  if  reason 
is not None :  587                      self
. to_screen ( u
'[download] '  +  reason
)  590                  entry_result 
=  self
. process_ie_result ( entry
,  593                  playlist_results
. append ( entry_result
)  594              ie_result
[ 'entries' ] =  playlist_results
 596          elif  result_type 
==  'compat_list' :  598                  self
. add_extra_info ( r
,  600                          'extractor' :  ie_result
[ 'extractor' ],  601                          'webpage_url' :  ie_result
[ 'webpage_url' ],  602                          'webpage_url_basename' :  url_basename ( ie_result
[ 'webpage_url' ]),  603                          'extractor_key' :  ie_result
[ 'extractor_key' ],  606              ie_result
[ 'entries' ] = [  607                  self
. process_ie_result ( _fixup ( r
),  download
,  extra_info
)  608                  for  r 
in  ie_result
[ 'entries' ]  612              raise  Exception ( 'Invalid result type:  %s '  %  result_type
)  614      def  select_format ( self
,  format_spec
,  available_formats
):  615          if  format_spec 
==  'best'  or  format_spec 
is None :  616              return  available_formats
[- 1 ]  617          elif  format_spec 
==  'worst' :  618              return  available_formats
[ 0 ]  620              extensions 
= [ u
'mp4' ,  u
'flv' ,  u
'webm' ,  u
'3gp' ]  621              if  format_spec 
in  extensions
:  622                  filter_f 
=  lambda  f
:  f
[ 'ext' ] ==  format_spec
 624                  filter_f 
=  lambda  f
:  f
[ 'format_id' ] ==  format_spec
 625              matches 
=  list ( filter ( filter_f
,  available_formats
))  630      def  process_video_result ( self
,  info_dict
,  download
= True ):  631          assert  info_dict
. get ( '_type' ,  'video' ) ==  'video'  633          if  'playlist'  not in  info_dict
:  634              # It isn't part of a playlist  635              info_dict
[ 'playlist' ] =  None  636              info_dict
[ 'playlist_index' ] =  None  638          # This extractors handle format selection themselves  639          if  info_dict
[ 'extractor' ]  in  [ u
'youtube' ,  u
'Youku' ]:  641                  self
. process_info ( info_dict
)  644          # We now pick which formats have to be downloaded  645          if  info_dict
. get ( 'formats' )  is None :  646              # There's only one format available  647              formats 
= [ info_dict
]  649              formats 
=  info_dict
[ 'formats' ]  651          # We check that all the formats have the format and format_id fields  652          for  ( i
,  format
)  in  enumerate ( formats
):  653              if  format
. get ( 'format_id' )  is None :  654                  format
[ 'format_id' ] =  compat_str ( i
)  655              if  format
. get ( 'format' )  is None :  656                  format
[ 'format' ] =  u
' {id}  -  {res}{note} ' . format (  657                      id = format
[ 'format_id' ],  658                      res
= self
. format_resolution ( format
),  659                      note
= u
' ( {0} )' . format ( format
[ 'format_note' ])  if  format
. get ( 'format_note' )  is not None else  '' ,  661              # Automatically determine file extension if missing  662              if  'ext'  not in  format
:  663                  format
[ 'ext' ] =  determine_ext ( format
[ 'url' ])  665          if  self
. params
. get ( 'listformats' ,  None ):  666              self
. list_formats ( info_dict
)  669          format_limit 
=  self
. params
. get ( 'format_limit' ,  None )  671              formats 
=  list ( takewhile_inclusive (  672                  lambda  f
:  f
[ 'format_id' ] !=  format_limit
,  formats
 674          if  self
. params
. get ( 'prefer_free_formats' ):  675              def  _free_formats_key ( f
):  677                      ext_ord 
= [ u
'flv' ,  u
'mp4' ,  u
'webm' ]. index ( f
[ 'ext' ])  680                  # We only compare the extension if they have the same height and width  681                  return  ( f
. get ( 'height' ),  f
. get ( 'width' ),  ext_ord
)  682              formats 
=  sorted ( formats
,  key
= _free_formats_key
)  684          req_format 
=  self
. params
. get ( 'format' ,  'best' )  685          if  req_format 
is None :  687          formats_to_download 
= []  688          # The -1 is for supporting YoutubeIE  689          if  req_format 
in  ( '-1' ,  'all' ):  690              formats_to_download 
=  formats
 692              # We can accept formats requestd in the format: 34/5/best, we pick  693              # the first that is available, starting from left  694              req_formats 
=  req_format
. split ( '/' )  695              for  rf 
in  req_formats
:  696                  selected_format 
=  self
. select_format ( rf
,  formats
)  697                  if  selected_format 
is not None :  698                      formats_to_download 
= [ selected_format
]  700          if not  formats_to_download
:  701              raise  ExtractorError ( u
'requested format not available' ,  705              if  len ( formats_to_download
) >  1 :  706                  self
. to_screen ( u
'[info]  %s : downloading video in  %s  formats'  % ( info_dict
[ 'id' ],  len ( formats_to_download
)))  707              for  format 
in  formats_to_download
:  708                  new_info 
=  dict ( info_dict
)  709                  new_info
. update ( format
)  710                  self
. process_info ( new_info
)  711          # We update the info dict with the best quality format (backwards compatibility)  712          info_dict
. update ( formats_to_download
[- 1 ])  715      def  process_info ( self
,  info_dict
):  716          """Process a single resolved IE result."""  718          assert  info_dict
. get ( '_type' ,  'video' ) ==  'video'  719          #We increment the download the download count here to match the previous behaviour.  720          self
. increment_downloads ()  722          info_dict
[ 'fulltitle' ] =  info_dict
[ 'title' ]  723          if  len ( info_dict
[ 'title' ]) >  200 :  724              info_dict
[ 'title' ] =  info_dict
[ 'title' ][: 197 ] +  u
'...'  726          # Keep for backwards compatibility  727          info_dict
[ 'stitle' ] =  info_dict
[ 'title' ]  729          if not  'format'  in  info_dict
:  730              info_dict
[ 'format' ] =  info_dict
[ 'ext' ]  732          reason 
=  self
._ match
_ entry
( info_dict
)  733          if  reason 
is not None :  734              self
. to_screen ( u
'[download] '  +  reason
)  737          max_downloads 
=  self
. params
. get ( 'max_downloads' )  738          if  max_downloads 
is not None :  739              if  self
._ num
_ downloads 
>  int ( max_downloads
):  740                  raise  MaxDownloadsReached ()  742          filename 
=  self
. prepare_filename ( info_dict
)  745          if  self
. params
. get ( 'forcetitle' ,  False ):  746              self
. to_stdout ( info_dict
[ 'fulltitle' ])  747          if  self
. params
. get ( 'forceid' ,  False ):  748              self
. to_stdout ( info_dict
[ 'id' ])  749          if  self
. params
. get ( 'forceurl' ,  False ):  750              # For RTMP URLs, also include the playpath  751              self
. to_stdout ( info_dict
[ 'url' ] +  info_dict
. get ( 'play_path' ,  u
'' ))  752          if  self
. params
. get ( 'forcethumbnail' ,  False )  and  info_dict
. get ( 'thumbnail' )  is not None :  753              self
. to_stdout ( info_dict
[ 'thumbnail' ])  754          if  self
. params
. get ( 'forcedescription' ,  False )  and  info_dict
. get ( 'description' )  is not None :  755              self
. to_stdout ( info_dict
[ 'description' ])  756          if  self
. params
. get ( 'forcefilename' ,  False )  and  filename 
is not None :  757              self
. to_stdout ( filename
)  758          if  self
. params
. get ( 'forceduration' ,  False )  and  info_dict
. get ( 'duration' )  is not None :  759              self
. to_stdout ( formatSeconds ( info_dict
[ 'duration' ]))  760          if  self
. params
. get ( 'forceformat' ,  False ):  761              self
. to_stdout ( info_dict
[ 'format' ])  762          if  self
. params
. get ( 'forcejson' ,  False ):  763              info_dict
[ '_filename' ] =  filename
 764              self
. to_stdout ( json
. dumps ( info_dict
))  766          # Do nothing else if in simulate mode  767          if  self
. params
. get ( 'simulate' ,  False ):  774              dn 
=  os
. path
. dirname ( encodeFilename ( filename
))  775              if  dn 
!=  ''  and not  os
. path
. exists ( dn
):  777          except  ( OSError ,  IOError )  as  err
:  778              self
. report_error ( u
'unable to create directory '  +  compat_str ( err
))  781          if  self
. params
. get ( 'writedescription' ,  False ):  782              descfn 
=  filename 
+  u
'.description'  783              if  self
. params
. get ( 'nooverwrites' ,  False )  and  os
. path
. exists ( encodeFilename ( descfn
)):  784                  self
. to_screen ( u
'[info] Video description is already present' )  787                      self
. to_screen ( u
'[info] Writing video description to: '  +  descfn
)  788                      with  io
. open ( encodeFilename ( descfn
),  'w' ,  encoding
= 'utf-8' )  as  descfile
:  789                          descfile
. write ( info_dict
[ 'description' ])  790                  except  ( KeyError ,  TypeError ):  791                      self
. report_warning ( u
'There \' s no description to write.' )  792                  except  ( OSError ,  IOError ):  793                      self
. report_error ( u
'Cannot write description file '  +  descfn
)  796          if  self
. params
. get ( 'writeannotations' ,  False ):  797              annofn 
=  filename 
+  u
'.annotations.xml'  798              if  self
. params
. get ( 'nooverwrites' ,  False )  and  os
. path
. exists ( encodeFilename ( annofn
)):  799                  self
. to_screen ( u
'[info] Video annotations are already present' )  802                      self
. to_screen ( u
'[info] Writing video annotations to: '  +  annofn
)  803                      with  io
. open ( encodeFilename ( annofn
),  'w' ,  encoding
= 'utf-8' )  as  annofile
:  804                          annofile
. write ( info_dict
[ 'annotations' ])  805                  except  ( KeyError ,  TypeError ):  806                      self
. report_warning ( u
'There are no annotations to write.' )  807                  except  ( OSError ,  IOError ):  808                      self
. report_error ( u
'Cannot write annotations file: '  +  annofn
)  811          subtitles_are_requested 
=  any ([ self
. params
. get ( 'writesubtitles' ,  False ),  812                                         self
. params
. get ( 'writeautomaticsub' )])  814          if  subtitles_are_requested 
and  'subtitles'  in  info_dict 
and  info_dict
[ 'subtitles' ]:  815              # subtitles download errors are already managed as troubles in relevant IE  816              # that way it will silently go on when used with unsupporting IE  817              subtitles 
=  info_dict
[ 'subtitles' ]  818              sub_format 
=  self
. params
. get ( 'subtitlesformat' ,  'srt' )  819              for  sub_lang 
in  subtitles
. keys ():  820                  sub 
=  subtitles
[ sub_lang
]  824                      sub_filename 
=  subtitles_filename ( filename
,  sub_lang
,  sub_format
)  825                      if  self
. params
. get ( 'nooverwrites' ,  False )  and  os
. path
. exists ( encodeFilename ( sub_filename
)):  826                          self
. to_screen ( u
'[info] Video subtitle  %s . %s  is already_present'  % ( sub_lang
,  sub_format
))  828                          self
. to_screen ( u
'[info] Writing video subtitles to: '  +  sub_filename
)  829                          with  io
. open ( encodeFilename ( sub_filename
),  'w' ,  encoding
= 'utf-8' )  as  subfile
:  831                  except  ( OSError ,  IOError ):  832                      self
. report_error ( u
'Cannot write subtitles file '  +  descfn
)  835          if  self
. params
. get ( 'writeinfojson' ,  False ):  836              infofn 
=  os
. path
. splitext ( filename
)[ 0 ] +  u
'.info.json'  837              if  self
. params
. get ( 'nooverwrites' ,  False )  and  os
. path
. exists ( encodeFilename ( infofn
)):  838                  self
. to_screen ( u
'[info] Video description metadata is already present' )  840                  self
. to_screen ( u
'[info] Writing video description metadata as JSON to: '  +  infofn
)  842                      json_info_dict 
=  dict (( k
,  v
)  for  k
,  v 
in  info_dict
. items ()  if not  k 
in  [ 'urlhandle' ])  843                      write_json_file ( json_info_dict
,  encodeFilename ( infofn
))  844                  except  ( OSError ,  IOError ):  845                      self
. report_error ( u
'Cannot write metadata to JSON file '  +  infofn
)  848          if  self
. params
. get ( 'writethumbnail' ,  False ):  849              if  info_dict
. get ( 'thumbnail' )  is not None :  850                  thumb_format 
=  determine_ext ( info_dict
[ 'thumbnail' ],  u
'jpg' )  851                  thumb_filename 
=  os
. path
. splitext ( filename
)[ 0 ] +  u
'.'  +  thumb_format
 852                  if  self
. params
. get ( 'nooverwrites' ,  False )  and  os
. path
. exists ( encodeFilename ( thumb_filename
)):  853                      self
. to_screen ( u
'[ %s ]  %s : Thumbnail is already present'  %  854                                     ( info_dict
[ 'extractor' ],  info_dict
[ 'id' ]))  856                      self
. to_screen ( u
'[ %s ]  %s : Downloading thumbnail ...'  %  857                                     ( info_dict
[ 'extractor' ],  info_dict
[ 'id' ]))  859                          uf 
=  compat_urllib_request
. urlopen ( info_dict
[ 'thumbnail' ])  860                          with  open ( thumb_filename
,  'wb' )  as  thumbf
:  861                              shutil
. copyfileobj ( uf
,  thumbf
)  862                          self
. to_screen ( u
'[ %s ]  %s : Writing thumbnail to:  %s '  %  863                              ( info_dict
[ 'extractor' ],  info_dict
[ 'id' ],  thumb_filename
))  864                      except  ( compat_urllib_error
. URLError
,  compat_http_client
. HTTPException
,  socket
. error
)  as  err
:  865                          self
. report_warning ( u
'Unable to download thumbnail " %s ":  %s '  %  866                              ( info_dict
[ 'thumbnail' ],  compat_str ( err
)))  868          if not  self
. params
. get ( 'skip_download' ,  False ):  869              if  self
. params
. get ( 'nooverwrites' ,  False )  and  os
. path
. exists ( encodeFilename ( filename
)):  873                      success 
=  self
. fd
._ do
_ download
( filename
,  info_dict
)  874                  except  ( compat_urllib_error
. URLError
,  compat_http_client
. HTTPException
,  socket
. error
)  as  err
:  875                      self
. report_error ( u
'unable to download video data:  %s '  %  str ( err
))  877                  except  ( OSError ,  IOError )  as  err
:  878                      raise  UnavailableVideoError ( err
)  879                  except  ( ContentTooShortError
, )  as  err
:  880                      self
. report_error ( u
'content too short (expected  %s  bytes and served  %s )'  % ( err
. expected
,  err
. downloaded
))  885                      self
. post_process ( filename
,  info_dict
)  886                  except  ( PostProcessingError
)  as  err
:  887                      self
. report_error ( u
'postprocessing:  %s '  %  str ( err
))  890          self
. record_download_archive ( info_dict
)  892      def  download ( self
,  url_list
):  893          """Download a given list of URLs."""  894          if  ( len ( url_list
) >  1  and  895                  '%'  not in  self
. params
[ 'outtmpl' ]  896                  and  self
. params
. get ( 'max_downloads' ) !=  1 ):  897              raise  SameFileError ( self
. params
[ 'outtmpl' ])  901                  #It also downloads the videos  902                  self
. extract_info ( url
)  903              except  UnavailableVideoError
:  904                  self
. report_error ( u
'unable to download video' )  905              except  MaxDownloadsReached
:  906                  self
. to_screen ( u
'[info] Maximum number of downloaded files reached.' )  909          return  self
._ download
_ retcode
 911      def  download_with_info_file ( self
,  info_filename
):  912          with  io
. open ( info_filename
,  'r' ,  encoding
= 'utf-8' )  as  f
:  915              self
. process_ie_result ( info
,  download
= True )  916          except  DownloadError
:  917              webpage_url 
=  info
. get ( 'webpage_url' )  918              if  webpage_url 
is not None :  919                  self
. report_warning ( u
'The info failed to download, trying with " %s "'  %  webpage_url
)  920                  return  self
. download ([ webpage_url
])  923          return  self
._ download
_ retcode
 925      def  post_process ( self
,  filename
,  ie_info
):  926          """Run all the postprocessors on the given file."""  928          info
[ 'filepath' ] =  filename
 932                  keep_video_wish
,  new_info 
=  pp
. run ( info
)  933                  if  keep_video_wish 
is not None :  935                          keep_video 
=  keep_video_wish
 936                      elif  keep_video 
is None :  937                          # No clear decision yet, let IE decide  938                          keep_video 
=  keep_video_wish
 939              except  PostProcessingError 
as  e
:  940                  self
. report_error ( e
. msg
)  941          if  keep_video 
is False and not  self
. params
. get ( 'keepvideo' ,  False ):  943                  self
. to_screen ( u
'Deleting original file  %s  (pass -k to keep)'  %  filename
)  944                  os
. remove ( encodeFilename ( filename
))  945              except  ( IOError ,  OSError ):  946                  self
. report_warning ( u
'Unable to remove downloaded video file' )  948      def  _make_archive_id ( self
,  info_dict
):  949          # Future-proof against any change in case  950          # and backwards compatibility with prior versions  951          extractor 
=  info_dict
. get ( 'extractor_key' )  952          if  extractor 
is None :  953              if  'id'  in  info_dict
:  954                  extractor 
=  info_dict
. get ( 'ie_key' )   # key in a playlist  955          if  extractor 
is None :  956              return None   # Incomplete video information  957          return  extractor
. lower () +  u
' '  +  info_dict
[ 'id' ]  959      def  in_download_archive ( self
,  info_dict
):  960          fn 
=  self
. params
. get ( 'download_archive' )  964          vid_id 
=  self
._ make
_ archive
_ id
( info_dict
)  966              return False   # Incomplete video information  969              with  locked_file ( fn
,  'r' ,  encoding
= 'utf-8' )  as  archive_file
:  970                  for  line 
in  archive_file
:  971                      if  line
. strip () ==  vid_id
:  973          except  IOError  as  ioe
:  974              if  ioe
. errno 
!=  errno
. ENOENT
:  978      def  record_download_archive ( self
,  info_dict
):  979          fn 
=  self
. params
. get ( 'download_archive' )  982          vid_id 
=  self
._ make
_ archive
_ id
( info_dict
)  984          with  locked_file ( fn
,  'a' ,  encoding
= 'utf-8' )  as  archive_file
:  985              archive_file
. write ( vid_id 
+  u
' \n ' )  988      def  format_resolution ( format
,  default
= 'unknown' ):  989          if  format
. get ( 'vcodec' ) ==  'none' :  991          if  format
. get ( '_resolution' )  is not None :  992              return  format
[ '_resolution' ]  993          if  format
. get ( 'height' )  is not None :  994              if  format
. get ( 'width' )  is not None :  995                  res 
=  u
' %sx%s '  % ( format
[ 'width' ],  format
[ 'height' ])  997                  res 
=  u
' %s p'  %  format
[ 'height' ] 1002      def  list_formats ( self
,  info_dict
): 1003          def  format_note ( fdict
): 1005              if  fdict
. get ( 'format_note' )  is not None : 1006                  res 
+=  fdict
[ 'format_note' ] +  u
' ' 1007              if  ( fdict
. get ( 'vcodec' )  is not None and 1008                      fdict
. get ( 'vcodec' ) !=  'none' ): 1009                  res 
+=  u
' %- 5s'  %  fdict
[ 'vcodec' ] 1010              elif  fdict
. get ( 'vbr' )  is not None : 1012              if  fdict
. get ( 'vbr' )  is not None : 1013                  res 
+=  u
'@%4dk'  %  fdict
[ 'vbr' ] 1014              if  fdict
. get ( 'acodec' )  is not None : 1017                  res 
+=  u
' %- 5s'  %  fdict
[ 'acodec' ] 1018              elif  fdict
. get ( 'abr' )  is not None : 1022              if  fdict
. get ( 'abr' )  is not None : 1023                  res 
+=  u
'@%3dk'  %  fdict
[ 'abr' ] 1024              if  fdict
. get ( 'filesize' )  is not None : 1027                  res 
+=  format_bytes ( fdict
[ 'filesize' ]) 1030          def  line ( format
,  idlen
= 20 ): 1031              return  (( u
' %- '  +  compat_str ( idlen 
+  1 ) +  u
's %- 10s %- 12s %s ' ) % ( 1032                  format
[ 'format_id' ], 1034                  self
. format_resolution ( format
), 1035                  format_note ( format
), 1038          formats 
=  info_dict
. get ( 'formats' , [ info_dict
]) 1039          idlen 
=  max ( len ( u
'format code' ), 1040                      max ( len ( f
[ 'format_id' ])  for  f 
in  formats
)) 1041          formats_s 
= [ line ( f
,  idlen
)  for  f 
in  formats
] 1042          if  len ( formats
) >  1 : 1043              formats_s
[ 0 ] += ( ' '  if  format_note ( formats
[ 0 ])  else  '' ) +  '(worst)' 1044              formats_s
[- 1 ] += ( ' '  if  format_note ( formats
[- 1 ])  else  '' ) +  '(best)' 1046          header_line 
=  line ({ 1047              'format_id' :  u
'format code' ,  'ext' :  u
'extension' , 1048              '_resolution' :  u
'resolution' ,  'format_note' :  u
'note' },  idlen
= idlen
) 1049          self
. to_screen ( u
'[info] Available formats for  %s : \n %s \n %s '  % 1050                         ( info_dict
[ 'id' ],  header_line
,  u
" \n " . join ( formats_s
))) 1052      def  urlopen ( self
,  req
): 1053          """ Start an HTTP download """ 1054          return  self
._ opener
. open ( req
) 1056      def  print_debug_header ( self
): 1057          if not  self
. params
. get ( 'verbose' ): 1059          write_string ( u
'[debug] youtube-dl version '  +  __version__ 
+  u
' \n ' ) 1061              sp 
=  subprocess
. Popen ( 1062                  [ 'git' ,  'rev-parse' ,  '--short' ,  'HEAD' ], 1063                  stdout
= subprocess
. PIPE
,  stderr
= subprocess
. PIPE
, 1064                  cwd
= os
. path
. dirname ( os
. path
. abspath ( __file__
))) 1065              out
,  err 
=  sp
. communicate () 1066              out 
=  out
. decode (). strip () 1067              if  re
. match ( '[0-9a-f]+' ,  out
): 1068                  write_string ( u
'[debug] Git HEAD: '  +  out 
+  u
' \n ' ) 1074          write_string ( u
'[debug] Python version  %s  -  %s '  % 1075                       ( platform
. python_version (),  platform_name ()) +  u
' \n ' ) 1078          for  handler 
in  self
._ opener
. handlers
: 1079              if  hasattr ( handler
,  'proxies' ): 1080                  proxy_map
. update ( handler
. proxies
) 1081          write_string ( u
'[debug] Proxy map: '  +  compat_str ( proxy_map
) +  u
' \n ' ) 1083      def  _setup_opener ( self
): 1084          timeout_val 
=  self
. params
. get ( 'socket_timeout' ) 1085          timeout 
=  600  if  timeout_val 
is None else  float ( timeout_val
) 1087          opts_cookiefile 
=  self
. params
. get ( 'cookiefile' ) 1088          opts_proxy 
=  self
. params
. get ( 'proxy' ) 1090          if  opts_cookiefile 
is None : 1091              self
. cookiejar 
=  compat_cookiejar
. CookieJar () 1093              self
. cookiejar 
=  compat_cookiejar
. MozillaCookieJar ( 1095              if  os
. access ( opts_cookiefile
,  os
. R_OK
): 1096                  self
. cookiejar
. load () 1098          cookie_processor 
=  compat_urllib_request
. HTTPCookieProcessor ( 1100          if  opts_proxy 
is not None : 1101              if  opts_proxy 
==  '' : 1104                  proxies 
= { 'http' :  opts_proxy
,  'https' :  opts_proxy
} 1106              proxies 
=  compat_urllib_request
. getproxies () 1107              # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805) 1108              if  'http'  in  proxies 
and  'https'  not in  proxies
: 1109                  proxies
[ 'https' ] =  proxies
[ 'http' ] 1110          proxy_handler 
=  compat_urllib_request
. ProxyHandler ( proxies
) 1111          https_handler 
=  make_HTTPS_handler ( 1112              self
. params
. get ( 'nocheckcertificate' ,  False )) 1113          opener 
=  compat_urllib_request
. build_opener ( 1114              https_handler
,  proxy_handler
,  cookie_processor
,  YoutubeDLHandler ()) 1115          # Delete the default user-agent header, which would otherwise apply in 1116          # cases where our custom HTTP handler doesn't come into play 1117          # (See https://github.com/rg3/youtube-dl/issues/1309 for details) 1118          opener
. addheaders 
= [] 1119          self
._ opener 
=  opener
1121          # TODO remove this global modification 1122          compat_urllib_request
. install_opener ( opener
) 1123          socket
. setdefaulttimeout ( timeout
)