Update changelog.

[youtubedl] / youtube_dl / YoutubeDL.py
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index b4a966b7035d322173b8595b5a30141ae7299b34..44a272e7ecd9d1a6cf23783b651405faa0aa29cd 100644 (file)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -13,7 +13,7 @@ import time
  import traceback
  
  from .utils import *
  import traceback
  
  from .utils import *
-from .extractor import get_info_extractor
+from .extractor import get_info_extractor, gen_extractors
  from .FileDownloader import FileDownloader
  
  
  from .FileDownloader import FileDownloader
  
  
@@ -72,13 +72,17 @@ class YoutubeDL(object):
      writeinfojson:     Write the video description to a .info.json file
      writethumbnail:    Write the thumbnail image to a file
      writesubtitles:    Write the video subtitles to a file
      writeinfojson:     Write the video description to a .info.json file
      writethumbnail:    Write the thumbnail image to a file
      writesubtitles:    Write the video subtitles to a file
+    writeautomaticsub: Write the automatic subtitles to a file
      allsubtitles:      Downloads all the subtitles of the video
      allsubtitles:      Downloads all the subtitles of the video
+                       (requires writesubtitles or writeautomaticsub)
      listsubtitles:     Lists all available subtitles for the video
      listsubtitles:     Lists all available subtitles for the video
-    subtitlesformat:   Subtitle format [sbv/srt] (default=srt)
-    subtitleslang:     Language of the subtitles to download
+    subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
+    subtitleslangs:    List of languages of the subtitles to download
      keepvideo:         Keep the video file after post-processing
      daterange:         A DateRange object, download only if the upload_date is in the range.
      skip_download:     Skip the actual download of the video file
      keepvideo:         Keep the video file after post-processing
      daterange:         A DateRange object, download only if the upload_date is in the range.
      skip_download:     Skip the actual download of the video file
+    cachedir:          Location of the cache files in the filesystem.
+                       None to disable filesystem cache.
      
      The following parameters are not used by YoutubeDL itself, they are used by
      the FileDownloader:
      
      The following parameters are not used by YoutubeDL itself, they are used by
      the FileDownloader:
@@ -96,11 +100,23 @@ class YoutubeDL(object):
      def __init__(self, params):
          """Create a FileDownloader object with the given options."""
          self._ies = []
      def __init__(self, params):
          """Create a FileDownloader object with the given options."""
          self._ies = []
+        self._ies_instances = {}
          self._pps = []
          self._progress_hooks = []
          self._download_retcode = 0
          self._num_downloads = 0
          self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
          self._pps = []
          self._progress_hooks = []
          self._download_retcode = 0
          self._num_downloads = 0
          self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
+
+        if (sys.version_info >= (3,) and sys.platform != 'win32' and
+                sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
+                and not params['restrictfilenames']):
+            # On Python 3, the Unicode filesystem API will throw errors (#1474)
+            self.report_warning(
+                u'Assuming --restrict-filenames isnce file system encoding '
+                u'cannot encode all charactes. '
+                u'Set the LC_ALL environment variable to fix this.')
+            params['restrictfilenames'] = True
+
          self.params = params
          self.fd = FileDownloader(self, self.params)
  
          self.params = params
          self.fd = FileDownloader(self, self.params)
  
@@ -110,8 +126,28 @@ class YoutubeDL(object):
      def add_info_extractor(self, ie):
          """Add an InfoExtractor object to the end of the list."""
          self._ies.append(ie)
      def add_info_extractor(self, ie):
          """Add an InfoExtractor object to the end of the list."""
          self._ies.append(ie)
+        self._ies_instances[ie.ie_key()] = ie
          ie.set_downloader(self)
  
          ie.set_downloader(self)
  
+    def get_info_extractor(self, ie_key):
+        """
+        Get an instance of an IE with name ie_key, it will try to get one from
+        the _ies list, if there's no instance it will create a new one and add
+        it to the extractor list.
+        """
+        ie = self._ies_instances.get(ie_key)
+        if ie is None:
+            ie = get_info_extractor(ie_key)()
+            self.add_info_extractor(ie)
+        return ie
+
+    def add_default_info_extractors(self):
+        """
+        Add the InfoExtractors returned by gen_extractors to the end of the list
+        """
+        for ie in gen_extractors():
+            self.add_info_extractor(ie)
+
      def add_post_processor(self, pp):
          """Add a PostProcessor object to the end of the chain."""
          self._pps.append(pp)
      def add_post_processor(self, pp):
          """Add a PostProcessor object to the end of the chain."""
          self._pps.append(pp)
@@ -119,14 +155,10 @@ class YoutubeDL(object):
  
      def to_screen(self, message, skip_eol=False):
          """Print message to stdout if not in quiet mode."""
  
      def to_screen(self, message, skip_eol=False):
          """Print message to stdout if not in quiet mode."""
-        assert type(message) == type(u'')
          if not self.params.get('quiet', False):
              terminator = [u'\n', u''][skip_eol]
              output = message + terminator
          if not self.params.get('quiet', False):
              terminator = [u'\n', u''][skip_eol]
              output = message + terminator
-            if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
-                output = output.encode(preferredencoding(), 'ignore')
-            self._screen_file.write(output)
-            self._screen_file.flush()
+            write_string(output, self._screen_file)
  
      def to_stderr(self, message):
          """Print message to stderr."""
  
      def to_stderr(self, message):
          """Print message to stderr."""
@@ -256,7 +288,7 @@ class YoutubeDL(object):
              self.report_error(u'Erroneous output template')
              return None
          except ValueError as err:
              self.report_error(u'Erroneous output template')
              return None
          except ValueError as err:
-            self.report_error(u'Insufficient system charset ' + repr(preferredencoding()))
+            self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
              return None
  
      def _match_entry(self, info_dict):
              return None
  
      def _match_entry(self, info_dict):
@@ -286,9 +318,7 @@ class YoutubeDL(object):
           '''
          
          if ie_key:
           '''
          
          if ie_key:
-            ie = get_info_extractor(ie_key)()
-            ie.set_downloader(self)
-            ies = [ie]
+            ies = [self.get_info_extractor(ie_key)]
          else:
              ies = self._ies
  
          else:
              ies = self._ies
  
@@ -340,6 +370,7 @@ class YoutubeDL(object):
  
          result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
          if result_type == 'video':
  
          result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
          if result_type == 'video':
+            ie_result.update(extra_info)
              if 'playlist' not in ie_result:
                  # It isn't part of a playlist
                  ie_result['playlist'] = None
              if 'playlist' not in ie_result:
                  # It isn't part of a playlist
                  ie_result['playlist'] = None
@@ -439,7 +470,8 @@ class YoutubeDL(object):
          if self.params.get('forceid', False):
              compat_print(info_dict['id'])
          if self.params.get('forceurl', False):
          if self.params.get('forceid', False):
              compat_print(info_dict['id'])
          if self.params.get('forceurl', False):
-            compat_print(info_dict['url'])
+            # For RTMP URLs, also include the playpath
+            compat_print(info_dict['url'] + info_dict.get('play_path', u''))
          if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
              compat_print(info_dict['thumbnail'])
          if self.params.get('forcedescription', False) and 'description' in info_dict:
          if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
              compat_print(info_dict['thumbnail'])
          if self.params.get('forcedescription', False) and 'description' in info_dict:
@@ -470,45 +502,33 @@ class YoutubeDL(object):
                  self.report_writedescription(descfn)
                  with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
                      descfile.write(info_dict['description'])
                  self.report_writedescription(descfn)
                  with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
                      descfile.write(info_dict['description'])
+            except (KeyError, TypeError):
+                self.report_warning(u'There\'s no description to write.')
              except (OSError, IOError):
                  self.report_error(u'Cannot write description file ' + descfn)
                  return
  
              except (OSError, IOError):
                  self.report_error(u'Cannot write description file ' + descfn)
                  return
  
-        if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
+        subtitles_are_requested = any([self.params.get('writesubtitles', False),
+                                       self.params.get('writeautomaticsub')])
+
+        if  subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
              # subtitles download errors are already managed as troubles in relevant IE
              # that way it will silently go on when used with unsupporting IE
              # subtitles download errors are already managed as troubles in relevant IE
              # that way it will silently go on when used with unsupporting IE
-            subtitle = info_dict['subtitles'][0]
-            (sub_error, sub_lang, sub) = subtitle
+            subtitles = info_dict['subtitles']
              sub_format = self.params.get('subtitlesformat')
              sub_format = self.params.get('subtitlesformat')
-            if sub_error:
-                self.report_warning("Some error while getting the subtitles")
-            else:
+            for sub_lang in subtitles.keys():
+                sub = subtitles[sub_lang]
+                if sub is None:
+                    continue
                  try:
                  try:
-                    sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
+                    sub_filename = subtitles_filename(filename, sub_lang, sub_format)
                      self.report_writesubtitles(sub_filename)
                      with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
                      self.report_writesubtitles(sub_filename)
                      with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
-                        subfile.write(sub)
+                            subfile.write(sub)
                  except (OSError, IOError):
                      self.report_error(u'Cannot write subtitles file ' + descfn)
                      return
  
                  except (OSError, IOError):
                      self.report_error(u'Cannot write subtitles file ' + descfn)
                      return
  
-        if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
-            subtitles = info_dict['subtitles']
-            sub_format = self.params.get('subtitlesformat')
-            for subtitle in subtitles:
-                (sub_error, sub_lang, sub) = subtitle
-                if sub_error:
-                    self.report_warning("Some error while getting the subtitles")
-                else:
-                    try:
-                        sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
-                        self.report_writesubtitles(sub_filename)
-                        with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
-                                subfile.write(sub)
-                    except (OSError, IOError):
-                        self.report_error(u'Cannot write subtitles file ' + descfn)
-                        return
-
          if self.params.get('writeinfojson', False):
              infofn = filename + u'.info.json'
              self.report_writeinfojson(infofn)
          if self.params.get('writeinfojson', False):
              infofn = filename + u'.info.json'
              self.report_writeinfojson(infofn)
@@ -520,18 +540,20 @@ class YoutubeDL(object):
                  return
  
          if self.params.get('writethumbnail', False):
                  return
  
          if self.params.get('writethumbnail', False):
-            if 'thumbnail' in info_dict:
-                thumb_format = info_dict['thumbnail'].rpartition(u'/')[2].rpartition(u'.')[2]
-                if not thumb_format:
-                    thumb_format = 'jpg'
+            if info_dict.get('thumbnail') is not None:
+                thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
                  thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
                  self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
                                 (info_dict['extractor'], info_dict['id']))
                  thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
                  self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
                                 (info_dict['extractor'], info_dict['id']))
-                uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
-                with open(thumb_filename, 'wb') as thumbf:
-                    shutil.copyfileobj(uf, thumbf)
-                self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
-                               (info_dict['extractor'], info_dict['id'], thumb_filename))
+                try:
+                    uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
+                    with open(thumb_filename, 'wb') as thumbf:
+                        shutil.copyfileobj(uf, thumbf)
+                    self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
+                        (info_dict['extractor'], info_dict['id'], thumb_filename))
+                except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+                    self.report_warning(u'Unable to download thumbnail "%s": %s' %
+                        (info_dict['thumbnail'], compat_str(err)))
  
          if not self.params.get('skip_download', False):
              if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
  
          if not self.params.get('skip_download', False):
              if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
@@ -539,11 +561,11 @@ class YoutubeDL(object):
              else:
                  try:
                      success = self.fd._do_download(filename, info_dict)
              else:
                  try:
                      success = self.fd._do_download(filename, info_dict)
-                except (OSError, IOError) as err:
-                    raise UnavailableVideoError()
                  except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                      self.report_error(u'unable to download video data: %s' % str(err))
                      return
                  except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                      self.report_error(u'unable to download video data: %s' % str(err))
                      return
+                except (OSError, IOError) as err:
+                    raise UnavailableVideoError(err)
                  except (ContentTooShortError, ) as err:
                      self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
                      return
                  except (ContentTooShortError, ) as err:
                      self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
                      return
@@ -587,7 +609,7 @@ class YoutubeDL(object):
                          # No clear decision yet, let IE decide
                          keep_video = keep_video_wish
              except PostProcessingError as e:
                          # No clear decision yet, let IE decide
                          keep_video = keep_video_wish
              except PostProcessingError as e:
-                self.to_stderr(u'ERROR: ' + e.msg)
+                self.report_error(e.msg)
          if keep_video is False and not self.params.get('keepvideo', False):
              try:
                  self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
          if keep_video is False and not self.params.get('keepvideo', False):
              try:
                  self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)