Imported Upstream version 2013.11.11

author Rogério Brito <rbrito@ime.usp.br>

Tue, 12 Nov 2013 00:27:50 +0000 (22:27 -0200)

committer Rogério Brito <rbrito@ime.usp.br>

Tue, 12 Nov 2013 00:27:50 +0000 (22:27 -0200)
author Rogério Brito <rbrito@ime.usp.br>
Tue, 12 Nov 2013 00:27:50 +0000 (22:27 -0200)
committer Rogério Brito <rbrito@ime.usp.br>
Tue, 12 Nov 2013 00:27:50 +0000 (22:27 -0200)
diff --git a/README.md b/README.md

index 2b8db0cfc2c9f7f78be5f3fb69fdeb345562d1b5..6632e5865585fa45eed03b25121b4e4528628fbc 100644 (file)
--- a/README.md
+++ b/README.md
@@ -79,24 +79,27 @@ which means you can modify it, redistribute it or use it however you like.
                                 different, %(autonumber)s to get an automatically
                                 incremented number, %(ext)s for the filename
                                 extension, %(format)s for the format description
                                 different, %(autonumber)s to get an automatically
                                 incremented number, %(ext)s for the filename
                                 extension, %(format)s for the format description
-                               (like "22 - 1280x720" or "HD")%(upload_date)s for
-                               the upload date (YYYYMMDD), %(extractor)s for the
-                               provider (youtube, metacafe, etc), %(id)s for the
-                               video id , %(playlist)s for the playlist the
-                               video is in, %(playlist_index)s for the position
-                               in the playlist and %% for a literal percent. Use
-                               - to output to stdout. Can also be used to
-                               download to a different directory, for example
-                               with -o '/my/downloads/%(uploader)s/%(title)s-%(i
-                               d)s.%(ext)s' .
+                               (like "22 - 1280x720" or "HD"),%(format_id)s for
+                               the unique id of the format (like Youtube's
+                               itags: "137"),%(upload_date)s for the upload date
+                               (YYYYMMDD), %(extractor)s for the provider
+                               (youtube, metacafe, etc), %(id)s for the video id
+                               , %(playlist)s for the playlist the video is in,
+                               %(playlist_index)s for the position in the
+                               playlist and %% for a literal percent. Use - to
+                               output to stdout. Can also be used to download to
+                               a different directory, for example with -o '/my/d
+                               ownloads/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
      --autonumber-size NUMBER   Specifies the number of digits in %(autonumber)s
                                 when it is present in output filename template or
      --autonumber-size NUMBER   Specifies the number of digits in %(autonumber)s
                                 when it is present in output filename template or
-                               --autonumber option is given
+                               --auto-number option is given
      --restrict-filenames       Restrict filenames to only ASCII characters, and
                                 avoid "&" and spaces in filenames
      -a, --batch-file FILE      file containing URLs to download ('-' for stdin)
      -w, --no-overwrites        do not overwrite files
      --restrict-filenames       Restrict filenames to only ASCII characters, and
                                 avoid "&" and spaces in filenames
      -a, --batch-file FILE      file containing URLs to download ('-' for stdin)
      -w, --no-overwrites        do not overwrite files
-    -c, --continue             resume partially downloaded files
+    -c, --continue             force resume of partially downloaded files. By
+                               default, youtube-dl will resume downloads if
+                               possible.
      --no-continue              do not resume partially downloaded files (restart
                                 from beginning)
      --cookies FILE             file to read cookies from and dump cookie jar in
      --no-continue              do not resume partially downloaded files (restart
                                 from beginning)
      --cookies FILE             file to read cookies from and dump cookie jar in
@@ -126,6 +129,8 @@ which means you can modify it, redistribute it or use it however you like.
      -v, --verbose              print various debugging information
      --dump-intermediate-pages  print downloaded pages to debug problems(very
                                 verbose)
      -v, --verbose              print various debugging information
      --dump-intermediate-pages  print downloaded pages to debug problems(very
                                 verbose)
+    --write-pages              Write downloaded pages to files in the current
+                               directory
  
  ## Video Format Options:
      -f, --format FORMAT        video format code, specifiy the order of
  
  ## Video Format Options:
      -f, --format FORMAT        video format code, specifiy the order of
diff --git a/README.txt b/README.txt

index b773576051a69521382eda4a3314d89334e04487..4b400fd9fe5ad8bba8e112190f34ee7f3d050a93 100644 (file)
--- a/README.txt
+++ b/README.txt
@@ -92,24 +92,27 @@ Filesystem Options:
                                 different, %(autonumber)s to get an automatically
                                 incremented number, %(ext)s for the filename
                                 extension, %(format)s for the format description
                                 different, %(autonumber)s to get an automatically
                                 incremented number, %(ext)s for the filename
                                 extension, %(format)s for the format description
-                               (like "22 - 1280x720" or "HD")%(upload_date)s for
-                               the upload date (YYYYMMDD), %(extractor)s for the
-                               provider (youtube, metacafe, etc), %(id)s for the
-                               video id , %(playlist)s for the playlist the
-                               video is in, %(playlist_index)s for the position
-                               in the playlist and %% for a literal percent. Use
-                               - to output to stdout. Can also be used to
-                               download to a different directory, for example
-                               with -o '/my/downloads/%(uploader)s/%(title)s-%(i
-                               d)s.%(ext)s' .
+                               (like "22 - 1280x720" or "HD"),%(format_id)s for
+                               the unique id of the format (like Youtube's
+                               itags: "137"),%(upload_date)s for the upload date
+                               (YYYYMMDD), %(extractor)s for the provider
+                               (youtube, metacafe, etc), %(id)s for the video id
+                               , %(playlist)s for the playlist the video is in,
+                               %(playlist_index)s for the position in the
+                               playlist and %% for a literal percent. Use - to
+                               output to stdout. Can also be used to download to
+                               a different directory, for example with -o '/my/d
+                               ownloads/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
      --autonumber-size NUMBER   Specifies the number of digits in %(autonumber)s
                                 when it is present in output filename template or
      --autonumber-size NUMBER   Specifies the number of digits in %(autonumber)s
                                 when it is present in output filename template or
-                               --autonumber option is given
+                               --auto-number option is given
      --restrict-filenames       Restrict filenames to only ASCII characters, and
                                 avoid "&" and spaces in filenames
      -a, --batch-file FILE      file containing URLs to download ('-' for stdin)
      -w, --no-overwrites        do not overwrite files
      --restrict-filenames       Restrict filenames to only ASCII characters, and
                                 avoid "&" and spaces in filenames
      -a, --batch-file FILE      file containing URLs to download ('-' for stdin)
      -w, --no-overwrites        do not overwrite files
-    -c, --continue             resume partially downloaded files
+    -c, --continue             force resume of partially downloaded files. By
+                               default, youtube-dl will resume downloads if
+                               possible.
      --no-continue              do not resume partially downloaded files (restart
                                 from beginning)
      --cookies FILE             file to read cookies from and dump cookie jar in
      --no-continue              do not resume partially downloaded files (restart
                                 from beginning)
      --cookies FILE             file to read cookies from and dump cookie jar in
@@ -141,6 +144,8 @@ Verbosity / Simulation Options:
      -v, --verbose              print various debugging information
      --dump-intermediate-pages  print downloaded pages to debug problems(very
                                 verbose)
      -v, --verbose              print various debugging information
      --dump-intermediate-pages  print downloaded pages to debug problems(very
                                 verbose)
+    --write-pages              Write downloaded pages to files in the current
+                               directory
  
  Video Format Options:
  ---------------------
  
  Video Format Options:
  ---------------------
diff --git a/devscripts/check-porn.py b/devscripts/check-porn.py

new file mode 100644 (file)

index 0000000..63401fe
--- /dev/null
+++ b/devscripts/check-porn.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python
+
+"""
+This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check
+if we are not 'age_limit' tagging some porn site
+"""
+
+# Allow direct execution
+import os
+import sys
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from test.helper import get_testcases
+from youtube_dl.utils import compat_urllib_request
+
+for test in get_testcases():
+    try:
+        webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()
+    except:
+        print('\nFail: {0}'.format(test['name']))
+        continue
+
+    webpage = webpage.decode('utf8', 'replace')
+
+    if 'porn' in webpage.lower() and ('info_dict' not in test
+                                      or 'age_limit' not in test['info_dict']
+                                      or test['info_dict']['age_limit'] != 18):
+        print('\nPotential missing age_limit check: {0}'.format(test['name']))
+
+    elif 'porn' not in webpage.lower() and ('info_dict' in test and
+                                            'age_limit' in test['info_dict'] and
+                                            test['info_dict']['age_limit'] == 18):
+        print('\nPotential false negative: {0}'.format(test['name']))
+
+    else:
+        sys.stdout.write('.')
+    sys.stdout.flush()
+
+print()
diff --git a/setup.py b/setup.py

index 2391c527298299a616551d77a3a7a07d764df38d..aa7cfca0862b1f4ba2cfd220fd570ca63bcfda7e 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -8,8 +8,10 @@ import sys
  
  try:
      from setuptools import setup
  
  try:
      from setuptools import setup
+    setuptools_available = True
  except ImportError:
      from distutils.core import setup
  except ImportError:
      from distutils.core import setup
+    setuptools_available = False
  
  try:
      # This will create an exe that needs Microsoft Visual C++ 2008
  
  try:
      # This will create an exe that needs Microsoft Visual C++ 2008
@@ -43,13 +45,16 @@ if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
      params = py2exe_params
  else:
      params = {
      params = py2exe_params
  else:
      params = {
-        'scripts': ['bin/youtube-dl'],
          'data_files': [  # Installing system-wide would require sudo...
              ('etc/bash_completion.d', ['youtube-dl.bash-completion']),
              ('share/doc/youtube_dl', ['README.txt']),
              ('share/man/man1/', ['youtube-dl.1'])
          ]
      }
          'data_files': [  # Installing system-wide would require sudo...
              ('etc/bash_completion.d', ['youtube-dl.bash-completion']),
              ('share/doc/youtube_dl', ['README.txt']),
              ('share/man/man1/', ['youtube-dl.1'])
          ]
      }
+    if setuptools_available:
+        params['entry_points'] = {'console_scripts': ['youtube-dl = youtube_dl:main']}
+    else:
+        params['scripts'] = ['bin/youtube-dl']
  
  # Get the version from youtube_dl/version.py without importing the package
  exec(compile(open('youtube_dl/version.py').read(),
  
  # Get the version from youtube_dl/version.py without importing the package
  exec(compile(open('youtube_dl/version.py').read(),
diff --git a/test/helper.py b/test/helper.py

index 777119ea5fa6fe7b43ae5efe53e9c7685be347b8..d7bf7a82802e58f0a80d788de83146d3a9d3fadf 100644 (file)
--- a/test/helper.py
+++ b/test/helper.py
@@ -5,9 +5,11 @@ import json
  import os.path
  import re
  import types
  import os.path
  import re
  import types
+import sys
  
  import youtube_dl.extractor
  from youtube_dl import YoutubeDL
  
  import youtube_dl.extractor
  from youtube_dl import YoutubeDL
+from youtube_dl.utils import preferredencoding
  
  
  def global_setup():
  
  
  def global_setup():
@@ -33,6 +35,21 @@ def try_rm(filename):
              raise
  
  
              raise
  
  
+def report_warning(message):
+    '''
+    Print the message to stderr, it will be prefixed with 'WARNING:'
+    If stderr is a tty file the 'WARNING:' will be colored
+    '''
+    if sys.stderr.isatty() and os.name != 'nt':
+        _msg_header = u'\033[0;33mWARNING:\033[0m'
+    else:
+        _msg_header = u'WARNING:'
+    output = u'%s %s\n' % (_msg_header, message)
+    if 'b' in getattr(sys.stderr, 'mode', '') or sys.version_info[0] < 3:
+        output = output.encode(preferredencoding())
+    sys.stderr.write(output)
+
+
  class FakeYDL(YoutubeDL):
      def __init__(self, override=None):
          # Different instances of the downloader can't share the same dictionary
  class FakeYDL(YoutubeDL):
      def __init__(self, override=None):
          # Different instances of the downloader can't share the same dictionary
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py

index f8cd1bdce9a64ac87ec83ab15130148f580a5291..58cf9c313607020d1493b420f8b93e18ccccd474 100644 (file)
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -62,10 +62,10 @@ class TestFormatSelection(unittest.TestCase):
  
      def test_format_limit(self):
          formats = [
  
      def test_format_limit(self):
          formats = [
-            {u'format_id': u'meh'},
-            {u'format_id': u'good'},
-            {u'format_id': u'great'},
-            {u'format_id': u'excellent'},
+            {u'format_id': u'meh', u'url': u'http://example.com/meh'},
+            {u'format_id': u'good', u'url': u'http://example.com/good'},
+            {u'format_id': u'great', u'url': u'http://example.com/great'},
+            {u'format_id': u'excellent', u'url': u'http://example.com/exc'},
          ]
          info_dict = {
              u'formats': formats, u'extractor': u'test', 'id': 'testvid'}
          ]
          info_dict = {
              u'formats': formats, u'extractor': u'test', 'id': 'testvid'}
@@ -128,6 +128,18 @@ class TestFormatSelection(unittest.TestCase):
          downloaded = ydl.downloaded_info_dicts[0]
          self.assertEqual(downloaded['format_id'], u'35')
  
          downloaded = ydl.downloaded_info_dicts[0]
          self.assertEqual(downloaded['format_id'], u'35')
  
+    def test_add_extra_info(self):
+        test_dict = {
+            'extractor': 'Foo',
+        }
+        extra_info = {
+            'extractor': 'Bar',
+            'playlist': 'funny videos',
+        }
+        YDL.add_extra_info(test_dict, extra_info)
+        self.assertEqual(test_dict['extractor'], 'Foo')
+        self.assertEqual(test_dict['playlist'], 'funny videos')
+
  
  if __name__ == '__main__':
      unittest.main()
  
  if __name__ == '__main__':
      unittest.main()
diff --git a/test/test_dailymotion_subtitles.py b/test/test_dailymotion_subtitles.py

deleted file mode 100644 (file)

index c596415..0000000
--- a/test/test_dailymotion_subtitles.py
+++ /dev/null
@@ -1,70 +0,0 @@
-#!/usr/bin/env python
-
-# Allow direct execution
-import os
-import sys
-import unittest
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-from test.helper import FakeYDL, global_setup, md5
-global_setup()
-
-
-from youtube_dl.extractor import DailymotionIE
-
-class TestDailymotionSubtitles(unittest.TestCase):
-    def setUp(self):
-        self.DL = FakeYDL()
-        self.url = 'http://www.dailymotion.com/video/xczg00'
-    def getInfoDict(self):
-        IE = DailymotionIE(self.DL)
-        info_dict = IE.extract(self.url)
-        return info_dict
-    def getSubtitles(self):
-        info_dict = self.getInfoDict()
-        return info_dict[0]['subtitles']
-    def test_no_writesubtitles(self):
-        subtitles = self.getSubtitles()
-        self.assertEqual(subtitles, None)
-    def test_subtitles(self):
-        self.DL.params['writesubtitles'] = True
-        subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
-    def test_subtitles_lang(self):
-        self.DL.params['writesubtitles'] = True
-        self.DL.params['subtitleslangs'] = ['fr']
-        subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
-    def test_allsubtitles(self):
-        self.DL.params['writesubtitles'] = True
-        self.DL.params['allsubtitles'] = True
-        subtitles = self.getSubtitles()
-        self.assertEqual(len(subtitles.keys()), 5)
-    def test_list_subtitles(self):
-        self.DL.expect_warning(u'Automatic Captions not supported by this server')
-        self.DL.params['listsubtitles'] = True
-        info_dict = self.getInfoDict()
-        self.assertEqual(info_dict, None)
-    def test_automatic_captions(self):
-        self.DL.expect_warning(u'Automatic Captions not supported by this server')
-        self.DL.params['writeautomaticsub'] = True
-        self.DL.params['subtitleslang'] = ['en']
-        subtitles = self.getSubtitles()
-        self.assertTrue(len(subtitles.keys()) == 0)
-    def test_nosubtitles(self):
-        self.DL.expect_warning(u'video doesn\'t have subtitles')
-        self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
-        self.DL.params['writesubtitles'] = True
-        self.DL.params['allsubtitles'] = True
-        subtitles = self.getSubtitles()
-        self.assertEqual(len(subtitles), 0)
-    def test_multiple_langs(self):
-        self.DL.params['writesubtitles'] = True
-        langs = ['es', 'fr', 'de']
-        self.DL.params['subtitleslangs'] = langs
-        subtitles = self.getSubtitles()
-        for lang in langs:
-            self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/test/test_download.py b/test/test_download.py

index b9a9be11d9686243ed2a1d5b748db4bc04712c54..16f2008094fca751f69ee01a38f16a6fa0c3fb53 100644 (file)
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -6,7 +6,14 @@ import sys
  import unittest
  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  
  import unittest
  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  
-from test.helper import get_params, get_testcases, global_setup, try_rm, md5
+from test.helper import (
+    get_params,
+    get_testcases,
+    global_setup,
+    try_rm,
+    md5,
+    report_warning
+)
  global_setup()
  
  
  global_setup()
  
  
@@ -19,10 +26,12 @@ import youtube_dl.YoutubeDL
  from youtube_dl.utils import (
      compat_str,
      compat_urllib_error,
  from youtube_dl.utils import (
      compat_str,
      compat_urllib_error,
+    compat_HTTPError,
      DownloadError,
      ExtractorError,
      UnavailableVideoError,
  )
      DownloadError,
      ExtractorError,
      UnavailableVideoError,
  )
+from youtube_dl.extractor import get_info_extractor
  
  RETRIES = 3
  
  
  RETRIES = 3
  
@@ -55,17 +64,25 @@ def generator(test_case):
  
      def test_template(self):
          ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
  
      def test_template(self):
          ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
+        other_ies = [get_info_extractor(ie_key) for ie_key in test_case.get('add_ie', [])]
          def print_skipping(reason):
              print('Skipping %s: %s' % (test_case['name'], reason))
          def print_skipping(reason):
              print('Skipping %s: %s' % (test_case['name'], reason))
-        if not ie._WORKING:
+        if not ie.working():
              print_skipping('IE marked as not _WORKING')
              return
              print_skipping('IE marked as not _WORKING')
              return
-        if 'playlist' not in test_case and not test_case['file']:
-            print_skipping('No output file specified')
-            return
+        if 'playlist' not in test_case:
+            info_dict = test_case.get('info_dict', {})
+            if not test_case.get('file') and not (info_dict.get('id') and info_dict.get('ext')):
+                print_skipping('The output file cannot be know, the "file" '
+                    'key is missing or the info_dict is incomplete')
+                return
          if 'skip' in test_case:
              print_skipping(test_case['skip'])
              return
          if 'skip' in test_case:
              print_skipping(test_case['skip'])
              return
+        for other_ie in other_ies:
+            if not other_ie.working():
+                print_skipping(u'test depends on %sIE, marked as not WORKING' % other_ie.ie_key())
+                return
  
          params = get_params(test_case.get('params', {}))
  
  
          params = get_params(test_case.get('params', {}))
  
@@ -77,35 +94,47 @@ def generator(test_case):
                  finished_hook_called.add(status['filename'])
          ydl.fd.add_progress_hook(_hook)
  
                  finished_hook_called.add(status['filename'])
          ydl.fd.add_progress_hook(_hook)
  
+        def get_tc_filename(tc):
+            return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {}))
+
          test_cases = test_case.get('playlist', [test_case])
          test_cases = test_case.get('playlist', [test_case])
-        for tc in test_cases:
-            try_rm(tc['file'])
-            try_rm(tc['file'] + '.part')
-            try_rm(tc['file'] + '.info.json')
+        def try_rm_tcs_files():
+            for tc in test_cases:
+                tc_filename = get_tc_filename(tc)
+                try_rm(tc_filename)
+                try_rm(tc_filename + '.part')
+                try_rm(tc_filename + '.info.json')
+        try_rm_tcs_files()
          try:
          try:
-            for retry in range(1, RETRIES + 1):
+            try_num = 1
+            while True:
                  try:
                      ydl.download([test_case['url']])
                  except (DownloadError, ExtractorError) as err:
                  try:
                      ydl.download([test_case['url']])
                  except (DownloadError, ExtractorError) as err:
-                    if retry == RETRIES: raise
-
                      # Check if the exception is not a network related one
                      # Check if the exception is not a network related one
-                    if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
+                    if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
                          raise
  
                          raise
  
-                    print('Retrying: {0} failed tries\n\n##########\n\n'.format(retry))
+                    if try_num == RETRIES:
+                        report_warning(u'Failed due to network errors, skipping...')
+                        return
+
+                    print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num))
+
+                    try_num += 1
                  else:
                      break
  
              for tc in test_cases:
                  else:
                      break
  
              for tc in test_cases:
+                tc_filename = get_tc_filename(tc)
                  if not test_case.get('params', {}).get('skip_download', False):
                  if not test_case.get('params', {}).get('skip_download', False):
-                    self.assertTrue(os.path.exists(tc['file']), msg='Missing file ' + tc['file'])
-                    self.assertTrue(tc['file'] in finished_hook_called)
-                self.assertTrue(os.path.exists(tc['file'] + '.info.json'))
+                    self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
+                    self.assertTrue(tc_filename in finished_hook_called)
+                self.assertTrue(os.path.exists(tc_filename + '.info.json'))
                  if 'md5' in tc:
                  if 'md5' in tc:
-                    md5_for_file = _file_md5(tc['file'])
+                    md5_for_file = _file_md5(tc_filename)
                      self.assertEqual(md5_for_file, tc['md5'])
                      self.assertEqual(md5_for_file, tc['md5'])
-                with io.open(tc['file'] + '.info.json', encoding='utf-8') as infof:
+                with io.open(tc_filename + '.info.json', encoding='utf-8') as infof:
                      info_dict = json.load(infof)
                  for (info_field, expected) in tc.get('info_dict', {}).items():
                      if isinstance(expected, compat_str) and expected.startswith('md5:'):
                      info_dict = json.load(infof)
                  for (info_field, expected) in tc.get('info_dict', {}).items():
                      if isinstance(expected, compat_str) and expected.startswith('md5:'):
@@ -125,11 +154,11 @@ def generator(test_case):
                  # Check for the presence of mandatory fields
                  for key in ('id', 'url', 'title', 'ext'):
                      self.assertTrue(key in info_dict.keys() and info_dict[key])
                  # Check for the presence of mandatory fields
                  for key in ('id', 'url', 'title', 'ext'):
                      self.assertTrue(key in info_dict.keys() and info_dict[key])
+                # Check for mandatory fields that are automatically set by YoutubeDL
+                for key in ['webpage_url', 'extractor', 'extractor_key']:
+                    self.assertTrue(info_dict.get(key), u'Missing field: %s' % key)
          finally:
          finally:
-            for tc in test_cases:
-                try_rm(tc['file'])
-                try_rm(tc['file'] + '.part')
-                try_rm(tc['file'] + '.info.json')
+            try_rm_tcs_files()
  
      return test_template
  
  
      return test_template
  
diff --git a/test/test_playlists.py b/test/test_playlists.py

index d6a8d56df99609e50ea5885d2f5a3eb48b72cf37..706b6bdca1399284263106b755fdf9278c5d17d5 100644 (file)
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -17,9 +17,11 @@ from youtube_dl.extractor import (
      DailymotionUserIE,
      VimeoChannelIE,
      UstreamChannelIE,
      DailymotionUserIE,
      VimeoChannelIE,
      UstreamChannelIE,
+    SoundcloudSetIE,
      SoundcloudUserIE,
      LivestreamIE,
      NHLVideocenterIE,
      SoundcloudUserIE,
      LivestreamIE,
      NHLVideocenterIE,
+    BambuserChannelIE,
  )
  
  
  )
  
  
@@ -60,6 +62,14 @@ class TestPlaylists(unittest.TestCase):
          self.assertEqual(result['id'], u'5124905')
          self.assertTrue(len(result['entries']) >= 11)
  
          self.assertEqual(result['id'], u'5124905')
          self.assertTrue(len(result['entries']) >= 11)
  
+    def test_soundcloud_set(self):
+        dl = FakeYDL()
+        ie = SoundcloudSetIE(dl)
+        result = ie.extract('https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['title'], u'The Royal Concept EP')
+        self.assertTrue(len(result['entries']) >= 6)
+
      def test_soundcloud_user(self):
          dl = FakeYDL()
          ie = SoundcloudUserIE(dl)
      def test_soundcloud_user(self):
          dl = FakeYDL()
          ie = SoundcloudUserIE(dl)
@@ -85,5 +95,13 @@ class TestPlaylists(unittest.TestCase):
          self.assertEqual(result['title'], u'Highlights')
          self.assertEqual(len(result['entries']), 12)
  
          self.assertEqual(result['title'], u'Highlights')
          self.assertEqual(len(result['entries']), 12)
  
+    def test_bambuser_channel(self):
+        dl = FakeYDL()
+        ie = BambuserChannelIE(dl)
+        result = ie.extract('http://bambuser.com/channel/pixelversity')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['title'], u'pixelversity')
+        self.assertTrue(len(result['entries']) >= 66)
+
  if __name__ == '__main__':
      unittest.main()
  if __name__ == '__main__':
      unittest.main()
diff --git a/test/test_subtitles.py b/test/test_subtitles.py

new file mode 100644 (file)

index 0000000..06a3048
--- /dev/null
+++ b/test/test_subtitles.py
@@ -0,0 +1,211 @@
+#!/usr/bin/env python
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from test.helper import FakeYDL, global_setup, md5
+global_setup()
+
+
+from youtube_dl.extractor import (
+    YoutubeIE,
+    DailymotionIE,
+    TEDIE,
+)
+
+
+class BaseTestSubtitles(unittest.TestCase):
+    url = None
+    IE = None
+    def setUp(self):
+        self.DL = FakeYDL()
+        self.ie = self.IE(self.DL)
+
+    def getInfoDict(self):
+        info_dict = self.ie.extract(self.url)
+        return info_dict
+
+    def getSubtitles(self):
+        info_dict = self.getInfoDict()
+        return info_dict['subtitles']
+
+
+class TestYoutubeSubtitles(BaseTestSubtitles):
+    url = 'QRS8MkLhQmM'
+    IE = YoutubeIE
+
+    def getSubtitles(self):
+        info_dict = self.getInfoDict()
+        return info_dict[0]['subtitles']
+
+    def test_youtube_no_writesubtitles(self):
+        self.DL.params['writesubtitles'] = False
+        subtitles = self.getSubtitles()
+        self.assertEqual(subtitles, None)
+
+    def test_youtube_subtitles(self):
+        self.DL.params['writesubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
+
+    def test_youtube_subtitles_lang(self):
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['subtitleslangs'] = ['it']
+        subtitles = self.getSubtitles()
+        self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
+
+    def test_youtube_allsubtitles(self):
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(len(subtitles.keys()), 13)
+
+    def test_youtube_subtitles_sbv_format(self):
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['subtitlesformat'] = 'sbv'
+        subtitles = self.getSubtitles()
+        self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b')
+
+    def test_youtube_subtitles_vtt_format(self):
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['subtitlesformat'] = 'vtt'
+        subtitles = self.getSubtitles()
+        self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7')
+
+    def test_youtube_list_subtitles(self):
+        self.DL.expect_warning(u'Video doesn\'t have automatic captions')
+        self.DL.params['listsubtitles'] = True
+        info_dict = self.getInfoDict()
+        self.assertEqual(info_dict, None)
+
+    def test_youtube_automatic_captions(self):
+        self.url = '8YoUxe5ncPo'
+        self.DL.params['writeautomaticsub'] = True
+        self.DL.params['subtitleslangs'] = ['it']
+        subtitles = self.getSubtitles()
+        self.assertTrue(subtitles['it'] is not None)
+
+    def test_youtube_nosubtitles(self):
+        self.DL.expect_warning(u'video doesn\'t have subtitles')
+        self.url = 'sAjKT8FhjI8'
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(len(subtitles), 0)
+
+    def test_youtube_multiple_langs(self):
+        self.url = 'QRS8MkLhQmM'
+        self.DL.params['writesubtitles'] = True
+        langs = ['it', 'fr', 'de']
+        self.DL.params['subtitleslangs'] = langs
+        subtitles = self.getSubtitles()
+        for lang in langs:
+            self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
+
+
+class TestDailymotionSubtitles(BaseTestSubtitles):
+    url = 'http://www.dailymotion.com/video/xczg00'
+    IE = DailymotionIE
+
+    def test_no_writesubtitles(self):
+        subtitles = self.getSubtitles()
+        self.assertEqual(subtitles, None)
+
+    def test_subtitles(self):
+        self.DL.params['writesubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
+
+    def test_subtitles_lang(self):
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['subtitleslangs'] = ['fr']
+        subtitles = self.getSubtitles()
+        self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
+
+    def test_allsubtitles(self):
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(len(subtitles.keys()), 5)
+
+    def test_list_subtitles(self):
+        self.DL.expect_warning(u'Automatic Captions not supported by this server')
+        self.DL.params['listsubtitles'] = True
+        info_dict = self.getInfoDict()
+        self.assertEqual(info_dict, None)
+
+    def test_automatic_captions(self):
+        self.DL.expect_warning(u'Automatic Captions not supported by this server')
+        self.DL.params['writeautomaticsub'] = True
+        self.DL.params['subtitleslang'] = ['en']
+        subtitles = self.getSubtitles()
+        self.assertTrue(len(subtitles.keys()) == 0)
+
+    def test_nosubtitles(self):
+        self.DL.expect_warning(u'video doesn\'t have subtitles')
+        self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(len(subtitles), 0)
+
+    def test_multiple_langs(self):
+        self.DL.params['writesubtitles'] = True
+        langs = ['es', 'fr', 'de']
+        self.DL.params['subtitleslangs'] = langs
+        subtitles = self.getSubtitles()
+        for lang in langs:
+            self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
+
+
+class TestTedSubtitles(BaseTestSubtitles):
+    url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
+    IE = TEDIE
+
+    def test_no_writesubtitles(self):
+        subtitles = self.getSubtitles()
+        self.assertEqual(subtitles, None)
+
+    def test_subtitles(self):
+        self.DL.params['writesubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(md5(subtitles['en']), '2154f31ff9b9f89a0aa671537559c21d')
+
+    def test_subtitles_lang(self):
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['subtitleslangs'] = ['fr']
+        subtitles = self.getSubtitles()
+        self.assertEqual(md5(subtitles['fr']), '7616cbc6df20ec2c1204083c83871cf6')
+
+    def test_allsubtitles(self):
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(len(subtitles.keys()), 28)
+
+    def test_list_subtitles(self):
+        self.DL.expect_warning(u'Automatic Captions not supported by this server')
+        self.DL.params['listsubtitles'] = True
+        info_dict = self.getInfoDict()
+        self.assertEqual(info_dict, None)
+
+    def test_automatic_captions(self):
+        self.DL.expect_warning(u'Automatic Captions not supported by this server')
+        self.DL.params['writeautomaticsub'] = True
+        self.DL.params['subtitleslang'] = ['en']
+        subtitles = self.getSubtitles()
+        self.assertTrue(len(subtitles.keys()) == 0)
+
+    def test_multiple_langs(self):
+        self.DL.params['writesubtitles'] = True
+        langs = ['es', 'fr', 'de']
+        self.DL.params['subtitleslangs'] = langs
+        subtitles = self.getSubtitles()
+        for lang in langs:
+            self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py

deleted file mode 100644 (file)

index 00430a3..0000000
--- a/test/test_youtube_subtitles.py
+++ /dev/null
@@ -1,95 +0,0 @@
-#!/usr/bin/env python
-
-# Allow direct execution
-import os
-import sys
-import unittest
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-from test.helper import FakeYDL, global_setup, md5
-global_setup()
-
-
-from youtube_dl.extractor import YoutubeIE
-
-
-class TestYoutubeSubtitles(unittest.TestCase):
-    def setUp(self):
-        self.DL = FakeYDL()
-        self.url = 'QRS8MkLhQmM'
-
-    def getInfoDict(self):
-        IE = YoutubeIE(self.DL)
-        info_dict = IE.extract(self.url)
-        return info_dict
-
-    def getSubtitles(self):
-        info_dict = self.getInfoDict()
-        return info_dict[0]['subtitles']
-
-    def test_youtube_no_writesubtitles(self):
-        self.DL.params['writesubtitles'] = False
-        subtitles = self.getSubtitles()
-        self.assertEqual(subtitles, None)
-
-    def test_youtube_subtitles(self):
-        self.DL.params['writesubtitles'] = True
-        subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
-
-    def test_youtube_subtitles_lang(self):
-        self.DL.params['writesubtitles'] = True
-        self.DL.params['subtitleslangs'] = ['it']
-        subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
-
-    def test_youtube_allsubtitles(self):
-        self.DL.params['writesubtitles'] = True
-        self.DL.params['allsubtitles'] = True
-        subtitles = self.getSubtitles()
-        self.assertEqual(len(subtitles.keys()), 13)
-
-    def test_youtube_subtitles_sbv_format(self):
-        self.DL.params['writesubtitles'] = True
-        self.DL.params['subtitlesformat'] = 'sbv'
-        subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b')
-
-    def test_youtube_subtitles_vtt_format(self):
-        self.DL.params['writesubtitles'] = True
-        self.DL.params['subtitlesformat'] = 'vtt'
-        subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7')
-
-    def test_youtube_list_subtitles(self):
-        self.DL.expect_warning(u'Video doesn\'t have automatic captions')
-        self.DL.params['listsubtitles'] = True
-        info_dict = self.getInfoDict()
-        self.assertEqual(info_dict, None)
-
-    def test_youtube_automatic_captions(self):
-        self.url = '8YoUxe5ncPo'
-        self.DL.params['writeautomaticsub'] = True
-        self.DL.params['subtitleslangs'] = ['it']
-        subtitles = self.getSubtitles()
-        self.assertTrue(subtitles['it'] is not None)
-
-    def test_youtube_nosubtitles(self):
-        self.DL.expect_warning(u'video doesn\'t have subtitles')
-        self.url = 'sAjKT8FhjI8'
-        self.DL.params['writesubtitles'] = True
-        self.DL.params['allsubtitles'] = True
-        subtitles = self.getSubtitles()
-        self.assertEqual(len(subtitles), 0)
-
-    def test_youtube_multiple_langs(self):
-        self.url = 'QRS8MkLhQmM'
-        self.DL.params['writesubtitles'] = True
-        langs = ['it', 'fr', 'de']
-        self.DL.params['subtitleslangs'] = langs
-        subtitles = self.getSubtitles()
-        for lang in langs:
-            self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/youtube-dl b/youtube-dl

index dc47dc0dba5b5a5945259c2dffeac63ca46b9ef0..0924a5d8a7fa976e2878e3607a5410a96559acf0 100755 (executable)

Binary files a/youtube-dl and b/youtube-dl differ
diff --git a/youtube-dl.1 b/youtube-dl.1

index 5dcfe85db89f99b5c8458cdb9b1b12f98930b6b0..bdd5a2da1d902677247977f812a46598db8b59af 100644 (file)
--- a/youtube-dl.1
+++ b/youtube-dl.1
@@ -95,24 +95,27 @@ redistribute it or use it however you like.
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ different,\ %(autonumber)s\ to\ get\ an\ automatically
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ incremented\ number,\ %(ext)s\ for\ the\ filename
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ extension,\ %(format)s\ for\ the\ format\ description
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ different,\ %(autonumber)s\ to\ get\ an\ automatically
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ incremented\ number,\ %(ext)s\ for\ the\ filename
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ extension,\ %(format)s\ for\ the\ format\ description
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (like\ "22\ \-\ 1280x720"\ or\ "HD")%(upload_date)s\ for
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ the\ upload\ date\ (YYYYMMDD),\ %(extractor)s\ for\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ provider\ (youtube,\ metacafe,\ etc),\ %(id)s\ for\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ video\ id\ ,\ %(playlist)s\ for\ the\ playlist\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ video\ is\ in,\ %(playlist_index)s\ for\ the\ position
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ in\ the\ playlist\ and\ %%\ for\ a\ literal\ percent.\ Use
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \-\ to\ output\ to\ stdout.\ Can\ also\ be\ used\ to
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ download\ to\ a\ different\ directory,\ for\ example
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ with\ \-o\ \[aq]/my/downloads/%(uploader)s/%(title)s\-%(i
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ d)s.%(ext)s\[aq]\ .
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (like\ "22\ \-\ 1280x720"\ or\ "HD"),%(format_id)s\ for
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ the\ unique\ id\ of\ the\ format\ (like\ Youtube\[aq]s
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ itags:\ "137"),%(upload_date)s\ for\ the\ upload\ date
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (YYYYMMDD),\ %(extractor)s\ for\ the\ provider
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (youtube,\ metacafe,\ etc),\ %(id)s\ for\ the\ video\ id
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ ,\ %(playlist)s\ for\ the\ playlist\ the\ video\ is\ in,
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(playlist_index)s\ for\ the\ position\ in\ the
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ playlist\ and\ %%\ for\ a\ literal\ percent.\ Use\ \-\ to
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ output\ to\ stdout.\ Can\ also\ be\ used\ to\ download\ to
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ a\ different\ directory,\ for\ example\ with\ \-o\ \[aq]/my/d
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ ownloads/%(uploader)s/%(title)s\-%(id)s.%(ext)s\[aq]\ .
  \-\-autonumber\-size\ NUMBER\ \ \ Specifies\ the\ number\ of\ digits\ in\ %(autonumber)s
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ when\ it\ is\ present\ in\ output\ filename\ template\ or
  \-\-autonumber\-size\ NUMBER\ \ \ Specifies\ the\ number\ of\ digits\ in\ %(autonumber)s
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ when\ it\ is\ present\ in\ output\ filename\ template\ or
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \-\-autonumber\ option\ is\ given
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \-\-auto\-number\ option\ is\ given
  \-\-restrict\-filenames\ \ \ \ \ \ \ Restrict\ filenames\ to\ only\ ASCII\ characters,\ and
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ avoid\ "&"\ and\ spaces\ in\ filenames
  \-a,\ \-\-batch\-file\ FILE\ \ \ \ \ \ file\ containing\ URLs\ to\ download\ (\[aq]\-\[aq]\ for\ stdin)
  \-w,\ \-\-no\-overwrites\ \ \ \ \ \ \ \ do\ not\ overwrite\ files
  \-\-restrict\-filenames\ \ \ \ \ \ \ Restrict\ filenames\ to\ only\ ASCII\ characters,\ and
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ avoid\ "&"\ and\ spaces\ in\ filenames
  \-a,\ \-\-batch\-file\ FILE\ \ \ \ \ \ file\ containing\ URLs\ to\ download\ (\[aq]\-\[aq]\ for\ stdin)
  \-w,\ \-\-no\-overwrites\ \ \ \ \ \ \ \ do\ not\ overwrite\ files
-\-c,\ \-\-continue\ \ \ \ \ \ \ \ \ \ \ \ \ resume\ partially\ downloaded\ files
+\-c,\ \-\-continue\ \ \ \ \ \ \ \ \ \ \ \ \ force\ resume\ of\ partially\ downloaded\ files.\ By
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ default,\ youtube\-dl\ will\ resume\ downloads\ if
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ possible.
  \-\-no\-continue\ \ \ \ \ \ \ \ \ \ \ \ \ \ do\ not\ resume\ partially\ downloaded\ files\ (restart
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ from\ beginning)
  \-\-cookies\ FILE\ \ \ \ \ \ \ \ \ \ \ \ \ file\ to\ read\ cookies\ from\ and\ dump\ cookie\ jar\ in
  \-\-no\-continue\ \ \ \ \ \ \ \ \ \ \ \ \ \ do\ not\ resume\ partially\ downloaded\ files\ (restart
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ from\ beginning)
  \-\-cookies\ FILE\ \ \ \ \ \ \ \ \ \ \ \ \ file\ to\ read\ cookies\ from\ and\ dump\ cookie\ jar\ in
@@ -146,6 +149,8 @@ redistribute it or use it however you like.
  \-v,\ \-\-verbose\ \ \ \ \ \ \ \ \ \ \ \ \ \ print\ various\ debugging\ information
  \-\-dump\-intermediate\-pages\ \ print\ downloaded\ pages\ to\ debug\ problems(very
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ verbose)
  \-v,\ \-\-verbose\ \ \ \ \ \ \ \ \ \ \ \ \ \ print\ various\ debugging\ information
  \-\-dump\-intermediate\-pages\ \ print\ downloaded\ pages\ to\ debug\ problems(very
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ verbose)
+\-\-write\-pages\ \ \ \ \ \ \ \ \ \ \ \ \ \ Write\ downloaded\ pages\ to\ files\ in\ the\ current
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ directory
  \f[]
  .fi
  .SS Video Format Options:
  \f[]
  .fi
  .SS Video Format Options:
diff --git a/youtube-dl.bash-completion b/youtube-dl.bash-completion

index 5132eacd9ca6a47a80ca73643f5d48f66a279b43..0123996620626f55166ba05de7ba3e3e6885771e 100644 (file)
--- a/youtube-dl.bash-completion
+++ b/youtube-dl.bash-completion
@@ -3,7 +3,7 @@ __youtube_dl()
      local cur prev opts
      COMPREPLY=()
      cur="${COMP_WORDS[COMP_CWORD]}"
      local cur prev opts
      COMPREPLY=()
      cur="${COMP_WORDS[COMP_CWORD]}"
-    opts="--help --version --update --ignore-errors --abort-on-error --dump-user-agent --user-agent --referer --list-extractors --extractor-descriptions --proxy --no-check-certificate --cache-dir --no-cache-dir --playlist-start --playlist-end --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --no-playlist --age-limit --download-archive --rate-limit --retries --buffer-size --no-resize-buffer --test --title --id --literal --auto-number --output --autonumber-size --restrict-filenames --batch-file --no-overwrites --continue --no-continue --cookies --no-part --no-mtime --write-description --write-info-json --write-annotations --write-thumbnail --quiet --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-filename --get-format --newline --no-progress --console-title --verbose --dump-intermediate-pages --youtube-print-sig-code --format --all-formats --prefer-free-formats --max-quality --list-formats --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites --embed-subs --add-metadata"
+    opts="--help --version --update --ignore-errors --abort-on-error --dump-user-agent --user-agent --referer --list-extractors --extractor-descriptions --proxy --no-check-certificate --cache-dir --no-cache-dir --playlist-start --playlist-end --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --no-playlist --age-limit --download-archive --rate-limit --retries --buffer-size --no-resize-buffer --test --title --id --literal --auto-number --output --autonumber-size --restrict-filenames --batch-file --no-overwrites --continue --no-continue --cookies --no-part --no-mtime --write-description --write-info-json --write-annotations --write-thumbnail --quiet --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-filename --get-format --newline --no-progress --console-title --verbose --dump-intermediate-pages --write-pages --youtube-print-sig-code --format --all-formats --prefer-free-formats --max-quality --list-formats --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites --embed-subs --add-metadata"
      keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater"
  
      if [[ ${cur} =~ : ]]; then
      keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater"
  
      if [[ ${cur} =~ : ]]; then
diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py

index 8ecabab1a517467c118dad0857c47291bcb2f929..088f595866372e360e425a3aeee196374f504b7c 100644 (file)
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@@ -4,12 +4,19 @@ import re
  import subprocess
  import sys
  import time
  import subprocess
  import sys
  import time
-import traceback
  
  if os.name == 'nt':
      import ctypes
  
  
  if os.name == 'nt':
      import ctypes
  
-from .utils import *
+from .utils import (
+    compat_urllib_error,
+    compat_urllib_request,
+    ContentTooShortError,
+    determine_ext,
+    encodeFilename,
+    sanitize_open,
+    timeconvert,
+)
  
  
  class FileDownloader(object):
  
  
  class FileDownloader(object):
@@ -194,7 +201,7 @@ class FileDownloader(object):
              if old_filename == new_filename:
                  return
              os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
              if old_filename == new_filename:
                  return
              os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
-        except (IOError, OSError) as err:
+        except (IOError, OSError):
              self.report_error(u'unable to rename file')
  
      def try_utime(self, filename, last_modified_hdr):
              self.report_error(u'unable to rename file')
  
      def try_utime(self, filename, last_modified_hdr):
@@ -227,8 +234,14 @@ class FileDownloader(object):
          if self.params.get('noprogress', False):
              return
          clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
          if self.params.get('noprogress', False):
              return
          clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
-        eta_str = self.format_eta(eta)
-        percent_str = self.format_percent(percent)
+        if eta is not None:
+            eta_str = self.format_eta(eta)
+        else:
+            eta_str = 'Unknown ETA'
+        if percent is not None:
+            percent_str = self.format_percent(percent)
+        else:
+            percent_str = 'Unknown %'
          speed_str = self.format_speed(speed)
          if self.params.get('progress_with_newline', False):
              self.to_screen(u'[download] %s of %s at %s ETA %s' %
          speed_str = self.format_speed(speed)
          if self.params.get('progress_with_newline', False):
              self.to_screen(u'[download] %s of %s at %s ETA %s' %
@@ -251,7 +264,7 @@ class FileDownloader(object):
          """Report file has already been fully downloaded."""
          try:
              self.to_screen(u'[download] %s has already been downloaded' % file_name)
          """Report file has already been fully downloaded."""
          try:
              self.to_screen(u'[download] %s has already been downloaded' % file_name)
-        except (UnicodeEncodeError) as err:
+        except UnicodeEncodeError:
              self.to_screen(u'[download] The file has already been downloaded')
  
      def report_unable_to_resume(self):
              self.to_screen(u'[download] The file has already been downloaded')
  
      def report_unable_to_resume(self):
@@ -267,7 +280,7 @@ class FileDownloader(object):
              self.to_screen(u'\r%s[download] 100%% of %s in %s' %
                  (clear_line, data_len_str, self.format_seconds(tot_time)))
  
              self.to_screen(u'\r%s[download] 100%% of %s in %s' %
                  (clear_line, data_len_str, self.format_seconds(tot_time)))
  
-    def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):
+    def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live):
          self.report_destination(filename)
          tmpfilename = self.temp_name(filename)
          test = self.params.get('test', False)
          self.report_destination(filename)
          tmpfilename = self.temp_name(filename)
          test = self.params.get('test', False)
@@ -294,6 +307,8 @@ class FileDownloader(object):
              basic_args += ['--tcUrl', url]
          if test:
              basic_args += ['--stop', '1']
              basic_args += ['--tcUrl', url]
          if test:
              basic_args += ['--stop', '1']
+        if live:
+            basic_args += ['--live']
          args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
          if self.params.get('verbose', False):
              try:
          args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
          if self.params.get('verbose', False):
              try:
@@ -366,15 +381,20 @@ class FileDownloader(object):
          self.report_destination(filename)
          tmpfilename = self.temp_name(filename)
  
          self.report_destination(filename)
          tmpfilename = self.temp_name(filename)
  
-        args = ['ffmpeg', '-y', '-i', url, '-f', 'mp4', tmpfilename]
-        # Check for ffmpeg first
-        try:
-            subprocess.call(['ffmpeg', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
-        except (OSError, IOError):
-            self.report_error(u'm3u8 download detected but "%s" could not be run' % args[0] )
-            return False
+        args = ['-y', '-i', url, '-f', 'mp4', '-c', 'copy',
+            '-bsf:a', 'aac_adtstoasc', tmpfilename]
  
  
-        retval = subprocess.call(args)
+        for program in ['avconv', 'ffmpeg']:
+            try:
+                subprocess.call([program, '-version'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
+                break
+            except (OSError, IOError):
+                pass
+        else:
+            self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found')
+        cmd = [program] + args
+
+        retval = subprocess.call(cmd)
          if retval == 0:
              fsize = os.path.getsize(encodeFilename(tmpfilename))
              self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
          if retval == 0:
              fsize = os.path.getsize(encodeFilename(tmpfilename))
              self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
@@ -411,7 +431,8 @@ class FileDownloader(object):
                                                  info_dict.get('player_url', None),
                                                  info_dict.get('page_url', None),
                                                  info_dict.get('play_path', None),
                                                  info_dict.get('player_url', None),
                                                  info_dict.get('page_url', None),
                                                  info_dict.get('play_path', None),
-                                                info_dict.get('tc_url', None))
+                                                info_dict.get('tc_url', None),
+                                                info_dict.get('rtmp_live', False))
  
          # Attempt to download using mplayer
          if url.startswith('mms') or url.startswith('rtsp'):
  
          # Attempt to download using mplayer
          if url.startswith('mms') or url.startswith('rtsp'):
@@ -550,12 +571,11 @@ class FileDownloader(object):
              # Progress message
              speed = self.calc_speed(start, time.time(), byte_counter - resume_len)
              if data_len is None:
              # Progress message
              speed = self.calc_speed(start, time.time(), byte_counter - resume_len)
              if data_len is None:
-                self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
-                eta = None
+                eta = percent = None
              else:
                  percent = self.calc_percent(byte_counter, data_len)
                  eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
              else:
                  percent = self.calc_percent(byte_counter, data_len)
                  eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
-                self.report_progress(percent, data_len_str, speed, eta)
+            self.report_progress(percent, data_len_str, speed, eta)
  
              self._hook_progress({
                  'downloaded_bytes': byte_counter,
  
              self._hook_progress({
                  'downloaded_bytes': byte_counter,
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index e2332f9b84ef2c41d6cf986553d4f999bac946a6..5253c39e1c8c3fdfe083d62d69c6b7552ef303b6 100644 (file)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -272,7 +272,7 @@ class YoutubeDL(object):
                  autonumber_size = 5
              autonumber_templ = u'%0' + str(autonumber_size) + u'd'
              template_dict['autonumber'] = autonumber_templ % self._num_downloads
                  autonumber_size = 5
              autonumber_templ = u'%0' + str(autonumber_size) + u'd'
              template_dict['autonumber'] = autonumber_templ % self._num_downloads
-            if template_dict['playlist_index'] is not None:
+            if template_dict.get('playlist_index') is not None:
                  template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
  
              sanitize = lambda k, v: sanitize_filename(
                  template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
  
              sanitize = lambda k, v: sanitize_filename(
@@ -318,6 +318,12 @@ class YoutubeDL(object):
                      % info_dict)
          return None
  
                      % info_dict)
          return None
  
+    @staticmethod
+    def add_extra_info(info_dict, extra_info):
+        '''Set the keys from extra_info in info dict if they are missing'''
+        for key, value in extra_info.items():
+            info_dict.setdefault(key, value)
+
      def extract_info(self, url, download=True, ie_key=None, extra_info={}):
          '''
          Returns a list with a dictionary for each video we find.
      def extract_info(self, url, download=True, ie_key=None, extra_info={}):
          '''
          Returns a list with a dictionary for each video we find.
@@ -344,17 +350,17 @@ class YoutubeDL(object):
                      break
                  if isinstance(ie_result, list):
                      # Backwards compatibility: old IE result format
                      break
                  if isinstance(ie_result, list):
                      # Backwards compatibility: old IE result format
-                    for result in ie_result:
-                        result.update(extra_info)
                      ie_result = {
                          '_type': 'compat_list',
                          'entries': ie_result,
                      }
                      ie_result = {
                          '_type': 'compat_list',
                          'entries': ie_result,
                      }
-                else:
-                    ie_result.update(extra_info)
-                if 'extractor' not in ie_result:
-                    ie_result['extractor'] = ie.IE_NAME
-                return self.process_ie_result(ie_result, download=download)
+                self.add_extra_info(ie_result,
+                    {
+                        'extractor': ie.IE_NAME,
+                        'webpage_url': url,
+                        'extractor_key': ie.ie_key(),
+                    })
+                return self.process_ie_result(ie_result, download, extra_info)
              except ExtractorError as de: # An error we somewhat expected
                  self.report_error(compat_str(de), de.format_traceback())
                  break
              except ExtractorError as de: # An error we somewhat expected
                  self.report_error(compat_str(de), de.format_traceback())
                  break
@@ -378,7 +384,7 @@ class YoutubeDL(object):
  
          result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
          if result_type == 'video':
  
          result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
          if result_type == 'video':
-            ie_result.update(extra_info)
+            self.add_extra_info(ie_result, extra_info)
              return self.process_video_result(ie_result)
          elif result_type == 'url':
              # We have to add extra_info to the results because it may be
              return self.process_video_result(ie_result)
          elif result_type == 'url':
              # We have to add extra_info to the results because it may be
@@ -388,6 +394,7 @@ class YoutubeDL(object):
                                       ie_key=ie_result.get('ie_key'),
                                       extra_info=extra_info)
          elif result_type == 'playlist':
                                       ie_key=ie_result.get('ie_key'),
                                       extra_info=extra_info)
          elif result_type == 'playlist':
+            self.add_extra_info(ie_result, extra_info)
              # We process each entry in the playlist
              playlist = ie_result.get('title', None) or ie_result.get('id', None)
              self.to_screen(u'[download] Downloading playlist: %s' % playlist)
              # We process each entry in the playlist
              playlist = ie_result.get('title', None) or ie_result.get('id', None)
              self.to_screen(u'[download] Downloading playlist: %s' % playlist)
@@ -413,12 +420,10 @@ class YoutubeDL(object):
                  extra = {
                      'playlist': playlist,
                      'playlist_index': i + playliststart,
                  extra = {
                      'playlist': playlist,
                      'playlist_index': i + playliststart,
+                    'extractor': ie_result['extractor'],
+                    'webpage_url': ie_result['webpage_url'],
+                    'extractor_key': ie_result['extractor_key'],
                  }
                  }
-                if not 'extractor' in entry:
-                    # We set the extractor, if it's an url it will be set then to
-                    # the new extractor, but if it's already a video we must make
-                    # sure it's present: see issue #877
-                    entry['extractor'] = ie_result['extractor']
                  entry_result = self.process_ie_result(entry,
                                                        download=download,
                                                        extra_info=extra)
                  entry_result = self.process_ie_result(entry,
                                                        download=download,
                                                        extra_info=extra)
@@ -427,10 +432,15 @@ class YoutubeDL(object):
              return ie_result
          elif result_type == 'compat_list':
              def _fixup(r):
              return ie_result
          elif result_type == 'compat_list':
              def _fixup(r):
-                r.setdefault('extractor', ie_result['extractor'])
+                self.add_extra_info(r,
+                    {
+                        'extractor': ie_result['extractor'],
+                        'webpage_url': ie_result['webpage_url'],
+                        'extractor_key': ie_result['extractor_key'],
+                    })
                  return r
              ie_result['entries'] = [
                  return r
              ie_result['entries'] = [
-                self.process_ie_result(_fixup(r), download=download)
+                self.process_ie_result(_fixup(r), download, extra_info)
                  for r in ie_result['entries']
              ]
              return ie_result
                  for r in ie_result['entries']
              ]
              return ie_result
@@ -462,7 +472,7 @@ class YoutubeDL(object):
              info_dict['playlist_index'] = None
  
          # This extractors handle format selection themselves
              info_dict['playlist_index'] = None
  
          # This extractors handle format selection themselves
-        if info_dict['extractor'] in [u'youtube', u'Youku', u'YouPorn', u'mixcloud']:
+        if info_dict['extractor'] in [u'youtube', u'Youku']:
              if download:
                  self.process_info(info_dict)
              return info_dict
              if download:
                  self.process_info(info_dict)
              return info_dict
@@ -482,8 +492,11 @@ class YoutubeDL(object):
                  format['format'] = u'{id} - {res}{note}'.format(
                      id=format['format_id'],
                      res=self.format_resolution(format),
                  format['format'] = u'{id} - {res}{note}'.format(
                      id=format['format_id'],
                      res=self.format_resolution(format),
-                    note=u' ({})'.format(format['format_note']) if format.get('format_note') is not None else '',
+                    note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
                  )
                  )
+            # Automatically determine file extension if missing
+            if 'ext' not in format:
+                format['ext'] = determine_ext(format['url'])
  
          if self.params.get('listformats', None):
              self.list_formats(info_dict)
  
          if self.params.get('listformats', None):
              self.list_formats(info_dict)
@@ -521,7 +534,8 @@ class YoutubeDL(object):
                      formats_to_download = [selected_format]
                      break
          if not formats_to_download:
                      formats_to_download = [selected_format]
                      break
          if not formats_to_download:
-            raise ExtractorError(u'requested format not available')
+            raise ExtractorError(u'requested format not available',
+                                 expected=True)
  
          if download:
              if len(formats_to_download) > 1:
  
          if download:
              if len(formats_to_download) > 1:
@@ -571,9 +585,9 @@ class YoutubeDL(object):
          if self.params.get('forceurl', False):
              # For RTMP URLs, also include the playpath
              compat_print(info_dict['url'] + info_dict.get('play_path', u''))
          if self.params.get('forceurl', False):
              # For RTMP URLs, also include the playpath
              compat_print(info_dict['url'] + info_dict.get('play_path', u''))
-        if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
+        if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
              compat_print(info_dict['thumbnail'])
              compat_print(info_dict['thumbnail'])
-        if self.params.get('forcedescription', False) and 'description' in info_dict:
+        if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
              compat_print(info_dict['description'])
          if self.params.get('forcefilename', False) and filename is not None:
              compat_print(filename)
              compat_print(info_dict['description'])
          if self.params.get('forcefilename', False) and filename is not None:
              compat_print(filename)
@@ -754,30 +768,36 @@ class YoutubeDL(object):
              archive_file.write(vid_id + u'\n')
  
      @staticmethod
              archive_file.write(vid_id + u'\n')
  
      @staticmethod
-    def format_resolution(format):
+    def format_resolution(format, default='unknown'):
+        if format.get('_resolution') is not None:
+            return format['_resolution']
          if format.get('height') is not None:
              if format.get('width') is not None:
                  res = u'%sx%s' % (format['width'], format['height'])
              else:
                  res = u'%sp' % format['height']
          else:
          if format.get('height') is not None:
              if format.get('width') is not None:
                  res = u'%sx%s' % (format['width'], format['height'])
              else:
                  res = u'%sp' % format['height']
          else:
-            res = '???'
+            res = default
          return res
  
      def list_formats(self, info_dict):
          return res
  
      def list_formats(self, info_dict):
-        formats_s = []
-        for format in info_dict.get('formats', [info_dict]):
-            formats_s.append(u'%-15s: %-5s     %-15s[%s]' % (
+        def line(format):
+            return (u'%-20s%-10s%-12s%s' % (
                  format['format_id'],
                  format['ext'],
                  format['format_id'],
                  format['ext'],
-                format.get('format_note') or '-',
                  self.format_resolution(format),
                  self.format_resolution(format),
+                format.get('format_note', ''),
                  )
              )
                  )
              )
-        if len(formats_s) != 1:
-            formats_s[0] += ' (worst)'
-            formats_s[-1] += ' (best)'
-        formats_s = "\n".join(formats_s)
-        self.to_screen(u'[info] Available formats for %s:\n'
-            u'format code    extension   note           resolution\n%s' % (
-                info_dict['id'], formats_s))
+
+        formats = info_dict.get('formats', [info_dict])
+        formats_s = list(map(line, formats))
+        if len(formats) > 1:
+            formats_s[0] += (' ' if formats[0].get('format_note') else '') + '(worst)'
+            formats_s[-1] += (' ' if formats[-1].get('format_note') else '') + '(best)'
+
+        header_line = line({
+            'format_id': u'format code', 'ext': u'extension',
+            '_resolution': u'resolution', 'format_note': u'note'})
+        self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
+                       (info_dict['id'], header_line, u"\n".join(formats_s)))
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py

index fce1adf0cffbf527841dfada34b931d93f67fd5a..1f1db9f676d55c909514bf3fde589550d6d6ca71 100644 (file)
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -133,7 +133,7 @@ def parseOpts(overrideArguments=None):
  
      def _hide_login_info(opts):
          opts = list(opts)
  
      def _hide_login_info(opts):
          opts = list(opts)
-        for private_opt in ['-p', '--password', '-u', '--username']:
+        for private_opt in ['-p', '--password', '-u', '--username', '--video-password']:
              try:
                  i = opts.index(private_opt)
                  opts[i+1] = '<PRIVATE>'
              try:
                  i = opts.index(private_opt)
                  opts[i+1] = '<PRIVATE>'
@@ -316,6 +316,9 @@ def parseOpts(overrideArguments=None):
      verbosity.add_option('--dump-intermediate-pages',
              action='store_true', dest='dump_intermediate_pages', default=False,
              help='print downloaded pages to debug problems(very verbose)')
      verbosity.add_option('--dump-intermediate-pages',
              action='store_true', dest='dump_intermediate_pages', default=False,
              help='print downloaded pages to debug problems(very verbose)')
+    verbosity.add_option('--write-pages',
+            action='store_true', dest='write_pages', default=False,
+            help='Write downloaded pages to files in the current directory')
      verbosity.add_option('--youtube-print-sig-code',
              action='store_true', dest='youtube_print_sig_code', default=False,
              help=optparse.SUPPRESS_HELP)
      verbosity.add_option('--youtube-print-sig-code',
              action='store_true', dest='youtube_print_sig_code', default=False,
              help=optparse.SUPPRESS_HELP)
@@ -336,7 +339,8 @@ def parseOpts(overrideArguments=None):
                    '%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, '
                    '%(autonumber)s to get an automatically incremented number, '
                    '%(ext)s for the filename extension, '
                    '%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, '
                    '%(autonumber)s to get an automatically incremented number, '
                    '%(ext)s for the filename extension, '
-                  '%(format)s for the format description (like "22 - 1280x720" or "HD")'
+                  '%(format)s for the format description (like "22 - 1280x720" or "HD"),'
+                  '%(format_id)s for the unique id of the format (like Youtube\'s itags: "137"),'
                    '%(upload_date)s for the upload date (YYYYMMDD), '
                    '%(extractor)s for the provider (youtube, metacafe, etc), '
                    '%(id)s for the video id , %(playlist)s for the playlist the video is in, '
                    '%(upload_date)s for the upload date (YYYYMMDD), '
                    '%(extractor)s for the provider (youtube, metacafe, etc), '
                    '%(id)s for the video id , %(playlist)s for the playlist the video is in, '
@@ -345,7 +349,7 @@ def parseOpts(overrideArguments=None):
                    'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .'))
      filesystem.add_option('--autonumber-size',
              dest='autonumber_size', metavar='NUMBER',
                    'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .'))
      filesystem.add_option('--autonumber-size',
              dest='autonumber_size', metavar='NUMBER',
-            help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --autonumber option is given')
+            help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given')
      filesystem.add_option('--restrict-filenames',
              action='store_true', dest='restrictfilenames',
              help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
      filesystem.add_option('--restrict-filenames',
              action='store_true', dest='restrictfilenames',
              help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
@@ -354,7 +358,7 @@ def parseOpts(overrideArguments=None):
      filesystem.add_option('-w', '--no-overwrites',
              action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
      filesystem.add_option('-c', '--continue',
      filesystem.add_option('-w', '--no-overwrites',
              action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
      filesystem.add_option('-c', '--continue',
-            action='store_true', dest='continue_dl', help='resume partially downloaded files', default=True)
+            action='store_true', dest='continue_dl', help='force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.', default=True)
      filesystem.add_option('--no-continue',
              action='store_false', dest='continue_dl',
              help='do not resume partially downloaded files (restart from beginning)')
      filesystem.add_option('--no-continue',
              action='store_false', dest='continue_dl',
              help='do not resume partially downloaded files (restart from beginning)')
@@ -651,6 +655,7 @@ def _real_main(argv=None):
          'prefer_free_formats': opts.prefer_free_formats,
          'verbose': opts.verbose,
          'dump_intermediate_pages': opts.dump_intermediate_pages,
          'prefer_free_formats': opts.prefer_free_formats,
          'verbose': opts.verbose,
          'dump_intermediate_pages': opts.dump_intermediate_pages,
+        'write_pages': opts.write_pages,
          'test': opts.test,
          'keepvideo': opts.keepvideo,
          'min_filesize': opts.min_filesize,
          'test': opts.test,
          'keepvideo': opts.keepvideo,
          'min_filesize': opts.min_filesize,
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index db69af361929fd7ff726d1a1df980730cad3630c..f9caca4ef8c3658e5c25602c98742dd8c3f94fb6 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -9,6 +9,7 @@ from .arte import (
      ArteTVFutureIE,
  )
  from .auengine import AUEngineIE
      ArteTVFutureIE,
  )
  from .auengine import AUEngineIE
+from .bambuser import BambuserIE, BambuserChannelIE
  from .bandcamp import BandcampIE
  from .bliptv import BlipTVIE, BlipTVUserIE
  from .bloomberg import BloombergIE
  from .bandcamp import BandcampIE
  from .bliptv import BlipTVIE, BlipTVUserIE
  from .bloomberg import BloombergIE
@@ -37,8 +38,10 @@ from .defense import DefenseGouvFrIE
  from .ebaumsworld import EbaumsWorldIE
  from .ehow import EHowIE
  from .eighttracks import EightTracksIE
  from .ebaumsworld import EbaumsWorldIE
  from .ehow import EHowIE
  from .eighttracks import EightTracksIE
+from .eitb import EitbIE
  from .escapist import EscapistIE
  from .exfm import ExfmIE
  from .escapist import EscapistIE
  from .exfm import ExfmIE
+from .extremetube import ExtremeTubeIE
  from .facebook import FacebookIE
  from .faz import FazIE
  from .fktv import (
  from .facebook import FacebookIE
  from .faz import FazIE
  from .fktv import (
@@ -72,6 +75,7 @@ from .jeuxvideo import JeuxVideoIE
  from .jukebox import JukeboxIE
  from .justintv import JustinTVIE
  from .kankan import KankanIE
  from .jukebox import JukeboxIE
  from .justintv import JustinTVIE
  from .kankan import KankanIE
+from .keezmovies import KeezMoviesIE
  from .kickstarter import KickStarterIE
  from .keek import KeekIE
  from .liveleak import LiveLeakIE
  from .kickstarter import KickStarterIE
  from .keek import KeekIE
  from .liveleak import LiveLeakIE
@@ -80,8 +84,10 @@ from .metacafe import MetacafeIE
  from .metacritic import MetacriticIE
  from .mit import TechTVMITIE, MITIE
  from .mixcloud import MixcloudIE
  from .metacritic import MetacriticIE
  from .mit import TechTVMITIE, MITIE
  from .mixcloud import MixcloudIE
+from .mofosex import MofosexIE
  from .mtv import MTVIE
  from .muzu import MuzuTVIE
  from .mtv import MTVIE
  from .muzu import MuzuTVIE
+from .myspace import MySpaceIE
  from .myspass import MySpassIE
  from .myvideo import MyVideoIE
  from .naver import NaverIE
  from .myspass import MySpassIE
  from .myvideo import MyVideoIE
  from .naver import NaverIE
@@ -94,6 +100,7 @@ from .ooyala import OoyalaIE
  from .orf import ORFIE
  from .pbs import PBSIE
  from .photobucket import PhotobucketIE
  from .orf import ORFIE
  from .pbs import PBSIE
  from .photobucket import PhotobucketIE
+from .pornhub import PornHubIE
  from .pornotube import PornotubeIE
  from .rbmaradio import RBMARadioIE
  from .redtube import RedTubeIE
  from .pornotube import PornotubeIE
  from .rbmaradio import RBMARadioIE
  from .redtube import RedTubeIE
@@ -109,6 +116,8 @@ from .slideshare import SlideshareIE
  from .sohu import SohuIE
  from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
  from .southparkstudios import SouthParkStudiosIE
  from .sohu import SohuIE
  from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
  from .southparkstudios import SouthParkStudiosIE
+from .space import SpaceIE
+from .spankwire import SpankwireIE
  from .spiegel import SpiegelIE
  from .stanfordoc import StanfordOpenClassroomIE
  from .statigram import StatigramIE
  from .spiegel import SpiegelIE
  from .stanfordoc import StanfordOpenClassroomIE
  from .statigram import StatigramIE
@@ -121,6 +130,7 @@ from .tf1 import TF1IE
  from .thisav import ThisAVIE
  from .traileraddict import TrailerAddictIE
  from .trilulilu import TriluliluIE
  from .thisav import ThisAVIE
  from .traileraddict import TrailerAddictIE
  from .trilulilu import TriluliluIE
+from .tube8 import Tube8IE
  from .tudou import TudouIE
  from .tumblr import TumblrIE
  from .tutv import TutvIE
  from .tudou import TudouIE
  from .tumblr import TumblrIE
  from .tutv import TutvIE
@@ -137,6 +147,7 @@ from .videofyme import VideofyMeIE
  from .videopremium import VideoPremiumIE
  from .vimeo import VimeoIE, VimeoChannelIE
  from .vine import VineIE
  from .videopremium import VideoPremiumIE
  from .vimeo import VimeoIE, VimeoChannelIE
  from .vine import VineIE
+from .vk import VKIE
  from .wat import WatIE
  from .websurg import WeBSurgIE
  from .weibo import WeiboIE
  from .wat import WatIE
  from .websurg import WeBSurgIE
  from .weibo import WeiboIE
@@ -145,6 +156,7 @@ from .worldstarhiphop import WorldStarHipHopIE
  from .xhamster import XHamsterIE
  from .xnxx import XNXXIE
  from .xvideos import XVideosIE
  from .xhamster import XHamsterIE
  from .xnxx import XNXXIE
  from .xvideos import XVideosIE
+from .xtube import XTubeIE
  from .yahoo import YahooIE, YahooSearchIE
  from .youjizz import YouJizzIE
  from .youku import YoukuIE
  from .yahoo import YahooIE, YahooSearchIE
  from .youjizz import YouJizzIE
  from .youku import YoukuIE
@@ -153,6 +165,7 @@ from .youtube import (
      YoutubeIE,
      YoutubePlaylistIE,
      YoutubeSearchIE,
      YoutubeIE,
      YoutubePlaylistIE,
      YoutubeSearchIE,
+    YoutubeSearchDateIE,
      YoutubeUserIE,
      YoutubeChannelIE,
      YoutubeShowIE,
      YoutubeUserIE,
      YoutubeChannelIE,
      YoutubeShowIE,
diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py

index 82a785a19c34517c17da294ad64c1cbe7d22cba4..b99d4b96689c23a13379d4392484c3763ce0e36f 100644 (file)
--- a/youtube_dl/extractor/addanime.py
+++ b/youtube_dl/extractor/addanime.py
@@ -17,8 +17,8 @@ class AddAnimeIE(InfoExtractor):
      IE_NAME = u'AddAnime'
      _TEST = {
          u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
      IE_NAME = u'AddAnime'
      _TEST = {
          u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
-        u'file': u'24MR3YO5SAS9.flv',
-        u'md5': u'1036a0e0cd307b95bd8a8c3a5c8cfaf1',
+        u'file': u'24MR3YO5SAS9.mp4',
+        u'md5': u'72954ea10bc979ab5e2eb288b21425a0',
          u'info_dict': {
              u"description": u"One Piece 606",
              u"title": u"One Piece 606"
          u'info_dict': {
              u"description": u"One Piece 606",
              u"title": u"One Piece 606"
@@ -31,7 +31,8 @@ class AddAnimeIE(InfoExtractor):
              video_id = mobj.group('video_id')
              webpage = self._download_webpage(url, video_id)
          except ExtractorError as ee:
              video_id = mobj.group('video_id')
              webpage = self._download_webpage(url, video_id)
          except ExtractorError as ee:
-            if not isinstance(ee.cause, compat_HTTPError):
+            if not isinstance(ee.cause, compat_HTTPError) or \
+               ee.cause.code != 503:
                  raise
  
              redir_webpage = ee.cause.read().decode('utf-8')
                  raise
  
              redir_webpage = ee.cause.read().decode('utf-8')
@@ -60,16 +61,26 @@ class AddAnimeIE(InfoExtractor):
                  note=u'Confirming after redirect')
              webpage = self._download_webpage(url, video_id)
  
                  note=u'Confirming after redirect')
              webpage = self._download_webpage(url, video_id)
  
-        video_url = self._search_regex(r"var normal_video_file = '(.*?)';",
-                                       webpage, u'video file URL')
+        formats = []
+        for format_id in ('normal', 'hq'):
+            rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id)
+            video_url = self._search_regex(rex, webpage, u'video file URLx',
+                                           fatal=False)
+            if not video_url:
+                continue
+            formats.append({
+                'format_id': format_id,
+                'url': video_url,
+            })
+        if not formats:
+            raise ExtractorError(u'Cannot find any video format!')
          video_title = self._og_search_title(webpage)
          video_description = self._og_search_description(webpage)
  
          return {
              '_type': 'video',
              'id':  video_id,
          video_title = self._og_search_title(webpage)
          video_description = self._og_search_description(webpage)
  
          return {
              '_type': 'video',
              'id':  video_id,
-            'url': video_url,
-            'ext': 'flv',
+            'formats': formats,
              'title': video_title,
              'description': video_description
          }
              'title': video_title,
              'description': video_description
          }
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py

index d39b489518f7bc699854d9ddeb7cd4fa357a6c2b..b35a679e3b036d2c573a4f1fc85d53bd793f745b 100644 (file)
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -10,6 +10,7 @@ from ..utils import (
      unified_strdate,
      determine_ext,
      get_element_by_id,
      unified_strdate,
      determine_ext,
      get_element_by_id,
+    compat_str,
  )
  
  # There are different sources of video in arte.tv, the extraction process 
  )
  
  # There are different sources of video in arte.tv, the extraction process 
@@ -158,7 +159,9 @@ class ArteTVPlus7IE(InfoExtractor):
              'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
          }
  
              'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
          }
  
-        formats = player_info['VSR'].values()
+        all_formats = player_info['VSR'].values()
+        # Some formats use the m3u8 protocol
+        all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats))
          def _match_lang(f):
              if f.get('versionCode') is None:
                  return True
          def _match_lang(f):
              if f.get('versionCode') is None:
                  return True
@@ -170,24 +173,39 @@ class ArteTVPlus7IE(InfoExtractor):
              regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
              return any(re.match(r, f['versionCode']) for r in regexes)
          # Some formats may not be in the same language as the url
              regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
              return any(re.match(r, f['versionCode']) for r in regexes)
          # Some formats may not be in the same language as the url
-        formats = filter(_match_lang, formats)
-        # Some formats use the m3u8 protocol
-        formats = filter(lambda f: f.get('videoFormat') != 'M3U8', formats)
-        # We order the formats by quality
+        formats = filter(_match_lang, all_formats)
          formats = list(formats) # in python3 filter returns an iterator
          formats = list(formats) # in python3 filter returns an iterator
+        if not formats:
+            # Some videos are only available in the 'Originalversion'
+            # they aren't tagged as being in French or German
+            if all(f['versionCode'] == 'VO' for f in all_formats):
+                formats = all_formats
+            else:
+                raise ExtractorError(u'The formats list is empty')
+
          if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
          if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
-            sort_key = lambda f: ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality'])
+            def sort_key(f):
+                return ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality'])
          else:
          else:
-            sort_key = lambda f: int(f.get('height',-1))
+            def sort_key(f):
+                return (
+                    # Sort first by quality
+                    int(f.get('height',-1)),
+                    int(f.get('bitrate',-1)),
+                    # The original version with subtitles has lower relevance
+                    re.match(r'VO-ST(F|A)', f.get('versionCode', '')) is None,
+                    # The version with sourds/mal subtitles has also lower relevance
+                    re.match(r'VO?(F|A)-STM\1', f.get('versionCode', '')) is None,
+                )
          formats = sorted(formats, key=sort_key)
          formats = sorted(formats, key=sort_key)
-        # Prefer videos without subtitles in the same language
-        formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f.get('versionCode', '')) is None)
-        # Pick the best quality
          def _format(format_info):
          def _format(format_info):
-            quality = format_info['quality']
-            m_quality = re.match(r'\w*? - (\d*)p', quality)
-            if m_quality is not None:
-                quality = m_quality.group(1)
+            quality = ''
+            height = format_info.get('height')
+            if height is not None:
+                quality = compat_str(height)
+            bitrate = format_info.get('bitrate')
+            if bitrate is not None:
+                quality += '-%d' % bitrate
              if format_info.get('versionCode') is not None:
                  format_id = u'%s-%s' % (quality, format_info['versionCode'])
              else:
              if format_info.get('versionCode') is not None:
                  format_id = u'%s-%s' % (quality, format_info['versionCode'])
              else:
@@ -196,7 +214,7 @@ class ArteTVPlus7IE(InfoExtractor):
                  'format_id': format_id,
                  'format_note': format_info.get('versionLibelle'),
                  'width': format_info.get('width'),
                  'format_id': format_id,
                  'format_note': format_info.get('versionLibelle'),
                  'width': format_info.get('width'),
-                'height': format_info.get('height'),
+                'height': height,
              }
              if format_info['mediaType'] == u'rtmp':
                  info['url'] = format_info['streamer']
              }
              if format_info['mediaType'] == u'rtmp':
                  info['url'] = format_info['streamer']
diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py

new file mode 100644 (file)

index 0000000..f3b36f4
--- /dev/null
+++ b/youtube_dl/extractor/bambuser.py
@@ -0,0 +1,80 @@
+import re
+import json
+import itertools
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_request,
+)
+
+
+class BambuserIE(InfoExtractor):
+    IE_NAME = u'bambuser'
+    _VALID_URL = r'https?://bambuser\.com/v/(?P<id>\d+)'
+    _API_KEY = '005f64509e19a868399060af746a00aa'
+
+    _TEST = {
+        u'url': u'http://bambuser.com/v/4050584',
+        u'md5': u'fba8f7693e48fd4e8641b3fd5539a641',
+        u'info_dict': {
+            u'id': u'4050584',
+            u'ext': u'flv',
+            u'title': u'Education engineering days - lightning talks',
+            u'duration': 3741,
+            u'uploader': u'pixelversity',
+            u'uploader_id': u'344706',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        info_url = ('http://player-c.api.bambuser.com/getVideo.json?'
+            '&api_key=%s&vid=%s' % (self._API_KEY, video_id))
+        info_json = self._download_webpage(info_url, video_id)
+        info = json.loads(info_json)['result']
+
+        return {
+            'id': video_id,
+            'title': info['title'],
+            'url': info['url'],
+            'thumbnail': info.get('preview'),
+            'duration': int(info['length']),
+            'view_count': int(info['views_total']),
+            'uploader': info['username'],
+            'uploader_id': info['uid'],
+        }
+
+
+class BambuserChannelIE(InfoExtractor):
+    IE_NAME = u'bambuser:channel'
+    _VALID_URL = r'http://bambuser.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
+    # The maximum number we can get with each request
+    _STEP = 50
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        user = mobj.group('user')
+        urls = []
+        last_id = ''
+        for i in itertools.count(1):
+            req_url = ('http://bambuser.com/xhr-api/index.php?username={user}'
+                '&sort=created&access_mode=0%2C1%2C2&limit={count}'
+                '&method=broadcast&format=json&vid_older_than={last}'
+                ).format(user=user, count=self._STEP, last=last_id)
+            req = compat_urllib_request.Request(req_url)
+            # Without setting this header, we wouldn't get any result
+            req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
+            info_json = self._download_webpage(req, user,
+                u'Downloading page %d' % i)
+            results = json.loads(info_json)['result']
+            if len(results) == 0:
+                break
+            last_id = results[-1]['vid']
+            urls.extend(self.url_result(v['page'], 'Bambuser') for v in results)
+
+        return {
+            '_type': 'playlist',
+            'title': user,
+            'entries': urls,
+        }
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py

index 1392f382a24c273604f0c67db7afafefbcec85b8..d8c35465a34fa4c4d4ca822d499892504a51ce62 100644 (file)
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -9,10 +9,13 @@ from ..utils import (
      compat_urllib_parse,
      find_xpath_attr,
      compat_urlparse,
      compat_urllib_parse,
      find_xpath_attr,
      compat_urlparse,
+    compat_str,
+    compat_urllib_request,
  
      ExtractorError,
  )
  
  
      ExtractorError,
  )
  
+
  class BrightcoveIE(InfoExtractor):
      _VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'
      _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
  class BrightcoveIE(InfoExtractor):
      _VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'
      _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
@@ -23,7 +26,7 @@ class BrightcoveIE(InfoExtractor):
              # From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/
              u'url': u'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001',
              u'file': u'2371591881001.mp4',
              # From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/
              u'url': u'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001',
              u'file': u'2371591881001.mp4',
-            u'md5': u'9e80619e0a94663f0bdc849b4566af19',
+            u'md5': u'8eccab865181d29ec2958f32a6a754f5',
              u'note': u'Test Brightcove downloads and detection in GenericIE',
              u'info_dict': {
                  u'title': u'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
              u'note': u'Test Brightcove downloads and detection in GenericIE',
              u'info_dict': {
                  u'title': u'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
@@ -41,6 +44,17 @@ class BrightcoveIE(InfoExtractor):
                  u'uploader': u'Oracle',
              },
          },
                  u'uploader': u'Oracle',
              },
          },
+        {
+            # From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/
+            u'url': u'http://c.brightcove.com/services/viewer/federated_f9?&playerID=1265504713001&publisherID=AQ%7E%7E%2CAAABBzUwv1E%7E%2CxP-xFHVUstiMFlNYfvF4G9yFnNaqCw_9&videoID=2750934548001',
+            u'info_dict': {
+                u'id': u'2750934548001',
+                u'ext': u'mp4',
+                u'title': u'This Bracelet Acts as a Personal Thermostat',
+                u'description': u'md5:547b78c64f4112766ccf4e151c20b6a0',
+                u'uploader': u'Mashable',
+            },
+        },
      ]
  
      @classmethod
      ]
  
      @classmethod
@@ -68,24 +82,48 @@ class BrightcoveIE(InfoExtractor):
          videoPlayer = find_xpath_attr(object_doc, './param', 'name', '@videoPlayer')
          if videoPlayer is not None:
              params['@videoPlayer'] = videoPlayer.attrib['value']
          videoPlayer = find_xpath_attr(object_doc, './param', 'name', '@videoPlayer')
          if videoPlayer is not None:
              params['@videoPlayer'] = videoPlayer.attrib['value']
+        linkBase = find_xpath_attr(object_doc, './param', 'name', 'linkBaseURL')
+        if linkBase is not None:
+            params['linkBaseURL'] = linkBase.attrib['value']
          data = compat_urllib_parse.urlencode(params)
          return cls._FEDERATED_URL_TEMPLATE % data
  
          data = compat_urllib_parse.urlencode(params)
          return cls._FEDERATED_URL_TEMPLATE % data
  
+    @classmethod
+    def _extract_brightcove_url(cls, webpage):
+        """Try to extract the brightcove url from the wepbage, returns None
+        if it can't be found
+        """
+        m_brightcove = re.search(
+            r'<object[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1.+?</object>',
+            webpage, re.DOTALL)
+        if m_brightcove is not None:
+            return cls._build_brighcove_url(m_brightcove.group())
+        else:
+            return None
+
      def _real_extract(self, url):
      def _real_extract(self, url):
+        # Change the 'videoId' and others field to '@videoPlayer'
+        url = re.sub(r'(?<=[?&])(videoI(d|D)|bctid)', '%40videoPlayer', url)
+        # Change bckey (used by bcove.me urls) to playerKey
+        url = re.sub(r'(?<=[?&])bckey', 'playerKey', url)
          mobj = re.match(self._VALID_URL, url)
          query_str = mobj.group('query')
          query = compat_urlparse.parse_qs(query_str)
  
          videoPlayer = query.get('@videoPlayer')
          if videoPlayer:
          mobj = re.match(self._VALID_URL, url)
          query_str = mobj.group('query')
          query = compat_urlparse.parse_qs(query_str)
  
          videoPlayer = query.get('@videoPlayer')
          if videoPlayer:
-            return self._get_video_info(videoPlayer[0], query_str)
+            return self._get_video_info(videoPlayer[0], query_str, query)
          else:
              player_key = query['playerKey']
              return self._get_playlist_info(player_key[0])
  
          else:
              player_key = query['playerKey']
              return self._get_playlist_info(player_key[0])
  
-    def _get_video_info(self, video_id, query):
-        request_url = self._FEDERATED_URL_TEMPLATE % query
-        webpage = self._download_webpage(request_url, video_id)
+    def _get_video_info(self, video_id, query_str, query):
+        request_url = self._FEDERATED_URL_TEMPLATE % query_str
+        req = compat_urllib_request.Request(request_url)
+        linkBase = query.get('linkBaseURL')
+        if linkBase is not None:
+            req.add_header('Referer', linkBase[0])
+        webpage = self._download_webpage(req, video_id)
  
          self.report_extraction(video_id)
          info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json')
  
          self.report_extraction(video_id)
          info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json')
@@ -109,7 +147,7 @@ class BrightcoveIE(InfoExtractor):
  
      def _extract_video_info(self, video_info):
          info = {
  
      def _extract_video_info(self, video_info):
          info = {
-            'id': video_info['id'],
+            'id': compat_str(video_info['id']),
              'title': video_info['displayName'],
              'description': video_info.get('shortDescription'),
              'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
              'title': video_info['displayName'],
              'description': video_info.get('shortDescription'),
              'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
@@ -119,15 +157,14 @@ class BrightcoveIE(InfoExtractor):
          renditions = video_info.get('renditions')
          if renditions:
              renditions = sorted(renditions, key=lambda r: r['size'])
          renditions = video_info.get('renditions')
          if renditions:
              renditions = sorted(renditions, key=lambda r: r['size'])
-            best_format = renditions[-1]
-            info.update({
-                'url': best_format['defaultURL'],
-                'ext': 'mp4',
-            })
+            info['formats'] = [{
+                'url': rend['defaultURL'],
+                'height': rend.get('frameHeight'),
+                'width': rend.get('frameWidth'),
+            } for rend in renditions]
          elif video_info.get('FLVFullLengthURL') is not None:
              info.update({
                  'url': video_info['FLVFullLengthURL'],
          elif video_info.get('FLVFullLengthURL') is not None:
              info.update({
                  'url': video_info['FLVFullLengthURL'],
-                'ext': 'flv',
              })
          else:
              raise ExtractorError(u'Unable to extract video url for %s' % info['id'])
              })
          else:
              raise ExtractorError(u'Unable to extract video url for %s' % info['id'])
diff --git a/youtube_dl/extractor/canalc2.py b/youtube_dl/extractor/canalc2.py

index e7f4fa9fdc569b9eb559fd3358169e81c3cdfa3a..3d8d7f9d2dee4713b467e47ab79bbb55edccf147 100644 (file)
--- a/youtube_dl/extractor/canalc2.py
+++ b/youtube_dl/extractor/canalc2.py
@@ -6,7 +6,7 @@ from .common import InfoExtractor
  
  class Canalc2IE(InfoExtractor):
      IE_NAME = 'canalc2.tv'
  
  class Canalc2IE(InfoExtractor):
      IE_NAME = 'canalc2.tv'
-    _VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?idVideo=(\d+)&voir=oui'
+    _VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?.*?idVideo=(?P<id>\d+)'
  
      _TEST = {
          u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
  
      _TEST = {
          u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
@@ -18,7 +18,9 @@ class Canalc2IE(InfoExtractor):
      }
  
      def _real_extract(self, url):
      }
  
      def _real_extract(self, url):
-        video_id = re.match(self._VALID_URL, url).group(1)
+        video_id = re.match(self._VALID_URL, url).group('id')
+        # We need to set the voir field for getting the file name
+        url = 'http://www.canalc2.tv/video.asp?idVideo=%s&voir=oui' % video_id
          webpage = self._download_webpage(url, video_id)
          file_name = self._search_regex(
              r"so\.addVariable\('file','(.*?)'\);",
          webpage = self._download_webpage(url, video_id)
          file_name = self._search_regex(
              r"so\.addVariable\('file','(.*?)'\);",
diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py

index 6925b96c2ee1fd1e09624638805597259b068dcd..f0d08cebfce87b006b339508f655eba95a4bc1ef 100644 (file)
--- a/youtube_dl/extractor/cinemassacre.py
+++ b/youtube_dl/extractor/cinemassacre.py
@@ -41,7 +41,7 @@ class CinemassacreIE(InfoExtractor):
          webpage_url = u'http://' + mobj.group('url')
          webpage = self._download_webpage(webpage_url, None) # Don't know video id yet
          video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
          webpage_url = u'http://' + mobj.group('url')
          webpage = self._download_webpage(webpage_url, None) # Don't know video id yet
          video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
-        mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/(?:embed|player)\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
+        mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
          if not mobj:
              raise ExtractorError(u'Can\'t extract embed url and video id')
          playerdata_url = mobj.group(u'embed_url')
          if not mobj:
              raise ExtractorError(u'Can\'t extract embed url and video id')
          playerdata_url = mobj.group(u'embed_url')
@@ -55,30 +55,32 @@ class CinemassacreIE(InfoExtractor):
              video_description = None
  
          playerdata = self._download_webpage(playerdata_url, video_id)
              video_description = None
  
          playerdata = self._download_webpage(playerdata_url, video_id)
-        base_url = self._html_search_regex(r'\'streamer\': \'(?P<base_url>rtmp://.*?)/(?:vod|Cinemassacre)\'',
-            playerdata, u'base_url')
-        base_url += '/Cinemassacre/'
-        # Important: The file names in playerdata are not used by the player and even wrong for some videos
-        sd_file = 'Cinemassacre-%s_high.mp4' % video_id
-        hd_file = 'Cinemassacre-%s.mp4' % video_id
-        video_thumbnail = 'http://image.screenwavemedia.com/Cinemassacre/Cinemassacre-%s_thumb_640x360.jpg' % video_id
+        url = self._html_search_regex(r'\'streamer\': \'(?P<url>[^\']+)\'', playerdata, u'url')
+
+        sd_file = self._html_search_regex(r'\'file\': \'(?P<sd_file>[^\']+)\'', playerdata, u'sd_file')
+        hd_file = self._html_search_regex(r'\'?file\'?: "(?P<hd_file>[^"]+)"', playerdata, u'hd_file')
+        video_thumbnail = self._html_search_regex(r'\'image\': \'(?P<thumbnail>[^\']+)\'', playerdata, u'thumbnail', fatal=False)
  
          formats = [
              {
  
          formats = [
              {
-                'url': base_url + sd_file,
+                'url': url,
+                'play_path': 'mp4:' + sd_file,
+                'rtmp_live': True, # workaround
                  'ext': 'flv',
                  'format': 'sd',
                  'format_id': 'sd',
              },
              {
                  'ext': 'flv',
                  'format': 'sd',
                  'format_id': 'sd',
              },
              {
-                'url': base_url + hd_file,
+                'url': url,
+                'play_path': 'mp4:' + hd_file,
+                'rtmp_live': True, # workaround
                  'ext': 'flv',
                  'format': 'hd',
                  'format_id': 'hd',
              },
          ]
  
                  'ext': 'flv',
                  'format': 'hd',
                  'format_id': 'hd',
              },
          ]
  
-        info = {
+        return {
              'id': video_id,
              'title': video_title,
              'formats': formats,
              'id': video_id,
              'title': video_title,
              'formats': formats,
@@ -86,6 +88,3 @@ class CinemassacreIE(InfoExtractor):
              'upload_date': video_date,
              'thumbnail': video_thumbnail,
          }
              'upload_date': video_date,
              'thumbnail': video_thumbnail,
          }
-        # TODO: Remove when #980 has been merged
-        info.update(formats[-1])
-        return info
diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py

index a79f881cd9dbf54b8fdb3ff229de1d0e6b9c6aac..34adf6dda519a5ed2657fee3687d9e2e0f52ef73 100644 (file)
--- a/youtube_dl/extractor/cnn.py
+++ b/youtube_dl/extractor/cnn.py
@@ -6,7 +6,7 @@ from ..utils import determine_ext
  
  
  class CNNIE(InfoExtractor):
  
  
  class CNNIE(InfoExtractor):
-    _VALID_URL = r'''(?x)https?://(edition\.)?cnn\.com/video/(data/.+?|\?)/
+    _VALID_URL = r'''(?x)https?://((edition|www)\.)?cnn\.com/video/(data/.+?|\?)/
          (?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))'''
  
      _TESTS = [{
          (?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))'''
  
      _TESTS = [{
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index 7d7ce5d98a7ef21ccb1fe6897ebad9dd996cc649..fb2d50a098992f8088c41259b73d653c6f6f173d 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -14,6 +14,8 @@ from ..utils import (
      clean_html,
      compiled_regex_type,
      ExtractorError,
      clean_html,
      compiled_regex_type,
      ExtractorError,
+    RegexNotFoundError,
+    sanitize_filename,
      unescapeHTML,
  )
  
      unescapeHTML,
  )
  
@@ -61,7 +63,7 @@ class InfoExtractor(object):
                      * ext       Will be calculated from url if missing
                      * format    A human-readable description of the format
                                  ("mp4 container with h264/opus").
                      * ext       Will be calculated from url if missing
                      * format    A human-readable description of the format
                                  ("mp4 container with h264/opus").
-                                Calculated from the format_id, width, height 
+                                Calculated from the format_id, width, height.
                                  and format_note fields if missing.
                      * format_id A short description of the format
                                  ("mp4_h264_opus" or "19")
                                  and format_note fields if missing.
                      * format_id A short description of the format
                                  ("mp4_h264_opus" or "19")
@@ -69,6 +71,9 @@ class InfoExtractor(object):
                                  ("3D" or "DASH video")
                      * width     Width of the video, if known
                      * height    Height of the video, if known
                                  ("3D" or "DASH video")
                      * width     Width of the video, if known
                      * height    Height of the video, if known
+    webpage_url:    The url to the video webpage, if given to youtube-dl it
+                    should allow to get the same result again. (It will be set
+                    by YoutubeDL if it's missing)
  
      Unless mentioned otherwise, the fields should be Unicode strings.
  
  
      Unless mentioned otherwise, the fields should be Unicode strings.
  
@@ -181,6 +186,17 @@ class InfoExtractor(object):
              self.to_screen(u'Dumping request to ' + url)
              dump = base64.b64encode(webpage_bytes).decode('ascii')
              self._downloader.to_screen(dump)
              self.to_screen(u'Dumping request to ' + url)
              dump = base64.b64encode(webpage_bytes).decode('ascii')
              self._downloader.to_screen(dump)
+        if self._downloader.params.get('write_pages', False):
+            try:
+                url = url_or_request.get_full_url()
+            except AttributeError:
+                url = url_or_request
+            raw_filename = ('%s_%s.dump' % (video_id, url))
+            filename = sanitize_filename(raw_filename, restricted=True)
+            self.to_screen(u'Saving request to ' + filename)
+            with open(filename, 'wb') as outf:
+                outf.write(webpage_bytes)
+
          content = webpage_bytes.decode(encoding, 'replace')
          return (content, urlh)
  
          content = webpage_bytes.decode(encoding, 'replace')
          return (content, urlh)
  
@@ -231,7 +247,7 @@ class InfoExtractor(object):
          Perform a regex search on the given string, using a single or a list of
          patterns returning the first matching group.
          In case of failure return a default value or raise a WARNING or a
          Perform a regex search on the given string, using a single or a list of
          patterns returning the first matching group.
          In case of failure return a default value or raise a WARNING or a
-        ExtractorError, depending on fatal, specifying the field name.
+        RegexNotFoundError, depending on fatal, specifying the field name.
          """
          if isinstance(pattern, (str, compat_str, compiled_regex_type)):
              mobj = re.search(pattern, string, flags)
          """
          if isinstance(pattern, (str, compat_str, compiled_regex_type)):
              mobj = re.search(pattern, string, flags)
@@ -251,7 +267,7 @@ class InfoExtractor(object):
          elif default is not None:
              return default
          elif fatal:
          elif default is not None:
              return default
          elif fatal:
-            raise ExtractorError(u'Unable to extract %s' % _name)
+            raise RegexNotFoundError(u'Unable to extract %s' % _name)
          else:
              self._downloader.report_warning(u'unable to extract %s; '
                  u'please report this issue on http://yt-dl.org/bug' % _name)
          else:
              self._downloader.report_warning(u'unable to extract %s; '
                  u'please report this issue on http://yt-dl.org/bug' % _name)
@@ -306,7 +322,9 @@ class InfoExtractor(object):
          if name is None:
              name = 'OpenGraph %s' % prop
          escaped = self._search_regex(self._og_regex(prop), html, name, flags=re.DOTALL, **kargs)
          if name is None:
              name = 'OpenGraph %s' % prop
          escaped = self._search_regex(self._og_regex(prop), html, name, flags=re.DOTALL, **kargs)
-        return unescapeHTML(escaped)
+        if not escaped is None:
+            return unescapeHTML(escaped)
+        return None
  
      def _og_search_thumbnail(self, html, **kargs):
          return self._og_search_property('image', html, u'thumbnail url', fatal=False, **kargs)
  
      def _og_search_thumbnail(self, html, **kargs):
          return self._og_search_property('image', html, u'thumbnail url', fatal=False, **kargs)
@@ -317,10 +335,10 @@ class InfoExtractor(object):
      def _og_search_title(self, html, **kargs):
          return self._og_search_property('title', html, **kargs)
  
      def _og_search_title(self, html, **kargs):
          return self._og_search_property('title', html, **kargs)
  
-    def _og_search_video_url(self, html, name='video url', **kargs):
-        return self._html_search_regex([self._og_regex('video:secure_url'),
-                                        self._og_regex('video')],
-                                       html, name, **kargs)
+    def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
+        regexes = [self._og_regex('video')]
+        if secure: regexes.insert(0, self._og_regex('video:secure_url'))
+        return self._html_search_regex(regexes, html, name, **kargs)
  
      def _rta_search(self, html):
          # See http://www.rtalabel.org/index.php?content=howtofaq#single
  
      def _rta_search(self, html):
          # See http://www.rtalabel.org/index.php?content=howtofaq#single
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py

index 7d83539469d3d7ff120f916cc837a60bacfe8390..e87690f9d288103ea222e1c216786b42e89364de 100644 (file)
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -21,6 +21,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
          """Build a request with the family filter disabled"""
          request = compat_urllib_request.Request(url)
          request.add_header('Cookie', 'family_filter=off')
          """Build a request with the family filter disabled"""
          request = compat_urllib_request.Request(url)
          request.add_header('Cookie', 'family_filter=off')
+        request.add_header('Cookie', 'ff=off')
          return request
  
  class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
          return request
  
  class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
@@ -28,6 +29,15 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
  
      _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)'
      IE_NAME = u'dailymotion'
  
      _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)'
      IE_NAME = u'dailymotion'
+
+    _FORMATS = [
+        (u'stream_h264_ld_url', u'ld'),
+        (u'stream_h264_url', u'standard'),
+        (u'stream_h264_hq_url', u'hq'),
+        (u'stream_h264_hd_url', u'hd'),
+        (u'stream_h264_hd1080_url', u'hd180'),
+    ]
+
      _TESTS = [
          {
              u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
      _TESTS = [
          {
              u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
@@ -52,6 +62,18 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
              },
              u'skip': u'VEVO is only available in some countries',
          },
              },
              u'skip': u'VEVO is only available in some countries',
          },
+        # age-restricted video
+        {
+            u'url': u'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband',
+            u'file': u'xyh2zz.mp4',
+            u'md5': u'0d667a7b9cebecc3c89ee93099c4159d',
+            u'info_dict': {
+                u'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
+                u'uploader': 'HotWaves1012',
+                u'age_limit': 18,
+            }
+
+        }
      ]
  
      def _real_extract(self, url):
      ]
  
      def _real_extract(self, url):
@@ -60,7 +82,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
  
          video_id = mobj.group(1).split('_')[0].split('?')[0]
  
  
          video_id = mobj.group(1).split('_')[0].split('?')[0]
  
-        video_extension = 'mp4'
          url = 'http://www.dailymotion.com/video/%s' % video_id
  
          # Retrieve video webpage to extract further information
          url = 'http://www.dailymotion.com/video/%s' % video_id
  
          # Retrieve video webpage to extract further information
@@ -82,7 +103,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
          video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
                                               # Looking for official user
                                               r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
          video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
                                               # Looking for official user
                                               r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
-                                            webpage, 'video uploader')
+                                            webpage, 'video uploader', fatal=False)
+        age_limit = self._rta_search(webpage)
  
          video_upload_date = None
          mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
  
          video_upload_date = None
          mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
@@ -99,37 +121,43 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
              msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
              raise ExtractorError(msg, expected=True)
  
              msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
              raise ExtractorError(msg, expected=True)
  
-        # TODO: support choosing qualities
-
-        for key in ['stream_h264_hd1080_url','stream_h264_hd_url',
-                    'stream_h264_hq_url','stream_h264_url',
-                    'stream_h264_ld_url']:
-            if info.get(key):#key in info and info[key]:
-                max_quality = key
-                self.to_screen(u'Using %s' % key)
-                break
-        else:
+        formats = []
+        for (key, format_id) in self._FORMATS:
+            video_url = info.get(key)
+            if video_url is not None:
+                m_size = re.search(r'H264-(\d+)x(\d+)', video_url)
+                if m_size is not None:
+                    width, height = m_size.group(1), m_size.group(2)
+                else:
+                    width, height = None, None
+                formats.append({
+                    'url': video_url,
+                    'ext': 'mp4',
+                    'format_id': format_id,
+                    'width': width,
+                    'height': height,
+                })
+        if not formats:
              raise ExtractorError(u'Unable to extract video URL')
              raise ExtractorError(u'Unable to extract video URL')
-        video_url = info[max_quality]
  
          # subtitles
  
          # subtitles
-        video_subtitles = self.extract_subtitles(video_id)
+        video_subtitles = self.extract_subtitles(video_id, webpage)
          if self._downloader.params.get('listsubtitles', False):
          if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id)
+            self._list_available_subtitles(video_id, webpage)
              return
  
              return
  
-        return [{
+        return {
              'id':       video_id,
              'id':       video_id,
-            'url':      video_url,
+            'formats': formats,
              'uploader': video_uploader,
              'upload_date':  video_upload_date,
              'title':    self._og_search_title(webpage),
              'uploader': video_uploader,
              'upload_date':  video_upload_date,
              'title':    self._og_search_title(webpage),
-            'ext':      video_extension,
              'subtitles':    video_subtitles,
              'subtitles':    video_subtitles,
-            'thumbnail': info['thumbnail_url']
-        }]
+            'thumbnail': info['thumbnail_url'],
+            'age_limit': age_limit,
+        }
  
  
-    def _get_available_subtitles(self, video_id):
+    def _get_available_subtitles(self, video_id, webpage):
          try:
              sub_list = self._download_webpage(
                  'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
          try:
              sub_list = self._download_webpage(
                  'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
diff --git a/youtube_dl/extractor/depositfiles.py b/youtube_dl/extractor/depositfiles.py

index d43348955f122edf17573660077dca407634e329..2c9fb5f2e08dd69e977ab5dc599d97a5ff26a917 100644 (file)
--- a/youtube_dl/extractor/depositfiles.py
+++ b/youtube_dl/extractor/depositfiles.py
@@ -25,7 +25,7 @@ class DepositFilesIE(InfoExtractor):
          url = 'http://depositfiles.com/en/files/' + file_id
  
          # Retrieve file webpage with 'Free download' button pressed
          url = 'http://depositfiles.com/en/files/' + file_id
  
          # Retrieve file webpage with 'Free download' button pressed
-        free_download_indication = { 'gateway_result' : '1' }
+        free_download_indication = {'gateway_result' : '1'}
          request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication))
          try:
              self.report_download_webpage(file_id)
          request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication))
          try:
              self.report_download_webpage(file_id)
diff --git a/youtube_dl/extractor/eighttracks.py b/youtube_dl/extractor/eighttracks.py

index cced0681171a3dbc818e62ee2551da1958eacae2..2cfbcd363c0db4f2505d8da7120d7c3161a7b0a9 100644 (file)
--- a/youtube_dl/extractor/eighttracks.py
+++ b/youtube_dl/extractor/eighttracks.py
@@ -101,7 +101,7 @@ class EightTracksIE(InfoExtractor):
          first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
          next_url = first_url
          res = []
          first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
          next_url = first_url
          res = []
-        for i in itertools.count():
+        for i in range(track_count):
              api_json = self._download_webpage(next_url, playlist_id,
                  note=u'Downloading song information %s/%s' % (str(i+1), track_count),
                  errnote=u'Failed to download song information')
              api_json = self._download_webpage(next_url, playlist_id,
                  note=u'Downloading song information %s/%s' % (str(i+1), track_count),
                  errnote=u'Failed to download song information')
@@ -116,7 +116,5 @@ class EightTracksIE(InfoExtractor):
                  'ext': 'm4a',
              }
              res.append(info)
                  'ext': 'm4a',
              }
              res.append(info)
-            if api_data['set']['at_last_track']:
-                break
              next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id'])
          return res
              next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id'])
          return res
diff --git a/youtube_dl/extractor/eitb.py b/youtube_dl/extractor/eitb.py

new file mode 100644 (file)

index 0000000..4ba3231
--- /dev/null
+++ b/youtube_dl/extractor/eitb.py
@@ -0,0 +1,37 @@
+# encoding: utf-8
+import re
+
+from .common import InfoExtractor
+from .brightcove import BrightcoveIE
+from ..utils import ExtractorError
+
+
+class EitbIE(InfoExtractor):
+    IE_NAME = u'eitb.tv'
+    _VALID_URL = r'https?://www\.eitb\.tv/(eu/bideoa|es/video)/[^/]+/(?P<playlist_id>\d+)/(?P<chapter_id>\d+)'
+
+    _TEST = {
+        u'add_ie': ['Brightcove'],
+        u'url': u'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/2677100210001/2743577154001/lasa-y-zabala-30-anos/',
+        u'md5': u'edf4436247185adee3ea18ce64c47998',
+        u'info_dict': {
+            u'id': u'2743577154001',
+            u'ext': u'mp4',
+            u'title': u'60 minutos (Lasa y Zabala, 30 años)',
+            # All videos from eitb has this description in the brightcove info
+            u'description': u'.',
+            u'uploader': u'Euskal Telebista',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        chapter_id = mobj.group('chapter_id')
+        webpage = self._download_webpage(url, chapter_id)
+        bc_url = BrightcoveIE._extract_brightcove_url(webpage)
+        if bc_url is None:
+            raise ExtractorError(u'Could not extract the Brightcove url')
+        # The BrightcoveExperience object doesn't contain the video id, we set
+        # it manually
+        bc_url += '&%40videoPlayer={0}'.format(chapter_id)
+        return self.url_result(bc_url, BrightcoveIE.ie_key())
diff --git a/youtube_dl/extractor/exfm.py b/youtube_dl/extractor/exfm.py

index 3443f19c5f9bb8e2853c95b4ca5e153b395a701f..a51d79b08c656144c3f67d853fcae8fe52bc6e1f 100644 (file)
--- a/youtube_dl/extractor/exfm.py
+++ b/youtube_dl/extractor/exfm.py
@@ -11,16 +11,17 @@ class ExfmIE(InfoExtractor):
      _SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream'
      _TESTS = [
          {
      _SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream'
      _TESTS = [
          {
-            u'url': u'http://ex.fm/song/1bgtzg',
-            u'file': u'95223130.mp3',
-            u'md5': u'8a7967a3fef10e59a1d6f86240fd41cf',
+            u'url': u'http://ex.fm/song/eh359',
+            u'file': u'44216187.mp3',
+            u'md5': u'e45513df5631e6d760970b14cc0c11e7',
              u'info_dict': {
              u'info_dict': {
-                u"title": u"We Can't Stop - Miley Cyrus",
-                u"uploader": u"Miley Cyrus",
-                u'upload_date': u'20130603',
-                u'description': u'Download "We Can\'t Stop" \r\niTunes: http://smarturl.it/WeCantStop?IQid=SC\r\nAmazon: http://smarturl.it/WeCantStopAMZ?IQid=SC',
+                u"title": u"Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive",
+                u"uploader": u"deadjournalist",
+                u'upload_date': u'20120424',
+                u'description': u'Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive',
              },
              u'note': u'Soundcloud song',
              },
              u'note': u'Soundcloud song',
+            u'skip': u'The site is down too often',
          },
          {
              u'url': u'http://ex.fm/song/wddt8',
          },
          {
              u'url': u'http://ex.fm/song/wddt8',
@@ -30,6 +31,7 @@ class ExfmIE(InfoExtractor):
                  u'title': u'Safe and Sound',
                  u'uploader': u'Capital Cities',
              },
                  u'title': u'Safe and Sound',
                  u'uploader': u'Capital Cities',
              },
+            u'skip': u'The site is down too often',
          },
      ]
  
          },
      ]
  
diff --git a/youtube_dl/extractor/extremetube.py b/youtube_dl/extractor/extremetube.py

new file mode 100644 (file)

index 0000000..1c20e43
--- /dev/null
+++ b/youtube_dl/extractor/extremetube.py
@@ -0,0 +1,50 @@
+import os
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse_urlparse,
+    compat_urllib_request,
+    compat_urllib_parse,
+)
+
+class ExtremeTubeIE(InfoExtractor):
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>extremetube\.com/video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)'
+    _TEST = {
+        u'url': u'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
+        u'file': u'652431.mp4',
+        u'md5': u'1fb9228f5e3332ec8c057d6ac36f33e0',
+        u'info_dict': {
+            u"title": u"Music Video 14 british euro brit european cumshots swallow",
+            u"uploader": u"unknown",
+            u"age_limit": 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('videoid')
+        url = 'http://www.' + mobj.group('url')
+
+        req = compat_urllib_request.Request(url)
+        req.add_header('Cookie', 'age_verified=1')
+        webpage = self._download_webpage(req, video_id)
+
+        video_title = self._html_search_regex(r'<h1 [^>]*?title="([^"]+)"[^>]*>\1<', webpage, u'title')
+        uploader = self._html_search_regex(r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, u'uploader', fatal=False)
+        video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&amp;', webpage, u'video_url'))
+        path = compat_urllib_parse_urlparse(video_url).path
+        extension = os.path.splitext(path)[1][1:]
+        format = path.split('/')[5].split('_')[:2]
+        format = "-".join(format)
+
+        return {
+            'id': video_id,
+            'title': video_title,
+            'uploader': uploader,
+            'url': video_url,
+            'ext': extension,
+            'format': format,
+            'format_id': format,
+            'age_limit': 18,
+        }
diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py

index 9d1bc07510c3148b8ed8659d697c46017c6a36ff..f8bdfc2d33c9f00b9f902a4303eb7024f4646312 100644 (file)
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -19,7 +19,8 @@ class FacebookIE(InfoExtractor):
      """Information Extractor for Facebook"""
  
      _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
      """Information Extractor for Facebook"""
  
      _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
-    _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
+    _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
+    _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
      _NETRC_MACHINE = 'facebook'
      IE_NAME = u'facebook'
      _TEST = {
      _NETRC_MACHINE = 'facebook'
      IE_NAME = u'facebook'
      _TEST = {
@@ -36,50 +37,56 @@ class FacebookIE(InfoExtractor):
          """Report attempt to log in."""
          self.to_screen(u'Logging in')
  
          """Report attempt to log in."""
          self.to_screen(u'Logging in')
  
-    def _real_initialize(self):
-        if self._downloader is None:
-            return
-
-        useremail = None
-        password = None
-        downloader_params = self._downloader.params
-
-        # Attempt to use provided username and password or .netrc data
-        if downloader_params.get('username', None) is not None:
-            useremail = downloader_params['username']
-            password = downloader_params['password']
-        elif downloader_params.get('usenetrc', False):
-            try:
-                info = netrc.netrc().authenticators(self._NETRC_MACHINE)
-                if info is not None:
-                    useremail = info[0]
-                    password = info[2]
-                else:
-                    raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
-            except (IOError, netrc.NetrcParseError) as err:
-                self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err))
-                return
-
+    def _login(self):
+        (useremail, password) = self._get_login_info()
          if useremail is None:
              return
  
          if useremail is None:
              return
  
-        # Log in
+        login_page_req = compat_urllib_request.Request(self._LOGIN_URL)
+        login_page_req.add_header('Cookie', 'locale=en_US')
+        self.report_login()
+        login_page = self._download_webpage(login_page_req, None, note=False,
+            errnote=u'Unable to download login page')
+        lsd = self._search_regex(r'"lsd":"(\w*?)"', login_page, u'lsd')
+        lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, u'lgnrnd')
+
          login_form = {
              'email': useremail,
              'pass': password,
          login_form = {
              'email': useremail,
              'pass': password,
-            'login': 'Log+In'
+            'lsd': lsd,
+            'lgnrnd': lgnrnd,
+            'next': 'http://facebook.com/home.php',
+            'default_persistent': '0',
+            'legacy_return': '1',
+            'timezone': '-60',
+            'trynum': '1',
              }
          request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
              }
          request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
+        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
          try:
          try:
-            self.report_login()
              login_results = compat_urllib_request.urlopen(request).read()
              if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
                  self._downloader.report_warning(u'unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
                  return
              login_results = compat_urllib_request.urlopen(request).read()
              if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
                  self._downloader.report_warning(u'unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
                  return
+
+            check_form = {
+                'fb_dtsg': self._search_regex(r'"fb_dtsg":"(.*?)"', login_results, u'fb_dtsg'),
+                'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, u'nh'),
+                'name_action_selected': 'dont_save',
+                'submit[Continue]': self._search_regex(r'<input value="(.*?)" name="submit\[Continue\]"', login_results, u'continue'),
+            }
+            check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, compat_urllib_parse.urlencode(check_form))
+            check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
+            check_response = compat_urllib_request.urlopen(check_req).read()
+            if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
+                self._downloader.report_warning(u'Unable to confirm login, you have to login in your brower and authorize the login.')
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
              self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
              return
  
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
              self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
              return
  
+    def _real_initialize(self):
+        self._login()
+
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          if mobj is None:
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          if mobj is None:
@@ -93,7 +100,13 @@ class FacebookIE(InfoExtractor):
          AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
          m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage)
          if not m:
          AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
          m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage)
          if not m:
-            raise ExtractorError(u'Cannot parse data')
+            m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
+            if m_msg is not None:
+                raise ExtractorError(
+                    u'The video is not available, Facebook said: "%s"' % m_msg.group(1),
+                    expected=True)
+            else:
+                raise ExtractorError(u'Cannot parse data')
          data = dict(json.loads(m.group(1)))
          params_raw = compat_urllib_parse.unquote(data['params'])
          params = json.loads(params_raw)
          data = dict(json.loads(m.group(1)))
          params_raw = compat_urllib_parse.unquote(data['params'])
          params = json.loads(params_raw)
diff --git a/youtube_dl/extractor/faz.py b/youtube_dl/extractor/faz.py

index deaa4ed2d9bc14406b6a7d3d6e8b015c6fcf915d..89ed08db4cbb99f9381013813fa03a19474c8e24 100644 (file)
--- a/youtube_dl/extractor/faz.py
+++ b/youtube_dl/extractor/faz.py
@@ -5,8 +5,6 @@ import xml.etree.ElementTree
  from .common import InfoExtractor
  from ..utils import (
      determine_ext,
  from .common import InfoExtractor
  from ..utils import (
      determine_ext,
-    clean_html,
-    get_element_by_attribute,
  )
  
  
  )
  
  
@@ -47,12 +45,12 @@ class FazIE(InfoExtractor):
                  'format_id': code.lower(),
              })
  
                  'format_id': code.lower(),
              })
  
-        descr_html = get_element_by_attribute('class', 'Content Copy', webpage)
+        descr = self._html_search_regex(r'<p class="Content Copy">(.*?)</p>', webpage, u'description')
          info = {
              'id': video_id,
              'title': self._og_search_title(webpage),
              'formats': formats,
          info = {
              'id': video_id,
              'title': self._og_search_title(webpage),
              'formats': formats,
-            'description': clean_html(descr_html),
+            'description': descr,
              'thumbnail': config.find('STILL/STILL_BIG').text,
          }
          # TODO: Remove when #980 has been merged
              'thumbnail': config.find('STILL/STILL_BIG').text,
          }
          # TODO: Remove when #980 has been merged
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index 69e0a7bd271dd0965f5c1f6f9c3a7cdce7a3da0b..c7552fddb587a60454bec6faa174c36bd4aa9a4a 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -25,7 +25,7 @@ class GenericIE(InfoExtractor):
          {
              u'url': u'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
              u'file': u'13601338388002.mp4',
          {
              u'url': u'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
              u'file': u'13601338388002.mp4',
-            u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89',
+            u'md5': u'6e15c93721d7ec9e9ca3fdbf07982cfd',
              u'info_dict': {
                  u"uploader": u"www.hodiho.fr",
                  u"title": u"R\u00e9gis plante sa Jeep"
              u'info_dict': {
                  u"uploader": u"www.hodiho.fr",
                  u"title": u"R\u00e9gis plante sa Jeep"
@@ -33,6 +33,7 @@ class GenericIE(InfoExtractor):
          },
          # embedded vimeo video
          {
          },
          # embedded vimeo video
          {
+            u'add_ie': ['Vimeo'],
              u'url': u'http://skillsmatter.com/podcast/home/move-semanticsperfect-forwarding-and-rvalue-references',
              u'file': u'22444065.mp4',
              u'md5': u'2903896e23df39722c33f015af0666e2',
              u'url': u'http://skillsmatter.com/podcast/home/move-semanticsperfect-forwarding-and-rvalue-references',
              u'file': u'22444065.mp4',
              u'md5': u'2903896e23df39722c33f015af0666e2',
@@ -41,7 +42,35 @@ class GenericIE(InfoExtractor):
                  u"uploader_id": u"skillsmatter",
                  u"uploader": u"Skills Matter",
              }
                  u"uploader_id": u"skillsmatter",
                  u"uploader": u"Skills Matter",
              }
-        }
+        },
+        # bandcamp page with custom domain
+        {
+            u'add_ie': ['Bandcamp'],
+            u'url': u'http://bronyrock.com/track/the-pony-mash',
+            u'file': u'3235767654.mp3',
+            u'info_dict': {
+                u'title': u'The Pony Mash',
+                u'uploader': u'M_Pallante',
+            },
+            u'skip': u'There is a limit of 200 free downloads / month for the test song',
+        },
+        # embedded brightcove video
+        # it also tests brightcove videos that need to set the 'Referer' in the
+        # http requests
+        {
+            u'add_ie': ['Brightcove'],
+            u'url': u'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
+            u'info_dict': {
+                u'id': u'2765128793001',
+                u'ext': u'mp4',
+                u'title': u'Le cours de bourse : l’analyse technique',
+                u'description': u'md5:7e9ad046e968cb2d1114004aba466fd9',
+                u'uploader': u'BFM BUSINESS',
+            },
+            u'params': {
+                u'skip_download': True,
+            },
+        },
      ]
  
      def report_download_webpage(self, video_id):
      ]
  
      def report_download_webpage(self, video_id):
@@ -134,10 +163,9 @@ class GenericIE(InfoExtractor):
  
          self.report_extraction(video_id)
          # Look for BrightCove:
  
          self.report_extraction(video_id)
          # Look for BrightCove:
-        m_brightcove = re.search(r'<object[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)
-        if m_brightcove is not None:
+        bc_url = BrightcoveIE._extract_brightcove_url(webpage)
+        if bc_url is not None:
              self.to_screen(u'Brightcove video detected.')
              self.to_screen(u'Brightcove video detected.')
-            bc_url = BrightcoveIE._build_brighcove_url(m_brightcove.group())
              return self.url_result(bc_url, 'Brightcove')
  
          # Look for embedded Vimeo player
              return self.url_result(bc_url, 'Brightcove')
  
          # Look for embedded Vimeo player
@@ -150,11 +178,17 @@ class GenericIE(InfoExtractor):
  
          # Look for embedded YouTube player
          mobj = re.search(
  
          # Look for embedded YouTube player
          mobj = re.search(
-            r'<iframe[^>]+?src="(https?://(?:www\.)?youtube.com/embed/.+?)"', webpage)
+            r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?youtube.com/embed/.+?)\1', webpage)
          if mobj:
          if mobj:
-            surl = unescapeHTML(mobj.group(1))
+            surl = unescapeHTML(mobj.group(u'url'))
              return self.url_result(surl, 'Youtube')
  
              return self.url_result(surl, 'Youtube')
  
+        # Look for Bandcamp pages with custom domain
+        mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
+        if mobj is not None:
+            burl = unescapeHTML(mobj.group(1))
+            return self.url_result(burl, 'Bandcamp')
+
          # Start with something easy: JW Player in SWFObject
          mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
          if mobj is None:
          # Start with something easy: JW Player in SWFObject
          mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
          if mobj is None:
diff --git a/youtube_dl/extractor/hypem.py b/youtube_dl/extractor/hypem.py

index ab2b591036a6371fc31e7437c368858dfb708efa..9bd06e7c7913e9c7492f63417760012f1219c875 100644 (file)
--- a/youtube_dl/extractor/hypem.py
+++ b/youtube_dl/extractor/hypem.py
@@ -30,7 +30,7 @@ class HypemIE(InfoExtractor):
              raise ExtractorError(u'Invalid URL: %s' % url)
          track_id = mobj.group(1)
  
              raise ExtractorError(u'Invalid URL: %s' % url)
          track_id = mobj.group(1)
  
-        data = { 'ax': 1, 'ts': time.time() }
+        data = {'ax': 1, 'ts': time.time()}
          data_encoded = compat_urllib_parse.urlencode(data)
          complete_url = url + "?" + data_encoded
          request = compat_urllib_request.Request(complete_url)
          data_encoded = compat_urllib_parse.urlencode(data)
          complete_url = url + "?" + data_encoded
          request = compat_urllib_request.Request(complete_url)
@@ -68,4 +68,4 @@ class HypemIE(InfoExtractor):
              'ext':      "mp3",
              'title':    title,
              'artist':   artist,
              'ext':      "mp3",
              'title':    title,
              'artist':   artist,
-        }]
-\ No newline at end of file
+        }]
diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py

index ddc42882a436a216cbd24b0b28d03da89ec27b0d..213aac428451bfcb860585b26de0e1c43abc732d 100644 (file)
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@@ -26,7 +26,7 @@ class InstagramIE(InfoExtractor):
  
          return [{
              'id':        video_id,
  
          return [{
              'id':        video_id,
-            'url':       self._og_search_video_url(webpage),
+            'url':       self._og_search_video_url(webpage, secure=False),
              'ext':       'mp4',
              'title':     u'Video by %s' % uploader_id,
              'thumbnail': self._og_search_thumbnail(webpage),
              'ext':       'mp4',
              'title':     u'Video by %s' % uploader_id,
              'thumbnail': self._og_search_thumbnail(webpage),
diff --git a/youtube_dl/extractor/kankan.py b/youtube_dl/extractor/kankan.py

index 445d465017f513b55839ed9323c95212e6de7fb7..50916f4a66c6227e1eb4dc531745c9d5a6ad85d5 100644 (file)
--- a/youtube_dl/extractor/kankan.py
+++ b/youtube_dl/extractor/kankan.py
@@ -1,8 +1,10 @@
  import re
  import re
+import hashlib
  
  from .common import InfoExtractor
  from ..utils import determine_ext
  
  
  from .common import InfoExtractor
  from ..utils import determine_ext
  
+_md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
  
  class KankanIE(InfoExtractor):
      _VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P<id>\d+)\.shtml'
  
  class KankanIE(InfoExtractor):
      _VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P<id>\d+)\.shtml'
@@ -30,7 +32,10 @@ class KankanIE(InfoExtractor):
                                                   video_id, u'Downloading video url info')
          ip = self._search_regex(r'ip:"(.+?)"', video_info_page, u'video url ip')
          path = self._search_regex(r'path:"(.+?)"', video_info_page, u'video url path')
                                                   video_id, u'Downloading video url info')
          ip = self._search_regex(r'ip:"(.+?)"', video_info_page, u'video url ip')
          path = self._search_regex(r'path:"(.+?)"', video_info_page, u'video url path')
-        video_url = 'http://%s%s' % (ip, path)
+        param1 = self._search_regex(r'param1:(\d+)', video_info_page, u'param1')
+        param2 = self._search_regex(r'param2:(\d+)', video_info_page, u'param2')
+        key = _md5('xl_mp43651' + param1 + param2)
+        video_url = 'http://%s%s?key=%s&key1=%s' % (ip, path, key, param2)
  
          return {'id': video_id,
                  'title': title,
  
          return {'id': video_id,
                  'title': title,
diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py

new file mode 100644 (file)

index 0000000..29658a7
--- /dev/null
+++ b/youtube_dl/extractor/keezmovies.py
@@ -0,0 +1,61 @@
+import os
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse_urlparse,
+    compat_urllib_request,
+    compat_urllib_parse,
+)
+from ..aes import (
+    aes_decrypt_text
+)
+
+class KeezMoviesIE(InfoExtractor):
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>keezmovies\.com/video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)'
+    _TEST = {
+        u'url': u'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
+        u'file': u'1214711.mp4',
+        u'md5': u'6e297b7e789329923fcf83abb67c9289',
+        u'info_dict': {
+            u"title": u"Petite Asian Lady Mai Playing In Bathtub",
+            u"age_limit": 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('videoid')
+        url = 'http://www.' + mobj.group('url')
+
+        req = compat_urllib_request.Request(url)
+        req.add_header('Cookie', 'age_verified=1')
+        webpage = self._download_webpage(req, video_id)
+
+        # embedded video
+        mobj = re.search(r'href="([^"]+)"></iframe>', webpage)
+        if mobj:
+            embedded_url = mobj.group(1)
+            return self.url_result(embedded_url)
+
+        video_title = self._html_search_regex(r'<h1 [^>]*>([^<]+)', webpage, u'title')
+        video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&amp;', webpage, u'video_url'))
+        if webpage.find('encrypted=true')!=-1:
+            password = self._html_search_regex(r'video_title=(.+?)&amp;', webpage, u'password')
+            video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8')
+        path = compat_urllib_parse_urlparse(video_url).path
+        extension = os.path.splitext(path)[1][1:]
+        format = path.split('/')[4].split('_')[:2]
+        format = "-".join(format)
+
+        age_limit = self._rta_search(webpage)
+
+        return {
+            'id': video_id,
+            'title': video_title,
+            'url': video_url,
+            'ext': extension,
+            'format': format,
+            'format_id': format,
+            'age_limit': age_limit,
+        }
diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py

index d04da98c89ed582e83e8bb905b15ff04c78d3018..4531fd6ab23a958d3d4dc4e38d52e7f330d85196 100644 (file)
--- a/youtube_dl/extractor/livestream.py
+++ b/youtube_dl/extractor/livestream.py
@@ -40,13 +40,9 @@ class LivestreamIE(InfoExtractor):
  
          if video_id is None:
              # This is an event page:
  
          if video_id is None:
              # This is an event page:
-            player = get_meta_content('twitter:player', webpage)
-            if player is None:
-                raise ExtractorError('Couldn\'t extract event api url')
-            api_url = player.replace('/player', '')
-            api_url = re.sub(r'^(https?://)(new\.)', r'\1api.\2', api_url)
-            info = json.loads(self._download_webpage(api_url, event_name,
-                                                     u'Downloading event info'))
+            config_json = self._search_regex(r'window.config = ({.*?});',
+                webpage, u'window config')
+            info = json.loads(config_json)['event']
              videos = [self._extract_video_info(video_data['data'])
                  for video_data in info['feed']['data'] if video_data['type'] == u'video']
              return self.playlist_result(videos, info['id'], info['full_name'])
              videos = [self._extract_video_info(video_data['data'])
                  for video_data in info['feed']['data'] if video_data['type'] == u'video']
              return self.playlist_result(videos, info['id'], info['full_name'])
diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py

index e537648ffb83564e56f43f7e1e21a949cc609925..91480ba875d5fff781ce08a47c41a3824e94e910 100644 (file)
--- a/youtube_dl/extractor/metacafe.py
+++ b/youtube_dl/extractor/metacafe.py
@@ -20,10 +20,12 @@ class MetacafeIE(InfoExtractor):
      _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
      _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
      IE_NAME = u'metacafe'
      _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
      _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
      IE_NAME = u'metacafe'
-    _TESTS = [{
+    _TESTS = [
+    # Youtube video
+    {
          u"add_ie": ["Youtube"],
          u"url":  u"http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/",
          u"add_ie": ["Youtube"],
          u"url":  u"http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/",
-        u"file":  u"_aUehQsCQtM.flv",
+        u"file":  u"_aUehQsCQtM.mp4",
          u"info_dict": {
              u"upload_date": u"20090102",
              u"title": u"The Electric Company | \"Short I\" | PBS KIDS GO!",
          u"info_dict": {
              u"upload_date": u"20090102",
              u"title": u"The Electric Company | \"Short I\" | PBS KIDS GO!",
@@ -32,15 +34,42 @@ class MetacafeIE(InfoExtractor):
              u"uploader_id": u"PBS"
          }
      },
              u"uploader_id": u"PBS"
          }
      },
+    # Normal metacafe video
+    {
+        u'url': u'http://www.metacafe.com/watch/11121940/news_stuff_you_wont_do_with_your_playstation_4/',
+        u'md5': u'6e0bca200eaad2552e6915ed6fd4d9ad',
+        u'info_dict': {
+            u'id': u'11121940',
+            u'ext': u'mp4',
+            u'title': u'News: Stuff You Won\'t Do with Your PlayStation 4',
+            u'uploader': u'ign',
+            u'description': u'Sony released a massive FAQ on the PlayStation Blog detailing the PS4\'s capabilities and limitations.',
+        },
+    },
+    # AnyClip video
      {
          u"url": u"http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/",
          u"file": u"an-dVVXnuY7Jh77J.mp4",
          u"info_dict": {
              u"title": u"The Andromeda Strain (1971): Stop the Bomb Part 3",
              u"uploader": u"anyclip",
      {
          u"url": u"http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/",
          u"file": u"an-dVVXnuY7Jh77J.mp4",
          u"info_dict": {
              u"title": u"The Andromeda Strain (1971): Stop the Bomb Part 3",
              u"uploader": u"anyclip",
-            u"description": u"md5:38c711dd98f5bb87acf973d573442e67"
-        }
-    }]
+            u"description": u"md5:38c711dd98f5bb87acf973d573442e67",
+        },
+    },
+    # age-restricted video
+    {
+        u'url': u'http://www.metacafe.com/watch/5186653/bbc_internal_christmas_tape_79_uncensored_outtakes_etc/',
+        u'md5': u'98dde7c1a35d02178e8ab7560fe8bd09',
+        u'info_dict': {
+            u'id': u'5186653',
+            u'ext': u'mp4',
+            u'title': u'BBC INTERNAL Christmas Tape \'79 - UNCENSORED Outtakes, Etc.',
+            u'uploader': u'Dwayne Pipe',
+            u'description': u'md5:950bf4c581e2c059911fa3ffbe377e4b',
+            u'age_limit': 18,
+        },
+    },
+    ]
  
  
      def report_disclaimer(self):
  
  
      def report_disclaimer(self):
@@ -62,6 +91,7 @@ class MetacafeIE(InfoExtractor):
              'submit': "Continue - I'm over 18",
              }
          request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
              'submit': "Continue - I'm over 18",
              }
          request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
+        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
          try:
              self.report_age_confirmation()
              compat_urllib_request.urlopen(request).read()
          try:
              self.report_age_confirmation()
              compat_urllib_request.urlopen(request).read()
@@ -83,7 +113,12 @@ class MetacafeIE(InfoExtractor):
  
          # Retrieve video webpage to extract further information
          req = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id)
  
          # Retrieve video webpage to extract further information
          req = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id)
-        req.headers['Cookie'] = 'flashVersion=0;'
+
+        # AnyClip videos require the flashversion cookie so that we get the link
+        # to the mp4 file
+        mobj_an = re.match(r'^an-(.*?)$', video_id)
+        if mobj_an:
+            req.headers['Cookie'] = 'flashVersion=0;'
          webpage = self._download_webpage(req, video_id)
  
          # Extract URL, uploader and title from webpage
          webpage = self._download_webpage(req, video_id)
  
          # Extract URL, uploader and title from webpage
@@ -125,6 +160,11 @@ class MetacafeIE(InfoExtractor):
                  r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
                  webpage, u'uploader nickname', fatal=False)
  
                  r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
                  webpage, u'uploader nickname', fatal=False)
  
+        if re.search(r'"contentRating":"restricted"', webpage) is not None:
+            age_limit = 18
+        else:
+            age_limit = 0
+
          return {
              '_type':    'video',
              'id':       video_id,
          return {
              '_type':    'video',
              'id':       video_id,
@@ -134,4 +174,5 @@ class MetacafeIE(InfoExtractor):
              'upload_date':  None,
              'title':    video_title,
              'ext':      video_ext,
              'upload_date':  None,
              'title':    video_title,
              'ext':      video_ext,
+            'age_limit': age_limit,
          }
          }
diff --git a/youtube_dl/extractor/mofosex.py b/youtube_dl/extractor/mofosex.py

new file mode 100644 (file)

index 0000000..b9430b0
--- /dev/null
+++ b/youtube_dl/extractor/mofosex.py
@@ -0,0 +1,49 @@
+import os
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse_urlparse,
+    compat_urllib_request,
+    compat_urllib_parse,
+)
+
+class MofosexIE(InfoExtractor):
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>mofosex\.com/videos/(?P<videoid>[0-9]+)/.*?\.html)'
+    _TEST = {
+        u'url': u'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html',
+        u'file': u'5018.mp4',
+        u'md5': u'1b2eb47ac33cc75d4a80e3026b613c5a',
+        u'info_dict': {
+            u"title": u"Japanese Teen Music Video",
+            u"age_limit": 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('videoid')
+        url = 'http://www.' + mobj.group('url')
+
+        req = compat_urllib_request.Request(url)
+        req.add_header('Cookie', 'age_verified=1')
+        webpage = self._download_webpage(req, video_id)
+
+        video_title = self._html_search_regex(r'<h1>(.+?)<', webpage, u'title')
+        video_url = compat_urllib_parse.unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, u'video_url'))
+        path = compat_urllib_parse_urlparse(video_url).path
+        extension = os.path.splitext(path)[1][1:]
+        format = path.split('/')[5].split('_')[:2]
+        format = "-".join(format)
+
+        age_limit = self._rta_search(webpage)
+
+        return {
+            'id': video_id,
+            'title': video_title,
+            'url': video_url,
+            'ext': extension,
+            'format': format,
+            'format_id': format,
+            'age_limit': age_limit,
+        }
diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py

index e520e2bb491f2c55f3867ab214b2b949eca6e684..24a79ae130e7ca52983ae9846532f643eda60085 100644 (file)
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -26,6 +26,7 @@ class MTVIE(InfoExtractor):
              },
          },
          {
              },
          },
          {
+            u'add_ie': ['Vevo'],
              u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
              u'file': u'USCJY1331283.mp4',
              u'md5': u'73b4e7fcadd88929292fe52c3ced8caf',
              u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
              u'file': u'USCJY1331283.mp4',
              u'md5': u'73b4e7fcadd88929292fe52c3ced8caf',
@@ -80,6 +81,8 @@ class MTVIE(InfoExtractor):
          video_id = self._id_from_uri(uri)
          self.report_extraction(video_id)
          mediagen_url = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content'))).attrib['url']
          video_id = self._id_from_uri(uri)
          self.report_extraction(video_id)
          mediagen_url = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content'))).attrib['url']
+        # Remove the templates, like &device={device}
+        mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', u'', mediagen_url)
          if 'acceptMethods' not in mediagen_url:
              mediagen_url += '&acceptMethods=fms'
          mediagen_page = self._download_webpage(mediagen_url, video_id,
          if 'acceptMethods' not in mediagen_url:
              mediagen_url += '&acceptMethods=fms'
          mediagen_page = self._download_webpage(mediagen_url, video_id,
diff --git a/youtube_dl/extractor/myspace.py b/youtube_dl/extractor/myspace.py

new file mode 100644 (file)

index 0000000..050f54a
--- /dev/null
+++ b/youtube_dl/extractor/myspace.py
@@ -0,0 +1,48 @@
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_str,
+)
+
+
+class MySpaceIE(InfoExtractor):
+    _VALID_URL = r'https?://myspace\.com/([^/]+)/video/[^/]+/(?P<id>\d+)'
+
+    _TEST = {
+        u'url': u'https://myspace.com/coldplay/video/viva-la-vida/100008689',
+        u'info_dict': {
+            u'id': u'100008689',
+            u'ext': u'flv',
+            u'title': u'Viva La Vida',
+            u'description': u'The official Viva La Vida video, directed by Hype Williams',
+            u'uploader': u'Coldplay',
+            u'uploader_id': u'coldplay',
+        },
+        u'params': {
+            # rtmp download
+            u'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id)
+        context = json.loads(self._search_regex(r'context = ({.*?});', webpage,
+            u'context'))
+        video = context['video']
+        rtmp_url, play_path = video['streamUrl'].split(';', 1)
+
+        return {
+            'id': compat_str(video['mediaId']),
+            'title': video['title'],
+            'url': rtmp_url,
+            'play_path': play_path,
+            'ext': 'flv',
+            'description': video['description'],
+            'thumbnail': video['imageUrl'],
+            'uploader': video['artistName'],
+            'uploader_id': video['artistUsername'],
+        }
diff --git a/youtube_dl/extractor/nowvideo.py b/youtube_dl/extractor/nowvideo.py

index ab52ad4011851405e9a6b17f73720a8cd646860c..241cc160b9ca58bfc6b88bf9c12fe134df3b3d66 100644 (file)
--- a/youtube_dl/extractor/nowvideo.py
+++ b/youtube_dl/extractor/nowvideo.py
@@ -20,7 +20,10 @@ class NowVideoIE(InfoExtractor):
  
          video_id = mobj.group('id')
          webpage_url = 'http://www.nowvideo.ch/video/' + video_id
  
          video_id = mobj.group('id')
          webpage_url = 'http://www.nowvideo.ch/video/' + video_id
+        embed_url = 'http://embed.nowvideo.ch/embed.php?v=' + video_id
          webpage = self._download_webpage(webpage_url, video_id)
          webpage = self._download_webpage(webpage_url, video_id)
+        embed_page = self._download_webpage(embed_url, video_id,
+            u'Downloading embed page')
  
          self.report_extraction(video_id)
  
  
          self.report_extraction(video_id)
  
@@ -28,7 +31,7 @@ class NowVideoIE(InfoExtractor):
              webpage, u'video title')
  
          video_key = self._search_regex(r'var fkzd="(.*)";',
              webpage, u'video title')
  
          video_key = self._search_regex(r'var fkzd="(.*)";',
-            webpage, u'video key')
+            embed_page, u'video key')
  
          api_call = "http://www.nowvideo.ch/api/player.api.php?file={0}&numOfErrors=0&cid=1&key={1}".format(video_id, video_key)
          api_response = self._download_webpage(api_call, video_id,
  
          api_call = "http://www.nowvideo.ch/api/player.api.php?file={0}&numOfErrors=0&cid=1&key={1}".format(video_id, video_key)
          api_response = self._download_webpage(api_call, video_id,
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py

new file mode 100644 (file)

index 0000000..75cf4bb
--- /dev/null
+++ b/youtube_dl/extractor/pornhub.py
@@ -0,0 +1,69 @@
+import os
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse_urlparse,
+    compat_urllib_request,
+    compat_urllib_parse,
+    unescapeHTML,
+)
+from ..aes import (
+    aes_decrypt_text
+)
+
+class PornHubIE(InfoExtractor):
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>pornhub\.com/view_video\.php\?viewkey=(?P<videoid>[0-9]+))'
+    _TEST = {
+        u'url': u'http://www.pornhub.com/view_video.php?viewkey=648719015',
+        u'file': u'648719015.mp4',
+        u'md5': u'882f488fa1f0026f023f33576004a2ed',
+        u'info_dict': {
+            u"uploader": u"BABES-COM", 
+            u"title": u"Seductive Indian beauty strips down and fingers her pink pussy",
+            u"age_limit": 18
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('videoid')
+        url = 'http://www.' + mobj.group('url')
+
+        req = compat_urllib_request.Request(url)
+        req.add_header('Cookie', 'age_verified=1')
+        webpage = self._download_webpage(req, video_id)
+
+        video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, u'title')
+        video_uploader = self._html_search_regex(r'<b>From: </b>(?:\s|<[^>]*>)*(.+?)<', webpage, u'uploader', fatal=False)
+        thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, u'thumbnail', fatal=False)
+        if thumbnail:
+            thumbnail = compat_urllib_parse.unquote(thumbnail)
+
+        video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
+        if webpage.find('"encrypted":true') != -1:
+            password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, u'password').replace('+', ' ')
+            video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
+
+        formats = []
+        for video_url in video_urls:
+            path = compat_urllib_parse_urlparse(video_url).path
+            extension = os.path.splitext(path)[1][1:]
+            format = path.split('/')[5].split('_')[:2]
+            format = "-".join(format)
+            formats.append({
+                'url': video_url,
+                'ext': extension,
+                'format': format,
+                'format_id': format,
+            })
+        formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-'))))
+
+        return {
+            'id': video_id,
+            'uploader': video_uploader,
+            'title': video_title,
+            'thumbnail': thumbnail,
+            'formats': formats,
+            'age_limit': 18,
+        }
diff --git a/youtube_dl/extractor/pornotube.py b/youtube_dl/extractor/pornotube.py

index 5d770ec285c3d1e3dcad04cfe49ca7780a9dd2b4..35dc5a9ffafb32d36e30f51988291dded6a6d18c 100644 (file)
--- a/youtube_dl/extractor/pornotube.py
+++ b/youtube_dl/extractor/pornotube.py
@@ -16,7 +16,8 @@ class PornotubeIE(InfoExtractor):
          u'md5': u'374dd6dcedd24234453b295209aa69b6',
          u'info_dict': {
              u"upload_date": u"20090708", 
          u'md5': u'374dd6dcedd24234453b295209aa69b6',
          u'info_dict': {
              u"upload_date": u"20090708", 
-            u"title": u"Marilyn-Monroe-Bathing"
+            u"title": u"Marilyn-Monroe-Bathing",
+            u"age_limit": 18
          }
      }
  
          }
      }
  
diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py

index d1b08c9bc050b3639ca252f2e84a373a8e4fa5f9..9ac7c3be8c8f1b97f46c944f08124eafbe8f1a5a 100644 (file)
--- a/youtube_dl/extractor/rtlnow.py
+++ b/youtube_dl/extractor/rtlnow.py
@@ -63,13 +63,12 @@ class RTLnowIE(InfoExtractor):
          },
      },
      {
          },
      },
      {
-        u'url': u'http://www.rtlnitronow.de/recht-ordnung/lebensmittelkontrolle-erlangenordnungsamt-berlin.php?film_id=127367&player=1&season=1',
-        u'file': u'127367.flv',
+        u'url': u'http://www.rtlnitronow.de/recht-ordnung/stadtpolizei-frankfurt-gerichtsvollzieher-leipzig.php?film_id=129679&player=1&season=1',
+        u'file': u'129679.flv',
          u'info_dict': {
          u'info_dict': {
-            u'upload_date': u'20130926', 
-            u'title': u'Recht & Ordnung - Lebensmittelkontrolle Erlangen/Ordnungsamt...',
-            u'description': u'Lebensmittelkontrolle Erlangen/Ordnungsamt Berlin',
-            u'thumbnail': u'http://autoimg.static-fra.de/nitronow/344787/1500x1500/image2.jpg',
+            u'upload_date': u'20131016', 
+            u'title': u'Recht & Ordnung - Stadtpolizei Frankfurt/ Gerichtsvollzieher...',
+            u'description': u'Stadtpolizei Frankfurt/ Gerichtsvollzieher Leipzig',
          },
          u'params': {
              u'skip_download': True,
          },
          u'params': {
              u'skip_download': True,
diff --git a/youtube_dl/extractor/slashdot.py b/youtube_dl/extractor/slashdot.py

index 2cba530766a2b898967aefe1c3f7761ebe986d45..f5003c7f91bc78d10a63d25604537e5e77f9fdb8 100644 (file)
--- a/youtube_dl/extractor/slashdot.py
+++ b/youtube_dl/extractor/slashdot.py
@@ -7,6 +7,7 @@ class SlashdotIE(InfoExtractor):
      _VALID_URL = r'https?://tv.slashdot.org/video/\?embed=(?P<id>.*?)(&|$)'
  
      _TEST = {
      _VALID_URL = r'https?://tv.slashdot.org/video/\?embed=(?P<id>.*?)(&|$)'
  
      _TEST = {
+        u'add_ie': ['Ooyala'],
          u'url': u'http://tv.slashdot.org/video/?embed=JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz',
          u'file': u'JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz.mp4',
          u'md5': u'd2222e7a4a4c1541b3e0cf732fb26735',
          u'url': u'http://tv.slashdot.org/video/?embed=JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz',
          u'file': u'JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz.mp4',
          u'md5': u'd2222e7a4a4c1541b3e0cf732fb26735',
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py

index 29cd5617c7d1919fa95e0b48e7ff35585106b800..4717fbb77e0ec21a26147b25d9cc3be2f83d9f94 100644 (file)
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -29,17 +29,34 @@ class SoundcloudIE(InfoExtractor):
                      )
                      '''
      IE_NAME = u'soundcloud'
                      )
                      '''
      IE_NAME = u'soundcloud'
-    _TEST = {
-        u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
-        u'file': u'62986583.mp3',
-        u'md5': u'ebef0a451b909710ed1d7787dddbf0d7',
-        u'info_dict': {
-            u"upload_date": u"20121011", 
-            u"description": u"No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd", 
-            u"uploader": u"E.T. ExTerrestrial Music", 
-            u"title": u"Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1"
-        }
-    }
+    _TESTS = [
+        {
+            u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
+            u'file': u'62986583.mp3',
+            u'md5': u'ebef0a451b909710ed1d7787dddbf0d7',
+            u'info_dict': {
+                u"upload_date": u"20121011", 
+                u"description": u"No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd", 
+                u"uploader": u"E.T. ExTerrestrial Music", 
+                u"title": u"Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1"
+            }
+        },
+        # not streamable song
+        {
+            u'url': u'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
+            u'info_dict': {
+                u'id': u'47127627',
+                u'ext': u'mp3',
+                u'title': u'Goldrushed',
+                u'uploader': u'The Royal Concept',
+                u'upload_date': u'20120521',
+            },
+            u'params': {
+                # rtmp
+                u'skip_download': True,
+            },
+        },
+    ]
  
      _CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28'
  
  
      _CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28'
  
@@ -56,16 +73,16 @@ class SoundcloudIE(InfoExtractor):
          return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
  
      def _extract_info_dict(self, info, full_title=None, quiet=False):
          return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
  
      def _extract_info_dict(self, info, full_title=None, quiet=False):
-        video_id = info['id']
-        name = full_title or video_id
+        track_id = compat_str(info['id'])
+        name = full_title or track_id
          if quiet == False:
              self.report_extraction(name)
  
          thumbnail = info['artwork_url']
          if thumbnail is not None:
              thumbnail = thumbnail.replace('-large', '-t500x500')
          if quiet == False:
              self.report_extraction(name)
  
          thumbnail = info['artwork_url']
          if thumbnail is not None:
              thumbnail = thumbnail.replace('-large', '-t500x500')
-        return {
-            'id':       info['id'],
+        result = {
+            'id':       track_id,
              'url':      info['stream_url'] + '?client_id=' + self._CLIENT_ID,
              'uploader': info['user']['username'],
              'upload_date': unified_strdate(info['created_at']),
              'url':      info['stream_url'] + '?client_id=' + self._CLIENT_ID,
              'uploader': info['user']['username'],
              'upload_date': unified_strdate(info['created_at']),
@@ -74,6 +91,21 @@ class SoundcloudIE(InfoExtractor):
              'description': info['description'],
              'thumbnail': thumbnail,
          }
              'description': info['description'],
              'thumbnail': thumbnail,
          }
+        if info.get('downloadable', False):
+            result['url'] = 'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(track_id, self._CLIENT_ID)
+        if not info.get('streamable', False):
+            # We have to get the rtmp url
+            stream_json = self._download_webpage(
+                'http://api.soundcloud.com/i1/tracks/{0}/streams?client_id={1}'.format(track_id, self._CLIENT_ID),
+                track_id, u'Downloading track url')
+            rtmp_url = json.loads(stream_json)['rtmp_mp3_128_url']
+            # The url doesn't have an rtmp app, we have to extract the playpath
+            url, path = rtmp_url.split('mp3:', 1)
+            result.update({
+                'url': url,
+                'play_path': 'mp3:' + path,
+            })
+        return result
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
@@ -106,70 +138,8 @@ class SoundcloudIE(InfoExtractor):
  class SoundcloudSetIE(SoundcloudIE):
      _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$'
      IE_NAME = u'soundcloud:set'
  class SoundcloudSetIE(SoundcloudIE):
      _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$'
      IE_NAME = u'soundcloud:set'
-    _TEST = {
-        u"url":"https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep",
-        u"playlist": [
-            {
-                u"file":"30510138.mp3",
-                u"md5":"f9136bf103901728f29e419d2c70f55d",
-                u"info_dict": {
-                    u"upload_date": u"20111213",
-                    u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com",
-                    u"uploader": u"The Royal Concept",
-                    u"title": u"D-D-Dance"
-                }
-            },
-            {
-                u"file":"47127625.mp3",
-                u"md5":"09b6758a018470570f8fd423c9453dd8",
-                u"info_dict": {
-                    u"upload_date": u"20120521",
-                    u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com",
-                    u"uploader": u"The Royal Concept",
-                    u"title": u"The Royal Concept - Gimme Twice"
-                }
-            },
-            {
-                u"file":"47127627.mp3",
-                u"md5":"154abd4e418cea19c3b901f1e1306d9c",
-                u"info_dict": {
-                    u"upload_date": u"20120521",
-                    u"uploader": u"The Royal Concept",
-                    u"title": u"Goldrushed"
-                }
-            },
-            {
-                u"file":"47127629.mp3",
-                u"md5":"2f5471edc79ad3f33a683153e96a79c1",
-                u"info_dict": {
-                    u"upload_date": u"20120521",
-                    u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com",
-                    u"uploader": u"The Royal Concept",
-                    u"title": u"In the End"
-                }
-            },
-            {
-                u"file":"47127631.mp3",
-                u"md5":"f9ba87aa940af7213f98949254f1c6e2",
-                u"info_dict": {
-                    u"upload_date": u"20120521",
-                    u"description": u"The Royal Concept from Stockholm\r\nFilip / David / Povel / Magnus\r\nwww.theroyalconceptband.com",
-                    u"uploader": u"The Royal Concept",
-                    u"title": u"Knocked Up"
-                }
-            },
-            {
-                u"file":"75206121.mp3",
-                u"md5":"f9d1fe9406717e302980c30de4af9353",
-                u"info_dict": {
-                    u"upload_date": u"20130116",
-                    u"description": u"The unreleased track World on Fire premiered on the CW's hit show Arrow (8pm/7pm central).  \r\nAs a gift to our fans we would like to offer you a free download of the track!  ",
-                    u"uploader": u"The Royal Concept",
-                    u"title": u"World On Fire"
-                }
-            }
-        ]
-    }
+    # it's in tests/test_playlists.py
+    _TESTS = []
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
@@ -208,7 +178,7 @@ class SoundcloudUserIE(SoundcloudIE):
      IE_NAME = u'soundcloud:user'
  
      # it's in tests/test_playlists.py
      IE_NAME = u'soundcloud:user'
  
      # it's in tests/test_playlists.py
-    _TEST = None
+    _TESTS = []
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/space.py b/youtube_dl/extractor/space.py

new file mode 100644 (file)

index 0000000..0d32a06
--- /dev/null
+++ b/youtube_dl/extractor/space.py
@@ -0,0 +1,35 @@
+import re
+
+from .common import InfoExtractor
+from .brightcove import BrightcoveIE
+from ..utils import RegexNotFoundError, ExtractorError
+
+
+class SpaceIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.space\.com/\d+-(?P<title>[^/\.\?]*?)-video.html'
+    _TEST = {
+        u'add_ie': ['Brightcove'],
+        u'url': u'http://www.space.com/23373-huge-martian-landforms-detail-revealed-by-european-probe-video.html',
+        u'info_dict': {
+            u'id': u'2780937028001',
+            u'ext': u'mp4',
+            u'title': u'Huge Martian Landforms\' Detail Revealed By European Probe | Video',
+            u'description': u'md5:db81cf7f3122f95ed234b631a6ea1e61',
+            u'uploader': u'TechMedia Networks',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        title = mobj.group('title')
+        webpage = self._download_webpage(url, title)
+        try:
+            # Some videos require the playerKey field, which isn't define in
+            # the BrightcoveExperience object
+            brightcove_url = self._og_search_video_url(webpage)
+        except RegexNotFoundError:
+            # Other videos works fine with the info from the object
+            brightcove_url = BrightcoveIE._extract_brightcove_url(webpage)
+        if brightcove_url is None:
+            raise ExtractorError(u'The webpage does not contain a video', expected=True)
+        return self.url_result(brightcove_url, BrightcoveIE.ie_key())
diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py

new file mode 100644 (file)

index 0000000..97f9c26
--- /dev/null
+++ b/youtube_dl/extractor/spankwire.py
@@ -0,0 +1,74 @@
+import os
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse_urlparse,
+    compat_urllib_request,
+    compat_urllib_parse,
+    unescapeHTML,
+)
+from ..aes import (
+    aes_decrypt_text
+)
+
+class SpankwireIE(InfoExtractor):
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)'
+    _TEST = {
+        u'url': u'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
+        u'file': u'103545.mp4',
+        u'md5': u'1b3f55e345500552dbc252a3e9c1af43',
+        u'info_dict': {
+            u"uploader": u"oreusz", 
+            u"title": u"Buckcherry`s X Rated Music Video Crazy Bitch",
+            u"description": u"Crazy Bitch X rated music video.",
+            u"age_limit": 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('videoid')
+        url = 'http://www.' + mobj.group('url')
+
+        req = compat_urllib_request.Request(url)
+        req.add_header('Cookie', 'age_verified=1')
+        webpage = self._download_webpage(req, video_id)
+
+        video_title = self._html_search_regex(r'<h1>([^<]+)', webpage, u'title')
+        video_uploader = self._html_search_regex(r'by:\s*<a [^>]*>(.+?)</a>', webpage, u'uploader', fatal=False)
+        thumbnail = self._html_search_regex(r'flashvars\.image_url = "([^"]+)', webpage, u'thumbnail', fatal=False)
+        description = self._html_search_regex(r'>\s*Description:</div>\s*<[^>]*>([^<]+)', webpage, u'description', fatal=False)
+        if len(description) == 0:
+            description = None
+
+        video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage)))
+        if webpage.find('flashvars\.encrypted = "true"') != -1:
+            password = self._html_search_regex(r'flashvars\.video_title = "([^"]+)', webpage, u'password').replace('+', ' ')
+            video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
+
+        formats = []
+        for video_url in video_urls:
+            path = compat_urllib_parse_urlparse(video_url).path
+            extension = os.path.splitext(path)[1][1:]
+            format = path.split('/')[4].split('_')[:2]
+            format = "-".join(format)
+            formats.append({
+                'url': video_url,
+                'ext': extension,
+                'format': format,
+                'format_id': format,
+            })
+        formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-'))))
+
+        age_limit = self._rta_search(webpage)
+
+        return {
+            'id': video_id,
+            'uploader': video_uploader,
+            'title': video_title,
+            'thumbnail': thumbnail,
+            'description': description,
+            'formats': formats,
+            'age_limit': age_limit,
+        }
diff --git a/youtube_dl/extractor/subtitles.py b/youtube_dl/extractor/subtitles.py

index 90de7de3a709d4385b29e62d44ae1e82349d883e..4b4c5235d09ea8a6f75b7d182abf87bfc573557f 100644 (file)
--- a/youtube_dl/extractor/subtitles.py
+++ b/youtube_dl/extractor/subtitles.py
@@ -12,9 +12,9 @@ class SubtitlesInfoExtractor(InfoExtractor):
          return any([self._downloader.params.get('writesubtitles', False),
                      self._downloader.params.get('writeautomaticsub')])
  
          return any([self._downloader.params.get('writesubtitles', False),
                      self._downloader.params.get('writeautomaticsub')])
  
-    def _list_available_subtitles(self, video_id, webpage=None):
+    def _list_available_subtitles(self, video_id, webpage):
          """ outputs the available subtitles for the video """
          """ outputs the available subtitles for the video """
-        sub_lang_list = self._get_available_subtitles(video_id)
+        sub_lang_list = self._get_available_subtitles(video_id, webpage)
          auto_captions_list = self._get_available_automatic_caption(video_id, webpage)
          sub_lang = ",".join(list(sub_lang_list.keys()))
          self.to_screen(u'%s: Available subtitles for video: %s' %
          auto_captions_list = self._get_available_automatic_caption(video_id, webpage)
          sub_lang = ",".join(list(sub_lang_list.keys()))
          self.to_screen(u'%s: Available subtitles for video: %s' %
@@ -23,7 +23,7 @@ class SubtitlesInfoExtractor(InfoExtractor):
          self.to_screen(u'%s: Available automatic captions for video: %s' %
                         (video_id, auto_lang))
  
          self.to_screen(u'%s: Available automatic captions for video: %s' %
                         (video_id, auto_lang))
  
-    def extract_subtitles(self, video_id, video_webpage=None):
+    def extract_subtitles(self, video_id, webpage):
          """
          returns {sub_lang: sub} ,{} if subtitles not found or None if the
          subtitles aren't requested.
          """
          returns {sub_lang: sub} ,{} if subtitles not found or None if the
          subtitles aren't requested.
@@ -32,9 +32,9 @@ class SubtitlesInfoExtractor(InfoExtractor):
              return None
          available_subs_list = {}
          if self._downloader.params.get('writeautomaticsub', False):
              return None
          available_subs_list = {}
          if self._downloader.params.get('writeautomaticsub', False):
-            available_subs_list.update(self._get_available_automatic_caption(video_id, video_webpage))
+            available_subs_list.update(self._get_available_automatic_caption(video_id, webpage))
          if self._downloader.params.get('writesubtitles', False):
          if self._downloader.params.get('writesubtitles', False):
-            available_subs_list.update(self._get_available_subtitles(video_id))
+            available_subs_list.update(self._get_available_subtitles(video_id, webpage))
  
          if not available_subs_list:  # error, it didn't get the available subtitles
              return {}
  
          if not available_subs_list:  # error, it didn't get the available subtitles
              return {}
@@ -74,7 +74,7 @@ class SubtitlesInfoExtractor(InfoExtractor):
              return
          return sub
  
              return
          return sub
  
-    def _get_available_subtitles(self, video_id):
+    def _get_available_subtitles(self, video_id, webpage):
          """
          returns {sub_lang: url} or {} if not available
          Must be redefined by the subclasses
          """
          returns {sub_lang: url} or {} if not available
          Must be redefined by the subclasses
diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py

index c910110ca9775d9ad03011238aacdc3c9ef4dae1..bc48620f0b992366e866181b1dad22aeb2e5d0a6 100644 (file)
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@@ -1,4 +1,5 @@
  import re
  import re
+import xml.etree.ElementTree
  
  from .common import InfoExtractor
  from ..utils import (
  
  from .common import InfoExtractor
  from ..utils import (
@@ -11,7 +12,7 @@ class TeamcocoIE(InfoExtractor):
      _TEST = {
          u'url': u'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
          u'file': u'19705.mp4',
      _TEST = {
          u'url': u'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
          u'file': u'19705.mp4',
-        u'md5': u'27b6f7527da5acf534b15f21b032656e',
+        u'md5': u'cde9ba0fa3506f5f017ce11ead928f9a',
          u'info_dict': {
              u"description": u"Louis C.K. got starstruck by George W. Bush, so what? Part one.", 
              u"title": u"Louis C.K. Interview Pt. 1 11/3/11"
          u'info_dict': {
              u"description": u"Louis C.K. got starstruck by George W. Bush, so what? Part one.", 
              u"title": u"Louis C.K. Interview Pt. 1 11/3/11"
@@ -31,16 +32,40 @@ class TeamcocoIE(InfoExtractor):
          self.report_extraction(video_id)
  
          data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
          self.report_extraction(video_id)
  
          data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
-        data = self._download_webpage(data_url, video_id, 'Downloading data webpage')
+        data_xml = self._download_webpage(data_url, video_id, 'Downloading data webpage')
+        data = xml.etree.ElementTree.fromstring(data_xml.encode('utf-8'))
  
  
-        video_url = self._html_search_regex(r'<file [^>]*type="high".*?>(.*?)</file>',
-            data, u'video URL')
  
  
-        return [{
+        qualities = ['500k', '480p', '1000k', '720p', '1080p']
+        formats = []
+        for file in data.findall('files/file'):
+            if file.attrib.get('playmode') == 'all':
+                # it just duplicates one of the entries
+                break
+            file_url = file.text
+            m_format = re.search(r'(\d+(k|p))\.mp4', file_url)
+            if m_format is not None:
+                format_id = m_format.group(1)
+            else:
+                format_id = file.attrib['bitrate']
+            formats.append({
+                'url': file_url,
+                'ext': 'mp4',
+                'format_id': format_id,
+            })
+        def sort_key(f):
+            try:
+                return qualities.index(f['format_id'])
+            except ValueError:
+                return -1
+        formats.sort(key=sort_key)
+        if not formats:
+            raise RegexNotFoundError(u'Unable to extract video URL')
+
+        return {
              'id':          video_id,
              'id':          video_id,
-            'url':         video_url,
-            'ext':         'mp4',
+            'formats': formats,
              'title':       self._og_search_title(webpage),
              'thumbnail':   self._og_search_thumbnail(webpage),
              'description': self._og_search_description(webpage),
              'title':       self._og_search_title(webpage),
              'thumbnail':   self._og_search_thumbnail(webpage),
              'description': self._og_search_description(webpage),
-        }]
+        }
diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py

index dfa1176a3e4e4eef333dcb829773c189bf9916ba..76cfdfb90c886a94e95cb60b144e93b3f0acbb1a 100644 (file)
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -1,10 +1,14 @@
  import json
  import re
  
  import json
  import re
  
-from .common import InfoExtractor
+from .subtitles import SubtitlesInfoExtractor
  
  
+from ..utils import (
+    compat_str,
+    RegexNotFoundError,
+)
  
  
-class TEDIE(InfoExtractor):
+class TEDIE(SubtitlesInfoExtractor):
      _VALID_URL=r'''http://www\.ted\.com/
                     (
                          ((?P<type_playlist>playlists)/(?P<playlist_id>\d+)) # We have a playlist
      _VALID_URL=r'''http://www\.ted\.com/
                     (
                          ((?P<type_playlist>playlists)/(?P<playlist_id>\d+)) # We have a playlist
@@ -32,7 +36,7 @@ class TEDIE(InfoExtractor):
      def _real_extract(self, url):
          m=re.match(self._VALID_URL, url, re.VERBOSE)
          if m.group('type_talk'):
      def _real_extract(self, url):
          m=re.match(self._VALID_URL, url, re.VERBOSE)
          if m.group('type_talk'):
-            return [self._talk_info(url)]
+            return self._talk_info(url)
          else :
              playlist_id=m.group('playlist_id')
              name=m.group('name')
          else :
              playlist_id=m.group('playlist_id')
              name=m.group('name')
@@ -82,11 +86,21 @@ class TEDIE(InfoExtractor):
              'url': stream['file'],
              'format': stream['id']
              } for stream in info['htmlStreams']]
              'url': stream['file'],
              'format': stream['id']
              } for stream in info['htmlStreams']]
+
+        video_id = info['id']
+
+        # subtitles
+        video_subtitles = self.extract_subtitles(video_id, webpage)
+        if self._downloader.params.get('listsubtitles', False):
+            self._list_available_subtitles(video_id, webpage)
+            return
+
          info = {
          info = {
-            'id': info['id'],
+            'id': video_id,
              'title': title,
              'thumbnail': thumbnail,
              'description': desc,
              'title': title,
              'thumbnail': thumbnail,
              'description': desc,
+            'subtitles': video_subtitles,
              'formats': formats,
          }
  
              'formats': formats,
          }
  
@@ -94,3 +108,17 @@ class TEDIE(InfoExtractor):
          info.update(info['formats'][-1])
  
          return info
          info.update(info['formats'][-1])
  
          return info
+
+    def _get_available_subtitles(self, video_id, webpage):
+        try:
+            options = self._search_regex(r'(?:<select name="subtitles_language_select" id="subtitles_language_select">)(.*?)(?:</select>)', webpage, 'subtitles_language_select', flags=re.DOTALL)
+            languages = re.findall(r'(?:<option value=")(\S+)"', options)
+            if languages:
+                sub_lang_list = {}
+                for l in languages:
+                    url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l)
+                    sub_lang_list[l] = url
+                return sub_lang_list
+        except RegexNotFoundError as err:
+            self._downloader.report_warning(u'video doesn\'t have subtitles')
+        return {}
diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py

new file mode 100644 (file)

index 0000000..d4b7603
--- /dev/null
+++ b/youtube_dl/extractor/tube8.py
@@ -0,0 +1,65 @@
+import os
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse_urlparse,
+    compat_urllib_request,
+    compat_urllib_parse,
+    unescapeHTML,
+)
+from ..aes import (
+    aes_decrypt_text
+)
+
+class Tube8IE(InfoExtractor):
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8\.com/[^/]+/[^/]+/(?P<videoid>[0-9]+)/?)'
+    _TEST = {
+        u'url': u'http://www.tube8.com/teen/kasia-music-video/229795/',
+        u'file': u'229795.mp4',
+        u'md5': u'e9e0b0c86734e5e3766e653509475db0',
+        u'info_dict': {
+            u"description": u"hot teen Kasia grinding", 
+            u"uploader": u"unknown", 
+            u"title": u"Kasia music video",
+            u"age_limit": 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('videoid')
+        url = 'http://www.' + mobj.group('url')
+
+        req = compat_urllib_request.Request(url)
+        req.add_header('Cookie', 'age_verified=1')
+        webpage = self._download_webpage(req, video_id)
+
+        video_title = self._html_search_regex(r'videotitle     ="([^"]+)', webpage, u'title')
+        video_description = self._html_search_regex(r'>Description:</strong>(.+?)<', webpage, u'description', fatal=False)
+        video_uploader = self._html_search_regex(r'>Submitted by:</strong>(?:\s|<[^>]*>)*(.+?)<', webpage, u'uploader', fatal=False)
+        thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, u'thumbnail', fatal=False)
+        if thumbnail:
+            thumbnail = thumbnail.replace('\\/', '/')
+
+        video_url = self._html_search_regex(r'"video_url":"([^"]+)', webpage, u'video_url')
+        if webpage.find('"encrypted":true')!=-1:
+            password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, u'password')
+            video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8')
+        path = compat_urllib_parse_urlparse(video_url).path
+        extension = os.path.splitext(path)[1][1:]
+        format = path.split('/')[4].split('_')[:2]
+        format = "-".join(format)
+
+        return {
+            'id': video_id,
+            'uploader': video_uploader,
+            'title': video_title,
+            'thumbnail': thumbnail,
+            'description': video_description,
+            'url': video_url,
+            'ext': extension,
+            'format': format,
+            'format_id': format,
+            'age_limit': 18,
+        }
diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py

index 1c1cc418d29a8897e2a2825492ed7becab75af6b..3f6020f74ec9eeefbddafc184d3f48cf5e436adb 100644 (file)
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -5,7 +5,7 @@ import datetime
  
  from .common import InfoExtractor
  from ..utils import (
  
  from .common import InfoExtractor
  from ..utils import (
-    determine_ext,
+    compat_HTTPError,
      ExtractorError,
  )
  
      ExtractorError,
  )
  
@@ -16,26 +16,22 @@ class VevoIE(InfoExtractor):
      (currently used by MTVIE)
      """
      _VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*?)(\?|$)'
      (currently used by MTVIE)
      """
      _VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*?)(\?|$)'
-    _TEST = {
+    _TESTS = [{
          u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
          u'file': u'GB1101300280.mp4',
          u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
          u'file': u'GB1101300280.mp4',
+        u"md5": u"06bea460acb744eab74a9d7dcb4bfd61",
          u'info_dict': {
              u"upload_date": u"20130624",
              u"uploader": u"Hurts",
              u"title": u"Somebody to Die For",
          u'info_dict': {
              u"upload_date": u"20130624",
              u"uploader": u"Hurts",
              u"title": u"Somebody to Die For",
-            u'duration': 230,
+            u"duration": 230,
+            u"width": 1920,
+            u"height": 1080,
          }
          }
-    }
+    }]
+    _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
  
  
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
-        json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
-        info_json = self._download_webpage(json_url, video_id, u'Downloading json info')
-
-        self.report_extraction(video_id)
-        video_info = json.loads(info_json)['video']
+    def _formats_from_json(self, video_info):
          last_version = {'version': -1}
          for version in video_info['videoVersions']:
              # These are the HTTP downloads, other types are for different manifests
          last_version = {'version': -1}
          for version in video_info['videoVersions']:
              # These are the HTTP downloads, other types are for different manifests
@@ -50,17 +46,74 @@ class VevoIE(InfoExtractor):
          # Already sorted from worst to best quality
          for rend in renditions.findall('rendition'):
              attr = rend.attrib
          # Already sorted from worst to best quality
          for rend in renditions.findall('rendition'):
              attr = rend.attrib
-            f_url = attr['url']
+            format_note = '%(videoCodec)s@%(videoBitrate)4sk, %(audioCodec)s@%(audioBitrate)3sk' % attr
              formats.append({
              formats.append({
-                'url': f_url,
-                'ext': determine_ext(f_url),
+                'url': attr['url'],
+                'format_id': attr['name'],
+                'format_note': format_note,
                  'height': int(attr['frameheight']),
                  'width': int(attr['frameWidth']),
              })
                  'height': int(attr['frameheight']),
                  'width': int(attr['frameWidth']),
              })
+        return formats
+
+    def _formats_from_smil(self, smil_xml):
+        formats = []
+        smil_doc = xml.etree.ElementTree.fromstring(smil_xml.encode('utf-8'))
+        els = smil_doc.findall('.//{http://www.w3.org/2001/SMIL20/Language}video')
+        for el in els:
+            src = el.attrib['src']
+            m = re.match(r'''(?xi)
+                (?P<ext>[a-z0-9]+):
+                (?P<path>
+                    [/a-z0-9]+     # The directory and main part of the URL
+                    _(?P<cbr>[0-9]+)k
+                    _(?P<width>[0-9]+)x(?P<height>[0-9]+)
+                    _(?P<vcodec>[a-z0-9]+)
+                    _(?P<vbr>[0-9]+)
+                    _(?P<acodec>[a-z0-9]+)
+                    _(?P<abr>[0-9]+)
+                    \.[a-z0-9]+  # File extension
+                )''', src)
+            if not m:
+                continue
  
  
-        date_epoch = int(self._search_regex(
-            r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))/1000
-        upload_date = datetime.datetime.fromtimestamp(date_epoch)
+            format_url = self._SMIL_BASE_URL + m.group('path')
+            format_note = ('%(vcodec)s@%(vbr)4sk, %(acodec)s@%(abr)3sk' %
+                           m.groupdict())
+            formats.append({
+                'url': format_url,
+                'format_id': u'SMIL_' + m.group('cbr'),
+                'format_note': format_note,
+                'ext': m.group('ext'),
+                'width': int(m.group('width')),
+                'height': int(m.group('height')),
+            })
+        return formats
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
+        info_json = self._download_webpage(json_url, video_id, u'Downloading json info')
+        video_info = json.loads(info_json)['video']
+
+        formats = self._formats_from_json(video_info)
+        try:
+            smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
+                self._SMIL_BASE_URL, video_id, video_id.lower())
+            smil_xml = self._download_webpage(smil_url, video_id,
+                                              u'Downloading SMIL info')
+            formats.extend(self._formats_from_smil(smil_xml))
+        except ExtractorError as ee:
+            if not isinstance(ee.cause, compat_HTTPError):
+                raise
+            self._downloader.report_warning(
+                u'Cannot download SMIL information, falling back to JSON ..')
+
+        timestamp_ms = int(self._search_regex(
+            r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))
+        upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000)
          info = {
              'id': video_id,
              'title': video_info['title'],
          info = {
              'id': video_id,
              'title': video_info['title'],
@@ -71,7 +124,4 @@ class VevoIE(InfoExtractor):
              'duration': video_info['duration'],
          }
  
              'duration': video_info['duration'],
          }
  
-        # TODO: Remove when #980 has been merged
-        info.update(formats[-1])
-
          return info
          return info
diff --git a/youtube_dl/extractor/viddler.py b/youtube_dl/extractor/viddler.py

index 12c84a985cc8a2ee49b592d504e60cdbe8500eb8..826804af37af54e308f90349e909d3e0e3aa5126 100644 (file)
--- a/youtube_dl/extractor/viddler.py
+++ b/youtube_dl/extractor/viddler.py
@@ -8,7 +8,7 @@ from ..utils import (
  
  
  class ViddlerIE(InfoExtractor):
  
  
  class ViddlerIE(InfoExtractor):
-    _VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler.com)/(?:v|embed|player)/(?P<id>[0-9]+)'
+    _VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler.com)/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
      _TEST = {
          u"url": u"http://www.viddler.com/v/43903784",
          u'file': u'43903784.mp4',
      _TEST = {
          u"url": u"http://www.viddler.com/v/43903784",
          u'file': u'43903784.mp4',
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py

index 1125513c7b0cd443becb2847acb9db9032303dfb..d465bf20b6d65b7b5cd3a0545af676c2c710a07d 100644 (file)
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -1,3 +1,4 @@
+# encoding: utf-8
  import json
  import re
  import itertools
  import json
  import re
  import itertools
@@ -10,6 +11,7 @@ from ..utils import (
      clean_html,
      get_element_by_attribute,
      ExtractorError,
      clean_html,
      get_element_by_attribute,
      ExtractorError,
+    RegexNotFoundError,
      std_headers,
      unsmuggle_url,
  )
      std_headers,
      unsmuggle_url,
  )
@@ -18,12 +20,12 @@ class VimeoIE(InfoExtractor):
      """Information extractor for vimeo.com."""
  
      # _VALID_URL matches Vimeo URLs
      """Information extractor for vimeo.com."""
  
      # _VALID_URL matches Vimeo URLs
-    _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?$'
+    _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|(?P<player>player))\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$'
      _NETRC_MACHINE = 'vimeo'
      IE_NAME = u'vimeo'
      _TESTS = [
          {
      _NETRC_MACHINE = 'vimeo'
      IE_NAME = u'vimeo'
      _TESTS = [
          {
-            u'url': u'http://vimeo.com/56015672',
+            u'url': u'http://vimeo.com/56015672#at=0',
              u'file': u'56015672.mp4',
              u'md5': u'8879b6cc097e987f02484baf890129e5',
              u'info_dict': {
              u'file': u'56015672.mp4',
              u'md5': u'8879b6cc097e987f02484baf890129e5',
              u'info_dict': {
@@ -54,7 +56,22 @@ class VimeoIE(InfoExtractor):
                  u'title': u'Kathy Sierra: Building the minimum Badass User, Business of Software',
                  u'uploader': u'The BLN & Business of Software',
              },
                  u'title': u'Kathy Sierra: Building the minimum Badass User, Business of Software',
                  u'uploader': u'The BLN & Business of Software',
              },
-        }
+        },
+        {
+            u'url': u'http://vimeo.com/68375962',
+            u'file': u'68375962.mp4',
+            u'md5': u'aaf896bdb7ddd6476df50007a0ac0ae7',
+            u'note': u'Video protected with password',
+            u'info_dict': {
+                u'title': u'youtube-dl password protected test video',
+                u'upload_date': u'20130614',
+                u'uploader_id': u'user18948128',
+                u'uploader': u'Jaime Marquínez Ferrándiz',
+            },
+            u'params': {
+                u'videopassword': u'youtube-dl',
+            },
+        },
      ]
  
      def _login(self):
      ]
  
      def _login(self):
@@ -111,11 +128,9 @@ class VimeoIE(InfoExtractor):
              raise ExtractorError(u'Invalid URL: %s' % url)
  
          video_id = mobj.group('id')
              raise ExtractorError(u'Invalid URL: %s' % url)
  
          video_id = mobj.group('id')
-        if not mobj.group('proto'):
-            url = 'https://' + url
-        elif mobj.group('pro'):
+        if mobj.group('pro') or mobj.group('player'):
              url = 'http://player.vimeo.com/video/' + video_id
              url = 'http://player.vimeo.com/video/' + video_id
-        elif mobj.group('direct_link'):
+        else:
              url = 'https://vimeo.com/' + video_id
  
          # Retrieve video webpage to extract further information
              url = 'https://vimeo.com/' + video_id
  
          # Retrieve video webpage to extract further information
@@ -129,18 +144,26 @@ class VimeoIE(InfoExtractor):
  
          # Extract the config JSON
          try:
  
          # Extract the config JSON
          try:
-            config = self._search_regex([r' = {config:({.+?}),assets:', r'c=({.+?);'],
-                webpage, u'info section', flags=re.DOTALL)
-            config = json.loads(config)
-        except:
+            try:
+                config_url = self._html_search_regex(
+                    r' data-config-url="(.+?)"', webpage, u'config URL')
+                config_json = self._download_webpage(config_url, video_id)
+                config = json.loads(config_json)
+            except RegexNotFoundError:
+                # For pro videos or player.vimeo.com urls
+                config = self._search_regex([r' = {config:({.+?}),assets:', r'c=({.+?);'],
+                    webpage, u'info section', flags=re.DOTALL)
+                config = json.loads(config)
+        except Exception as e:
              if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
                  raise ExtractorError(u'The author has restricted the access to this video, try with the "--referer" option')
  
              if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
                  raise ExtractorError(u'The author has restricted the access to this video, try with the "--referer" option')
  
-            if re.search('If so please provide the correct password.', webpage):
+            if re.search('<form[^>]+?id="pw_form"', webpage) is not None:
                  self._verify_video_password(url, video_id, webpage)
                  return self._real_extract(url)
              else:
                  self._verify_video_password(url, video_id, webpage)
                  return self._real_extract(url)
              else:
-                raise ExtractorError(u'Unable to extract info section')
+                raise ExtractorError(u'Unable to extract info section',
+                                     cause=e)
  
          # Extract title
          video_title = config["video"]["title"]
  
          # Extract title
          video_title = config["video"]["title"]
@@ -180,7 +203,7 @@ class VimeoIE(InfoExtractor):
          # Vimeo specific: extract video codec and quality information
          # First consider quality, then codecs, then take everything
          codecs = [('vp6', 'flv'), ('vp8', 'flv'), ('h264', 'mp4')]
          # Vimeo specific: extract video codec and quality information
          # First consider quality, then codecs, then take everything
          codecs = [('vp6', 'flv'), ('vp8', 'flv'), ('h264', 'mp4')]
-        files = { 'hd': [], 'sd': [], 'other': []}
+        files = {'hd': [], 'sd': [], 'other': []}
          config_files = config["video"].get("files") or config["request"].get("files")
          for codec_name, codec_extension in codecs:
              for quality in config_files.get(codec_name, []):
          config_files = config["video"].get("files") or config["request"].get("files")
          for codec_name, codec_extension in codecs:
              for quality in config_files.get(codec_name, []):
@@ -209,7 +232,7 @@ class VimeoIE(InfoExtractor):
          if len(formats) == 0:
              raise ExtractorError(u'No known codec found')
  
          if len(formats) == 0:
              raise ExtractorError(u'No known codec found')
  
-        return [{
+        return {
              'id':       video_id,
              'uploader': video_uploader,
              'uploader_id': video_uploader_id,
              'id':       video_id,
              'uploader': video_uploader,
              'uploader_id': video_uploader_id,
@@ -218,7 +241,8 @@ class VimeoIE(InfoExtractor):
              'thumbnail':    video_thumbnail,
              'description':  video_description,
              'formats': formats,
              'thumbnail':    video_thumbnail,
              'description':  video_description,
              'formats': formats,
-        }]
+            'webpage_url': url,
+        }
  
  
  class VimeoChannelIE(InfoExtractor):
  
  
  class VimeoChannelIE(InfoExtractor):
diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py

new file mode 100644 (file)

index 0000000..90d8a6d
--- /dev/null
+++ b/youtube_dl/extractor/vk.py
@@ -0,0 +1,45 @@
+# encoding: utf-8
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_str,
+    unescapeHTML,
+)
+
+
+class VKIE(InfoExtractor):
+    IE_NAME = u'vk.com'
+    _VALID_URL = r'https?://vk\.com/(?:videos.*?\?.*?z=)?video(?P<id>.*?)(?:\?|%2F|$)'
+
+    _TEST = {
+        u'url': u'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
+        u'md5': u'0deae91935c54e00003c2a00646315f0',
+        u'info_dict': {
+            u'id': u'162222515',
+            u'ext': u'flv',
+            u'title': u'ProtivoGunz - Хуёвая песня',
+            u'uploader': u'Noize MC',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        info_url = 'http://vk.com/al_video.php?act=show&al=1&video=%s' % video_id
+        info_page = self._download_webpage(info_url, video_id)
+        m_yt = re.search(r'src="(http://www.youtube.com/.*?)"', info_page)
+        if m_yt is not None:
+            self.to_screen(u'Youtube video detected')
+            return self.url_result(m_yt.group(1), 'Youtube')
+        vars_json = self._search_regex(r'var vars = ({.*?});', info_page, u'vars')
+        vars = json.loads(vars_json)
+
+        return {
+            'id': compat_str(vars['vid']),
+            'url': vars['url240'],
+            'title': unescapeHTML(vars['md_title']),
+            'thumbnail': vars['jpg'],
+            'uploader': vars['md_author'],
+        }
diff --git a/youtube_dl/extractor/weibo.py b/youtube_dl/extractor/weibo.py

index 0757495bd8a11e22c98c9307d734f9e83529a631..fa784ab994d2b8acede7e4b4496b12779a787de6 100644 (file)
--- a/youtube_dl/extractor/weibo.py
+++ b/youtube_dl/extractor/weibo.py
@@ -13,6 +13,7 @@ class WeiboIE(InfoExtractor):
      _VALID_URL = r'https?://video\.weibo\.com/v/weishipin/t_(?P<id>.+?)\.htm'
  
      _TEST = {
      _VALID_URL = r'https?://video\.weibo\.com/v/weishipin/t_(?P<id>.+?)\.htm'
  
      _TEST = {
+        u'add_ie': ['Sina'],
          u'url': u'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm',
          u'file': u'98322879.flv',
          u'info_dict': {
          u'url': u'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm',
          u'file': u'98322879.flv',
          u'info_dict': {
diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py

index 81c4be3269150aafc72f6ff0980923119c299373..7444d3393a25f8a49778a5bd589aa839591bd9d8 100644 (file)
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@@ -36,21 +36,25 @@ class XHamsterIE(InfoExtractor):
      }]
  
      def _real_extract(self,url):
      }]
  
      def _real_extract(self,url):
+        def extract_video_url(webpage):
+            mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
+            if mobj is None:
+                raise ExtractorError(u'Unable to extract media URL')
+            if len(mobj.group('server')) == 0:
+                return compat_urllib_parse.unquote(mobj.group('file'))
+            else:
+                return mobj.group('server')+'/key='+mobj.group('file')
+
+        def is_hd(webpage):
+            return webpage.find('<div class=\'icon iconHD\'>') != -1
+
          mobj = re.match(self._VALID_URL, url)
  
          video_id = mobj.group('id')
          seo = mobj.group('seo')
          mobj = re.match(self._VALID_URL, url)
  
          video_id = mobj.group('id')
          seo = mobj.group('seo')
-        mrss_url = 'http://xhamster.com/movies/%s/%s.html?hd' % (video_id, seo)
+        mrss_url = 'http://xhamster.com/movies/%s/%s.html' % (video_id, seo)
          webpage = self._download_webpage(mrss_url, video_id)
  
          webpage = self._download_webpage(mrss_url, video_id)
  
-        mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract media URL')
-        if len(mobj.group('server')) == 0:
-            video_url = compat_urllib_parse.unquote(mobj.group('file'))
-        else:
-            video_url = mobj.group('server')+'/key='+mobj.group('file')
-
          video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',
              webpage, u'title')
  
          video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',
              webpage, u'title')
  
@@ -76,14 +80,32 @@ class XHamsterIE(InfoExtractor):
  
          age_limit = self._rta_search(webpage)
  
  
          age_limit = self._rta_search(webpage)
  
-        return [{
-            'id':       video_id,
-            'url':      video_url,
-            'ext':      determine_ext(video_url),
-            'title':    video_title,
+        video_url = extract_video_url(webpage)
+        hd = is_hd(webpage)
+        formats = [{
+            'url': video_url,
+            'ext': determine_ext(video_url),
+            'format': 'hd' if hd else 'sd',
+            'format_id': 'hd' if hd else 'sd',
+        }]
+        if not hd:
+            webpage = self._download_webpage(mrss_url+'?hd', video_id)
+            if is_hd(webpage):
+                video_url = extract_video_url(webpage)
+                formats.append({
+                    'url': video_url,
+                    'ext': determine_ext(video_url),
+                    'format': 'hd',
+                    'format_id': 'hd',
+                })
+
+        return {
+            'id': video_id,
+            'title': video_title,
+            'formats': formats,
              'description': video_description,
              'upload_date': video_upload_date,
              'uploader_id': video_uploader_id,
              'thumbnail': video_thumbnail,
              'age_limit': age_limit,
              'description': video_description,
              'upload_date': video_upload_date,
              'uploader_id': video_uploader_id,
              'thumbnail': video_thumbnail,
              'age_limit': age_limit,
-        }]
+        }
diff --git a/youtube_dl/extractor/xnxx.py b/youtube_dl/extractor/xnxx.py

index 8a0eb1afdacc4cbe1cbb441b939cff3d7697cf4e..1177a4b14ec04748bebb5ab17db2f0a29c68ca5c 100644 (file)
--- a/youtube_dl/extractor/xnxx.py
+++ b/youtube_dl/extractor/xnxx.py
@@ -9,7 +9,7 @@ from ..utils import (
  
  
  class XNXXIE(InfoExtractor):
  
  
  class XNXXIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?video\.xnxx\.com/video([0-9]+)/(.*)'
+    _VALID_URL = r'^(?:https?://)?(?:video|www)\.xnxx\.com/video([0-9]+)/(.*)'
      VIDEO_URL_RE = r'flv_url=(.*?)&amp;'
      VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
      VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&amp;'
      VIDEO_URL_RE = r'flv_url=(.*?)&amp;'
      VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
      VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&amp;'
diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py

new file mode 100644 (file)

index 0000000..03ad88b
--- /dev/null
+++ b/youtube_dl/extractor/xtube.py
@@ -0,0 +1,55 @@
+import os
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse_urlparse,
+    compat_urllib_request,
+    compat_urllib_parse,
+)
+
+class XTubeIE(InfoExtractor):
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<videoid>[^/?&]+))'
+    _TEST = {
+        u'url': u'http://www.xtube.com/watch.php?v=kVTUy_G222_',
+        u'file': u'kVTUy_G222_.mp4',
+        u'md5': u'092fbdd3cbe292c920ef6fc6a8a9cdab',
+        u'info_dict': {
+            u"title": u"strange erotica",
+            u"description": u"surreal gay themed erotica...almost an ET kind of thing",
+            u"uploader": u"greenshowers",
+            u"age_limit": 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('videoid')
+        url = 'http://www.' + mobj.group('url')
+
+        req = compat_urllib_request.Request(url)
+        req.add_header('Cookie', 'age_verified=1')
+        webpage = self._download_webpage(req, video_id)
+
+        video_title = self._html_search_regex(r'<div class="p_5px[^>]*>([^<]+)', webpage, u'title')
+        video_uploader = self._html_search_regex(r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, u'uploader', fatal=False)
+        video_description = self._html_search_regex(r'<p class="video_description">([^<]+)', webpage, u'description', default=None)
+        video_url= self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, u'video_url').replace('\\/', '/')
+        path = compat_urllib_parse_urlparse(video_url).path
+        extension = os.path.splitext(path)[1][1:]
+        format = path.split('/')[5].split('_')[:2]
+        format[0] += 'p'
+        format[1] += 'k'
+        format = "-".join(format)
+
+        return {
+            'id': video_id,
+            'title': video_title,
+            'uploader': video_uploader,
+            'description': video_description,
+            'url': video_url,
+            'ext': extension,
+            'format': format,
+            'format_id': format,
+            'age_limit': 18,
+        }
diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py

index 464b498f584c3e42b613a79589b52a4d32fec413..34e6afb20fb6833ab21501785deb54cf5f0a0e24 100644 (file)
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@@ -132,7 +132,7 @@ class YahooSearchIE(SearchInfoExtractor):
                  mobj = re.search(r'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r)
                  e = self.url_result('http://' + mobj.group('url'), 'Yahoo')
                  res['entries'].append(e)
                  mobj = re.search(r'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r)
                  e = self.url_result('http://' + mobj.group('url'), 'Yahoo')
                  res['entries'].append(e)
-            if (pagenum * 30 +i >= n) or (m[u'last'] >= (m[u'total'] -1 )):
+            if (pagenum * 30 +i >= n) or (m[u'last'] >= (m[u'total'] -1)):
                  break
  
          return res
                  break
  
          return res
diff --git a/youtube_dl/extractor/youjizz.py b/youtube_dl/extractor/youjizz.py

index 1265639e821bd873b74aeea08811f8c22e966ba1..1fcc518acde9dbb08fef1ccb42a9ee7ae550967a 100644 (file)
--- a/youtube_dl/extractor/youjizz.py
+++ b/youtube_dl/extractor/youjizz.py
@@ -13,7 +13,8 @@ class YouJizzIE(InfoExtractor):
          u'file': u'2189178.flv',
          u'md5': u'07e15fa469ba384c7693fd246905547c',
          u'info_dict': {
          u'file': u'2189178.flv',
          u'md5': u'07e15fa469ba384c7693fd246905547c',
          u'info_dict': {
-            u"title": u"Zeichentrick 1"
+            u"title": u"Zeichentrick 1",
+            u"age_limit": 18,
          }
      }
  
          }
      }
  
@@ -25,6 +26,8 @@ class YouJizzIE(InfoExtractor):
          # Get webpage content
          webpage = self._download_webpage(url, video_id)
  
          # Get webpage content
          webpage = self._download_webpage(url, video_id)
  
+        age_limit = self._rta_search(webpage)
+
          # Get the video title
          video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>',
              webpage, u'title').strip()
          # Get the video title
          video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>',
              webpage, u'title').strip()
@@ -60,6 +63,7 @@ class YouJizzIE(InfoExtractor):
                  'title': video_title,
                  'ext': 'flv',
                  'format': 'flv',
                  'title': video_title,
                  'ext': 'flv',
                  'format': 'flv',
-                'player_url': embed_page_url}
+                'player_url': embed_page_url,
+                'age_limit': age_limit}
  
          return [info]
  
          return [info]
diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py

index 9d88c17f52a25091ea045d2ea0dd6f819da93473..a8fd40c833fb7707eb1cd8760c288da5f2299025 100644 (file)
--- a/youtube_dl/extractor/youku.py
+++ b/youtube_dl/extractor/youku.py
@@ -18,7 +18,7 @@ class YoukuIE(InfoExtractor):
          u"url": u"http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html",
          u"file": u"XNDgyMDQ2NTQw_part00.flv",
          u"md5": u"ffe3f2e435663dc2d1eea34faeff5b5b",
          u"url": u"http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html",
          u"file": u"XNDgyMDQ2NTQw_part00.flv",
          u"md5": u"ffe3f2e435663dc2d1eea34faeff5b5b",
-        u"params": { u"test": False },
+        u"params": {u"test": False},
          u"info_dict": {
              u"title": u"youtube-dl test video \"'/\\ä↭𝕐"
          }
          u"info_dict": {
              u"title": u"youtube-dl test video \"'/\\ä↭𝕐"
          }
@@ -37,8 +37,8 @@ class YoukuIE(InfoExtractor):
          source = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890")
          seed = float(seed)
          for i in range(len(source)):
          source = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890")
          seed = float(seed)
          for i in range(len(source)):
-            seed  =  (seed * 211 + 30031 ) % 65536
-            index  =  math.floor(seed / 65536 * len(source) )
+            seed  =  (seed * 211 + 30031) % 65536
+            index  =  math.floor(seed / 65536 * len(source))
              mixed.append(source[int(index)])
              source.remove(source[int(index)])
          #return ''.join(mixed)
              mixed.append(source[int(index)])
              source.remove(source[int(index)])
          #return ''.join(mixed)
diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py

index e3b56cece6bd9d8bebfe3cfffe7a1aefe2614131..bd0f2cae0298dec0d78f812153976ec6a8434bb0 100644 (file)
--- a/youtube_dl/extractor/youporn.py
+++ b/youtube_dl/extractor/youporn.py
@@ -17,7 +17,7 @@ from ..aes import (
  )
  
  class YouPornIE(InfoExtractor):
  )
  
  class YouPornIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)'
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))'
      _TEST = {
          u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
          u'file': u'505835.mp4',
      _TEST = {
          u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
          u'file': u'505835.mp4',
@@ -31,23 +31,10 @@ class YouPornIE(InfoExtractor):
          }
      }
  
          }
      }
  
-    def _print_formats(self, formats):
-        """Print all available formats"""
-        print(u'Available formats:')
-        print(u'ext\t\tformat')
-        print(u'---------------------------------')
-        for format in formats:
-            print(u'%s\t\t%s'  % (format['ext'], format['format']))
-
-    def _specific(self, req_format, formats):
-        for x in formats:
-            if x["format"] == req_format:
-                return x
-        return None
-
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          video_id = mobj.group('videoid')
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          video_id = mobj.group('videoid')
+        url = 'http://www.' + mobj.group('url')
  
          req = compat_urllib_request.Request(url)
          req.add_header('Cookie', 'age_verified=1')
  
          req = compat_urllib_request.Request(url)
          req.add_header('Cookie', 'age_verified=1')
@@ -71,27 +58,22 @@ class YouPornIE(InfoExtractor):
          except KeyError:
              raise ExtractorError('Missing JSON parameter: ' + sys.exc_info()[1])
  
          except KeyError:
              raise ExtractorError('Missing JSON parameter: ' + sys.exc_info()[1])
  
-        # Get all of the formats available
+        # Get all of the links from the page
          DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
          download_list_html = self._search_regex(DOWNLOAD_LIST_RE,
              webpage, u'download list').strip()
          DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
          download_list_html = self._search_regex(DOWNLOAD_LIST_RE,
              webpage, u'download list').strip()
-
-        # Get all of the links from the page
-        LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">'
+        LINK_RE = r'<a href="([^"]+)">'
          links = re.findall(LINK_RE, download_list_html)
          links = re.findall(LINK_RE, download_list_html)
-        
-        # Get link of hd video if available
-        mobj = re.search(r'var encryptedQuality720URL = \'(?P<encrypted_video_url>[a-zA-Z0-9+/]+={0,2})\';', webpage)
-        if mobj != None:
-            encrypted_video_url = mobj.group(u'encrypted_video_url')
-            video_url = aes_decrypt_text(encrypted_video_url, video_title, 32).decode('utf-8')
-            links = [video_url] + links
+
+        # Get all encrypted links
+        encrypted_links = re.findall(r'var encryptedQuality[0-9]{3}URL = \'([a-zA-Z0-9+/]+={0,2})\';', webpage)
+        for encrypted_link in encrypted_links:
+            link = aes_decrypt_text(encrypted_link, video_title, 32).decode('utf-8')
+            links.append(link)
          
          if not links:
              raise ExtractorError(u'ERROR: no known formats available for video')
  
          
          if not links:
              raise ExtractorError(u'ERROR: no known formats available for video')
  
-        self.to_screen(u'Links found: %d' % len(links))
-
          formats = []
          for link in links:
  
          formats = []
          for link in links:
  
@@ -99,43 +81,36 @@ class YouPornIE(InfoExtractor):
              # http://cdn1.download.youporn.phncdn.com/201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4?nvb=20121113051249&nva=20121114051249&ir=1200&sr=1200&hash=014b882080310e95fb6a0
              # A path looks like this:
              # /201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4
              # http://cdn1.download.youporn.phncdn.com/201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4?nvb=20121113051249&nva=20121114051249&ir=1200&sr=1200&hash=014b882080310e95fb6a0
              # A path looks like this:
              # /201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4
-            video_url = unescapeHTML( link )
-            path = compat_urllib_parse_urlparse( video_url ).path
-            extension = os.path.splitext( path )[1][1:]
+            video_url = unescapeHTML(link)
+            path = compat_urllib_parse_urlparse(video_url).path
+            extension = os.path.splitext(path)[1][1:]
              format = path.split('/')[4].split('_')[:2]
              format = path.split('/')[4].split('_')[:2]
+
              # size = format[0]
              # bitrate = format[1]
              # size = format[0]
              # bitrate = format[1]
-            format = "-".join( format )
+            format = "-".join(format)
              # title = u'%s-%s-%s' % (video_title, size, bitrate)
  
              formats.append({
              # title = u'%s-%s-%s' % (video_title, size, bitrate)
  
              formats.append({
-                'id': video_id,
                  'url': video_url,
                  'url': video_url,
-                'uploader': video_uploader,
-                'upload_date': upload_date,
-                'title': video_title,
                  'ext': extension,
                  'format': format,
                  'ext': extension,
                  'format': format,
-                'thumbnail': thumbnail,
-                'description': video_description,
-                'age_limit': age_limit,
+                'format_id': format,
              })
  
              })
  
-        if self._downloader.params.get('listformats', None):
-            self._print_formats(formats)
-            return
-
-        req_format = self._downloader.params.get('format', 'best')
-        self.to_screen(u'Format: %s' % req_format)
-
-        if req_format is None or req_format == 'best':
-            return [formats[0]]
-        elif req_format == 'worst':
-            return [formats[-1]]
-        elif req_format in ('-1', 'all'):
-            return formats
-        else:
-            format = self._specific( req_format, formats )
-            if format is None:
-                raise ExtractorError(u'Requested format not available')
-            return [format]
+        # Sort and remove doubles
+        formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-'))))
+        for i in range(len(formats)-1,0,-1):
+            if formats[i]['format_id'] == formats[i-1]['format_id']:
+                del formats[i]
+        
+        return {
+            'id': video_id,
+            'uploader': video_uploader,
+            'upload_date': upload_date,
+            'title': video_title,
+            'thumbnail': thumbnail,
+            'description': video_description,
+            'age_limit': age_limit,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index a88cba2b4d74d2f57d0b781ab8e608e693c1caa2..c992cba978441081a50e5f134a4a694c83a6685c 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -74,14 +74,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
              self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
              return False
  
              self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
              return False
  
-        galx = None
-        dsh = None
-        match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
-        if match:
-          galx = match.group(1)
-        match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
-        if match:
-          dsh = match.group(1)
+        galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
+                                  login_page, u'Login GALX parameter')
  
          # Log in
          login_form_strs = {
  
          # Log in
          login_form_strs = {
@@ -95,7 +89,6 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
                  u'checkConnection': u'',
                  u'checkedDomains': u'youtube',
                  u'dnConn': u'',
                  u'checkConnection': u'',
                  u'checkedDomains': u'youtube',
                  u'dnConn': u'',
-                u'dsh': dsh,
                  u'pstMsg': u'0',
                  u'rmShown': u'1',
                  u'secTok': u'',
                  u'pstMsg': u'0',
                  u'rmShown': u'1',
                  u'secTok': u'',
@@ -236,11 +229,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
          '136': 'mp4',
          '137': 'mp4',
          '138': 'mp4',
          '136': 'mp4',
          '137': 'mp4',
          '138': 'mp4',
-        '139': 'mp4',
-        '140': 'mp4',
-        '141': 'mp4',
          '160': 'mp4',
  
          '160': 'mp4',
  
+        # Dash mp4 audio
+        '139': 'm4a',
+        '140': 'm4a',
+        '141': 'm4a',
+
          # Dash webm
          '171': 'webm',
          '172': 'webm',
          # Dash webm
          '171': 'webm',
          '172': 'webm',
@@ -344,18 +339,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                  u"description": u"test chars:  \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
              }
          },
                  u"description": u"test chars:  \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
              }
          },
-        {
-            u"url":  u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
-            u"file":  u"1ltcDfZMA3U.flv",
-            u"note": u"Test VEVO video (#897)",
-            u"info_dict": {
-                u"upload_date": u"20070518",
-                u"title": u"Maps - It Will Find You",
-                u"description": u"Music video by Maps performing It Will Find You.",
-                u"uploader": u"MuteUSA",
-                u"uploader_id": u"MuteUSA"
-            }
-        },
          {
              u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY",
              u"file":  u"UxxajLWwzqY.mp4",
          {
              u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY",
              u"file":  u"UxxajLWwzqY.mp4",
@@ -1099,7 +1082,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
          else:
              raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
  
          else:
              raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
  
-    def _get_available_subtitles(self, video_id):
+    def _get_available_subtitles(self, video_id, webpage):
          try:
              sub_list = self._download_webpage(
                  'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
          try:
              sub_list = self._download_webpage(
                  'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
@@ -1116,7 +1099,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                  'lang': lang,
                  'v': video_id,
                  'fmt': self._downloader.params.get('subtitlesformat'),
                  'lang': lang,
                  'v': video_id,
                  'fmt': self._downloader.params.get('subtitlesformat'),
-                'name': l[0],
+                'name': l[0].encode('utf-8'),
              })
              url = u'http://www.youtube.com/api/timedtext?' + params
              sub_lang_list[lang] = url
              })
              url = u'http://www.youtube.com/api/timedtext?' + params
              sub_lang_list[lang] = url
@@ -1403,32 +1386,29 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
              # this signatures are encrypted
              if 'url_encoded_fmt_stream_map' not in args:
                  raise ValueError(u'No stream_map present')  # caught below
              # this signatures are encrypted
              if 'url_encoded_fmt_stream_map' not in args:
                  raise ValueError(u'No stream_map present')  # caught below
-            m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map'])
+            re_signature = re.compile(r'[&,]s=')
+            m_s = re_signature.search(args['url_encoded_fmt_stream_map'])
              if m_s is not None:
                  self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
                  video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
              if m_s is not None:
                  self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
                  video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
-            m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
+            m_s = re_signature.search(args.get('adaptive_fmts', u''))
              if m_s is not None:
              if m_s is not None:
-                if 'url_encoded_fmt_stream_map' in video_info:
-                    video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
-                else:
-                    video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
-            elif 'adaptive_fmts' in video_info:
-                if 'url_encoded_fmt_stream_map' in video_info:
-                    video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
+                if 'adaptive_fmts' in video_info:
+                    video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts']
                  else:
                  else:
-                    video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
+                    video_info['adaptive_fmts'] = [args['adaptive_fmts']]
          except ValueError:
              pass
  
          if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
              self.report_rtmp_download()
              video_url_list = [(None, video_info['conn'][0])]
          except ValueError:
              pass
  
          if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
              self.report_rtmp_download()
              video_url_list = [(None, video_info['conn'][0])]
-        elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
-            if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]:
+        elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
+            encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0]
+            if 'rtmpe%3Dyes' in encoded_url_map:
                  raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
              url_map = {}
                  raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
              url_map = {}
-            for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
+            for url_data_str in encoded_url_map.split(','):
                  url_data = compat_parse_qs(url_data_str)
                  if 'itag' in url_data and 'url' in url_data:
                      url = url_data['url'][0]
                  url_data = compat_parse_qs(url_data_str)
                  if 'itag' in url_data and 'url' in url_data:
                      url = url_data['url'][0]
@@ -1481,13 +1461,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
              raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
  
          results = []
              raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
  
          results = []
-        for format_param, video_real_url in video_url_list:
+        for itag, video_real_url in video_url_list:
              # Extension
              # Extension
-            video_extension = self._video_extensions.get(format_param, 'flv')
+            video_extension = self._video_extensions.get(itag, 'flv')
  
  
-            video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
-                                              self._video_dimensions.get(format_param, '???'),
-                                              ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
+            video_format = '{0} - {1}{2}'.format(itag if itag else video_extension,
+                                              self._video_dimensions.get(itag, '???'),
+                                              ' ('+self._special_itags[itag]+')' if itag in self._special_itags else '')
  
              results.append({
                  'id':       video_id,
  
              results.append({
                  'id':       video_id,
@@ -1498,13 +1478,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                  'title':    video_title,
                  'ext':      video_extension,
                  'format':   video_format,
                  'title':    video_title,
                  'ext':      video_extension,
                  'format':   video_format,
+                'format_id': itag,
                  'thumbnail':    video_thumbnail,
                  'description':  video_description,
                  'player_url':   player_url,
                  'subtitles':    video_subtitles,
                  'duration':     video_duration,
                  'age_limit':    18 if age_gate else 0,
                  'thumbnail':    video_thumbnail,
                  'description':  video_description,
                  'player_url':   player_url,
                  'subtitles':    video_subtitles,
                  'duration':     video_duration,
                  'age_limit':    18 if age_gate else 0,
-                'annotations':  video_annotations
+                'annotations':  video_annotations,
+                'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
              })
          return results
  
              })
          return results
  
@@ -1590,7 +1572,6 @@ class YoutubePlaylistIE(InfoExtractor):
  class YoutubeChannelIE(InfoExtractor):
      IE_DESC = u'YouTube.com channels'
      _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
  class YoutubeChannelIE(InfoExtractor):
      IE_DESC = u'YouTube.com channels'
      _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
-    _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
      _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
      _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
      IE_NAME = u'youtube:channel'
      _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
      _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
      IE_NAME = u'youtube:channel'
@@ -1611,30 +1592,20 @@ class YoutubeChannelIE(InfoExtractor):
          # Download channel page
          channel_id = mobj.group(1)
          video_ids = []
          # Download channel page
          channel_id = mobj.group(1)
          video_ids = []
-        pagenum = 1
  
  
-        url = self._TEMPLATE_URL % (channel_id, pagenum)
-        page = self._download_webpage(url, channel_id,
-                                      u'Downloading page #%s' % pagenum)
+        # Download all channel pages using the json-based channel_ajax query
+        for pagenum in itertools.count(1):
+            url = self._MORE_PAGES_URL % (pagenum, channel_id)
+            page = self._download_webpage(url, channel_id,
+                                          u'Downloading page #%s' % pagenum)
  
  
-        # Extract video identifiers
-        ids_in_page = self.extract_videos_from_page(page)
-        video_ids.extend(ids_in_page)
+            page = json.loads(page)
  
  
-        # Download any subsequent channel pages using the json-based channel_ajax query
-        if self._MORE_PAGES_INDICATOR in page:
-            for pagenum in itertools.count(1):
-                url = self._MORE_PAGES_URL % (pagenum, channel_id)
-                page = self._download_webpage(url, channel_id,
-                                              u'Downloading page #%s' % pagenum)
-
-                page = json.loads(page)
-
-                ids_in_page = self.extract_videos_from_page(page['content_html'])
-                video_ids.extend(ids_in_page)
+            ids_in_page = self.extract_videos_from_page(page['content_html'])
+            video_ids.extend(ids_in_page)
  
  
-                if self._MORE_PAGES_INDICATOR  not in page['load_more_widget_html']:
-                    break
+            if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
+                break
  
          self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
  
  
          self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
  
@@ -1750,6 +1721,10 @@ class YoutubeSearchIE(SearchInfoExtractor):
          videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
          return self.playlist_result(videos, query)
  
          videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
          return self.playlist_result(videos, query)
  
+class YoutubeSearchDateIE(YoutubeSearchIE):
+    _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
+    _SEARCH_KEY = 'ytsearchdate'
+    IE_DESC = u'YouTube.com searches, newest videos first'
  
  class YoutubeShowIE(InfoExtractor):
      IE_DESC = u'YouTube.com (multi-season) shows'
  
  class YoutubeShowIE(InfoExtractor):
      IE_DESC = u'YouTube.com (multi-season) shows'
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index bfb8f6bcd971dad03d5236c8e607b59ff81c667a..1d9785341ec685071ea8fcc4846029a3e889bc72 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -572,6 +572,11 @@ class ExtractorError(Exception):
          return u''.join(traceback.format_tb(self.traceback))
  
  
          return u''.join(traceback.format_tb(self.traceback))
  
  
+class RegexNotFoundError(ExtractorError):
+    """Error when a regex didn't match"""
+    pass
+
+
  class DownloadError(Exception):
      """Download Error exception.
  
  class DownloadError(Exception):
      """Download Error exception.
  
diff --git a/youtube_dl/version.py b/youtube_dl/version.py

index a5b56d894093a772b31a3bd89c5058dd9f2725de..338e7ba1ff83fc5375b91c54f2119588a72df9a6 100644 (file)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
  
  
-__version__ = '2013.10.23'
+__version__ = '2013.11.11'
author	Rogério Brito <rbrito@ime.usp.br>
	Tue, 12 Nov 2013 00:27:50 +0000 (22:27 -0200)
committer	Rogério Brito <rbrito@ime.usp.br>
	Tue, 12 Nov 2013 00:27:50 +0000 (22:27 -0200)
README.md		patch \| blob \| history
README.txt		patch \| blob \| history
devscripts/check-porn.py	[new file with mode: 0644]	patch \| blob
setup.py		patch \| blob \| history
test/helper.py		patch \| blob \| history
test/test_YoutubeDL.py		patch \| blob \| history
test/test_dailymotion_subtitles.py	[deleted file]	patch \| blob \| history
test/test_download.py		patch \| blob \| history
test/test_playlists.py		patch \| blob \| history
test/test_subtitles.py	[new file with mode: 0644]	patch \| blob
test/test_youtube_subtitles.py	[deleted file]	patch \| blob \| history
youtube-dl		patch \| blob \| history
youtube-dl.1		patch \| blob \| history
youtube-dl.bash-completion		patch \| blob \| history
youtube_dl/FileDownloader.py		patch \| blob \| history
youtube_dl/YoutubeDL.py		patch \| blob \| history
youtube_dl/__init__.py		patch \| blob \| history
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/addanime.py		patch \| blob \| history
youtube_dl/extractor/arte.py		patch \| blob \| history
youtube_dl/extractor/bambuser.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/brightcove.py		patch \| blob \| history
youtube_dl/extractor/canalc2.py		patch \| blob \| history
youtube_dl/extractor/cinemassacre.py		patch \| blob \| history
youtube_dl/extractor/cnn.py		patch \| blob \| history
youtube_dl/extractor/common.py		patch \| blob \| history
youtube_dl/extractor/dailymotion.py		patch \| blob \| history
youtube_dl/extractor/depositfiles.py		patch \| blob \| history
youtube_dl/extractor/eighttracks.py		patch \| blob \| history
youtube_dl/extractor/eitb.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/exfm.py		patch \| blob \| history
youtube_dl/extractor/extremetube.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/facebook.py		patch \| blob \| history
youtube_dl/extractor/faz.py		patch \| blob \| history
youtube_dl/extractor/generic.py		patch \| blob \| history
youtube_dl/extractor/hypem.py		patch \| blob \| history
youtube_dl/extractor/instagram.py		patch \| blob \| history
youtube_dl/extractor/kankan.py		patch \| blob \| history
youtube_dl/extractor/keezmovies.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/livestream.py		patch \| blob \| history
youtube_dl/extractor/metacafe.py		patch \| blob \| history
youtube_dl/extractor/mofosex.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/mtv.py		patch \| blob \| history
youtube_dl/extractor/myspace.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/nowvideo.py		patch \| blob \| history
youtube_dl/extractor/pornhub.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/pornotube.py		patch \| blob \| history
youtube_dl/extractor/rtlnow.py		patch \| blob \| history
youtube_dl/extractor/slashdot.py		patch \| blob \| history
youtube_dl/extractor/soundcloud.py		patch \| blob \| history
youtube_dl/extractor/space.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/spankwire.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/subtitles.py		patch \| blob \| history
youtube_dl/extractor/teamcoco.py		patch \| blob \| history
youtube_dl/extractor/ted.py		patch \| blob \| history
youtube_dl/extractor/tube8.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/vevo.py		patch \| blob \| history
youtube_dl/extractor/viddler.py		patch \| blob \| history
youtube_dl/extractor/vimeo.py		patch \| blob \| history
youtube_dl/extractor/vk.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/weibo.py		patch \| blob \| history
youtube_dl/extractor/xhamster.py		patch \| blob \| history
youtube_dl/extractor/xnxx.py		patch \| blob \| history
youtube_dl/extractor/xtube.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/yahoo.py		patch \| blob \| history
youtube_dl/extractor/youjizz.py		patch \| blob \| history
youtube_dl/extractor/youku.py		patch \| blob \| history
youtube_dl/extractor/youporn.py		patch \| blob \| history
youtube_dl/extractor/youtube.py		patch \| blob \| history
youtube_dl/utils.py		patch \| blob \| history
youtube_dl/version.py		patch \| blob \| history