Imported Upstream version 2013.08.29

author Rogério Brito <rbrito@ime.usp.br>

Thu, 29 Aug 2013 22:09:34 +0000 (19:09 -0300)

committer Rogério Brito <rbrito@ime.usp.br>

Thu, 29 Aug 2013 22:09:34 +0000 (19:09 -0300)
author Rogério Brito <rbrito@ime.usp.br>
Thu, 29 Aug 2013 22:09:34 +0000 (19:09 -0300)
committer Rogério Brito <rbrito@ime.usp.br>
Thu, 29 Aug 2013 22:09:34 +0000 (19:09 -0300)
diff --git a/README.md b/README.md

index 560bcdca185494cc096c7e1ec7f5a55cf0c34732..75068fe56a2681176a9862d44373f47f11318931 100644 (file)
--- a/README.md
+++ b/README.md
@@ -120,18 +120,20 @@ which means you can modify it, redistribute it or use it however you like.
      --max-quality FORMAT       highest quality format to download
      -F, --list-formats         list all available formats (currently youtube
                                 only)
      --max-quality FORMAT       highest quality format to download
      -F, --list-formats         list all available formats (currently youtube
                                 only)
+
+## Subtitle Options:
      --write-sub                write subtitle file (currently youtube only)
      --write-auto-sub           write automatic subtitle file (currently youtube
                                 only)
      --only-sub                 [deprecated] alias of --skip-download
      --all-subs                 downloads all the available subtitles of the
      --write-sub                write subtitle file (currently youtube only)
      --write-auto-sub           write automatic subtitle file (currently youtube
                                 only)
      --only-sub                 [deprecated] alias of --skip-download
      --all-subs                 downloads all the available subtitles of the
-                               video (currently youtube only)
+                               video
      --list-subs                lists all available subtitles for the video
      --list-subs                lists all available subtitles for the video
-                               (currently youtube only)
-    --sub-format FORMAT        subtitle format [srt/sbv/vtt] (default=srt)
-                               (currently youtube only)
-    --sub-lang LANG            language of the subtitles to download (optional)
-                               use IETF language tags like 'en'
+    --sub-format FORMAT        subtitle format (default=srt) ([sbv/vtt] youtube
+                               only)
+    --sub-lang LANGS           languages of the subtitles to download (optional)
+                               separated by commas, use IETF language tags like
+                               'en,pt'
  
  ## Authentication Options:
      -u, --username USERNAME    account username
  
  ## Authentication Options:
      -u, --username USERNAME    account username
@@ -153,6 +155,8 @@ which means you can modify it, redistribute it or use it however you like.
                                 processing; the video is erased by default
      --no-post-overwrites       do not overwrite post-processed files; the post-
                                 processed files are overwritten by default
                                 processing; the video is erased by default
      --no-post-overwrites       do not overwrite post-processed files; the post-
                                 processed files are overwritten by default
+    --embed-subs               embed subtitles in the video (only for mp4
+                               videos)
  
  # CONFIGURATION
  
  
  # CONFIGURATION
  
diff --git a/README.txt b/README.txt

index b13711a0f6c1ccc9aa2918147e31f16209e34353..3baa06253385950db6ad751a1086a86ed2477605 100644 (file)
--- a/README.txt
+++ b/README.txt
@@ -137,18 +137,22 @@ Video Format Options:
      --max-quality FORMAT       highest quality format to download
      -F, --list-formats         list all available formats (currently youtube
                                 only)
      --max-quality FORMAT       highest quality format to download
      -F, --list-formats         list all available formats (currently youtube
                                 only)
+
+Subtitle Options:
+-----------------
+
      --write-sub                write subtitle file (currently youtube only)
      --write-auto-sub           write automatic subtitle file (currently youtube
                                 only)
      --only-sub                 [deprecated] alias of --skip-download
      --all-subs                 downloads all the available subtitles of the
      --write-sub                write subtitle file (currently youtube only)
      --write-auto-sub           write automatic subtitle file (currently youtube
                                 only)
      --only-sub                 [deprecated] alias of --skip-download
      --all-subs                 downloads all the available subtitles of the
-                               video (currently youtube only)
+                               video
      --list-subs                lists all available subtitles for the video
      --list-subs                lists all available subtitles for the video
-                               (currently youtube only)
-    --sub-format FORMAT        subtitle format [srt/sbv/vtt] (default=srt)
-                               (currently youtube only)
-    --sub-lang LANG            language of the subtitles to download (optional)
-                               use IETF language tags like 'en'
+    --sub-format FORMAT        subtitle format (default=srt) ([sbv/vtt] youtube
+                               only)
+    --sub-lang LANGS           languages of the subtitles to download (optional)
+                               separated by commas, use IETF language tags like
+                               'en,pt'
  
  Authentication Options:
  -----------------------
  
  Authentication Options:
  -----------------------
@@ -174,6 +178,8 @@ Post-processing Options:
                                 processing; the video is erased by default
      --no-post-overwrites       do not overwrite post-processed files; the post-
                                 processed files are overwritten by default
                                 processing; the video is erased by default
      --no-post-overwrites       do not overwrite post-processed files; the post-
                                 processed files are overwritten by default
+    --embed-subs               embed subtitles in the video (only for mp4
+                               videos)
  
  CONFIGURATION
  =============
  
  CONFIGURATION
  =============
diff --git a/devscripts/gh-pages/add-version.py b/devscripts/gh-pages/add-version.py

index 6af8bb9d84196b92a9b5162b52173741ffd31b0c..116420ef2f0e4561a07ce558590088b418872179 100755 (executable)
--- a/devscripts/gh-pages/add-version.py
+++ b/devscripts/gh-pages/add-version.py
@@ -6,28 +6,32 @@ import hashlib
  import urllib.request
  
  if len(sys.argv) <= 1:
  import urllib.request
  
  if len(sys.argv) <= 1:
-       print('Specify the version number as parameter')
-       sys.exit()
+    print('Specify the version number as parameter')
+    sys.exit()
  version = sys.argv[1]
  
  with open('update/LATEST_VERSION', 'w') as f:
  version = sys.argv[1]
  
  with open('update/LATEST_VERSION', 'w') as f:
-       f.write(version)
+    f.write(version)
  
  versions_info = json.load(open('update/versions.json'))
  if 'signature' in versions_info:
  
  versions_info = json.load(open('update/versions.json'))
  if 'signature' in versions_info:
-       del versions_info['signature']
+    del versions_info['signature']
  
  new_version = {}
  
  
  new_version = {}
  
-filenames = {'bin': 'youtube-dl', 'exe': 'youtube-dl.exe', 'tar': 'youtube-dl-%s.tar.gz' % version}
+filenames = {
+    'bin': 'youtube-dl',
+    'exe': 'youtube-dl.exe',
+    'tar': 'youtube-dl-%s.tar.gz' % version}
  for key, filename in filenames.items():
  for key, filename in filenames.items():
-       print('Downloading and checksumming %s...' %filename)
-       url = 'http://youtube-dl.org/downloads/%s/%s' % (version, filename)
-       data = urllib.request.urlopen(url).read()
-       sha256sum = hashlib.sha256(data).hexdigest()
-       new_version[key] = (url, sha256sum)
+    print('Downloading and checksumming %s...' % filename)
+    url = 'https://yt-dl.org/downloads/%s/%s' % (version, filename)
+    data = urllib.request.urlopen(url).read()
+    sha256sum = hashlib.sha256(data).hexdigest()
+    new_version[key] = (url, sha256sum)
  
  versions_info['versions'][version] = new_version
  versions_info['latest'] = version
  
  
  versions_info['versions'][version] = new_version
  versions_info['latest'] = version
  
-json.dump(versions_info, open('update/versions.json', 'w'), indent=4, sort_keys=True)
-\ No newline at end of file
+with open('update/versions.json', 'w') as jsonf:
+    json.dump(versions_info, jsonf, indent=4, sort_keys=True)
diff --git a/devscripts/gh-pages/update-feed.py b/devscripts/gh-pages/update-feed.py

index cfff05fc8f017cac11bc0293ca60734040127c39..16571a924c132b8ba7849ea9ad81a4d63c6ed208 100755 (executable)
--- a/devscripts/gh-pages/update-feed.py
+++ b/devscripts/gh-pages/update-feed.py
@@ -22,7 +22,7 @@ entry_template=textwrap.dedent("""
                                                                         <atom:link href="http://rg3.github.io/youtube-dl" />
                                                                         <atom:content type="xhtml">
                                                                                 <div xmlns="http://www.w3.org/1999/xhtml">
                                                                         <atom:link href="http://rg3.github.io/youtube-dl" />
                                                                         <atom:content type="xhtml">
                                                                                 <div xmlns="http://www.w3.org/1999/xhtml">
-                                                                                       Downloads available at <a href="http://youtube-dl.org/downloads/@VERSION@/">http://youtube-dl.org/downloads/@VERSION@/</a>
+                                                                                       Downloads available at <a href="https://yt-dl.org/downloads/@VERSION@/">https://yt-dl.org/downloads/@VERSION@/</a>
                                                                                 </div>
                                                                         </atom:content>
                                                                         <atom:author>
                                                                                 </div>
                                                                         </atom:content>
                                                                         <atom:author>
@@ -54,4 +54,3 @@ atom_template = atom_template.replace('@ENTRIES@', entries_str)
  with open('update/releases.atom','w',encoding='utf-8') as atom_file:
         atom_file.write(atom_template)
  
  with open('update/releases.atom','w',encoding='utf-8') as atom_file:
         atom_file.write(atom_template)
  
-
diff --git a/devscripts/release.sh b/devscripts/release.sh

index 46c31e437558659d734d4f59009eadcd94ca62c0..24c9ad8d889808ac5e2d25b7ab2c8b5ff3db7ef5 100755 (executable)
--- a/devscripts/release.sh
+++ b/devscripts/release.sh
@@ -67,7 +67,7 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
  (cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS)
  git checkout HEAD -- youtube-dl youtube-dl.exe
  
  (cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS)
  git checkout HEAD -- youtube-dl youtube-dl.exe
  
-/bin/echo -e "\n### Signing and uploading the new binaries to youtube-dl.org..."
+/bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..."
  for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done
  scp -r "build/$version" ytdl@yt-dl.org:html/tmp/
  ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/"
  for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done
  scp -r "build/$version" ytdl@yt-dl.org:html/tmp/
  ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/"
diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py

index 31d6ec95295f97a1382edc0fd866348a3456fca8..13df535c772a569790fb3ebeee9e977172d08aa1 100644 (file)
--- a/devscripts/youtube_genalgo.py
+++ b/devscripts/youtube_genalgo.py
@@ -11,30 +11,36 @@ tests = [
      # 90
      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`",
       "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"),
      # 90
      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`",
       "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"),
-    # 88
+    # 89 
+    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'",
+     "/?;:|}<[{=+-_)(*&^%$#@!MqBVCXZASDFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuyt"),
+    # 88 - vflapUV9V 2013/08/28
      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",
      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",
-     "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"),
-    # 87 - vflART1Nf 2013/07/24
+     "ioplkjhgfdsazxcvbnm12<4567890QWERTYUIOZLKJHGFDSAeXCVBNM!@#$%^&*()_-+={[]}|:;?/>.3"),
+    # 87
      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
-     "tyuioplkjhgfdsazxcv<nm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>"),
-    # 86 - vflm_D8eE 2013/07/31
+     "uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"),
+    # 86 - vflg0g8PQ 2013/08/29
      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
-     ">.1}|[{=+-_)(*&^%$#@!MNBVCXZASDFGHJK<POIUYTREW509876L432/mnbvcxzasdfghjklpoiuytre"),
-    # 85 - vflSAFCP9 2013/07/19
+     ">/?;}|[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"),
+    # 85
      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
-     "ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"),
-    # 84
+     ".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"),
+    # 84 - vflg0g8PQ 2013/08/29 (sporadic)
      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
-     "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"),
-    # 83 - vflTWC9KW 2013/08/01
+     ">?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"),
+    # 83
      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
-     "qwertyuioplkjhg>dsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/f"),
-    # 82
+     ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),
+    # 82 - vflZK4ZYR 2013/08/23
      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",
      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",
-     "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"),
+     "wertyuioplkjhgfdsaqxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&z(-+={[};?/>.<"),
      # 81 - vflLC8JvQ 2013/07/25
      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.",
       "C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"),
      # 81 - vflLC8JvQ 2013/07/25
      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.",
       "C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"),
+    # 80 - vflZK4ZYR 2013/08/23 (sporadic)
+    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>",
+     "wertyuioplkjhgfdsaqxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&z(-+={[};?/>"),
      # 79 - vflLC8JvQ 2013/07/25 (sporadic)
      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/",
       "Z?;}[{=+-(*&^%$#@!MNBVCXRASDFGHKLPOIUYT/EWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"),
      # 79 - vflLC8JvQ 2013/07/25 (sporadic)
      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/",
       "Z?;}[{=+-(*&^%$#@!MNBVCXRASDFGHKLPOIUYT/EWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"),
diff --git a/test/test_all_urls.py b/test/test_all_urls.py

index c73d0e4679853b3e80bd9832a57b0d804265895a..c54faa380e44a57969563109d3a7baaf11e835c7 100644 (file)
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -50,6 +50,7 @@ class TestAllURLsMatching(unittest.TestCase):
          self.assertEqual(YoutubeIE()._extract_id('http://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
          self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
          self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc'), 'BaW_jenozKc')
          self.assertEqual(YoutubeIE()._extract_id('http://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
          self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
          self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc'), 'BaW_jenozKc')
+        self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch_popup?v=BaW_jenozKc'), 'BaW_jenozKc')
  
      def test_no_duplicates(self):
          ies = gen_extractors()
  
      def test_no_duplicates(self):
          ies = gen_extractors()
diff --git a/test/test_download.py b/test/test_download.py

index 21cb2e6941cd8c6cae4c220535b9193f004999c7..23a66254d86ed2a68ee3ea54339838fda7d5dc71 100644 (file)
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -127,12 +127,11 @@ def generator(test_case):
                      info_dict = json.load(infof)
                  for (info_field, expected) in tc.get('info_dict', {}).items():
                      if isinstance(expected, compat_str) and expected.startswith('md5:'):
                      info_dict = json.load(infof)
                  for (info_field, expected) in tc.get('info_dict', {}).items():
                      if isinstance(expected, compat_str) and expected.startswith('md5:'):
-                        self.assertEqual(expected, 'md5:' + md5(info_dict.get(info_field)))
+                        got = 'md5:' + md5(info_dict.get(info_field))
                      else:
                          got = info_dict.get(info_field)
                      else:
                          got = info_dict.get(info_field)
-                        self.assertEqual(
-                            expected, got,
-                            u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
+                    self.assertEqual(expected, got,
+                        u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
  
                  # If checkable fields are missing from the test case, print the info_dict
                  test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
  
                  # If checkable fields are missing from the test case, print the info_dict
                  test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py

index 86e09c9b1b397187acc0f28d6d03a1191fa7846f..641206277bbeec22d11339bb3b143df5adff834e 100644 (file)
--- a/test/test_youtube_subtitles.py
+++ b/test/test_youtube_subtitles.py
@@ -35,47 +35,47 @@ class TestYoutubeSubtitles(unittest.TestCase):
          DL.params['writesubtitles'] = True
          IE = YoutubeIE(DL)
          info_dict = IE.extract('QRS8MkLhQmM')
          DL.params['writesubtitles'] = True
          IE = YoutubeIE(DL)
          info_dict = IE.extract('QRS8MkLhQmM')
-        sub = info_dict[0]['subtitles'][0]
-        self.assertEqual(md5(sub[2]), '4cd9278a35ba2305f47354ee13472260')
+        sub = info_dict[0]['subtitles']['en']
+        self.assertEqual(md5(sub), '4cd9278a35ba2305f47354ee13472260')
      def test_youtube_subtitles_it(self):
          DL = FakeYDL()
          DL.params['writesubtitles'] = True
      def test_youtube_subtitles_it(self):
          DL = FakeYDL()
          DL.params['writesubtitles'] = True
-        DL.params['subtitleslang'] = 'it'
+        DL.params['subtitleslangs'] = ['it']
          IE = YoutubeIE(DL)
          info_dict = IE.extract('QRS8MkLhQmM')
          IE = YoutubeIE(DL)
          info_dict = IE.extract('QRS8MkLhQmM')
-        sub = info_dict[0]['subtitles'][0]
-        self.assertEqual(md5(sub[2]), '164a51f16f260476a05b50fe4c2f161d')
+        sub = info_dict[0]['subtitles']['it']
+        self.assertEqual(md5(sub), '164a51f16f260476a05b50fe4c2f161d')
      def test_youtube_onlysubtitles(self):
          DL = FakeYDL()
          DL.params['writesubtitles'] = True
          DL.params['onlysubtitles'] = True
          IE = YoutubeIE(DL)
          info_dict = IE.extract('QRS8MkLhQmM')
      def test_youtube_onlysubtitles(self):
          DL = FakeYDL()
          DL.params['writesubtitles'] = True
          DL.params['onlysubtitles'] = True
          IE = YoutubeIE(DL)
          info_dict = IE.extract('QRS8MkLhQmM')
-        sub = info_dict[0]['subtitles'][0]
-        self.assertEqual(md5(sub[2]), '4cd9278a35ba2305f47354ee13472260')
+        sub = info_dict[0]['subtitles']['en']
+        self.assertEqual(md5(sub), '4cd9278a35ba2305f47354ee13472260')
      def test_youtube_allsubtitles(self):
          DL = FakeYDL()
          DL.params['allsubtitles'] = True
          IE = YoutubeIE(DL)
          info_dict = IE.extract('QRS8MkLhQmM')
          subtitles = info_dict[0]['subtitles']
      def test_youtube_allsubtitles(self):
          DL = FakeYDL()
          DL.params['allsubtitles'] = True
          IE = YoutubeIE(DL)
          info_dict = IE.extract('QRS8MkLhQmM')
          subtitles = info_dict[0]['subtitles']
-        self.assertEqual(len(subtitles), 13)
+        self.assertEqual(len(subtitles.keys()), 13)
      def test_youtube_subtitles_sbv_format(self):
          DL = FakeYDL()
          DL.params['writesubtitles'] = True
          DL.params['subtitlesformat'] = 'sbv'
          IE = YoutubeIE(DL)
          info_dict = IE.extract('QRS8MkLhQmM')
      def test_youtube_subtitles_sbv_format(self):
          DL = FakeYDL()
          DL.params['writesubtitles'] = True
          DL.params['subtitlesformat'] = 'sbv'
          IE = YoutubeIE(DL)
          info_dict = IE.extract('QRS8MkLhQmM')
-        sub = info_dict[0]['subtitles'][0]
-        self.assertEqual(md5(sub[2]), '13aeaa0c245a8bed9a451cb643e3ad8b')
+        sub = info_dict[0]['subtitles']['en']
+        self.assertEqual(md5(sub), '13aeaa0c245a8bed9a451cb643e3ad8b')
      def test_youtube_subtitles_vtt_format(self):
          DL = FakeYDL()
          DL.params['writesubtitles'] = True
          DL.params['subtitlesformat'] = 'vtt'
          IE = YoutubeIE(DL)
          info_dict = IE.extract('QRS8MkLhQmM')
      def test_youtube_subtitles_vtt_format(self):
          DL = FakeYDL()
          DL.params['writesubtitles'] = True
          DL.params['subtitlesformat'] = 'vtt'
          IE = YoutubeIE(DL)
          info_dict = IE.extract('QRS8MkLhQmM')
-        sub = info_dict[0]['subtitles'][0]
-        self.assertEqual(md5(sub[2]), '356cdc577fde0c6783b9b822e7206ff7')
+        sub = info_dict[0]['subtitles']['en']
+        self.assertEqual(md5(sub), '356cdc577fde0c6783b9b822e7206ff7')
      def test_youtube_list_subtitles(self):
          DL = FakeYDL()
          DL.params['listsubtitles'] = True
      def test_youtube_list_subtitles(self):
          DL = FakeYDL()
          DL.params['listsubtitles'] = True
@@ -85,11 +85,20 @@ class TestYoutubeSubtitles(unittest.TestCase):
      def test_youtube_automatic_captions(self):
          DL = FakeYDL()
          DL.params['writeautomaticsub'] = True
      def test_youtube_automatic_captions(self):
          DL = FakeYDL()
          DL.params['writeautomaticsub'] = True
-        DL.params['subtitleslang'] = 'it'
+        DL.params['subtitleslangs'] = ['it']
          IE = YoutubeIE(DL)
          info_dict = IE.extract('8YoUxe5ncPo')
          IE = YoutubeIE(DL)
          info_dict = IE.extract('8YoUxe5ncPo')
-        sub = info_dict[0]['subtitles'][0]
-        self.assertTrue(sub[2] is not None)
+        sub = info_dict[0]['subtitles']['it']
+        self.assertTrue(sub is not None)
+    def test_youtube_multiple_langs(self):
+        DL = FakeYDL()
+        DL.params['writesubtitles'] = True
+        langs = ['it', 'fr', 'de']
+        DL.params['subtitleslangs'] = langs
+        IE = YoutubeIE(DL)
+        subtitles = IE.extract('QRS8MkLhQmM')[0]['subtitles']
+        for lang in langs:
+            self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
  
  if __name__ == '__main__':
      unittest.main()
  
  if __name__ == '__main__':
      unittest.main()
diff --git a/youtube-dl b/youtube-dl

index 1b01c0bbc4ad4e443341ab0f51e2b0dc7d0a28df..4b283a55d901e35f47b09152573debe76b153c31 100755 (executable)

Binary files a/youtube-dl and b/youtube-dl differ
diff --git a/youtube-dl.1 b/youtube-dl.1

index e2ea5b8c6304983868a61453d047c2865a3be4ad..00fb1c1c2a1b0880c6b15fb4aea4555a76aec362 100644 (file)
--- a/youtube-dl.1
+++ b/youtube-dl.1
@@ -144,18 +144,24 @@ redistribute it or use it however you like.
  \-\-max\-quality\ FORMAT\ \ \ \ \ \ \ highest\ quality\ format\ to\ download
  \-F,\ \-\-list\-formats\ \ \ \ \ \ \ \ \ list\ all\ available\ formats\ (currently\ youtube
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ only)
  \-\-max\-quality\ FORMAT\ \ \ \ \ \ \ highest\ quality\ format\ to\ download
  \-F,\ \-\-list\-formats\ \ \ \ \ \ \ \ \ list\ all\ available\ formats\ (currently\ youtube
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ only)
+\f[]
+.fi
+.SS Subtitle Options:
+.IP
+.nf
+\f[C]
  \-\-write\-sub\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ write\ subtitle\ file\ (currently\ youtube\ only)
  \-\-write\-auto\-sub\ \ \ \ \ \ \ \ \ \ \ write\ automatic\ subtitle\ file\ (currently\ youtube
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ only)
  \-\-only\-sub\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ [deprecated]\ alias\ of\ \-\-skip\-download
  \-\-all\-subs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ downloads\ all\ the\ available\ subtitles\ of\ the
  \-\-write\-sub\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ write\ subtitle\ file\ (currently\ youtube\ only)
  \-\-write\-auto\-sub\ \ \ \ \ \ \ \ \ \ \ write\ automatic\ subtitle\ file\ (currently\ youtube
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ only)
  \-\-only\-sub\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ [deprecated]\ alias\ of\ \-\-skip\-download
  \-\-all\-subs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ downloads\ all\ the\ available\ subtitles\ of\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ video\ (currently\ youtube\ only)
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ video
  \-\-list\-subs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ lists\ all\ available\ subtitles\ for\ the\ video
  \-\-list\-subs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ lists\ all\ available\ subtitles\ for\ the\ video
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (currently\ youtube\ only)
-\-\-sub\-format\ FORMAT\ \ \ \ \ \ \ \ subtitle\ format\ [srt/sbv/vtt]\ (default=srt)
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (currently\ youtube\ only)
-\-\-sub\-lang\ LANG\ \ \ \ \ \ \ \ \ \ \ \ language\ of\ the\ subtitles\ to\ download\ (optional)
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ use\ IETF\ language\ tags\ like\ \[aq]en\[aq]
+\-\-sub\-format\ FORMAT\ \ \ \ \ \ \ \ subtitle\ format\ (default=srt)\ ([sbv/vtt]\ youtube
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ only)
+\-\-sub\-lang\ LANGS\ \ \ \ \ \ \ \ \ \ \ languages\ of\ the\ subtitles\ to\ download\ (optional)
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ separated\ by\ commas,\ use\ IETF\ language\ tags\ like
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \[aq]en,pt\[aq]
  \f[]
  .fi
  .SS Authentication Options:
  \f[]
  .fi
  .SS Authentication Options:
@@ -185,6 +191,8 @@ redistribute it or use it however you like.
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ processing;\ the\ video\ is\ erased\ by\ default
  \-\-no\-post\-overwrites\ \ \ \ \ \ \ do\ not\ overwrite\ post\-processed\ files;\ the\ post\-
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ processed\ files\ are\ overwritten\ by\ default
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ processing;\ the\ video\ is\ erased\ by\ default
  \-\-no\-post\-overwrites\ \ \ \ \ \ \ do\ not\ overwrite\ post\-processed\ files;\ the\ post\-
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ processed\ files\ are\ overwritten\ by\ default
+\-\-embed\-subs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ embed\ subtitles\ in\ the\ video\ (only\ for\ mp4
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ videos)
  \f[]
  .fi
  .SH CONFIGURATION
  \f[]
  .fi
  .SH CONFIGURATION
diff --git a/youtube-dl.bash-completion b/youtube-dl.bash-completion

index fd12ce8d94fe37de5288379587fb335b7bf70690..25ee51ad835198275aacad1ff3db5dc5e8be9572 100644 (file)
--- a/youtube-dl.bash-completion
+++ b/youtube-dl.bash-completion
@@ -3,7 +3,7 @@ __youtube-dl()
      local cur prev opts
      COMPREPLY=()
      cur="${COMP_WORDS[COMP_CWORD]}"
      local cur prev opts
      COMPREPLY=()
      cur="${COMP_WORDS[COMP_CWORD]}"
-    opts="--help --version --update --ignore-errors --dump-user-agent --user-agent --referer --list-extractors --extractor-descriptions --proxy --no-check-certificate --playlist-start --playlist-end --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --rate-limit --retries --buffer-size --no-resize-buffer --test --title --id --literal --auto-number --output --autonumber-size --restrict-filenames --batch-file --no-overwrites --continue --no-continue --cookies --no-part --no-mtime --write-description --write-info-json --write-thumbnail --quiet --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-filename --get-format --newline --no-progress --console-title --verbose --dump-intermediate-pages --format --all-formats --prefer-free-formats --max-quality --list-formats --write-sub --write-auto-sub --only-sub --all-subs --list-subs --sub-format --sub-lang --username --password --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites"
+    opts="--help --version --update --ignore-errors --dump-user-agent --user-agent --referer --list-extractors --extractor-descriptions --proxy --no-check-certificate --playlist-start --playlist-end --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --rate-limit --retries --buffer-size --no-resize-buffer --test --title --id --literal --auto-number --output --autonumber-size --restrict-filenames --batch-file --no-overwrites --continue --no-continue --cookies --no-part --no-mtime --write-description --write-info-json --write-thumbnail --quiet --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-filename --get-format --newline --no-progress --console-title --verbose --dump-intermediate-pages --format --all-formats --prefer-free-formats --max-quality --list-formats --write-sub --write-auto-sub --only-sub --all-subs --list-subs --sub-format --sub-lang --username --password --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites --embed-subs"
  
      if [[ ${cur} == * ]] ; then
          COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
  
      if [[ ${cur} == * ]] ; then
          COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py

index ea6b9d626efa7a18eafe20afa8c473d1afee315b..7c5ac4bc2ecae6d3440266a98b2034ac5f56867a 100644 (file)
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@@ -63,6 +63,17 @@ class FileDownloader(object):
          converted = float(bytes) / float(1024 ** exponent)
          return '%.2f%s' % (converted, suffix)
  
          converted = float(bytes) / float(1024 ** exponent)
          return '%.2f%s' % (converted, suffix)
  
+    @staticmethod
+    def format_seconds(seconds):
+        (mins, secs) = divmod(seconds, 60)
+        (hours, eta_mins) = divmod(mins, 60)
+        if hours > 99:
+            return '--:--:--'
+        if hours == 0:
+            return '%02d:%02d' % (mins, secs)
+        else:
+            return '%02d:%02d:%02d' % (hours, mins, secs)
+
      @staticmethod
      def calc_percent(byte_counter, data_len):
          if data_len is None:
      @staticmethod
      def calc_percent(byte_counter, data_len):
          if data_len is None:
@@ -78,10 +89,7 @@ class FileDownloader(object):
              return '--:--'
          rate = float(current) / dif
          eta = int((float(total) - float(current)) / rate)
              return '--:--'
          rate = float(current) / dif
          eta = int((float(total) - float(current)) / rate)
-        (eta_mins, eta_secs) = divmod(eta, 60)
-        if eta_mins > 99:
-            return '--:--'
-        return '%02d:%02d' % (eta_mins, eta_secs)
+        return FileDownloader.format_seconds(eta)
  
      @staticmethod
      def calc_speed(start, now, bytes):
  
      @staticmethod
      def calc_speed(start, now, bytes):
@@ -230,12 +238,14 @@ class FileDownloader(object):
          """Report it was impossible to resume download."""
          self.to_screen(u'[download] Unable to resume')
  
          """Report it was impossible to resume download."""
          self.to_screen(u'[download] Unable to resume')
  
-    def report_finish(self):
+    def report_finish(self, data_len_str, tot_time):
          """Report download finished."""
          if self.params.get('noprogress', False):
              self.to_screen(u'[download] Download completed')
          else:
          """Report download finished."""
          if self.params.get('noprogress', False):
              self.to_screen(u'[download] Download completed')
          else:
-            self.to_screen(u'')
+            clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
+            self.to_screen(u'\r%s[download] 100%% of %s in %s' %
+                (clear_line, data_len_str, self.format_seconds(tot_time)))
  
      def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):
          self.report_destination(filename)
  
      def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):
          self.report_destination(filename)
@@ -538,7 +548,7 @@ class FileDownloader(object):
              self.report_error(u'Did not get any data blocks')
              return False
          stream.close()
              self.report_error(u'Did not get any data blocks')
              return False
          stream.close()
-        self.report_finish()
+        self.report_finish(data_len_str, (time.time() - start))
          if data_len is not None and byte_counter != data_len:
              raise ContentTooShortError(byte_counter, int(data_len))
          self.try_rename(tmpfilename, filename)
          if data_len is not None and byte_counter != data_len:
              raise ContentTooShortError(byte_counter, int(data_len))
          self.try_rename(tmpfilename, filename)
diff --git a/youtube_dl/PostProcessor.py b/youtube_dl/PostProcessor.py

index fddf58606015b92cc21a9f89818c90852c365e83..ae56d2082dec2b152a53b63b1629420e8e18cfec 100644 (file)
--- a/youtube_dl/PostProcessor.py
+++ b/youtube_dl/PostProcessor.py
@@ -71,12 +71,17 @@ class FFmpegPostProcessor(PostProcessor):
          programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
          return dict((program, executable(program)) for program in programs)
  
          programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
          return dict((program, executable(program)) for program in programs)
  
-    def run_ffmpeg(self, path, out_path, opts):
+    def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
          if not self._exes['ffmpeg'] and not self._exes['avconv']:
              raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.')
          if not self._exes['ffmpeg'] and not self._exes['avconv']:
              raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.')
-        cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y', '-i', encodeFilename(path)]
+
+        files_cmd = []
+        for path in input_paths:
+            files_cmd.extend(['-i', encodeFilename(path)])
+        cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y'] + files_cmd
                 + opts +
                 [encodeFilename(self._ffmpeg_filename_argument(out_path))])
                 + opts +
                 [encodeFilename(self._ffmpeg_filename_argument(out_path))])
+
          p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
          stdout,stderr = p.communicate()
          if p.returncode != 0:
          p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
          stdout,stderr = p.communicate()
          if p.returncode != 0:
@@ -84,6 +89,9 @@ class FFmpegPostProcessor(PostProcessor):
              msg = stderr.strip().split('\n')[-1]
              raise FFmpegPostProcessorError(msg)
  
              msg = stderr.strip().split('\n')[-1]
              raise FFmpegPostProcessorError(msg)
  
+    def run_ffmpeg(self, path, out_path, opts):
+        self.run_ffmpeg_multiple_files([path], out_path, opts)
+
      def _ffmpeg_filename_argument(self, fn):
          # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
          if fn.startswith(u'-'):
      def _ffmpeg_filename_argument(self, fn):
          # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
          if fn.startswith(u'-'):
@@ -129,7 +137,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
          try:
              FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
          except FFmpegPostProcessorError as err:
          try:
              FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
          except FFmpegPostProcessorError as err:
-            raise AudioConversionError(err.message)
+            raise AudioConversionError(err.msg)
  
      def run(self, information):
          path = information['filepath']
  
      def run(self, information):
          path = information['filepath']
@@ -199,7 +207,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
          except:
              etype,e,tb = sys.exc_info()
              if isinstance(e, AudioConversionError):
          except:
              etype,e,tb = sys.exc_info()
              if isinstance(e, AudioConversionError):
-                msg = u'audio conversion failed: ' + e.message
+                msg = u'audio conversion failed: ' + e.msg
              else:
                  msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg')
              raise PostProcessingError(msg)
              else:
                  msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg')
              raise PostProcessingError(msg)
@@ -232,3 +240,227 @@ class FFmpegVideoConvertor(FFmpegPostProcessor):
          information['format'] = self._preferedformat
          information['ext'] = self._preferedformat
          return False,information
          information['format'] = self._preferedformat
          information['ext'] = self._preferedformat
          return False,information
+
+
+class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
+    # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
+    _lang_map = {
+        'aa': 'aar',
+        'ab': 'abk',
+        'ae': 'ave',
+        'af': 'afr',
+        'ak': 'aka',
+        'am': 'amh',
+        'an': 'arg',
+        'ar': 'ara',
+        'as': 'asm',
+        'av': 'ava',
+        'ay': 'aym',
+        'az': 'aze',
+        'ba': 'bak',
+        'be': 'bel',
+        'bg': 'bul',
+        'bh': 'bih',
+        'bi': 'bis',
+        'bm': 'bam',
+        'bn': 'ben',
+        'bo': 'bod',
+        'br': 'bre',
+        'bs': 'bos',
+        'ca': 'cat',
+        'ce': 'che',
+        'ch': 'cha',
+        'co': 'cos',
+        'cr': 'cre',
+        'cs': 'ces',
+        'cu': 'chu',
+        'cv': 'chv',
+        'cy': 'cym',
+        'da': 'dan',
+        'de': 'deu',
+        'dv': 'div',
+        'dz': 'dzo',
+        'ee': 'ewe',
+        'el': 'ell',
+        'en': 'eng',
+        'eo': 'epo',
+        'es': 'spa',
+        'et': 'est',
+        'eu': 'eus',
+        'fa': 'fas',
+        'ff': 'ful',
+        'fi': 'fin',
+        'fj': 'fij',
+        'fo': 'fao',
+        'fr': 'fra',
+        'fy': 'fry',
+        'ga': 'gle',
+        'gd': 'gla',
+        'gl': 'glg',
+        'gn': 'grn',
+        'gu': 'guj',
+        'gv': 'glv',
+        'ha': 'hau',
+        'he': 'heb',
+        'hi': 'hin',
+        'ho': 'hmo',
+        'hr': 'hrv',
+        'ht': 'hat',
+        'hu': 'hun',
+        'hy': 'hye',
+        'hz': 'her',
+        'ia': 'ina',
+        'id': 'ind',
+        'ie': 'ile',
+        'ig': 'ibo',
+        'ii': 'iii',
+        'ik': 'ipk',
+        'io': 'ido',
+        'is': 'isl',
+        'it': 'ita',
+        'iu': 'iku',
+        'ja': 'jpn',
+        'jv': 'jav',
+        'ka': 'kat',
+        'kg': 'kon',
+        'ki': 'kik',
+        'kj': 'kua',
+        'kk': 'kaz',
+        'kl': 'kal',
+        'km': 'khm',
+        'kn': 'kan',
+        'ko': 'kor',
+        'kr': 'kau',
+        'ks': 'kas',
+        'ku': 'kur',
+        'kv': 'kom',
+        'kw': 'cor',
+        'ky': 'kir',
+        'la': 'lat',
+        'lb': 'ltz',
+        'lg': 'lug',
+        'li': 'lim',
+        'ln': 'lin',
+        'lo': 'lao',
+        'lt': 'lit',
+        'lu': 'lub',
+        'lv': 'lav',
+        'mg': 'mlg',
+        'mh': 'mah',
+        'mi': 'mri',
+        'mk': 'mkd',
+        'ml': 'mal',
+        'mn': 'mon',
+        'mr': 'mar',
+        'ms': 'msa',
+        'mt': 'mlt',
+        'my': 'mya',
+        'na': 'nau',
+        'nb': 'nob',
+        'nd': 'nde',
+        'ne': 'nep',
+        'ng': 'ndo',
+        'nl': 'nld',
+        'nn': 'nno',
+        'no': 'nor',
+        'nr': 'nbl',
+        'nv': 'nav',
+        'ny': 'nya',
+        'oc': 'oci',
+        'oj': 'oji',
+        'om': 'orm',
+        'or': 'ori',
+        'os': 'oss',
+        'pa': 'pan',
+        'pi': 'pli',
+        'pl': 'pol',
+        'ps': 'pus',
+        'pt': 'por',
+        'qu': 'que',
+        'rm': 'roh',
+        'rn': 'run',
+        'ro': 'ron',
+        'ru': 'rus',
+        'rw': 'kin',
+        'sa': 'san',
+        'sc': 'srd',
+        'sd': 'snd',
+        'se': 'sme',
+        'sg': 'sag',
+        'si': 'sin',
+        'sk': 'slk',
+        'sl': 'slv',
+        'sm': 'smo',
+        'sn': 'sna',
+        'so': 'som',
+        'sq': 'sqi',
+        'sr': 'srp',
+        'ss': 'ssw',
+        'st': 'sot',
+        'su': 'sun',
+        'sv': 'swe',
+        'sw': 'swa',
+        'ta': 'tam',
+        'te': 'tel',
+        'tg': 'tgk',
+        'th': 'tha',
+        'ti': 'tir',
+        'tk': 'tuk',
+        'tl': 'tgl',
+        'tn': 'tsn',
+        'to': 'ton',
+        'tr': 'tur',
+        'ts': 'tso',
+        'tt': 'tat',
+        'tw': 'twi',
+        'ty': 'tah',
+        'ug': 'uig',
+        'uk': 'ukr',
+        'ur': 'urd',
+        'uz': 'uzb',
+        've': 'ven',
+        'vi': 'vie',
+        'vo': 'vol',
+        'wa': 'wln',
+        'wo': 'wol',
+        'xh': 'xho',
+        'yi': 'yid',
+        'yo': 'yor',
+        'za': 'zha',
+        'zh': 'zho',
+        'zu': 'zul',
+    }
+
+    def __init__(self, downloader=None, subtitlesformat='srt'):
+        super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
+        self._subformat = subtitlesformat
+
+    @classmethod
+    def _conver_lang_code(cls, code):
+        """Convert language code from ISO 639-1 to ISO 639-2/T"""
+        return cls._lang_map.get(code[:2])
+
+    def run(self, information):
+        if information['ext'] != u'mp4':
+            self._downloader.to_screen(u'[ffmpeg] Subtitles can only be embedded in mp4 files')
+            return True, information
+        sub_langs = [key for key in information['subtitles']]
+
+        filename = information['filepath']
+        input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]
+
+        opts = ['-map', '0:0', '-map', '0:1', '-c:v', 'copy', '-c:a', 'copy']
+        for (i, lang) in enumerate(sub_langs):
+            opts.extend(['-map', '%d:0' % (i+1), '-c:s:%d' % i, 'mov_text'])
+            lang_code = self._conver_lang_code(lang)
+            if lang_code is not None:
+                opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
+        opts.extend(['-f', 'mp4'])
+
+        temp_filename = filename + u'.temp'
+        self._downloader.to_screen(u'[ffmpeg] Embedding subtitles in \'%s\'' % filename)
+        self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
+        os.remove(encodeFilename(filename))
+        os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+
+        return True, information
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index d1618da7914b08ccad26a9ff9a9035d2efc48ea0..b289bd9e26bbc9993e6f1295a31d20b3275f5f48 100644 (file)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -76,7 +76,7 @@ class YoutubeDL(object):
      allsubtitles:      Downloads all the subtitles of the video
      listsubtitles:     Lists all available subtitles for the video
      subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
      allsubtitles:      Downloads all the subtitles of the video
      listsubtitles:     Lists all available subtitles for the video
      subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
-    subtitleslang:     Language of the subtitles to download
+    subtitleslangs:    List of languages of the subtitles to download
      keepvideo:         Keep the video file after post-processing
      daterange:         A DateRange object, download only if the upload_date is in the range.
      skip_download:     Skip the actual download of the video file
      keepvideo:         Keep the video file after post-processing
      daterange:         A DateRange object, download only if the upload_date is in the range.
      skip_download:     Skip the actual download of the video file
@@ -97,6 +97,7 @@ class YoutubeDL(object):
      def __init__(self, params):
          """Create a FileDownloader object with the given options."""
          self._ies = []
      def __init__(self, params):
          """Create a FileDownloader object with the given options."""
          self._ies = []
+        self._ies_instances = {}
          self._pps = []
          self._progress_hooks = []
          self._download_retcode = 0
          self._pps = []
          self._progress_hooks = []
          self._download_retcode = 0
@@ -111,8 +112,21 @@ class YoutubeDL(object):
      def add_info_extractor(self, ie):
          """Add an InfoExtractor object to the end of the list."""
          self._ies.append(ie)
      def add_info_extractor(self, ie):
          """Add an InfoExtractor object to the end of the list."""
          self._ies.append(ie)
+        self._ies_instances[ie.ie_key()] = ie
          ie.set_downloader(self)
  
          ie.set_downloader(self)
  
+    def get_info_extractor(self, ie_key):
+        """
+        Get an instance of an IE with name ie_key, it will try to get one from
+        the _ies list, if there's no instance it will create a new one and add
+        it to the extractor list.
+        """
+        ie = self._ies_instances.get(ie_key)
+        if ie is None:
+            ie = get_info_extractor(ie_key)()
+            self.add_info_extractor(ie)
+        return ie
+
      def add_default_info_extractors(self):
          """
          Add the InfoExtractors returned by gen_extractors to the end of the list
      def add_default_info_extractors(self):
          """
          Add the InfoExtractors returned by gen_extractors to the end of the list
@@ -294,9 +308,7 @@ class YoutubeDL(object):
           '''
          
          if ie_key:
           '''
          
          if ie_key:
-            ie = get_info_extractor(ie_key)()
-            ie.set_downloader(self)
-            ies = [ie]
+            ies = [self.get_info_extractor(ie_key)]
          else:
              ies = self._ies
  
          else:
              ies = self._ies
  
@@ -448,7 +460,8 @@ class YoutubeDL(object):
          if self.params.get('forceid', False):
              compat_print(info_dict['id'])
          if self.params.get('forceurl', False):
          if self.params.get('forceid', False):
              compat_print(info_dict['id'])
          if self.params.get('forceurl', False):
-            compat_print(info_dict['url'])
+            # For RTMP URLs, also include the playpath
+            compat_print(info_dict['url'] + info_dict.get('play_path', u''))
          if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
              compat_print(info_dict['thumbnail'])
          if self.params.get('forcedescription', False) and 'description' in info_dict:
          if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
              compat_print(info_dict['thumbnail'])
          if self.params.get('forcedescription', False) and 'description' in info_dict:
@@ -483,41 +496,28 @@ class YoutubeDL(object):
                  self.report_error(u'Cannot write description file ' + descfn)
                  return
  
                  self.report_error(u'Cannot write description file ' + descfn)
                  return
  
-        if (self.params.get('writesubtitles', False) or self.params.get('writeautomaticsub')) and 'subtitles' in info_dict and info_dict['subtitles']:
+        subtitles_are_requested = any([self.params.get('writesubtitles', False),
+                                       self.params.get('writeautomaticsub'),
+                                       self.params.get('allsubtitles', False)])
+
+        if  subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
              # subtitles download errors are already managed as troubles in relevant IE
              # that way it will silently go on when used with unsupporting IE
              # subtitles download errors are already managed as troubles in relevant IE
              # that way it will silently go on when used with unsupporting IE
-            subtitle = info_dict['subtitles'][0]
-            (sub_error, sub_lang, sub) = subtitle
+            subtitles = info_dict['subtitles']
              sub_format = self.params.get('subtitlesformat')
              sub_format = self.params.get('subtitlesformat')
-            if sub_error:
-                self.report_warning("Some error while getting the subtitles")
-            else:
+            for sub_lang in subtitles.keys():
+                sub = subtitles[sub_lang]
+                if sub is None:
+                    continue
                  try:
                  try:
-                    sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
+                    sub_filename = subtitles_filename(filename, sub_lang, sub_format)
                      self.report_writesubtitles(sub_filename)
                      with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
                      self.report_writesubtitles(sub_filename)
                      with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
-                        subfile.write(sub)
+                            subfile.write(sub)
                  except (OSError, IOError):
                      self.report_error(u'Cannot write subtitles file ' + descfn)
                      return
  
                  except (OSError, IOError):
                      self.report_error(u'Cannot write subtitles file ' + descfn)
                      return
  
-        if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
-            subtitles = info_dict['subtitles']
-            sub_format = self.params.get('subtitlesformat')
-            for subtitle in subtitles:
-                (sub_error, sub_lang, sub) = subtitle
-                if sub_error:
-                    self.report_warning("Some error while getting the subtitles")
-                else:
-                    try:
-                        sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
-                        self.report_writesubtitles(sub_filename)
-                        with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
-                                subfile.write(sub)
-                    except (OSError, IOError):
-                        self.report_error(u'Cannot write subtitles file ' + descfn)
-                        return
-
          if self.params.get('writeinfojson', False):
              infofn = filename + u'.info.json'
              self.report_writeinfojson(infofn)
          if self.params.get('writeinfojson', False):
              infofn = filename + u'.info.json'
              self.report_writeinfojson(infofn)
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py

index bf040aacd79fb16a19c4d4e688c1d628faf65a9f..431460c5790fadd46e6880004465c1eda87e290d 100644 (file)
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -27,6 +27,7 @@ __authors__  = (
      'Johny Mo Swag',
      'Axel Noack',
      'Albert Kim',
      'Johny Mo Swag',
      'Axel Noack',
      'Albert Kim',
+    'Pierre Rudloff',
  )
  
  __license__ = 'Public Domain'
  )
  
  __license__ = 'Public Domain'
@@ -44,6 +45,7 @@ import sys
  import warnings
  import platform
  
  import warnings
  import platform
  
+
  from .utils import *
  from .update import update_self
  from .version import __version__
  from .utils import *
  from .update import update_self
  from .version import __version__
@@ -82,6 +84,9 @@ def parseOpts(overrideArguments=None):
  
          return "".join(opts)
  
  
          return "".join(opts)
  
+    def _comma_separated_values_options_callback(option, opt_str, value, parser):
+        setattr(parser.values, option.dest, value.split(','))
+
      def _find_term_columns():
          columns = os.environ.get('COLUMNS', None)
          if columns:
      def _find_term_columns():
          columns = os.environ.get('COLUMNS', None)
          if columns:
@@ -95,6 +100,16 @@ def parseOpts(overrideArguments=None):
              pass
          return None
  
              pass
          return None
  
+    def _hide_login_info(opts):
+        opts = list(opts)
+        for private_opt in ['-p', '--password', '-u', '--username']:
+            try:
+                i = opts.index(private_opt)
+                opts[i+1] = '<PRIVATE>'
+            except ValueError:
+                pass
+        return opts
+
      max_width = 80
      max_help_position = 80
  
      max_width = 80
      max_help_position = 80
  
@@ -119,6 +134,7 @@ def parseOpts(overrideArguments=None):
      selection      = optparse.OptionGroup(parser, 'Video Selection')
      authentication = optparse.OptionGroup(parser, 'Authentication Options')
      video_format   = optparse.OptionGroup(parser, 'Video Format Options')
      selection      = optparse.OptionGroup(parser, 'Video Selection')
      authentication = optparse.OptionGroup(parser, 'Authentication Options')
      video_format   = optparse.OptionGroup(parser, 'Video Format Options')
+    subtitles      = optparse.OptionGroup(parser, 'Subtitle Options')
      downloader     = optparse.OptionGroup(parser, 'Download Options')
      postproc       = optparse.OptionGroup(parser, 'Post-processing Options')
      filesystem     = optparse.OptionGroup(parser, 'Filesystem Options')
      downloader     = optparse.OptionGroup(parser, 'Download Options')
      postproc       = optparse.OptionGroup(parser, 'Post-processing Options')
      filesystem     = optparse.OptionGroup(parser, 'Filesystem Options')
@@ -185,27 +201,29 @@ def parseOpts(overrideArguments=None):
              action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
      video_format.add_option('-F', '--list-formats',
              action='store_true', dest='listformats', help='list all available formats (currently youtube only)')
              action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
      video_format.add_option('-F', '--list-formats',
              action='store_true', dest='listformats', help='list all available formats (currently youtube only)')
-    video_format.add_option('--write-sub', '--write-srt',
+
+    subtitles.add_option('--write-sub', '--write-srt',
              action='store_true', dest='writesubtitles',
              help='write subtitle file (currently youtube only)', default=False)
              action='store_true', dest='writesubtitles',
              help='write subtitle file (currently youtube only)', default=False)
-    video_format.add_option('--write-auto-sub', '--write-automatic-sub',
+    subtitles.add_option('--write-auto-sub', '--write-automatic-sub',
              action='store_true', dest='writeautomaticsub',
              help='write automatic subtitle file (currently youtube only)', default=False)
              action='store_true', dest='writeautomaticsub',
              help='write automatic subtitle file (currently youtube only)', default=False)
-    video_format.add_option('--only-sub',
+    subtitles.add_option('--only-sub',
              action='store_true', dest='skip_download',
              help='[deprecated] alias of --skip-download', default=False)
              action='store_true', dest='skip_download',
              help='[deprecated] alias of --skip-download', default=False)
-    video_format.add_option('--all-subs',
+    subtitles.add_option('--all-subs',
              action='store_true', dest='allsubtitles',
              action='store_true', dest='allsubtitles',
-            help='downloads all the available subtitles of the video (currently youtube only)', default=False)
-    video_format.add_option('--list-subs',
+            help='downloads all the available subtitles of the video', default=False)
+    subtitles.add_option('--list-subs',
              action='store_true', dest='listsubtitles',
              action='store_true', dest='listsubtitles',
-            help='lists all available subtitles for the video (currently youtube only)', default=False)
-    video_format.add_option('--sub-format',
+            help='lists all available subtitles for the video', default=False)
+    subtitles.add_option('--sub-format',
              action='store', dest='subtitlesformat', metavar='FORMAT',
              action='store', dest='subtitlesformat', metavar='FORMAT',
-            help='subtitle format [srt/sbv/vtt] (default=srt) (currently youtube only)', default='srt')
-    video_format.add_option('--sub-lang', '--srt-lang',
-            action='store', dest='subtitleslang', metavar='LANG',
-            help='language of the subtitles to download (optional) use IETF language tags like \'en\'')
+            help='subtitle format (default=srt) ([sbv/vtt] youtube only)', default='srt')
+    subtitles.add_option('--sub-lang', '--sub-langs', '--srt-lang',
+            action='callback', dest='subtitleslang', metavar='LANGS', type='str',
+            default=[], callback=_comma_separated_values_options_callback,
+            help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'')
  
      downloader.add_option('-r', '--rate-limit',
              dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)')
  
      downloader.add_option('-r', '--rate-limit',
              dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)')
@@ -320,6 +338,8 @@ def parseOpts(overrideArguments=None):
              help='keeps the video file on disk after the post-processing; the video is erased by default')
      postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False,
              help='do not overwrite post-processed files; the post-processed files are overwritten by default')
              help='keeps the video file on disk after the post-processing; the video is erased by default')
      postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False,
              help='do not overwrite post-processed files; the post-processed files are overwritten by default')
+    postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False,
+            help='embed subtitles in the video (only for mp4 videos)')
  
  
      parser.add_option_group(general)
  
  
      parser.add_option_group(general)
@@ -328,6 +348,7 @@ def parseOpts(overrideArguments=None):
      parser.add_option_group(filesystem)
      parser.add_option_group(verbosity)
      parser.add_option_group(video_format)
      parser.add_option_group(filesystem)
      parser.add_option_group(verbosity)
      parser.add_option_group(video_format)
+    parser.add_option_group(subtitles)
      parser.add_option_group(authentication)
      parser.add_option_group(postproc)
  
      parser.add_option_group(authentication)
      parser.add_option_group(postproc)
  
@@ -343,13 +364,13 @@ def parseOpts(overrideArguments=None):
              userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
          systemConf = _readOptions('/etc/youtube-dl.conf')
          userConf = _readOptions(userConfFile)
              userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
          systemConf = _readOptions('/etc/youtube-dl.conf')
          userConf = _readOptions(userConfFile)
-        commandLineConf = sys.argv[1:] 
+        commandLineConf = sys.argv[1:]
          argv = systemConf + userConf + commandLineConf
          opts, args = parser.parse_args(argv)
          if opts.verbose:
          argv = systemConf + userConf + commandLineConf
          opts, args = parser.parse_args(argv)
          if opts.verbose:
-            sys.stderr.write(u'[debug] System config: ' + repr(systemConf) + '\n')
-            sys.stderr.write(u'[debug] User config: ' + repr(userConf) + '\n')
-            sys.stderr.write(u'[debug] Command-line args: ' + repr(commandLineConf) + '\n')
+            sys.stderr.write(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
+            sys.stderr.write(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
+            sys.stderr.write(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
  
      return parser, opts, args
  
  
      return parser, opts, args
  
@@ -377,7 +398,7 @@ def _real_main(argv=None):
      # Set user agent
      if opts.user_agent is not None:
          std_headers['User-Agent'] = opts.user_agent
      # Set user agent
      if opts.user_agent is not None:
          std_headers['User-Agent'] = opts.user_agent
-    
+
      # Set referer
      if opts.referer is not None:
          std_headers['Referer'] = opts.referer
      # Set referer
      if opts.referer is not None:
          std_headers['Referer'] = opts.referer
@@ -420,6 +441,10 @@ def _real_main(argv=None):
      proxy_handler = compat_urllib_request.ProxyHandler(proxies)
      https_handler = make_HTTPS_handler(opts)
      opener = compat_urllib_request.build_opener(https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
      proxy_handler = compat_urllib_request.ProxyHandler(proxies)
      https_handler = make_HTTPS_handler(opts)
      opener = compat_urllib_request.build_opener(https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
+    # Delete the default user-agent header, which would otherwise apply in
+    # cases where our custom HTTP handler doesn't come into play
+    # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
+    opener.addheaders =[]
      compat_urllib_request.install_opener(opener)
      socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
  
      compat_urllib_request.install_opener(opener)
      socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
  
@@ -567,7 +592,7 @@ def _real_main(argv=None):
          'allsubtitles': opts.allsubtitles,
          'listsubtitles': opts.listsubtitles,
          'subtitlesformat': opts.subtitlesformat,
          'allsubtitles': opts.allsubtitles,
          'listsubtitles': opts.listsubtitles,
          'subtitlesformat': opts.subtitlesformat,
-        'subtitleslang': opts.subtitleslang,
+        'subtitleslangs': opts.subtitleslang,
          'matchtitle': decodeOption(opts.matchtitle),
          'rejecttitle': decodeOption(opts.rejecttitle),
          'max_downloads': opts.max_downloads,
          'matchtitle': decodeOption(opts.matchtitle),
          'rejecttitle': decodeOption(opts.rejecttitle),
          'max_downloads': opts.max_downloads,
@@ -597,7 +622,7 @@ def _real_main(argv=None):
                  sys.exc_clear()
              except:
                  pass
                  sys.exc_clear()
              except:
                  pass
-        sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform.platform()) + u'\n')
+        sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n')
          sys.stderr.write(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n')
  
      ydl.add_default_info_extractors()
          sys.stderr.write(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n')
  
      ydl.add_default_info_extractors()
@@ -607,6 +632,8 @@ def _real_main(argv=None):
          ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
      if opts.recodevideo:
          ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
          ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
      if opts.recodevideo:
          ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
+    if opts.embedsubtitles:
+        ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
  
      # Update version
      if opts.update_self:
  
      # Update version
      if opts.update_self:
diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py

new file mode 100644 (file)

index 0000000..9a0c93f
--- /dev/null
+++ b/youtube_dl/aes.py
@@ -0,0 +1,202 @@
+__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_decrypt_text']
+
+import base64
+from math import ceil
+
+from .utils import bytes_to_intlist, intlist_to_bytes
+
+BLOCK_SIZE_BYTES = 16
+
+def aes_ctr_decrypt(data, key, counter):
+    """
+    Decrypt with aes in counter mode
+    
+    @param {int[]} data        cipher
+    @param {int[]} key         16/24/32-Byte cipher key
+    @param {instance} counter  Instance whose next_value function (@returns {int[]}  16-Byte block)
+                               returns the next counter block
+    @returns {int[]}           decrypted data
+    """
+    expanded_key = key_expansion(key)
+    block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
+    
+    decrypted_data=[]
+    for i in range(block_count):
+        counter_block = counter.next_value()
+        block = data[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES]
+        block += [0]*(BLOCK_SIZE_BYTES - len(block))
+        
+        cipher_counter_block = aes_encrypt(counter_block, expanded_key)
+        decrypted_data += xor(block, cipher_counter_block)
+    decrypted_data = decrypted_data[:len(data)]
+    
+    return decrypted_data
+
+def key_expansion(data):
+    """
+    Generate key schedule
+    
+    @param {int[]} data  16/24/32-Byte cipher key
+    @returns {int[]}     176/208/240-Byte expanded key 
+    """
+    data = data[:] # copy
+    rcon_iteration = 1
+    key_size_bytes = len(data)
+    expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES
+    
+    while len(data) < expanded_key_size_bytes:
+        temp = data[-4:]
+        temp = key_schedule_core(temp, rcon_iteration)
+        rcon_iteration += 1
+        data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
+        
+        for _ in range(3):
+            temp = data[-4:]
+            data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
+        
+        if key_size_bytes == 32:
+            temp = data[-4:]
+            temp = sub_bytes(temp)
+            data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
+        
+        for _ in range(3 if key_size_bytes == 32  else 2 if key_size_bytes == 24 else 0):
+            temp = data[-4:]
+            data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
+    data = data[:expanded_key_size_bytes]
+    
+    return data
+
+def aes_encrypt(data, expanded_key):
+    """
+    Encrypt one block with aes
+    
+    @param {int[]} data          16-Byte state
+    @param {int[]} expanded_key  176/208/240-Byte expanded key 
+    @returns {int[]}             16-Byte cipher
+    """
+    rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
+    
+    data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
+    for i in range(1, rounds+1):
+        data = sub_bytes(data)
+        data = shift_rows(data)
+        if i != rounds:
+            data = mix_columns(data)
+        data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES])
+    
+    return data
+
+def aes_decrypt_text(data, password, key_size_bytes):
+    """
+    Decrypt text
+    - The first 8 Bytes of decoded 'data' are the 8 high Bytes of the counter
+    - The cipher key is retrieved by encrypting the first 16 Byte of 'password'
+      with the first 'key_size_bytes' Bytes from 'password' (if necessary filled with 0's)
+    - Mode of operation is 'counter'
+    
+    @param {str} data                    Base64 encoded string
+    @param {str,unicode} password        Password (will be encoded with utf-8)
+    @param {int} key_size_bytes          Possible values: 16 for 128-Bit, 24 for 192-Bit or 32 for 256-Bit
+    @returns {str}                       Decrypted data
+    """
+    NONCE_LENGTH_BYTES = 8
+    
+    data = bytes_to_intlist(base64.b64decode(data))
+    password = bytes_to_intlist(password.encode('utf-8'))
+    
+    key = password[:key_size_bytes] + [0]*(key_size_bytes - len(password))
+    key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES)
+    
+    nonce = data[:NONCE_LENGTH_BYTES]
+    cipher = data[NONCE_LENGTH_BYTES:]
+    
+    class Counter:
+        __value = nonce + [0]*(BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)
+        def next_value(self):
+            temp = self.__value
+            self.__value = inc(self.__value)
+            return temp
+    
+    decrypted_data = aes_ctr_decrypt(cipher, key, Counter())
+    plaintext = intlist_to_bytes(decrypted_data)
+    
+    return plaintext
+
+RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36)
+SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
+        0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
+        0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
+        0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
+        0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
+        0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
+        0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
+        0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
+        0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
+        0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
+        0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
+        0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
+        0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
+        0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
+        0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
+        0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16)
+MIX_COLUMN_MATRIX = ((2,3,1,1),
+                     (1,2,3,1),
+                     (1,1,2,3),
+                     (3,1,1,2))
+
+def sub_bytes(data):
+    return [SBOX[x] for x in data]
+
+def rotate(data):
+    return data[1:] + [data[0]]
+
+def key_schedule_core(data, rcon_iteration):
+    data = rotate(data)
+    data = sub_bytes(data)
+    data[0] = data[0] ^ RCON[rcon_iteration]
+    
+    return data
+
+def xor(data1, data2):
+    return [x^y for x, y in zip(data1, data2)]
+
+def mix_column(data):
+    data_mixed = []
+    for row in range(4):
+        mixed = 0
+        for column in range(4):
+            addend = data[column]
+            if MIX_COLUMN_MATRIX[row][column] in (2,3):
+                addend <<= 1
+                if addend > 0xff:
+                    addend &= 0xff
+                    addend ^= 0x1b
+                if MIX_COLUMN_MATRIX[row][column] == 3:
+                    addend ^= data[column]
+            mixed ^= addend & 0xff
+        data_mixed.append(mixed)
+    return data_mixed
+
+def mix_columns(data):
+    data_mixed = []
+    for i in range(4):
+        column = data[i*4 : (i+1)*4]
+        data_mixed += mix_column(column)
+    return data_mixed
+
+def shift_rows(data):
+    data_shifted = []
+    for column in range(4):
+        for row in range(4):
+            data_shifted.append( data[((column + row) & 0b11) * 4 + row] )
+    return data_shifted
+
+def inc(data):
+    data = data[:] # copy
+    for i in range(len(data)-1,-1,-1):
+        if data[i] == 255:
+            data[i] = 0
+        else:
+            data[i] = data[i] + 1
+            break
+    return data
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index 84c02c2ed99967832264e642fa084367cd4eff05..90f1a4418b1267cf630a60dca3cd23266133f59c 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -1,3 +1,5 @@
+from .appletrailers import AppleTrailersIE
+from .addanime import AddAnimeIE
  from .archiveorg import ArchiveOrgIE
  from .ard import ARDIE
  from .arte import ArteTvIE
  from .archiveorg import ArchiveOrgIE
  from .ard import ARDIE
  from .arte import ArteTvIE
@@ -6,7 +8,10 @@ from .bandcamp import BandcampIE
  from .bliptv import BlipTVIE, BlipTVUserIE
  from .breakcom import BreakIE
  from .brightcove import BrightcoveIE
  from .bliptv import BlipTVIE, BlipTVUserIE
  from .breakcom import BreakIE
  from .brightcove import BrightcoveIE
+from .c56 import C56IE
  from .canalplus import CanalplusIE
  from .canalplus import CanalplusIE
+from .canalc2 import Canalc2IE
+from .cnn import CNNIE
  from .collegehumor import CollegeHumorIE
  from .comedycentral import ComedyCentralIE
  from .condenast import CondeNastIE
  from .collegehumor import CollegeHumorIE
  from .comedycentral import ComedyCentralIE
  from .condenast import CondeNastIE
@@ -29,6 +34,7 @@ from .gametrailers import GametrailersIE
  from .generic import GenericIE
  from .googleplus import GooglePlusIE
  from .googlesearch import GoogleSearchIE
  from .generic import GenericIE
  from .googleplus import GooglePlusIE
  from .googlesearch import GoogleSearchIE
+from .hark import HarkIE
  from .hotnewhiphop import HotNewHipHopIE
  from .howcast import HowcastIE
  from .hypem import HypemIE
  from .hotnewhiphop import HotNewHipHopIE
  from .howcast import HowcastIE
  from .hypem import HypemIE
@@ -36,6 +42,7 @@ from .ign import IGNIE, OneUPIE
  from .ina import InaIE
  from .infoq import InfoQIE
  from .instagram import InstagramIE
  from .ina import InaIE
  from .infoq import InfoQIE
  from .instagram import InstagramIE
+from .jeuxvideo import JeuxVideoIE
  from .jukebox import JukeboxIE
  from .justintv import JustinTVIE
  from .kankan import KankanIE
  from .jukebox import JukeboxIE
  from .justintv import JustinTVIE
  from .kankan import KankanIE
@@ -43,20 +50,28 @@ from .keek import KeekIE
  from .liveleak import LiveLeakIE
  from .livestream import LivestreamIE
  from .metacafe import MetacafeIE
  from .liveleak import LiveLeakIE
  from .livestream import LivestreamIE
  from .metacafe import MetacafeIE
+from .mit import TechTVMITIE, MITIE
  from .mixcloud import MixcloudIE
  from .mtv import MTVIE
  from .muzu import MuzuTVIE
  from .myspass import MySpassIE
  from .myvideo import MyVideoIE
  from .nba import NBAIE
  from .mixcloud import MixcloudIE
  from .mtv import MTVIE
  from .muzu import MuzuTVIE
  from .myspass import MySpassIE
  from .myvideo import MyVideoIE
  from .nba import NBAIE
+from .nbc import NBCNewsIE
  from .ooyala import OoyalaIE
  from .ooyala import OoyalaIE
+from .orf import ORFIE
+from .pbs import PBSIE
  from .photobucket import PhotobucketIE
  from .pornotube import PornotubeIE
  from .rbmaradio import RBMARadioIE
  from .redtube import RedTubeIE
  from .ringtv import RingTVIE
  from .photobucket import PhotobucketIE
  from .pornotube import PornotubeIE
  from .rbmaradio import RBMARadioIE
  from .redtube import RedTubeIE
  from .ringtv import RingTVIE
+from .ro220 import Ro220IE
  from .roxwel import RoxwelIE
  from .roxwel import RoxwelIE
+from .rtlnow import RTLnowIE
  from .sina import SinaIE
  from .sina import SinaIE
+from .slashdot import SlashdotIE
+from .sohu import SohuIE
  from .soundcloud import SoundcloudIE, SoundcloudSetIE
  from .spiegel import SpiegelIE
  from .stanfordoc import StanfordOpenClassroomIE
  from .soundcloud import SoundcloudIE, SoundcloudSetIE
  from .spiegel import SpiegelIE
  from .stanfordoc import StanfordOpenClassroomIE
@@ -67,9 +82,11 @@ from .ted import TEDIE
  from .tf1 import TF1IE
  from .thisav import ThisAVIE
  from .traileraddict import TrailerAddictIE
  from .tf1 import TF1IE
  from .thisav import ThisAVIE
  from .traileraddict import TrailerAddictIE
+from .trilulilu import TriluliluIE
  from .tudou import TudouIE
  from .tumblr import TumblrIE
  from .tutv import TutvIE
  from .tudou import TudouIE
  from .tumblr import TumblrIE
  from .tutv import TutvIE
+from .unistra import UnistraIE
  from .ustream import UstreamIE
  from .vbox7 import Vbox7IE
  from .veoh import VeohIE
  from .ustream import UstreamIE
  from .vbox7 import Vbox7IE
  from .veoh import VeohIE
@@ -77,7 +94,6 @@ from .vevo import VevoIE
  from .videofyme import VideofyMeIE
  from .vimeo import VimeoIE, VimeoChannelIE
  from .vine import VineIE
  from .videofyme import VideofyMeIE
  from .vimeo import VimeoIE, VimeoChannelIE
  from .vine import VineIE
-from .c56 import C56IE
  from .wat import WatIE
  from .weibo import WeiboIE
  from .wimp import WimpIE
  from .wat import WatIE
  from .weibo import WeiboIE
  from .wimp import WimpIE
@@ -111,12 +127,14 @@ _ALL_CLASSES = [
  ]
  _ALL_CLASSES.append(GenericIE)
  
  ]
  _ALL_CLASSES.append(GenericIE)
  
+
  def gen_extractors():
      """ Return a list of an instance of every supported extractor.
      The order does matter; the first extractor matched is the one handling the URL.
      """
      return [klass() for klass in _ALL_CLASSES]
  
  def gen_extractors():
      """ Return a list of an instance of every supported extractor.
      The order does matter; the first extractor matched is the one handling the URL.
      """
      return [klass() for klass in _ALL_CLASSES]
  
+
  def get_info_extractor(ie_name):
      """Returns the info extractor class with the given ie_name"""
      return globals()[ie_name+'IE']
  def get_info_extractor(ie_name):
      """Returns the info extractor class with the given ie_name"""
      return globals()[ie_name+'IE']
diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py

new file mode 100644 (file)

index 0000000..82a785a
--- /dev/null
+++ b/youtube_dl/extractor/addanime.py
@@ -0,0 +1,75 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_HTTPError,
+    compat_str,
+    compat_urllib_parse,
+    compat_urllib_parse_urlparse,
+
+    ExtractorError,
+)
+
+
+class AddAnimeIE(InfoExtractor):
+
+    _VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)'
+    IE_NAME = u'AddAnime'
+    _TEST = {
+        u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
+        u'file': u'24MR3YO5SAS9.flv',
+        u'md5': u'1036a0e0cd307b95bd8a8c3a5c8cfaf1',
+        u'info_dict': {
+            u"description": u"One Piece 606",
+            u"title": u"One Piece 606"
+        }
+    }
+
+    def _real_extract(self, url):
+        try:
+            mobj = re.match(self._VALID_URL, url)
+            video_id = mobj.group('video_id')
+            webpage = self._download_webpage(url, video_id)
+        except ExtractorError as ee:
+            if not isinstance(ee.cause, compat_HTTPError):
+                raise
+
+            redir_webpage = ee.cause.read().decode('utf-8')
+            action = self._search_regex(
+                r'<form id="challenge-form" action="([^"]+)"',
+                redir_webpage, u'Redirect form')
+            vc = self._search_regex(
+                r'<input type="hidden" name="jschl_vc" value="([^"]+)"/>',
+                redir_webpage, u'redirect vc value')
+            av = re.search(
+                r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);',
+                redir_webpage)
+            if av is None:
+                raise ExtractorError(u'Cannot find redirect math task')
+            av_res = int(av.group(1)) + int(av.group(2)) * int(av.group(3))
+
+            parsed_url = compat_urllib_parse_urlparse(url)
+            av_val = av_res + len(parsed_url.netloc)
+            confirm_url = (
+                parsed_url.scheme + u'://' + parsed_url.netloc +
+                action + '?' +
+                compat_urllib_parse.urlencode({
+                    'jschl_vc': vc, 'jschl_answer': compat_str(av_val)}))
+            self._download_webpage(
+                confirm_url, video_id,
+                note=u'Confirming after redirect')
+            webpage = self._download_webpage(url, video_id)
+
+        video_url = self._search_regex(r"var normal_video_file = '(.*?)';",
+                                       webpage, u'video file URL')
+        video_title = self._og_search_title(webpage)
+        video_description = self._og_search_description(webpage)
+
+        return {
+            '_type': 'video',
+            'id':  video_id,
+            'url': video_url,
+            'ext': 'flv',
+            'title': video_title,
+            'description': video_description
+        }
diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py

new file mode 100644 (file)

index 0000000..8b191c1
--- /dev/null
+++ b/youtube_dl/extractor/appletrailers.py
@@ -0,0 +1,166 @@
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+)
+
+
+class AppleTrailersIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?trailers.apple.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
+    _TEST = {
+        u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/",
+        u"playlist": [
+            {
+                u"file": u"manofsteel-trailer4.mov",
+                u"md5": u"11874af099d480cc09e103b189805d5f",
+                u"info_dict": {
+                    u"duration": 111,
+                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_11624.jpg",
+                    u"title": u"Trailer 4",
+                    u"upload_date": u"20130523",
+                    u"uploader_id": u"wb",
+                },
+            },
+            {
+                u"file": u"manofsteel-trailer3.mov",
+                u"md5": u"07a0a262aae5afe68120eed61137ab34",
+                u"info_dict": {
+                    u"duration": 182,
+                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_10793.jpg",
+                    u"title": u"Trailer 3",
+                    u"upload_date": u"20130417",
+                    u"uploader_id": u"wb",
+                },
+            },
+            {
+                u"file": u"manofsteel-trailer.mov",
+                u"md5": u"e401fde0813008e3307e54b6f384cff1",
+                u"info_dict": {
+                    u"duration": 148,
+                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_8703.jpg",
+                    u"title": u"Trailer",
+                    u"upload_date": u"20121212",
+                    u"uploader_id": u"wb",
+                },
+            },
+            {
+                u"file": u"manofsteel-teaser.mov",
+                u"md5": u"76b392f2ae9e7c98b22913c10a639c97",
+                u"info_dict": {
+                    u"duration": 93,
+                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_6899.jpg",
+                    u"title": u"Teaser",
+                    u"upload_date": u"20120721",
+                    u"uploader_id": u"wb",
+                },
+            }
+        ]
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        movie = mobj.group('movie')
+        uploader_id = mobj.group('company')
+
+        playlist_url = url.partition(u'?')[0] + u'/includes/playlists/web.inc'
+        playlist_snippet = self._download_webpage(playlist_url, movie)
+        playlist_cleaned = re.sub(r'(?s)<script>.*?</script>', u'', playlist_snippet)
+        playlist_html = u'<html>' + playlist_cleaned + u'</html>'
+
+        size_cache = {}
+
+        doc = xml.etree.ElementTree.fromstring(playlist_html)
+        playlist = []
+        for li in doc.findall('./div/ul/li'):
+            title = li.find('.//h3').text
+            video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
+            thumbnail = li.find('.//img').attrib['src']
+
+            date_el = li.find('.//p')
+            upload_date = None
+            m = re.search(r':\s?(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<year>[0-9]{2})', date_el.text)
+            if m:
+                upload_date = u'20' + m.group('year') + m.group('month') + m.group('day')
+            runtime_el = date_el.find('./br')
+            m = re.search(r':\s?(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime_el.tail)
+            duration = None
+            if m:
+                duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
+
+            formats = []
+            for formats_el in li.findall('.//a'):
+                if formats_el.attrib['class'] != 'OverlayPanel':
+                    continue
+                target = formats_el.attrib['target']
+
+                format_code = formats_el.text
+                if 'Automatic' in format_code:
+                    continue
+
+                size_q = formats_el.attrib['href']
+                size_id = size_q.rpartition('#videos-')[2]
+                if size_id not in size_cache:
+                    size_url = url + size_q
+                    sizepage_html = self._download_webpage(
+                        size_url, movie,
+                        note=u'Downloading size info %s' % size_id,
+                        errnote=u'Error while downloading size info %s' % size_id,
+                    )
+                    _doc = xml.etree.ElementTree.fromstring(sizepage_html)
+                    size_cache[size_id] = _doc
+
+                sizepage_doc = size_cache[size_id]
+                links = sizepage_doc.findall('.//{http://www.w3.org/1999/xhtml}ul/{http://www.w3.org/1999/xhtml}li/{http://www.w3.org/1999/xhtml}a')
+                for vid_a in links:
+                    href = vid_a.get('href')
+                    if not href.endswith(target):
+                        continue
+                    detail_q = href.partition('#')[0]
+                    detail_url = url + '/' + detail_q
+
+                    m = re.match(r'includes/(?P<detail_id>[^/]+)/', detail_q)
+                    detail_id = m.group('detail_id')
+
+                    detail_html = self._download_webpage(
+                        detail_url, movie,
+                        note=u'Downloading detail %s %s' % (detail_id, size_id),
+                        errnote=u'Error while downloading detail %s %s' % (detail_id, size_id)
+                    )
+                    detail_doc = xml.etree.ElementTree.fromstring(detail_html)
+                    movie_link_el = detail_doc.find('.//{http://www.w3.org/1999/xhtml}a')
+                    assert movie_link_el.get('class') == 'movieLink'
+                    movie_link = movie_link_el.get('href').partition('?')[0].replace('_', '_h')
+                    ext = determine_ext(movie_link)
+                    assert ext == 'mov'
+
+                    formats.append({
+                        'format': format_code,
+                        'ext': ext,
+                        'url': movie_link,
+                    })
+
+            info = {
+                '_type': 'video',
+                'id': video_id,
+                'title': title,
+                'formats': formats,
+                'title': title,
+                'duration': duration,
+                'thumbnail': thumbnail,
+                'upload_date': upload_date,
+                'uploader_id': uploader_id,
+                'user_agent': 'QuickTime compatible (youtube-dl)',
+            }
+            # TODO: Remove when #980 has been merged
+            info['url'] = formats[-1]['url']
+            info['ext'] = formats[-1]['ext']
+
+            playlist.append(info)
+
+        return {
+            '_type': 'playlist',
+            'id': movie,
+            'entries': playlist,
+        }
diff --git a/youtube_dl/extractor/c56.py b/youtube_dl/extractor/c56.py

index 4c8a8af0917ba4d26e213ad01e6ed148d7a79e42..dc3a8d47d164912590d88d42c0fc072a99faeee4 100644 (file)
--- a/youtube_dl/extractor/c56.py
+++ b/youtube_dl/extractor/c56.py
@@ -12,8 +12,8 @@ class C56IE(InfoExtractor):
  
      _TEST ={
          u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html',
  
      _TEST ={
          u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html',
-        u'file': u'93440716.mp4',
-        u'md5': u'9dc07b5c8e978112a6441f9e75d2b59e',
+        u'file': u'93440716.flv',
+        u'md5': u'e59995ac63d0457783ea05f93f12a866',
          u'info_dict': {
              u'title': u'网事知多少 第32期：车怒',
          },
          u'info_dict': {
              u'title': u'网事知多少 第32期：车怒',
          },
diff --git a/youtube_dl/extractor/canalc2.py b/youtube_dl/extractor/canalc2.py

new file mode 100644 (file)

index 0000000..5083221
--- /dev/null
+++ b/youtube_dl/extractor/canalc2.py
@@ -0,0 +1,35 @@
+# coding: utf-8
+import re
+
+from .common import InfoExtractor
+
+
+class Canalc2IE(InfoExtractor):
+    _IE_NAME = 'canalc2.tv'
+    _VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?idVideo=(\d+)&voir=oui'
+
+    _TEST = {
+        u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
+        u'file': u'12163.mp4',
+        u'md5': u'060158428b650f896c542dfbb3d6487f',
+        u'info_dict': {
+            u'title': u'Terrasses du Numérique'
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = re.match(self._VALID_URL, url).group(1)
+        webpage = self._download_webpage(url, video_id)
+        file_name = self._search_regex(
+            r"so\.addVariable\('file','(.*?)'\);",
+            webpage, 'file name')
+        video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name
+
+        title = self._html_search_regex(
+            r'class="evenement8">(.*?)</a>', webpage, u'title')
+        
+        return {'id': video_id,
+                'ext': 'mp4',
+                'url': video_url,
+                'title': title,
+                }
diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py

index 3b1c8887621c8e48a51db1945b9ee93b9c2ab68d..1f02519a0149ad85aad3886bf0a01e5c986064c0 100644 (file)
--- a/youtube_dl/extractor/canalplus.py
+++ b/youtube_dl/extractor/canalplus.py
@@ -5,7 +5,7 @@ from .common import InfoExtractor
  from ..utils import unified_strdate
  
  class CanalplusIE(InfoExtractor):
  from ..utils import unified_strdate
  
  class CanalplusIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.canalplus\.fr/.*?\?vid=(?P<id>\d+)'
+    _VALID_URL = r'https?://(www\.canalplus\.fr/.*?\?vid=|player\.canalplus\.fr/#/)(?P<id>\d+)'
      _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
      IE_NAME = u'canalplus.fr'
  
      _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
      IE_NAME = u'canalplus.fr'
  
diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py

new file mode 100644 (file)

index 0000000..a79f881
--- /dev/null
+++ b/youtube_dl/extractor/cnn.py
@@ -0,0 +1,58 @@
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import determine_ext
+
+
+class CNNIE(InfoExtractor):
+    _VALID_URL = r'''(?x)https?://(edition\.)?cnn\.com/video/(data/.+?|\?)/
+        (?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))'''
+
+    _TESTS = [{
+        u'url': u'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
+        u'file': u'sports_2013_06_09_nadal-1-on-1.cnn.mp4',
+        u'md5': u'3e6121ea48df7e2259fe73a0628605c4',
+        u'info_dict': {
+            u'title': u'Nadal wins 8th French Open title',
+            u'description': u'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
+        },
+    },
+    {
+        u"url": u"http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29",
+        u"file": u"us_2013_08_21_sot-student-gives-epic-speech.georgia-institute-of-technology.mp4",
+        u"md5": u"b5cc60c60a3477d185af8f19a2a26f4e",
+        u"info_dict": {
+            u"title": "Student's epic speech stuns new freshmen",
+            u"description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\""
+        }
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        path = mobj.group('path')
+        page_title = mobj.group('title')
+        info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path
+        info_xml = self._download_webpage(info_url, page_title)
+        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
+
+        formats = []
+        for f in info.findall('files/file'):
+            mf = re.match(r'(\d+)x(\d+)(?:_(.*)k)?',f.attrib['bitrate'])
+            if mf is not None:
+                formats.append((int(mf.group(1)), int(mf.group(2)), int(mf.group(3) or 0), f.text))
+        formats = sorted(formats)
+        (_,_,_, video_path) = formats[-1]
+        video_url = 'http://ht.cdn.turner.com/cnn/big%s' % video_path
+
+        thumbnails = sorted([((int(t.attrib['height']),int(t.attrib['width'])), t.text) for t in info.findall('images/image')])
+        thumbs_dict = [{'resolution': res, 'url': t_url} for (res, t_url) in thumbnails]
+
+        return {'id': info.attrib['id'],
+                'title': info.find('headline').text,
+                'url': video_url,
+                'ext': determine_ext(video_url),
+                'thumbnail': thumbnails[-1][1],
+                'thumbnails': thumbs_dict,
+                'description': info.find('description').text,
+                }
diff --git a/youtube_dl/extractor/collegehumor.py b/youtube_dl/extractor/collegehumor.py

index 30b9c7549f76c8d65dd4f18bcc5023b0c86160d9..8d4c93d6da91f4470c9809bf32dd0fbbe886c92b 100644 (file)
--- a/youtube_dl/extractor/collegehumor.py
+++ b/youtube_dl/extractor/collegehumor.py
@@ -4,6 +4,7 @@ import xml.etree.ElementTree
  from .common import InfoExtractor
  from ..utils import (
      compat_urllib_parse_urlparse,
  from .common import InfoExtractor
  from ..utils import (
      compat_urllib_parse_urlparse,
+    determine_ext,
  
      ExtractorError,
  )
  
      ExtractorError,
  )
@@ -12,7 +13,7 @@ from ..utils import (
  class CollegeHumorIE(InfoExtractor):
      _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$'
  
  class CollegeHumorIE(InfoExtractor):
      _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$'
  
-    _TEST = {
+    _TESTS = [{
          u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
          u'file': u'6902724.mp4',
          u'md5': u'1264c12ad95dca142a9f0bf7968105a0',
          u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
          u'file': u'6902724.mp4',
          u'md5': u'1264c12ad95dca142a9f0bf7968105a0',
@@ -20,7 +21,16 @@ class CollegeHumorIE(InfoExtractor):
              u'title': u'Comic-Con Cosplay Catastrophe',
              u'description': u'Fans get creative this year at San Diego.  Too creative.  And yes, that\'s really Joss Whedon.',
          },
              u'title': u'Comic-Con Cosplay Catastrophe',
              u'description': u'Fans get creative this year at San Diego.  Too creative.  And yes, that\'s really Joss Whedon.',
          },
-    }
+    },
+    {
+        u'url': u'http://www.collegehumor.com/video/3505939/font-conference',
+        u'file': u'3505939.mp4',
+        u'md5': u'c51ca16b82bb456a4397987791a835f5',
+        u'info_dict': {
+            u'title': u'Font Conference',
+            u'description': u'This video wasn\'t long enough, so we made it double-spaced.',
+        },
+    }]
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
@@ -49,25 +59,29 @@ class CollegeHumorIE(InfoExtractor):
              info['description'] = videoNode.findall('./description')[0].text
              info['title'] = videoNode.findall('./caption')[0].text
              info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
              info['description'] = videoNode.findall('./description')[0].text
              info['title'] = videoNode.findall('./caption')[0].text
              info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
-            manifest_url = videoNode.findall('./file')[0].text
+            next_url = videoNode.findall('./file')[0].text
          except IndexError:
              raise ExtractorError(u'Invalid metadata XML file')
  
          except IndexError:
              raise ExtractorError(u'Invalid metadata XML file')
  
-        manifest_url += '?hdcore=2.10.3'
-        manifestXml = self._download_webpage(manifest_url, video_id,
-                                             u'Downloading XML manifest',
-                                             u'Unable to download video info XML')
-
-        adoc = xml.etree.ElementTree.fromstring(manifestXml)
-        try:
-            media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0]
-            node_id = media_node.attrib['url']
-            video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
-        except IndexError as err:
-            raise ExtractorError(u'Invalid manifest file')
+        if next_url.endswith(u'manifest.f4m'):
+            manifest_url = next_url + '?hdcore=2.10.3'
+            manifestXml = self._download_webpage(manifest_url, video_id,
+                                         u'Downloading XML manifest',
+                                         u'Unable to download video info XML')
  
  
-        url_pr = compat_urllib_parse_urlparse(info['thumbnail'])
+            adoc = xml.etree.ElementTree.fromstring(manifestXml)
+            try:
+                media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0]
+                node_id = media_node.attrib['url']
+                video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
+            except IndexError as err:
+                raise ExtractorError(u'Invalid manifest file')
+            url_pr = compat_urllib_parse_urlparse(info['thumbnail'])
+            info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')
+            info['ext'] = 'mp4'
+        else:
+            # Old-style direct links
+            info['url'] = next_url
+            info['ext'] = determine_ext(info['url'])
  
  
-        info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')
-        info['ext'] = 'mp4'
-        return [info]
+        return info
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index da50abfc1cd492b8d360ef601b44841a938c055b..77726ee2432fc2bcd6df6ce89dcc560419524051 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -47,7 +47,8 @@ class InfoExtractor(object):
      uploader_id:    Nickname or id of the video uploader.
      location:       Physical location of the video.
      player_url:     SWF Player URL (used for rtmpdump).
      uploader_id:    Nickname or id of the video uploader.
      location:       Physical location of the video.
      player_url:     SWF Player URL (used for rtmpdump).
-    subtitles:      The subtitle file contents.
+    subtitles:      The subtitle file contents as a dictionary in the format
+                    {language: subtitles}.
      view_count:     How many users have watched the video on the platform.
      urlhandle:      [internal] The urlHandle to be used to download the file,
                      like returned by urllib.request.urlopen
      view_count:     How many users have watched the video on the platform.
      urlhandle:      [internal] The urlHandle to be used to download the file,
                      like returned by urllib.request.urlopen
@@ -77,7 +78,13 @@ class InfoExtractor(object):
      @classmethod
      def suitable(cls, url):
          """Receives a URL and returns True if suitable for this IE."""
      @classmethod
      def suitable(cls, url):
          """Receives a URL and returns True if suitable for this IE."""
-        return re.match(cls._VALID_URL, url) is not None
+
+        # This does not use has/getattr intentionally - we want to know whether
+        # we have cached the regexp for *this* class, whereas getattr would also
+        # match the superclass
+        if '_VALID_URL_RE' not in cls.__dict__:
+            cls._VALID_URL_RE = re.compile(cls._VALID_URL)
+        return cls._VALID_URL_RE.match(url) is not None
  
      @classmethod
      def working(cls):
  
      @classmethod
      def working(cls):
@@ -107,6 +114,11 @@ class InfoExtractor(object):
          """Real extraction process. Redefine in subclasses."""
          pass
  
          """Real extraction process. Redefine in subclasses."""
          pass
  
+    @classmethod
+    def ie_key(cls):
+        """A string for getting the InfoExtractor with get_info_extractor"""
+        return cls.__name__[:-2]
+
      @property
      def IE_NAME(self):
          return type(self).__name__[:-2]
      @property
      def IE_NAME(self):
          return type(self).__name__[:-2]
@@ -122,7 +134,7 @@ class InfoExtractor(object):
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
              if errnote is None:
                  errnote = u'Unable to download webpage'
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
              if errnote is None:
                  errnote = u'Unable to download webpage'
-            raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2])
+            raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2], cause=err)
  
      def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None):
          """ Returns a tuple (page content as string, URL handle) """
  
      def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None):
          """ Returns a tuple (page content as string, URL handle) """
@@ -133,12 +145,17 @@ class InfoExtractor(object):
  
          urlh = self._request_webpage(url_or_request, video_id, note, errnote)
          content_type = urlh.headers.get('Content-Type', '')
  
          urlh = self._request_webpage(url_or_request, video_id, note, errnote)
          content_type = urlh.headers.get('Content-Type', '')
+        webpage_bytes = urlh.read()
          m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
          if m:
              encoding = m.group(1)
          else:
          m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
          if m:
              encoding = m.group(1)
          else:
-            encoding = 'utf-8'
-        webpage_bytes = urlh.read()
+            m = re.search(br'<meta[^>]+charset=[\'"]?([^\'")]+)[ /\'">]',
+                          webpage_bytes[:1024])
+            if m:
+                encoding = m.group(1).decode('ascii')
+            else:
+                encoding = 'utf-8'
          if self._downloader.params.get('dump_intermediate_pages', False):
              try:
                  url = url_or_request.get_full_url()
          if self._downloader.params.get('dump_intermediate_pages', False):
              try:
                  url = url_or_request.get_full_url()
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py

index fa8c630d053168bf30d835952debd67536555c0c..1ea449ca824bbf100edf9dc851a3cd74d1dcd266 100644 (file)
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -21,7 +21,7 @@ class DailymotionIE(InfoExtractor):
          u'file': u'x33vw9.mp4',
          u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
          u'info_dict': {
          u'file': u'x33vw9.mp4',
          u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
          u'info_dict': {
-            u"uploader": u"Alex and Van .", 
+            u"uploader": u"Amphora Alex and Van .", 
              u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
          }
      }
              u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
          }
      }
diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py

index 67a7e5f76fc604ae058a8e05842b02feb7fecffe..4508f0dfac29a85d86533c2f781414b9b17d10cb 100644 (file)
--- a/youtube_dl/extractor/funnyordie.py
+++ b/youtube_dl/extractor/funnyordie.py
@@ -21,17 +21,14 @@ class FunnyOrDieIE(InfoExtractor):
          video_id = mobj.group('id')
          webpage = self._download_webpage(url, video_id)
  
          video_id = mobj.group('id')
          webpage = self._download_webpage(url, video_id)
  
-        video_url = self._html_search_regex(r'<video[^>]*>\s*<source[^>]*>\s*<source src="(?P<url>[^"]+)"',
+        video_url = self._search_regex(r'type: "video/mp4", src: "(.*?)"',
              webpage, u'video URL', flags=re.DOTALL)
  
              webpage, u'video URL', flags=re.DOTALL)
  
-        title = self._html_search_regex((r"<h1 class='player_page_h1'.*?>(?P<title>.*?)</h1>",
-            r'<title>(?P<title>[^<]+?)</title>'), webpage, 'title', flags=re.DOTALL)
-
          info = {
              'id': video_id,
              'url': video_url,
              'ext': 'mp4',
          info = {
              'id': video_id,
              'url': video_url,
              'ext': 'mp4',
-            'title': title,
+            'title': self._og_search_title(webpage),
              'description': self._og_search_description(webpage),
          }
          return [info]
              'description': self._og_search_description(webpage),
          }
          return [info]
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index b633e896c6eb3d5b2158585d01ffba9615fe8f31..dc4dea4adf63937722a1bf81ead5e10fe09f34e3 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -8,11 +8,13 @@ from ..utils import (
      compat_urllib_error,
      compat_urllib_parse,
      compat_urllib_request,
      compat_urllib_error,
      compat_urllib_parse,
      compat_urllib_request,
+    compat_urlparse,
  
      ExtractorError,
  )
  from .brightcove import BrightcoveIE
  
  
      ExtractorError,
  )
  from .brightcove import BrightcoveIE
  
+
  class GenericIE(InfoExtractor):
      IE_DESC = u'Generic downloader that works on some sites'
      _VALID_URL = r'.*'
  class GenericIE(InfoExtractor):
      IE_DESC = u'Generic downloader that works on some sites'
      _VALID_URL = r'.*'
@@ -23,7 +25,7 @@ class GenericIE(InfoExtractor):
              u'file': u'13601338388002.mp4',
              u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89',
              u'info_dict': {
              u'file': u'13601338388002.mp4',
              u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89',
              u'info_dict': {
-                u"uploader": u"www.hodiho.fr", 
+                u"uploader": u"www.hodiho.fr",
                  u"title": u"R\u00e9gis plante sa Jeep"
              }
          },
                  u"title": u"R\u00e9gis plante sa Jeep"
              }
          },
@@ -107,8 +109,13 @@ class GenericIE(InfoExtractor):
          return new_url
  
      def _real_extract(self, url):
          return new_url
  
      def _real_extract(self, url):
-        new_url = self._test_redirect(url)
-        if new_url: return [self.url_result(new_url)]
+        try:
+            new_url = self._test_redirect(url)
+            if new_url:
+                return [self.url_result(new_url)]
+        except compat_urllib_error.HTTPError:
+            # This may be a stupid server that doesn't like HEAD, our UA, or so
+            pass
  
          video_id = url.split('/')[-1]
          try:
  
          video_id = url.split('/')[-1]
          try:
@@ -119,7 +126,7 @@ class GenericIE(InfoExtractor):
              raise ExtractorError(u'Invalid URL: %s' % url)
  
          self.report_extraction(video_id)
              raise ExtractorError(u'Invalid URL: %s' % url)
  
          self.report_extraction(video_id)
-        # Look for BrigthCove:
+        # Look for BrightCove:
          m_brightcove = re.search(r'<object.+?class=([\'"]).*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)
          if m_brightcove is not None:
              self.to_screen(u'Brightcove video detected.')
          m_brightcove = re.search(r'<object.+?class=([\'"]).*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)
          if m_brightcove is not None:
              self.to_screen(u'Brightcove video detected.')
@@ -144,6 +151,9 @@ class GenericIE(InfoExtractor):
              # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
              if m_video_type is not None:
                  mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
              # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
              if m_video_type is not None:
                  mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
+        if mobj is None:
+            # HTML5 video
+            mobj = re.search(r'<video[^<]*>.*?<source .*?src="([^"]+)"', webpage, flags=re.DOTALL)
          if mobj is None:
              raise ExtractorError(u'Invalid URL: %s' % url)
  
          if mobj is None:
              raise ExtractorError(u'Invalid URL: %s' % url)
  
@@ -153,6 +163,7 @@ class GenericIE(InfoExtractor):
              raise ExtractorError(u'Invalid URL: %s' % url)
  
          video_url = compat_urllib_parse.unquote(mobj.group(1))
              raise ExtractorError(u'Invalid URL: %s' % url)
  
          video_url = compat_urllib_parse.unquote(mobj.group(1))
+        video_url = compat_urlparse.urljoin(url, video_url)
          video_id = os.path.basename(video_url)
  
          # here's a fun little line of code for you:
          video_id = os.path.basename(video_url)
  
          # here's a fun little line of code for you:
diff --git a/youtube_dl/extractor/googleplus.py b/youtube_dl/extractor/googleplus.py

index 9f7fc19a4e663f422b41a4fff620152bbe0b6e64..f1cd889834dc712d8b3c38478f85e30f2f92e44f 100644 (file)
--- a/youtube_dl/extractor/googleplus.py
+++ b/youtube_dl/extractor/googleplus.py
@@ -57,8 +57,8 @@ class GooglePlusIE(InfoExtractor):
              webpage, 'title', default=u'NA')
  
          # Step 2, Simulate clicking the image box to launch video
              webpage, 'title', default=u'NA')
  
          # Step 2, Simulate clicking the image box to launch video
-        DOMAIN = 'https://plus.google.com'
-        video_page = self._search_regex(r'<a href="((?:%s)?/photos/.*?)"' % re.escape(DOMAIN),
+        DOMAIN = 'https://plus.google.com/'
+        video_page = self._search_regex(r'<a href="((?:%s)?photos/.*?)"' % re.escape(DOMAIN),
              webpage, u'video page URL')
          if not video_page.startswith(DOMAIN):
              video_page = DOMAIN + video_page
              webpage, u'video page URL')
          if not video_page.startswith(DOMAIN):
              video_page = DOMAIN + video_page
diff --git a/youtube_dl/extractor/hark.py b/youtube_dl/extractor/hark.py

new file mode 100644 (file)

index 0000000..5bdd08a
--- /dev/null
+++ b/youtube_dl/extractor/hark.py
@@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import determine_ext
+
+class HarkIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.hark\.com/clips/(.+?)-.+'
+    _TEST = {
+        u'url': u'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013',
+        u'file': u'mmbzyhkgny.mp3',
+        u'md5': u'6783a58491b47b92c7c1af5a77d4cbee',
+        u'info_dict': {
+            u'title': u"Obama: 'Beyond The Afghan Theater, We Only Target Al Qaeda' on May 23, 2013",
+            u'description': u'President Barack Obama addressed the nation live on May 23, 2013 in a speech aimed at addressing counter-terrorism policies including the use of drone strikes, detainees at Guantanamo Bay prison facility, and American citizens who are terrorists.',
+            u'duration': 11,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group(1)
+        json_url = "http://www.hark.com/clips/%s.json" %(video_id)
+        info_json = self._download_webpage(json_url, video_id)
+        info = json.loads(info_json)
+        final_url = info['url']
+
+        return {'id': video_id,
+                'url' : final_url,
+                'title': info['name'],
+                'ext': determine_ext(final_url),
+                'description': info['description'],
+                'thumbnail': info['image_original'],
+                'duration': info['duration'],
+                }
diff --git a/youtube_dl/extractor/ign.py b/youtube_dl/extractor/ign.py

index 62abab65552dffdce1eec252f6860dd847d36b91..b1c84278a5784ed2028a986f15be49dd99b1ef9d 100644 (file)
--- a/youtube_dl/extractor/ign.py
+++ b/youtube_dl/extractor/ign.py
@@ -13,7 +13,7 @@ class IGNIE(InfoExtractor):
      Some videos of it.ign.com are also supported
      """
  
      Some videos of it.ign.com are also supported
      """
  
-    _VALID_URL = r'https?://.+?\.ign\.com/(?:videos|show_videos)(/.+)?/(?P<name_or_id>.+)'
+    _VALID_URL = r'https?://.+?\.ign\.com/(?P<type>videos|show_videos|articles)(/.+)?/(?P<name_or_id>.+)'
      IE_NAME = u'ign.com'
  
      _CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config'
      IE_NAME = u'ign.com'
  
      _CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config'
@@ -41,7 +41,11 @@ class IGNIE(InfoExtractor):
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          name_or_id = mobj.group('name_or_id')
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          name_or_id = mobj.group('name_or_id')
+        page_type = mobj.group('type')
          webpage = self._download_webpage(url, name_or_id)
          webpage = self._download_webpage(url, name_or_id)
+        if page_type == 'articles':
+            video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, u'video url')
+            return self.url_result(video_url, ie='IGN')
          video_id = self._find_video_id(webpage)
          result = self._get_video_info(video_id)
          description = self._html_search_regex(self._DESCRIPTION_RE,
          video_id = self._find_video_id(webpage)
          result = self._get_video_info(video_id)
          description = self._html_search_regex(self._DESCRIPTION_RE,
@@ -68,7 +72,7 @@ class IGNIE(InfoExtractor):
  class OneUPIE(IGNIE):
      """Extractor for 1up.com, it uses the ign videos system."""
  
  class OneUPIE(IGNIE):
      """Extractor for 1up.com, it uses the ign videos system."""
  
-    _VALID_URL = r'https?://gamevideos.1up.com/video/id/(?P<name_or_id>.+)'
+    _VALID_URL = r'https?://gamevideos.1up.com/(?P<type>video)/id/(?P<name_or_id>.+)'
      IE_NAME = '1up.com'
  
      _DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
      IE_NAME = '1up.com'
  
      _DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
diff --git a/youtube_dl/extractor/jeuxvideo.py b/youtube_dl/extractor/jeuxvideo.py

new file mode 100644 (file)

index 0000000..4327bc1
--- /dev/null
+++ b/youtube_dl/extractor/jeuxvideo.py
@@ -0,0 +1,47 @@
+# coding: utf-8
+
+import json
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+
+class JeuxVideoIE(InfoExtractor):
+    _VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm'
+
+    _TEST = {
+        u'url': u'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
+        u'file': u'5182.mp4',
+        u'md5': u'e0fdb0cd3ce98713ef9c1e1e025779d0',
+        u'info_dict': {
+            u'title': u'GC 2013 : Tearaway nous présente ses papiers d\'identité',
+            u'description': u'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        title = re.match(self._VALID_URL, url).group(1)
+        webpage = self._download_webpage(url, title)
+        m_download = re.search(r'<param name="flashvars" value="config=(.*?)" />', webpage)
+
+        xml_link = m_download.group(1)
+        
+        id = re.search(r'http://www.jeuxvideo.com/config/\w+/0011/(.*?)/\d+_player\.xml', xml_link).group(1)
+
+        xml_config = self._download_webpage(xml_link, title,
+                                                  'Downloading XML config')
+        config = xml.etree.ElementTree.fromstring(xml_config.encode('utf-8'))
+        info = re.search(r'<format\.json>(.*?)</format\.json>',
+                         xml_config, re.MULTILINE|re.DOTALL).group(1)
+        info = json.loads(info)['versions'][0]
+        
+        video_url = 'http://video720.jeuxvideo.com/' + info['file']
+
+        return {'id': id,
+                'title' : config.find('titre_video').text,
+                'ext' : 'mp4',
+                'url' : video_url,
+                'description': self._og_search_description(webpage),
+                'thumbnail': config.find('image').text,
+                }
diff --git a/youtube_dl/extractor/kankan.py b/youtube_dl/extractor/kankan.py

index 8537ba5843221ba5b7575359e2f78357ded787bf..445d465017f513b55839ed9323c95212e6de7fb7 100644 (file)
--- a/youtube_dl/extractor/kankan.py
+++ b/youtube_dl/extractor/kankan.py
@@ -21,8 +21,10 @@ class KankanIE(InfoExtractor):
          video_id = mobj.group('id')
          webpage = self._download_webpage(url, video_id)
  
          video_id = mobj.group('id')
          webpage = self._download_webpage(url, video_id)
  
-        title = self._search_regex(r'G_TITLE=[\'"](.+?)[\'"]', webpage, u'video title')
-        gcid = self._search_regex(r'lurl:[\'"]http://.+?/.+?/(.+?)/', webpage, u'gcid')
+        title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, u'video title')
+        surls = re.search(r'surls:\[\'.+?\'\]|lurl:\'.+?\.flv\'', webpage).group(0)
+        gcids = re.findall(r"http://.+?/.+?/(.+?)/", surls)
+        gcid = gcids[-1]
  
          video_info_page = self._download_webpage('http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid,
                                                   video_id, u'Downloading video url info')
  
          video_info_page = self._download_webpage('http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid,
                                                   video_id, u'Downloading video url info')
diff --git a/youtube_dl/extractor/mit.py b/youtube_dl/extractor/mit.py

new file mode 100644 (file)

index 0000000..52be923
--- /dev/null
+++ b/youtube_dl/extractor/mit.py
@@ -0,0 +1,74 @@
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    clean_html,
+    get_element_by_id,
+)
+
+
+class TechTVMITIE(InfoExtractor):
+    IE_NAME = u'techtv.mit.edu'
+    _VALID_URL = r'https?://techtv\.mit\.edu/(videos|embeds)/(?P<id>\d+)'
+
+    _TEST = {
+        u'url': u'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set',
+        u'file': u'25418.mp4',
+        u'md5': u'1f8cb3e170d41fd74add04d3c9330e5f',
+        u'info_dict': {
+            u'title': u'MIT DNA Learning Center Set',
+            u'description': u'md5:82313335e8a8a3f243351ba55bc1b474',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        raw_page = self._download_webpage(
+            'http://techtv.mit.edu/videos/%s' % video_id, video_id)
+        clean_page = re.compile(u'<!--.*?-->', re.S).sub(u'', raw_page)
+
+        base_url = self._search_regex(r'ipadUrl: \'(.+?cloudfront.net/)',
+            raw_page, u'base url')
+        formats_json = self._search_regex(r'bitrates: (\[.+?\])', raw_page,
+            u'video formats')
+        formats = json.loads(formats_json)
+        formats = sorted(formats, key=lambda f: f['bitrate'])
+
+        title = get_element_by_id('edit-title', clean_page)
+        description = clean_html(get_element_by_id('edit-description', clean_page))
+        thumbnail = self._search_regex(r'playlist:.*?url: \'(.+?)\'',
+            raw_page, u'thumbnail', flags=re.DOTALL)
+
+        return {'id': video_id,
+                'title': title,
+                'url': base_url + formats[-1]['url'].replace('mp4:', ''),
+                'ext': 'mp4',
+                'description': description,
+                'thumbnail': thumbnail,
+                }
+
+
+class MITIE(TechTVMITIE):
+    IE_NAME = u'video.mit.edu'
+    _VALID_URL = r'https?://video\.mit\.edu/watch/(?P<title>[^/]+)'
+
+    _TEST = {
+        u'url': u'http://video.mit.edu/watch/the-government-is-profiling-you-13222/',
+        u'file': u'21783.mp4',
+        u'md5': u'7db01d5ccc1895fc5010e9c9e13648da',
+        u'info_dict': {
+            u'title': u'The Government is Profiling You',
+            u'description': u'md5:ad5795fe1e1623b73620dbfd47df9afd',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        page_title = mobj.group('title')
+        webpage = self._download_webpage(url, page_title)
+        self.to_screen('%s: Extracting %s url' % (page_title, TechTVMITIE.IE_NAME))
+        embed_url = self._search_regex(r'<iframe .*?src="(.+?)"', webpage,
+            u'embed url')
+        return self.url_result(embed_url, ie='TechTVMIT')
diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py

new file mode 100644 (file)

index 0000000..3bc9dae
--- /dev/null
+++ b/youtube_dl/extractor/nbc.py
@@ -0,0 +1,33 @@
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import find_xpath_attr, compat_str
+
+
+class NBCNewsIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.nbcnews\.com/video/.+?/(?P<id>\d+)'
+
+    _TEST = {
+        u'url': u'http://www.nbcnews.com/video/nbc-news/52753292',
+        u'file': u'52753292.flv',
+        u'md5': u'47abaac93c6eaf9ad37ee6c4463a5179',
+        u'info_dict': {
+            u'title': u'Crew emerges after four-month Mars food study',
+            u'description': u'md5:24e632ffac72b35f8b67a12d1b6ddfc1',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        info_xml = self._download_webpage('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
+        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')).find('video')
+
+        return {'id': video_id,
+                'title': info.find('headline').text,
+                'ext': 'flv',
+                'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
+                'description': compat_str(info.find('caption').text),
+                'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
+                }
diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py

new file mode 100644 (file)

index 0000000..41ef8e9
--- /dev/null
+++ b/youtube_dl/extractor/orf.py
@@ -0,0 +1,67 @@
+# coding: utf-8
+
+import re
+import xml.etree.ElementTree
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urlparse,
+    ExtractorError,
+    find_xpath_attr,
+)
+
+class ORFIE(InfoExtractor):
+    _VALID_URL = r'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)'
+
+    _TEST = {
+        u'url': u'http://tvthek.orf.at/programs/1171769-Wetter-ZIB/episodes/6557323-Wetter',
+        u'file': u'6566957.flv',
+        u'info_dict': {
+            u'title': u'Wetter',
+            u'description': u'Christa Kummer, Marcus Wadsak und Kollegen  präsentieren abwechselnd ihre täglichen Wetterprognosen für Österreich.\r \r Mehr Wetter unter wetter.ORF.at',
+        },
+        u'params': {
+            # It uses rtmp
+            u'skip_download': True,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        playlist_id = mobj.group('id')
+        webpage = self._download_webpage(url, playlist_id)
+
+        flash_xml = self._search_regex('ORF.flashXML = \'(.+?)\'', webpage, u'flash xml')
+        flash_xml = compat_urlparse.parse_qs('xml='+flash_xml)['xml'][0]
+        flash_config = xml.etree.ElementTree.fromstring(flash_xml.encode('utf-8'))
+        playlist_json = self._search_regex(r'playlist\': \'(\[.*?\])\'', webpage, u'playlist').replace(r'\"','"')
+        playlist = json.loads(playlist_json)
+
+        videos = []
+        ns = '{http://tempuri.org/XMLSchema.xsd}'
+        xpath = '%(ns)sPlaylist/%(ns)sItems/%(ns)sItem' % {'ns': ns}
+        webpage_description = self._og_search_description(webpage)
+        for (i, (item, info)) in enumerate(zip(flash_config.findall(xpath), playlist), 1):
+            # Get best quality url
+            rtmp_url = None
+            for q in ['Q6A', 'Q4A', 'Q1A']:
+                video_url = find_xpath_attr(item, '%sVideoUrl' % ns, 'quality', q)
+                if video_url is not None:
+                    rtmp_url = video_url.text
+                    break
+            if rtmp_url is None:
+                raise ExtractorError(u'Couldn\'t get video url: %s' % info['id'])
+            description = self._html_search_regex(
+                r'id="playlist_entry_%s".*?<p>(.*?)</p>' % i, webpage,
+                u'description', default=webpage_description, flags=re.DOTALL)
+            videos.append({
+                '_type': 'video',
+                'id': info['id'],
+                'title': info['title'],
+                'url': rtmp_url,
+                'ext': 'flv',
+                'description': description,
+                })
+
+        return videos
diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py

new file mode 100644 (file)

index 0000000..65462d8
--- /dev/null
+++ b/youtube_dl/extractor/pbs.py
@@ -0,0 +1,34 @@
+import re
+import json
+
+from .common import InfoExtractor
+
+
+class PBSIE(InfoExtractor):
+    _VALID_URL = r'https?://video.pbs.org/video/(?P<id>\d+)/?'
+
+    _TEST = {
+        u'url': u'http://video.pbs.org/video/2365006249/',
+        u'file': u'2365006249.mp4',
+        u'md5': 'ce1888486f0908d555a8093cac9a7362',
+        u'info_dict': {
+            u'title': u'A More Perfect Union',
+            u'description': u'md5:ba0c207295339c8d6eced00b7c363c6a',
+            u'duration': 3190,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
+        info_page = self._download_webpage(info_url, video_id)
+        info =json.loads(info_page)
+        return {'id': video_id,
+                'title': info['title'],
+                'url': info['alternate_encoding']['url'],
+                'ext': 'mp4',
+                'description': info['program'].get('description'),
+                'thumbnail': info.get('image_url'),
+                'duration': info.get('duration'),
+                }
diff --git a/youtube_dl/extractor/ro220.py b/youtube_dl/extractor/ro220.py

new file mode 100644 (file)

index 0000000..c32f64d
--- /dev/null
+++ b/youtube_dl/extractor/ro220.py
@@ -0,0 +1,42 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    clean_html,
+    compat_parse_qs,
+)
+
+
+class Ro220IE(InfoExtractor):
+    IE_NAME = '220.ro'
+    _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<video_id>[^/]+)'
+    _TEST = {
+        u"url": u"http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/",
+        u'file': u'LYV6doKo7f.mp4',
+        u'md5': u'03af18b73a07b4088753930db7a34add',
+        u'info_dict': {
+            u"title": u"Luati-le Banii sez 4 ep 1",
+            u"description": u"Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('video_id')
+
+        webpage = self._download_webpage(url, video_id)
+        flashVars_str = self._search_regex(
+            r'<param name="flashVars" value="([^"]+)"',
+            webpage, u'flashVars')
+        flashVars = compat_parse_qs(flashVars_str)
+
+        info = {
+            '_type': 'video',
+            'id': video_id,
+            'ext': 'mp4',
+            'url': flashVars['videoURL'][0],
+            'title': flashVars['title'][0],
+            'description': clean_html(flashVars['desc'][0]),
+            'thumbnail': flashVars['preview'][0],
+        }
+        return info
diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py

new file mode 100644 (file)

index 0000000..7bb236c
--- /dev/null
+++ b/youtube_dl/extractor/rtlnow.py
@@ -0,0 +1,126 @@
+# encoding: utf-8
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    clean_html,
+    ExtractorError,
+)
+
+class RTLnowIE(InfoExtractor):
+    """Information Extractor for RTL NOW, RTL2 NOW, SUPER RTL NOW and VOX NOW"""
+    _VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl-now\.rtl\.de/|rtl2now\.rtl2\.de/|(?:www\.)?voxnow\.de/|(?:www\.)?superrtlnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
+    _TESTS = [{
+        u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
+        u'file': u'90419.flv',
+        u'info_dict': {
+            u'upload_date': u'20070416', 
+            u'title': u'Ahornallee - Folge 1 - Der Einzug',
+            u'description': u'Folge 1 - Der Einzug',
+        },
+        u'params': {
+            u'skip_download': True,
+        },
+        u'skip': u'Only works from Germany',
+    },
+    {
+        u'url': u'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5',
+        u'file': u'69756.flv',
+        u'info_dict': {
+            u'upload_date': u'20120519', 
+            u'title': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit...',
+            u'description': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.',
+            u'thumbnail': u'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg',
+        },
+        u'params': {
+            u'skip_download': True,
+        },
+        u'skip': u'Only works from Germany',
+    },
+    {
+        u'url': u'www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17',
+        u'file': u'13883.flv',
+        u'info_dict': {
+            u'upload_date': u'20090627', 
+            u'title': u'Voxtours - Südafrika-Reporter II',
+            u'description': u'Südafrika-Reporter II',
+        },
+        u'params': {
+            u'skip_download': True,
+        },
+    },
+    {
+        u'url': u'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1',
+        u'file': u'99205.flv',
+        u'info_dict': {
+            u'upload_date': u'20080928', 
+            u'title': u'Medicopter 117 - Angst!',
+            u'description': u'Angst!',
+            u'thumbnail': u'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg'
+        },
+        u'params': {
+            u'skip_download': True,
+        },
+    }]
+
+    def _real_extract(self,url):
+        mobj = re.match(self._VALID_URL, url)
+
+        webpage_url = u'http://' + mobj.group('url')
+        video_page_url = u'http://' + mobj.group('base_url')
+        video_id = mobj.group(u'video_id')
+
+        webpage = self._download_webpage(webpage_url, video_id)
+
+        note_m = re.search(r'''(?sx)
+            <div[ ]style="margin-left:[ ]20px;[ ]font-size:[ ]13px;">(.*?)
+            <div[ ]id="playerteaser">''', webpage)
+        if note_m:
+            msg = clean_html(note_m.group(1))
+            raise ExtractorError(msg)
+
+        video_title = self._html_search_regex(r'<title>(?P<title>[^<]+)</title>',
+            webpage, u'title')
+        playerdata_url = self._html_search_regex(r'\'playerdata\': \'(?P<playerdata_url>[^\']+)\'',
+            webpage, u'playerdata_url')
+
+        playerdata = self._download_webpage(playerdata_url, video_id)
+        mobj = re.search(r'<title><!\[CDATA\[(?P<description>.+?)\s+- (?:Sendung )?vom (?P<upload_date_d>[0-9]{2})\.(?P<upload_date_m>[0-9]{2})\.(?:(?P<upload_date_Y>[0-9]{4})|(?P<upload_date_y>[0-9]{2})) [0-9]{2}:[0-9]{2} Uhr\]\]></title>', playerdata)
+        if mobj:
+            video_description = mobj.group(u'description')
+            if mobj.group('upload_date_Y'):
+                video_upload_date = mobj.group('upload_date_Y')
+            else:
+                video_upload_date = u'20' + mobj.group('upload_date_y')
+            video_upload_date += mobj.group('upload_date_m')+mobj.group('upload_date_d')
+        else:
+            video_description = None
+            video_upload_date = None
+            self._downloader.report_warning(u'Unable to extract description and upload date')
+
+        # Thumbnail: not every video has an thumbnail
+        mobj = re.search(r'<meta property="og:image" content="(?P<thumbnail>[^"]+)">', webpage)
+        if mobj:
+            video_thumbnail = mobj.group(u'thumbnail')
+        else:
+            video_thumbnail = None
+
+        mobj = re.search(r'<filename [^>]+><!\[CDATA\[(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>[^\]]+)\]\]></filename>', playerdata)
+        if mobj is None:
+            raise ExtractorError(u'Unable to extract media URL')
+        video_url = mobj.group(u'url')
+        video_play_path = u'mp4:' + mobj.group(u'play_path')
+        video_player_url = video_page_url + u'includes/vodplayer.swf'
+
+        return [{
+            'id':          video_id,
+            'url':         video_url,
+            'play_path':   video_play_path,
+            'page_url':    video_page_url,
+            'player_url':  video_player_url,
+            'ext':         'flv',
+            'title':       video_title,
+            'description': video_description,
+            'upload_date': video_upload_date,
+            'thumbnail':   video_thumbnail,
+        }]
diff --git a/youtube_dl/extractor/slashdot.py b/youtube_dl/extractor/slashdot.py

new file mode 100644 (file)

index 0000000..2cba530
--- /dev/null
+++ b/youtube_dl/extractor/slashdot.py
@@ -0,0 +1,23 @@
+import re
+
+from .common import InfoExtractor
+
+
+class SlashdotIE(InfoExtractor):
+    _VALID_URL = r'https?://tv.slashdot.org/video/\?embed=(?P<id>.*?)(&|$)'
+
+    _TEST = {
+        u'url': u'http://tv.slashdot.org/video/?embed=JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz',
+        u'file': u'JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz.mp4',
+        u'md5': u'd2222e7a4a4c1541b3e0cf732fb26735',
+        u'info_dict': {
+            u'title': u' Meet the Stampede Supercomputing Cluster\'s Administrator',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id)
+        ooyala_url = self._search_regex(r'<script src="(.*?)"', webpage, 'ooyala url')
+        return self.url_result(ooyala_url, 'Ooyala')
diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py

new file mode 100644 (file)

index 0000000..77bb0a8
--- /dev/null
+++ b/youtube_dl/extractor/sohu.py
@@ -0,0 +1,90 @@
+# encoding: utf-8
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class SohuIE(InfoExtractor):
+    _VALID_URL = r'https?://tv\.sohu\.com/\d+?/n(?P<id>\d+)\.shtml.*?'
+
+    _TEST = {
+        u'url': u'http://tv.sohu.com/20130724/n382479172.shtml#super',
+        u'file': u'382479172.mp4',
+        u'md5': u'bde8d9a6ffd82c63a1eefaef4eeefec7',
+        u'info_dict': {
+            u'title': u'MV：Far East Movement《The Illest》',
+        },
+    }
+
+    def _real_extract(self, url):
+
+        def _fetch_data(vid_id):
+            base_data_url = u'http://hot.vrs.sohu.com/vrs_flash.action?vid='
+            data_url = base_data_url + str(vid_id)
+            data_json = self._download_webpage(
+                data_url, video_id,
+                note=u'Downloading JSON data for ' + str(vid_id))
+            return json.loads(data_json)
+
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        raw_title = self._html_search_regex(r'(?s)<title>(.+?)</title>',
+                                            webpage, u'video title')
+        title = raw_title.partition('-')[0].strip()
+
+        vid = self._html_search_regex(r'var vid="(\d+)"', webpage,
+                                      u'video path')
+        data = _fetch_data(vid)
+
+        QUALITIES = ('ori', 'super', 'high', 'nor')
+        vid_ids = [data['data'][q + 'Vid']
+                   for q in QUALITIES
+                   if data['data'][q + 'Vid'] != 0]
+        if not vid_ids:
+            raise ExtractorError(u'No formats available for this video')
+
+        # For now, we just pick the highest available quality
+        vid_id = vid_ids[-1]
+
+        format_data = data if vid == vid_id else _fetch_data(vid_id)
+        part_count = format_data['data']['totalBlocks']
+        allot = format_data['allot']
+        prot = format_data['prot']
+        clipsURL = format_data['data']['clipsURL']
+        su = format_data['data']['su']
+
+        playlist = []
+        for i in range(part_count):
+            part_url = ('http://%s/?prot=%s&file=%s&new=%s' %
+                        (allot, prot, clipsURL[i], su[i]))
+            part_str = self._download_webpage(
+                part_url, video_id,
+                note=u'Downloading part %d of %d' % (i+1, part_count))
+
+            part_info = part_str.split('|')
+            video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])
+
+            video_info = {
+                'id': '%s_part%02d' % (video_id, i + 1),
+                'title': title,
+                'url': video_url,
+                'ext': 'mp4',
+            }
+            playlist.append(video_info)
+
+        if len(playlist) == 1:
+            info = playlist[0]
+            info['id'] = video_id
+        else:
+            info = {
+                '_type': 'playlist',
+                'entries': playlist,
+                'id': video_id,
+            }
+
+        return info
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py

index 7c9f1c6b65998d57515b65dea5e9120772e0b019..5f3a5540d2775ae1952d31ab86447ed5151e952f 100644 (file)
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -4,6 +4,7 @@ import re
  from .common import InfoExtractor
  from ..utils import (
      compat_str,
  from .common import InfoExtractor
  from ..utils import (
      compat_str,
+    compat_urlparse,
  
      ExtractorError,
      unified_strdate,
  
      ExtractorError,
      unified_strdate,
@@ -22,6 +23,7 @@ class SoundcloudIE(InfoExtractor):
      _VALID_URL = r'''^(?:https?://)?
                      (?:(?:(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$)
                         |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
      _VALID_URL = r'''^(?:https?://)?
                      (?:(?:(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$)
                         |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
+                       |(?P<widget>w.soundcloud.com/player/?.*?url=.*)
                      )
                      '''
      IE_NAME = u'soundcloud'
                      )
                      '''
      IE_NAME = u'soundcloud'
@@ -79,6 +81,9 @@ class SoundcloudIE(InfoExtractor):
          if track_id is not None:
              info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
              full_title = track_id
          if track_id is not None:
              info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
              full_title = track_id
+        elif mobj.group('widget'):
+            query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+            return self.url_result(query['url'][0], ie='Soundcloud')
          else:
              # extract uploader (which is in the url)
              uploader = mobj.group(1)
          else:
              # extract uploader (which is in the url)
              uploader = mobj.group(1)
diff --git a/youtube_dl/extractor/statigram.py b/youtube_dl/extractor/statigram.py

index b8e6b3bf91a05a793db631db9325bb2bc605b8b9..1ea4a9f2f82edce03340d28414d1a77b695d8e52 100644 (file)
--- a/youtube_dl/extractor/statigram.py
+++ b/youtube_dl/extractor/statigram.py
@@ -5,13 +5,13 @@ from .common import InfoExtractor
  class StatigramIE(InfoExtractor):
      _VALID_URL = r'(?:http://)?(?:www\.)?statigr\.am/p/([^/]+)'
      _TEST = {
  class StatigramIE(InfoExtractor):
      _VALID_URL = r'(?:http://)?(?:www\.)?statigr\.am/p/([^/]+)'
      _TEST = {
-        u'url': u'http://statigr.am/p/484091715184808010_284179915',
-        u'file': u'484091715184808010_284179915.mp4',
-        u'md5': u'deda4ff333abe2e118740321e992605b',
+        u'url': u'http://statigr.am/p/522207370455279102_24101272',
+        u'file': u'522207370455279102_24101272.mp4',
+        u'md5': u'6eb93b882a3ded7c378ee1d6884b1814',
          u'info_dict': {
          u'info_dict': {
-            u"uploader_id": u"videoseconds", 
-            u"title": u"Instagram photo by @videoseconds"
-        }
+            u'uploader_id': u'aguynamedpatrick',
+            u'title': u'Instagram photo by @aguynamedpatrick (Patrick Janelle)',
+        },
      }
  
      def _real_extract(self, url):
      }
  
      def _real_extract(self, url):
diff --git a/youtube_dl/extractor/trilulilu.py b/youtube_dl/extractor/trilulilu.py

new file mode 100644 (file)

index 0000000..f278951
--- /dev/null
+++ b/youtube_dl/extractor/trilulilu.py
@@ -0,0 +1,73 @@
+import json
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+
+
+class TriluliluIE(InfoExtractor):
+    _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?trilulilu\.ro/video-(?P<category>[^/]+)/(?P<video_id>[^/]+)'
+    _TEST = {
+        u"url": u"http://www.trilulilu.ro/video-animatie/big-buck-bunny-1",
+        u'file': u"big-buck-bunny-1.mp4",
+        u'info_dict': {
+            u"title": u"Big Buck Bunny",
+            u"description": u":) pentru copilul din noi",
+        },
+        # Server ignores Range headers (--test)
+        u"params": {
+            u"skip_download": True
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('video_id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._og_search_title(webpage)
+        thumbnail = self._og_search_thumbnail(webpage)
+        description = self._og_search_description(webpage)
+
+        log_str = self._search_regex(
+            r'block_flash_vars[ ]=[ ]({[^}]+})', webpage, u'log info')
+        log = json.loads(log_str)
+
+        format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/'
+                      u'video-formats2' % log)
+        format_str = self._download_webpage(
+            format_url, video_id,
+            note=u'Downloading formats',
+            errnote=u'Error while downloading formats')
+
+        format_doc = xml.etree.ElementTree.fromstring(format_str)
+ 
+        video_url_template = (
+            u'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
+            u'&source=site&hash=%(hash)s&username=%(userid)s&'
+            u'key=ministhebest&format=%%s&sig=&exp=' %
+            log)
+        formats = [
+            {
+                'format': fnode.text,
+                'url': video_url_template % fnode.text,
+            }
+
+            for fnode in format_doc.findall('./formats/format')
+        ]
+
+        info = {
+            '_type': 'video',
+            'id': video_id,
+            'formats': formats,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+        }
+
+        # TODO: Remove when #980 has been merged
+        info['url'] = formats[-1]['url']
+        info['ext'] = formats[-1]['format'].partition('-')[0]
+
+        return info
diff --git a/youtube_dl/extractor/unistra.py b/youtube_dl/extractor/unistra.py

new file mode 100644 (file)

index 0000000..516e189
--- /dev/null
+++ b/youtube_dl/extractor/unistra.py
@@ -0,0 +1,32 @@
+import re
+
+from .common import InfoExtractor
+
+class UnistraIE(InfoExtractor):
+    _VALID_URL = r'http://utv.unistra.fr/(?:index|video).php\?id_video\=(\d+)'
+
+    _TEST = {
+        u'url': u'http://utv.unistra.fr/video.php?id_video=154',
+        u'file': u'154.mp4',
+        u'md5': u'736f605cfdc96724d55bb543ab3ced24',
+        u'info_dict': {
+            u'title': u'M!ss Yella',
+            u'description': u'md5:104892c71bd48e55d70b902736b81bbf',
+        },
+    }
+
+    def _real_extract(self, url):
+        id = re.match(self._VALID_URL, url).group(1)
+        webpage = self._download_webpage(url, id)
+        file = re.search(r'file: "(.*?)",', webpage).group(1)
+        title = self._html_search_regex(r'<title>UTV - (.*?)</', webpage, u'title')
+
+        video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file
+
+        return {'id': id,
+                'title': title,
+                'ext': 'mp4',
+                'url': video_url,
+                'description': self._html_search_regex(r'<meta name="Description" content="(.*?)"', webpage, u'description', flags=re.DOTALL),
+                'thumbnail': self._search_regex(r'image: "(.*?)"', webpage, u'thumbnail'),
+                }
diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py

index 14abd58e8384ea4c99c75e571279d790cdf36707..70408c4f0edc2ba5b00a9e793cf1e1c2e0ba30ed 100644 (file)
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -11,14 +11,14 @@ class VevoIE(InfoExtractor):
      Accepts urls from vevo.com or in the format 'vevo:{id}'
      (currently used by MTVIE)
      """
      Accepts urls from vevo.com or in the format 'vevo:{id}'
      (currently used by MTVIE)
      """
-    _VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*)$'
+    _VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*?)(\?|$)'
      _TEST = {
          u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
          u'file': u'GB1101300280.mp4',
          u'md5': u'06bea460acb744eab74a9d7dcb4bfd61',
          u'info_dict': {
      _TEST = {
          u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
          u'file': u'GB1101300280.mp4',
          u'md5': u'06bea460acb744eab74a9d7dcb4bfd61',
          u'info_dict': {
-            u"upload_date": u"20130624", 
-            u"uploader": u"Hurts", 
+            u"upload_date": u"20130624",
+            u"uploader": u"Hurts",
              u"title": u"Somebody to Die For"
          }
      }
              u"title": u"Somebody to Die For"
          }
      }
diff --git a/youtube_dl/extractor/videofyme.py b/youtube_dl/extractor/videofyme.py

index 04106672b583443fb3785a60a283e555a7f9fdfc..94f64ffa5aaba21ec4ca4470d6034352acbf411e 100644 (file)
--- a/youtube_dl/extractor/videofyme.py
+++ b/youtube_dl/extractor/videofyme.py
@@ -14,7 +14,7 @@ class VideofyMeIE(InfoExtractor):
      _TEST = {
          u'url': u'http://www.videofy.me/thisisvideofyme/1100701',
          u'file':  u'1100701.mp4',
      _TEST = {
          u'url': u'http://www.videofy.me/thisisvideofyme/1100701',
          u'file':  u'1100701.mp4',
-        u'md5': u'2046dd5758541d630bfa93e741e2fd79',
+        u'md5': u'c77d700bdc16ae2e9f3c26019bd96143',
          u'info_dict': {
              u'title': u'This is VideofyMe',
              u'description': None,
          u'info_dict': {
              u'title': u'This is VideofyMe',
              u'description': None,
@@ -32,9 +32,8 @@ class VideofyMeIE(InfoExtractor):
          config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
          video = config.find('video')
          sources = video.find('sources')
          config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
          video = config.find('video')
          sources = video.find('sources')
-        url_node = find_xpath_attr(sources, 'source', 'id', 'HQ on')
-        if url_node is None:
-            url_node = find_xpath_attr(sources, 'source', 'id', 'HQ off')
+        url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key) 
+            for key in ['on', 'av', 'off']] if node is not None)
          video_url = url_node.find('url').text
  
          return {'id': video_id,
          video_url = url_node.find('url').text
  
          return {'id': video_id,
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py

index cc9c8d0188749761b79b90652d3c0c843a24eda3..512e06e2a620161704c8b1d02854c1279276179e 100644 (file)
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -20,18 +20,31 @@ class VimeoIE(InfoExtractor):
      _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)(?:[?].*)?$'
      _NETRC_MACHINE = 'vimeo'
      IE_NAME = u'vimeo'
      _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)(?:[?].*)?$'
      _NETRC_MACHINE = 'vimeo'
      IE_NAME = u'vimeo'
-    _TEST = {
-        u'url': u'http://vimeo.com/56015672',
-        u'file': u'56015672.mp4',
-        u'md5': u'8879b6cc097e987f02484baf890129e5',
-        u'info_dict': {
-            u"upload_date": u"20121220", 
-            u"description": u"This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", 
-            u"uploader_id": u"user7108434", 
-            u"uploader": u"Filippo Valsorda", 
-            u"title": u"youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550"
-        }
-    }
+    _TESTS = [
+        {
+            u'url': u'http://vimeo.com/56015672',
+            u'file': u'56015672.mp4',
+            u'md5': u'8879b6cc097e987f02484baf890129e5',
+            u'info_dict': {
+                u"upload_date": u"20121220", 
+                u"description": u"This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", 
+                u"uploader_id": u"user7108434", 
+                u"uploader": u"Filippo Valsorda", 
+                u"title": u"youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
+            },
+        },
+        {
+            u'url': u'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876',
+            u'file': u'68093876.mp4',
+            u'md5': u'3b5ca6aa22b60dfeeadf50b72e44ed82',
+            u'note': u'Vimeo Pro video (#1197)',
+            u'info_dict': {
+                u'uploader_id': u'openstreetmapus', 
+                u'uploader': u'OpenStreetMap US', 
+                u'title': u'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
+            },
+        },
+    ]
  
      def _login(self):
          (username, password) = self._get_login_info()
  
      def _login(self):
          (username, password) = self._get_login_info()
@@ -83,7 +96,9 @@ class VimeoIE(InfoExtractor):
          video_id = mobj.group('id')
          if not mobj.group('proto'):
              url = 'https://' + url
          video_id = mobj.group('id')
          if not mobj.group('proto'):
              url = 'https://' + url
-        if mobj.group('direct_link') or mobj.group('pro'):
+        elif mobj.group('pro'):
+            url = 'http://player.vimeo.com/video/' + video_id
+        elif mobj.group('direct_link'):
              url = 'https://vimeo.com/' + video_id
  
          # Retrieve video webpage to extract further information
              url = 'https://vimeo.com/' + video_id
  
          # Retrieve video webpage to extract further information
diff --git a/youtube_dl/extractor/wat.py b/youtube_dl/extractor/wat.py

index 7d228edac1fb8b291189487482d0c718b3707e08..29c25f0e309c7d4179d1226ed0a079a0d17fcba6 100644 (file)
--- a/youtube_dl/extractor/wat.py
+++ b/youtube_dl/extractor/wat.py
@@ -6,7 +6,6 @@ import re
  from .common import InfoExtractor
  
  from ..utils import (
  from .common import InfoExtractor
  
  from ..utils import (
-    compat_urllib_parse,
      unified_strdate,
  )
  
      unified_strdate,
  )
  
diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py

index 0f1feeffd777ccc152b1ea1c4bb15dc00dc6b494..88b8b6be09f7a8f892db8266b3e68df14e22bfe7 100644 (file)
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@@ -3,7 +3,8 @@ import re
  from .common import InfoExtractor
  from ..utils import (
      compat_urllib_parse,
  from .common import InfoExtractor
  from ..utils import (
      compat_urllib_parse,
-
+    unescapeHTML,
+    determine_ext,
      ExtractorError,
  )
  
      ExtractorError,
  )
  
@@ -36,15 +37,16 @@ class XHamsterIE(InfoExtractor):
              video_url = compat_urllib_parse.unquote(mobj.group('file'))
          else:
              video_url = mobj.group('server')+'/key='+mobj.group('file')
              video_url = compat_urllib_parse.unquote(mobj.group('file'))
          else:
              video_url = mobj.group('server')+'/key='+mobj.group('file')
-        video_extension = video_url.split('.')[-1]
  
          video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',
              webpage, u'title')
  
  
          video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',
              webpage, u'title')
  
-        # Can't see the description anywhere in the UI
-        # video_description = self._html_search_regex(r'<span>Description: </span>(?P<description>[^<]+)',
-        #     webpage, u'description', fatal=False)
-        # if video_description: video_description = unescapeHTML(video_description)
+        # Only a few videos have an description
+        mobj = re.search('<span>Description: </span>(?P<description>[^<]+)', webpage)
+        if mobj:
+            video_description = unescapeHTML(mobj.group('description'))
+        else:
+            video_description = None
  
          mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
          if mobj:
  
          mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
          if mobj:
@@ -62,9 +64,9 @@ class XHamsterIE(InfoExtractor):
          return [{
              'id':       video_id,
              'url':      video_url,
          return [{
              'id':       video_id,
              'url':      video_url,
-            'ext':      video_extension,
+            'ext':      determine_ext(video_url),
              'title':    video_title,
              'title':    video_title,
-            # 'description': video_description,
+            'description': video_description,
              'upload_date': video_upload_date,
              'uploader_id': video_uploader_id,
              'thumbnail': video_thumbnail
              'upload_date': video_upload_date,
              'uploader_id': video_uploader_id,
              'thumbnail': video_thumbnail
diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py

index d1156bf425ea3f1b5522bb528a042758d30a2075..c85fd4b5af0ccdd3f259bd403ddd4311f2de5fdb 100644 (file)
--- a/youtube_dl/extractor/youporn.py
+++ b/youtube_dl/extractor/youporn.py
@@ -12,14 +12,16 @@ from ..utils import (
      unescapeHTML,
      unified_strdate,
  )
      unescapeHTML,
      unified_strdate,
  )
-
+from ..aes import (
+    aes_decrypt_text
+)
  
  class YouPornIE(InfoExtractor):
      _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)'
      _TEST = {
          u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
          u'file': u'505835.mp4',
  
  class YouPornIE(InfoExtractor):
      _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)'
      _TEST = {
          u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
          u'file': u'505835.mp4',
-        u'md5': u'c37ddbaaa39058c76a7e86c6813423c1',
+        u'md5': u'71ec5fcfddacf80f495efa8b6a8d9a89',
          u'info_dict': {
              u"upload_date": u"20101221", 
              u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?", 
          u'info_dict': {
              u"upload_date": u"20101221", 
              u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?", 
@@ -75,7 +77,15 @@ class YouPornIE(InfoExtractor):
          # Get all of the links from the page
          LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">'
          links = re.findall(LINK_RE, download_list_html)
          # Get all of the links from the page
          LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">'
          links = re.findall(LINK_RE, download_list_html)
-        if(len(links) == 0):
+        
+        # Get link of hd video if available
+        mobj = re.search(r'var encryptedQuality720URL = \'(?P<encrypted_video_url>[a-zA-Z0-9+/]+={0,2})\';', webpage)
+        if mobj != None:
+            encrypted_video_url = mobj.group(u'encrypted_video_url')
+            video_url = aes_decrypt_text(encrypted_video_url, video_title, 32).decode('utf-8')
+            links = [video_url] + links
+        
+        if not links:
              raise ExtractorError(u'ERROR: no known formats available for video')
  
          self.to_screen(u'Links found: %d' % len(links))
              raise ExtractorError(u'ERROR: no known formats available for video')
  
          self.to_screen(u'Links found: %d' % len(links))
@@ -112,7 +122,7 @@ class YouPornIE(InfoExtractor):
              self._print_formats(formats)
              return
  
              self._print_formats(formats)
              return
  
-        req_format = self._downloader.params.get('format', None)
+        req_format = self._downloader.params.get('format', 'best')
          self.to_screen(u'Format: %s' % req_format)
  
          if req_format is None or req_format == 'best':
          self.to_screen(u'Format: %s' % req_format)
  
          if req_format is None or req_format == 'best':
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index f7471895052ed328465b888ba402eeebaa1351d6..9e2373bd5473c8a23d5177ee6d4450a9fe86bcdb 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -141,7 +141,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                           (?:                                                  # the various things that can precede the ID:
                               (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
                               |(?:                                             # or the v= param in all its forms
                           (?:                                                  # the various things that can precede the ID:
                               (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
                               |(?:                                             # or the v= param in all its forms
-                                 (?:watch|movie(?:_popup)?(?:\.php)?)?              # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
+                                 (?:(?:watch|movie)(?:_popup)?(?:\.php)?)?    # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
                                   (?:\?|\#!?)                                  # the params delimiter ? or # or #!
                                   (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
                                   v=
                                   (?:\?|\#!?)                                  # the params delimiter ? or # or #!
                                   (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
                                   v=
@@ -155,11 +155,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
      # Listed in order of quality
      _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13',
                            '95', '94', '93', '92', '132', '151',
      # Listed in order of quality
      _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13',
                            '95', '94', '93', '92', '132', '151',
+                          # 3D
                            '85', '84', '102', '83', '101', '82', '100',
                            '85', '84', '102', '83', '101', '82', '100',
+                          # Dash video
+                          '138', '137', '248', '136', '247', '135', '246',
+                          '245', '244', '134', '243', '133', '242', '160',
+                          # Dash audio
+                          '141', '172', '140', '171', '139',
                            ]
      _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13',
                                        '95', '94', '93', '92', '132', '151',
                                        '85', '102', '84', '101', '83', '100', '82',
                            ]
      _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13',
                                        '95', '94', '93', '92', '132', '151',
                                        '85', '102', '84', '101', '83', '100', '82',
+                                      # Dash video
+                                      '138', '248', '137', '247', '136', '246', '245',
+                                      '244', '135', '243', '134', '242', '133', '160',
+                                      # Dash audio
+                                      '172', '141', '171', '140', '139',
                                        ]
      _video_extensions = {
          '13': '3gp',
                                        ]
      _video_extensions = {
          '13': '3gp',
@@ -181,7 +192,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          '100': 'webm',
          '101': 'webm',
          '102': 'webm',
          '100': 'webm',
          '101': 'webm',
          '102': 'webm',
-        
+
          # videos that use m3u8
          '92': 'mp4',
          '93': 'mp4',
          # videos that use m3u8
          '92': 'mp4',
          '93': 'mp4',
@@ -190,6 +201,29 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          '96': 'mp4',
          '132': 'mp4',
          '151': 'mp4',
          '96': 'mp4',
          '132': 'mp4',
          '151': 'mp4',
+
+        # Dash mp4
+        '133': 'mp4',
+        '134': 'mp4',
+        '135': 'mp4',
+        '136': 'mp4',
+        '137': 'mp4',
+        '138': 'mp4',
+        '139': 'mp4',
+        '140': 'mp4',
+        '141': 'mp4',
+        '160': 'mp4',
+
+        # Dash webm
+        '171': 'webm',
+        '172': 'webm',
+        '242': 'webm',
+        '243': 'webm',
+        '244': 'webm',
+        '245': 'webm',
+        '246': 'webm',
+        '247': 'webm',
+        '248': 'webm',
      }
      _video_dimensions = {
          '5': '240x400',
      }
      _video_dimensions = {
          '5': '240x400',
@@ -217,11 +251,58 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          '96': '1080p',
          '100': '360p',
          '101': '480p',
          '96': '1080p',
          '100': '360p',
          '101': '480p',
-        '102': '720p',        
+        '102': '720p',
          '132': '240p',
          '151': '72p',
          '132': '240p',
          '151': '72p',
+        '133': '240p',
+        '134': '360p',
+        '135': '480p',
+        '136': '720p',
+        '137': '1080p',
+        '138': '>1080p',
+        '139': '48k',
+        '140': '128k',
+        '141': '256k',
+        '160': '192p',
+        '171': '128k',
+        '172': '256k',
+        '242': '240p',
+        '243': '360p',
+        '244': '480p',
+        '245': '480p',
+        '246': '480p',
+        '247': '720p',
+        '248': '1080p',
+    }
+    _special_itags = {
+        '82': '3D',
+        '83': '3D',
+        '84': '3D',
+        '85': '3D',
+        '100': '3D',
+        '101': '3D',
+        '102': '3D',
+        '133': 'DASH Video',
+        '134': 'DASH Video',
+        '135': 'DASH Video',
+        '136': 'DASH Video',
+        '137': 'DASH Video',
+        '138': 'DASH Video',
+        '139': 'DASH Audio',
+        '140': 'DASH Audio',
+        '141': 'DASH Audio',
+        '160': 'DASH Video',
+        '171': 'DASH Audio',
+        '172': 'DASH Audio',
+        '242': 'DASH Video',
+        '243': 'DASH Video',
+        '244': 'DASH Video',
+        '245': 'DASH Video',
+        '246': 'DASH Video',
+        '247': 'DASH Video',
+        '248': 'DASH Video',
      }
      }
-    _3d_itags = ['85', '84', '102', '83', '101', '82', '100']
+
      IE_NAME = u'youtube'
      _TESTS = [
          {
      IE_NAME = u'youtube'
      _TESTS = [
          {
@@ -254,8 +335,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              u"info_dict": {
                  u"upload_date": u"20120506",
                  u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
              u"info_dict": {
                  u"upload_date": u"20120506",
                  u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
-                u"description": u"md5:b085c9804f5ab69f4adea963a2dceb3c",
-                u"uploader": u"IconaPop",
+                u"description": u"md5:3e2666e0a55044490499ea45fe9037b7",
+                u"uploader": u"Icona Pop",
                  u"uploader_id": u"IconaPop"
              }
          },
                  u"uploader_id": u"IconaPop"
              }
          },
@@ -338,21 +419,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          elif len(s) == 89:
              return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
          elif len(s) == 88:
          elif len(s) == 89:
              return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
          elif len(s) == 88:
-            return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
+            return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
          elif len(s) == 87:
              return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
          elif len(s) == 86:
          elif len(s) == 87:
              return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
          elif len(s) == 86:
-            return s[5:20] + s[2] + s[21:]
+            return s[83:36:-1] + s[0] + s[35:2:-1]
          elif len(s) == 85:
              return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
          elif len(s) == 84:
          elif len(s) == 85:
              return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
          elif len(s) == 84:
-            return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27]
+            return s[81:36:-1] + s[0] + s[35:2:-1]
          elif len(s) == 83:
              return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
          elif len(s) == 82:
          elif len(s) == 83:
              return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
          elif len(s) == 82:
-            return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34]
+            return s[1:19] + s[0] + s[20:68] + s[19] + s[69:82]
          elif len(s) == 81:
              return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
          elif len(s) == 81:
              return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
+        elif len(s) == 80:
+            return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
          elif len(s) == 79:
              return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
  
          elif len(s) == 79:
              return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
  
@@ -375,11 +458,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          try:
              sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
          try:
              sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            return (u'unable to download video subtitles: %s' % compat_str(err), None)
+            self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
+            return {}
          sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
          sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list)
          if not sub_lang_list:
          sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
          sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list)
          if not sub_lang_list:
-            return (u'video doesn\'t have subtitles', None)
+            self._downloader.report_warning(u'video doesn\'t have subtitles')
+            return {}
          return sub_lang_list
  
      def _list_available_subtitles(self, video_id):
          return sub_lang_list
  
      def _list_available_subtitles(self, video_id):
@@ -388,8 +473,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
  
      def _request_subtitle(self, sub_lang, sub_name, video_id, format):
          """
  
      def _request_subtitle(self, sub_lang, sub_name, video_id, format):
          """
-        Return tuple:
-        (error_message, sub_lang, sub)
+        Return the subtitle as a string or None if they are not found
          """
          self.report_video_subtitles_request(video_id, sub_lang, format)
          params = compat_urllib_parse.urlencode({
          """
          self.report_video_subtitles_request(video_id, sub_lang, format)
          params = compat_urllib_parse.urlencode({
@@ -402,21 +486,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          try:
              sub = compat_urllib_request.urlopen(url).read().decode('utf-8')
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
          try:
              sub = compat_urllib_request.urlopen(url).read().decode('utf-8')
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            return (u'unable to download video subtitles: %s' % compat_str(err), None, None)
+            self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
+            return
          if not sub:
          if not sub:
-            return (u'Did not fetch video subtitles', None, None)
-        return (None, sub_lang, sub)
+            self._downloader.report_warning(u'Did not fetch video subtitles')
+            return
+        return sub
  
      def _request_automatic_caption(self, video_id, webpage):
          """We need the webpage for getting the captions url, pass it as an
             argument to speed up the process."""
  
      def _request_automatic_caption(self, video_id, webpage):
          """We need the webpage for getting the captions url, pass it as an
             argument to speed up the process."""
-        sub_lang = self._downloader.params.get('subtitleslang') or 'en'
+        sub_lang = (self._downloader.params.get('subtitleslangs') or ['en'])[0]
          sub_format = self._downloader.params.get('subtitlesformat')
          self.to_screen(u'%s: Looking for automatic captions' % video_id)
          mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
          err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang
          if mobj is None:
          sub_format = self._downloader.params.get('subtitlesformat')
          self.to_screen(u'%s: Looking for automatic captions' % video_id)
          mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
          err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang
          if mobj is None:
-            return [(err_msg, None, None)]
+            self._downloader.report_warning(err_msg)
+            return {}
          player_config = json.loads(mobj.group(1))
          try:
              args = player_config[u'args']
          player_config = json.loads(mobj.group(1))
          try:
              args = player_config[u'args']
@@ -431,40 +518,43 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              })
              subtitles_url = caption_url + '&' + params
              sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions')
              })
              subtitles_url = caption_url + '&' + params
              sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions')
-            return [(None, sub_lang, sub)]
-        except KeyError:
-            return [(err_msg, None, None)]
-
-    def _extract_subtitle(self, video_id):
+            return {sub_lang: sub}
+        # An extractor error can be raise by the download process if there are
+        # no automatic captions but there are subtitles
+        except (KeyError, ExtractorError):
+            self._downloader.report_warning(err_msg)
+            return {}
+    
+    def _extract_subtitles(self, video_id):
          """
          """
-        Return a list with a tuple:
-        [(error_message, sub_lang, sub)]
+        Return a dictionary: {language: subtitles} or {} if the subtitles
+        couldn't be found
          """
          """
-        sub_lang_list = self._get_available_subtitles(video_id)
+        available_subs_list = self._get_available_subtitles(video_id)
          sub_format = self._downloader.params.get('subtitlesformat')
          sub_format = self._downloader.params.get('subtitlesformat')
-        if  isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles
-            return [(sub_lang_list[0], None, None)]
-        if self._downloader.params.get('subtitleslang', False):
-            sub_lang = self._downloader.params.get('subtitleslang')
-        elif 'en' in sub_lang_list:
-            sub_lang = 'en'
+        if  not available_subs_list: #There was some error, it didn't get the available subtitles
+            return {}
+        if self._downloader.params.get('allsubtitles', False):
+            sub_lang_list = available_subs_list
          else:
          else:
-            sub_lang = list(sub_lang_list.keys())[0]
-        if not sub_lang in sub_lang_list:
-            return [(u'no closed captions found in the specified language "%s"' % sub_lang, None, None)]
-
-        subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
-        return [subtitle]
-
-    def _extract_all_subtitles(self, video_id):
-        sub_lang_list = self._get_available_subtitles(video_id)
-        sub_format = self._downloader.params.get('subtitlesformat')
-        if  isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles
-            return [(sub_lang_list[0], None, None)]
-        subtitles = []
+            if self._downloader.params.get('subtitleslangs', False):
+                reqested_langs = self._downloader.params.get('subtitleslangs')
+            elif 'en' in available_subs_list:
+                reqested_langs = ['en']
+            else:
+                reqested_langs = [list(available_subs_list.keys())[0]]
+
+            sub_lang_list = {}
+            for sub_lang in reqested_langs:
+                if not sub_lang in available_subs_list:
+                    self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang)
+                    continue
+                sub_lang_list[sub_lang] = available_subs_list[sub_lang]
+        subtitles = {}
          for sub_lang in sub_lang_list:
              subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
          for sub_lang in sub_lang_list:
              subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
-            subtitles.append(subtitle)
+            if subtitle:
+                subtitles[sub_lang] = subtitle
          return subtitles
  
      def _print_formats(self, formats):
          return subtitles
  
      def _print_formats(self, formats):
@@ -472,7 +562,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          for x in formats:
              print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
                                          self._video_dimensions.get(x, '???'),
          for x in formats:
              print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
                                          self._video_dimensions.get(x, '???'),
-                                        ' (3D)' if x in self._3d_itags else ''))
+                                        ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
  
      def _extract_id(self, url):
          mobj = re.match(self._VALID_URL, url, re.VERBOSE)
  
      def _extract_id(self, url):
          mobj = re.match(self._VALID_URL, url, re.VERBOSE)
@@ -655,25 +745,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          # subtitles
          video_subtitles = None
  
          # subtitles
          video_subtitles = None
  
-        if self._downloader.params.get('writesubtitles', False):
-            video_subtitles = self._extract_subtitle(video_id)
-            if video_subtitles:
-                (sub_error, sub_lang, sub) = video_subtitles[0]
-                if sub_error:
-                    self._downloader.report_warning(sub_error)
-        
-        if self._downloader.params.get('writeautomaticsub', False):
+        if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
+            video_subtitles = self._extract_subtitles(video_id)
+        elif self._downloader.params.get('writeautomaticsub', False):
              video_subtitles = self._request_automatic_caption(video_id, video_webpage)
              video_subtitles = self._request_automatic_caption(video_id, video_webpage)
-            (sub_error, sub_lang, sub) = video_subtitles[0]
-            if sub_error:
-                self._downloader.report_warning(sub_error)
-
-        if self._downloader.params.get('allsubtitles', False):
-            video_subtitles = self._extract_all_subtitles(video_id)
-            for video_subtitle in video_subtitles:
-                (sub_error, sub_lang, sub) = video_subtitle
-                if sub_error:
-                    self._downloader.report_warning(sub_error)
  
          if self._downloader.params.get('listsubtitles', False):
              self._list_available_subtitles(video_id)
  
          if self._downloader.params.get('listsubtitles', False):
              self._list_available_subtitles(video_id)
@@ -699,6 +774,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              if m_s is not None:
                  self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
                  video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
              if m_s is not None:
                  self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
                  video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
+            m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
+            if m_s is not None:
+                if 'url_encoded_fmt_stream_map' in video_info:
+                    video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
+                else:
+                    video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
+            elif 'adaptive_fmts' in video_info:
+                if 'url_encoded_fmt_stream_map' in video_info:
+                    video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
+                else:
+                    video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
          except ValueError:
              pass
  
          except ValueError:
              pass
  
@@ -758,7 +844,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
  
              video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
                                                self._video_dimensions.get(format_param, '???'),
  
              video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
                                                self._video_dimensions.get(format_param, '???'),
-                                              ' (3D)' if format_param in self._3d_itags else '')
+                                              ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
  
              results.append({
                  'id':       video_id,
  
              results.append({
                  'id':       video_id,
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index 5dd5b2923d2a773a526006d71769d10486fe8730..201802cee6e56cbfffeed573c7fc42592a33fdab 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1,19 +1,20 @@
  #!/usr/bin/env python
  # -*- coding: utf-8 -*-
  
  #!/usr/bin/env python
  # -*- coding: utf-8 -*-
  
+import datetime
+import email.utils
  import errno
  import gzip
  import io
  import json
  import locale
  import os
  import errno
  import gzip
  import io
  import json
  import locale
  import os
+import platform
  import re
  import re
+import socket
  import sys
  import traceback
  import zlib
  import sys
  import traceback
  import zlib
-import email.utils
-import socket
-import datetime
  
  try:
      import urllib.request as compat_urllib_request
  
  try:
      import urllib.request as compat_urllib_request
@@ -60,6 +61,11 @@ try:
  except ImportError: # Python 2
      import httplib as compat_http_client
  
  except ImportError: # Python 2
      import httplib as compat_http_client
  
+try:
+    from urllib.error import HTTPError as compat_HTTPError
+except ImportError:  # Python 2
+    from urllib2 import HTTPError as compat_HTTPError
+
  try:
      from subprocess import DEVNULL
      compat_subprocess_get_DEVNULL = lambda: DEVNULL
  try:
      from subprocess import DEVNULL
      compat_subprocess_get_DEVNULL = lambda: DEVNULL
@@ -207,7 +213,7 @@ if sys.version_info >= (2,7):
      def find_xpath_attr(node, xpath, key, val):
          """ Find the xpath xpath[@key=val] """
          assert re.match(r'^[a-zA-Z]+$', key)
      def find_xpath_attr(node, xpath, key, val):
          """ Find the xpath xpath[@key=val] """
          assert re.match(r'^[a-zA-Z]+$', key)
-        assert re.match(r'^[a-zA-Z@\s]*$', val)
+        assert re.match(r'^[a-zA-Z0-9@\s]*$', val)
          expr = xpath + u"[@%s='%s']" % (key, val)
          return node.find(expr)
  else:
          expr = xpath + u"[@%s='%s']" % (key, val)
          return node.find(expr)
  else:
@@ -489,7 +495,7 @@ def make_HTTPS_handler(opts):
  
  class ExtractorError(Exception):
      """Error during info extraction."""
  
  class ExtractorError(Exception):
      """Error during info extraction."""
-    def __init__(self, msg, tb=None, expected=False):
+    def __init__(self, msg, tb=None, expected=False, cause=None):
          """ tb, if given, is the original traceback (so that it can be printed out).
          If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
          """
          """ tb, if given, is the original traceback (so that it can be printed out).
          If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
          """
@@ -502,6 +508,7 @@ class ExtractorError(Exception):
  
          self.traceback = tb
          self.exc_info = sys.exc_info()  # preserve original exception
  
          self.traceback = tb
          self.exc_info = sys.exc_info()  # preserve original exception
+        self.cause = cause
  
      def format_traceback(self):
          if self.traceback is None:
  
      def format_traceback(self):
          if self.traceback is None:
@@ -622,8 +629,23 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
          old_resp = resp
          # gzip
          if resp.headers.get('Content-encoding', '') == 'gzip':
          old_resp = resp
          # gzip
          if resp.headers.get('Content-encoding', '') == 'gzip':
-            gz = gzip.GzipFile(fileobj=io.BytesIO(resp.read()), mode='r')
-            resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
+            content = resp.read()
+            gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
+            try:
+                uncompressed = io.BytesIO(gz.read())
+            except IOError as original_ioerror:
+                # There may be junk add the end of the file
+                # See http://stackoverflow.com/q/4928560/35070 for details
+                for i in range(1, 1024):
+                    try:
+                        gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
+                        uncompressed = io.BytesIO(gz.read())
+                    except IOError:
+                        continue
+                    break
+                else:
+                    raise original_ioerror
+            resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
              resp.msg = old_resp.msg
          # deflate
          if resp.headers.get('Content-encoding', '') == 'deflate':
              resp.msg = old_resp.msg
          # deflate
          if resp.headers.get('Content-encoding', '') == 'deflate':
@@ -657,6 +679,9 @@ def determine_ext(url, default_ext=u'unknown_video'):
      else:
          return default_ext
  
      else:
          return default_ext
  
+def subtitles_filename(filename, sub_lang, sub_format):
+    return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
+
  def date_from_str(date_str):
      """
      Return a datetime object from a string in the format YYYYMMDD or
  def date_from_str(date_str):
      """
      Return a datetime object from a string in the format YYYYMMDD or
@@ -708,3 +733,31 @@ class DateRange(object):
          return self.start <= date <= self.end
      def __str__(self):
          return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
          return self.start <= date <= self.end
      def __str__(self):
          return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
+
+
+def platform_name():
+    """ Returns the platform name as a compat_str """
+    res = platform.platform()
+    if isinstance(res, bytes):
+        res = res.decode(preferredencoding())
+
+    assert isinstance(res, compat_str)
+    return res
+
+
+def bytes_to_intlist(bs):
+    if not bs:
+        return []
+    if isinstance(bs[0], int):  # Python 3
+        return list(bs)
+    else:
+        return [ord(c) for c in bs]
+
+
+def intlist_to_bytes(xs):
+    if not xs:
+        return b''
+    if isinstance(chr(0), bytes):  # Python 2
+        return ''.join([chr(x) for x in xs])
+    else:
+        return bytes(xs)
diff --git a/youtube_dl/version.py b/youtube_dl/version.py

index 8c93a275c1ca3b7ad344571e1499498b8dbc529a..c2832018188fb3a395481a5468c9f8489f10e46f 100644 (file)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
  
  
-__version__ = '2013.08.17'
+__version__ = '2013.08.29'
author	Rogério Brito <rbrito@ime.usp.br>
	Thu, 29 Aug 2013 22:09:34 +0000 (19:09 -0300)
committer	Rogério Brito <rbrito@ime.usp.br>
	Thu, 29 Aug 2013 22:09:34 +0000 (19:09 -0300)
README.md		patch \| blob \| history
README.txt		patch \| blob \| history
devscripts/gh-pages/add-version.py		patch \| blob \| history
devscripts/gh-pages/update-feed.py		patch \| blob \| history
devscripts/release.sh		patch \| blob \| history
devscripts/youtube_genalgo.py		patch \| blob \| history
test/test_all_urls.py		patch \| blob \| history
test/test_download.py		patch \| blob \| history
test/test_youtube_subtitles.py		patch \| blob \| history
youtube-dl		patch \| blob \| history
youtube-dl.1		patch \| blob \| history
youtube-dl.bash-completion		patch \| blob \| history
youtube_dl/FileDownloader.py		patch \| blob \| history
youtube_dl/PostProcessor.py		patch \| blob \| history
youtube_dl/YoutubeDL.py		patch \| blob \| history
youtube_dl/__init__.py		patch \| blob \| history
youtube_dl/aes.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/addanime.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/appletrailers.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/c56.py		patch \| blob \| history
youtube_dl/extractor/canalc2.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/canalplus.py		patch \| blob \| history
youtube_dl/extractor/cnn.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/collegehumor.py		patch \| blob \| history
youtube_dl/extractor/common.py		patch \| blob \| history
youtube_dl/extractor/dailymotion.py		patch \| blob \| history
youtube_dl/extractor/funnyordie.py		patch \| blob \| history
youtube_dl/extractor/generic.py		patch \| blob \| history
youtube_dl/extractor/googleplus.py		patch \| blob \| history
youtube_dl/extractor/hark.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/ign.py		patch \| blob \| history
youtube_dl/extractor/jeuxvideo.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/kankan.py		patch \| blob \| history
youtube_dl/extractor/mit.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/nbc.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/orf.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/pbs.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/ro220.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/rtlnow.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/slashdot.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/sohu.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/soundcloud.py		patch \| blob \| history
youtube_dl/extractor/statigram.py		patch \| blob \| history
youtube_dl/extractor/trilulilu.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/unistra.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/vevo.py		patch \| blob \| history
youtube_dl/extractor/videofyme.py		patch \| blob \| history
youtube_dl/extractor/vimeo.py		patch \| blob \| history
youtube_dl/extractor/wat.py		patch \| blob \| history
youtube_dl/extractor/xhamster.py		patch \| blob \| history
youtube_dl/extractor/youporn.py		patch \| blob \| history
youtube_dl/extractor/youtube.py		patch \| blob \| history
youtube_dl/utils.py		patch \| blob \| history
youtube_dl/version.py		patch \| blob \| history