]> Raphaël G. Git Repositories - youtubedl/commitdiff
Imported Upstream version 2013.06.34
authorRogério Brito <rbrito@ime.usp.br>
Tue, 2 Jul 2013 00:43:32 +0000 (21:43 -0300)
committerRogério Brito <rbrito@ime.usp.br>
Tue, 2 Jul 2013 00:43:32 +0000 (21:43 -0300)
22 files changed:
README.md
README.txt
devscripts/youtube_genalgo.py [new file with mode: 0644]
test/helper.py [new file with mode: 0644]
test/test_youtube_lists.py
test/test_youtube_sig.py [new file with mode: 0755]
test/test_youtube_subtitles.py
test/tests.json
youtube-dl
youtube-dl.1
youtube-dl.bash-completion
youtube_dl/FileDownloader.py
youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/arte.py
youtube_dl/extractor/comedycentral.py
youtube_dl/extractor/cspan.py [new file with mode: 0644]
youtube_dl/extractor/vevo.py
youtube_dl/extractor/wimp.py [new file with mode: 0644]
youtube_dl/extractor/youtube.py
youtube_dl/version.py

index 7d190249df4e36b95f476bfaef26e86cb397e922..81b86e264c117954bfddae70f8ba4feb2e16a198 100644 (file)
--- a/README.md
+++ b/README.md
@@ -116,12 +116,14 @@ which means you can modify it, redistribute it or use it however you like.
     -F, --list-formats         list all available formats (currently youtube
                                only)
     --write-sub                write subtitle file (currently youtube only)
+    --write-auto-sub           write automatic subtitle file (currently youtube
+                               only)
     --only-sub                 [deprecated] alias of --skip-download
     --all-subs                 downloads all the available subtitles of the
                                video (currently youtube only)
     --list-subs                lists all available subtitles for the video
                                (currently youtube only)
-    --sub-format FORMAT        subtitle format [srt/sbv] (default=srt)
+    --sub-format FORMAT        subtitle format [srt/sbv/vtt] (default=srt)
                                (currently youtube only)
     --sub-lang LANG            language of the subtitles to download (optional)
                                use IETF language tags like 'en'
index 56e25126ff4d977b1a69b524f8e6544b9d4bcaa1..239709b72c295eee0827fc5edc0bc97fe60d9663 100644 (file)
@@ -131,12 +131,14 @@ Video Format Options:
     -F, --list-formats         list all available formats (currently youtube
                                only)
     --write-sub                write subtitle file (currently youtube only)
+    --write-auto-sub           write automatic subtitle file (currently youtube
+                               only)
     --only-sub                 [deprecated] alias of --skip-download
     --all-subs                 downloads all the available subtitles of the
                                video (currently youtube only)
     --list-subs                lists all available subtitles for the video
                                (currently youtube only)
-    --sub-format FORMAT        subtitle format [srt/sbv] (default=srt)
+    --sub-format FORMAT        subtitle format [srt/sbv/vtt] (default=srt)
                                (currently youtube only)
     --sub-lang LANG            language of the subtitles to download (optional)
                                use IETF language tags like 'en'
diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py
new file mode 100644 (file)
index 0000000..b168cea
--- /dev/null
@@ -0,0 +1,76 @@
+#!/usr/bin/env python
+
+# Generate youtube signature algorithm from test cases
+
+import sys
+
+tests = [
+    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",
+     "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"),
+    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
+     "!?;:|}][{=+-_)(*&^$#@/MNBVCXZASqFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"),
+    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
+     "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"),
+    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
+     "{>/?;}[.=+-_)(*&^%$#@!MqBVCXZASDFwHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"),
+    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
+     "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"),
+    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
+     "D.>/?;}[{=+_)(*&^%$#!MNBVCXeAS<FGHJKLPOIUYTREWZ0987654321mnbvcxzasdfghjklpoiuytrQ"),
+    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",
+     "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"),
+]
+
+def find_matching(wrong, right):
+    idxs = [wrong.index(c) for c in right]
+    return compress(idxs)
+    return ('s[%d]' % i for i in idxs)
+
+def compress(idxs):
+    def _genslice(start, end, step):
+        starts = '' if start == 0 else str(start)
+        ends = ':%d' % (end+step)
+        steps = '' if step == 1 else (':%d' % step)
+        return 's[%s%s%s]' % (starts, ends, steps)
+
+    step = None
+    for i, prev in zip(idxs[1:], idxs[:-1]):
+        if step is not None:
+            if i - prev == step:
+                continue
+            yield _genslice(start, prev, step)
+            step = None
+            continue
+        if i - prev in [-1, 1]:
+            step = i - prev
+            start = prev
+            continue
+        else:
+            yield 's[%d]' % prev
+    if step is None:
+        yield 's[%d]' % i
+    else:
+        yield _genslice(start, i, step)
+
+def _assert_compress(inp, exp):
+    res = list(compress(inp))
+    if res != exp:
+        print('Got %r, expected %r' % (res, exp))
+        assert res == exp
+_assert_compress([0,2,4,6], ['s[0]', 's[2]', 's[4]', 's[6]'])
+_assert_compress([0,1,2,4,6,7], ['s[:3]', 's[4]', 's[6:8]'])
+_assert_compress([8,0,1,2,4,7,6,9], ['s[8]', 's[:3]', 's[4]', 's[7:5:-1]', 's[9]'])
+
+def gen(wrong, right, indent):
+    code = ' + '.join(find_matching(wrong, right))
+    return 'if len(s) == %d:\n%s    return %s\n' % (len(wrong), indent, code)
+
+def genall(tests):
+    indent = ' ' * 8
+    return indent + (indent + 'el').join(gen(wrong, right, indent) for wrong,right in tests)
+
+def main():
+    print(genall(tests))
+
+if __name__ == '__main__':
+    main()
diff --git a/test/helper.py b/test/helper.py
new file mode 100644 (file)
index 0000000..842ffc2
--- /dev/null
@@ -0,0 +1,33 @@
+import io
+import json
+import os.path
+
+from youtube_dl import YoutubeDL, YoutubeDLHandler
+from youtube_dl.utils import (
+    compat_cookiejar,
+    compat_urllib_request,
+)
+
+# General configuration (from __init__, not very elegant...)
+jar = compat_cookiejar.CookieJar()
+cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
+proxy_handler = compat_urllib_request.ProxyHandler()
+opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
+compat_urllib_request.install_opener(opener)
+
+PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json")
+with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
+    parameters = json.load(pf)
+
+class FakeYDL(YoutubeDL):
+    def __init__(self):
+        self.result = []
+        # Different instances of the downloader can't share the same dictionary
+        # some test set the "sublang" parameter, which would break the md5 checks.
+        self.params = dict(parameters)
+    def to_screen(self, s):
+        print(s)
+    def trouble(self, s, tb=None):
+        raise Exception(s)
+    def download(self, x):
+        self.result.append(x)
\ No newline at end of file
index 320b44082f4a0ca7cbf115a5b057cd04e88a5290..4486b7eb0fdcad7f7e6658b3126df734e0574bf5 100644 (file)
@@ -10,30 +10,8 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 from youtube_dl.extractor import YoutubeUserIE, YoutubePlaylistIE, YoutubeIE, YoutubeChannelIE
 from youtube_dl.utils import *
-from youtube_dl import YoutubeDL
 
-PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json")
-with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
-    parameters = json.load(pf)
-
-# General configuration (from __init__, not very elegant...)
-jar = compat_cookiejar.CookieJar()
-cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
-proxy_handler = compat_urllib_request.ProxyHandler()
-opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
-compat_urllib_request.install_opener(opener)
-
-class FakeYDL(YoutubeDL):
-    def __init__(self):
-        self.result = []
-        self.params = parameters
-    def to_screen(self, s):
-        print(s)
-    def trouble(self, s, tb=None):
-        raise Exception(s)
-    def extract_info(self, url):
-        self.result.append(url)
-        return url
+from helper import FakeYDL
 
 class TestYoutubeLists(unittest.TestCase):
     def assertIsPlaylist(self,info):
diff --git a/test/test_youtube_sig.py b/test/test_youtube_sig.py
new file mode 100755 (executable)
index 0000000..e87b625
--- /dev/null
@@ -0,0 +1,57 @@
+#!/usr/bin/env python
+
+import unittest
+import sys
+
+# Allow direct execution
+import os
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl.extractor.youtube import YoutubeIE
+from helper import FakeYDL
+
+sig = YoutubeIE(FakeYDL())._decrypt_signature
+
+class TestYoutubeSig(unittest.TestCase):
+    def test_43_43(self):
+        wrong = '5AEEAE0EC39677BC65FD9021CCD115F1F2DBD5A59E4.C0B243A3E2DED6769199AF3461781E75122AE135135'
+        right = '931EA22157E1871643FA9519676DED253A342B0C.4E95A5DBD2F1F511DCC1209DF56CB77693CE0EAE'
+        self.assertEqual(sig(wrong), right)
+
+    def test_88(self):
+        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<"
+        right = "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"
+        self.assertEqual(sig(wrong), right)
+
+    def test_87(self):
+        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<"
+        right = "!?;:|}][{=+-_)(*&^$#@/MNBVCXZASqFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"
+        self.assertEqual(sig(wrong), right)
+
+    def test_86(self):
+        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"
+        right = "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"
+        self.assertEqual(sig(wrong), right)
+
+    def test_85(self):
+        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<"
+        right = "{>/?;}[.=+-_)(*&^%$#@!MqBVCXZASDFwHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"
+        self.assertEqual(sig(wrong), right)
+
+    def test_84(self):
+        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<"
+        right = "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"
+        self.assertEqual(sig(wrong), right)
+
+    def test_83(self):
+        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<"
+        right = "D.>/?;}[{=+_)(*&^%$#!MNBVCXeAS<FGHJKLPOIUYTREWZ0987654321mnbvcxzasdfghjklpoiuytrQ"
+        self.assertEqual(sig(wrong), right)
+
+    def test_82(self):
+        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<"
+        right = "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"
+        self.assertEqual(sig(wrong), right)
+
+if __name__ == '__main__':
+    unittest.main()
index e8f5e4ae70a85e29c363dafe6417c614a409849e..86e09c9b1b397187acc0f28d6d03a1191fa7846f 100644 (file)
@@ -12,31 +12,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 from youtube_dl.extractor import YoutubeIE
 from youtube_dl.utils import *
-from youtube_dl import YoutubeDL
-
-PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json")
-with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
-    parameters = json.load(pf)
-
-# General configuration (from __init__, not very elegant...)
-jar = compat_cookiejar.CookieJar()
-cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
-proxy_handler = compat_urllib_request.ProxyHandler()
-opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
-compat_urllib_request.install_opener(opener)
-
-class FakeYDL(YoutubeDL):
-    def __init__(self):
-        self.result = []
-        # Different instances of the downloader can't share the same dictionary
-        # some test set the "sublang" parameter, which would break the md5 checks.
-        self.params = dict(parameters)
-    def to_screen(self, s):
-        print(s)
-    def trouble(self, s, tb=None):
-        raise Exception(s)
-    def download(self, x):
-        self.result.append(x)
+from helper import FakeYDL
 
 md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
 
@@ -84,7 +60,7 @@ class TestYoutubeSubtitles(unittest.TestCase):
         info_dict = IE.extract('QRS8MkLhQmM')
         subtitles = info_dict[0]['subtitles']
         self.assertEqual(len(subtitles), 13)
-    def test_youtube_subtitles_format(self):
+    def test_youtube_subtitles_sbv_format(self):
         DL = FakeYDL()
         DL.params['writesubtitles'] = True
         DL.params['subtitlesformat'] = 'sbv'
@@ -92,6 +68,14 @@ class TestYoutubeSubtitles(unittest.TestCase):
         info_dict = IE.extract('QRS8MkLhQmM')
         sub = info_dict[0]['subtitles'][0]
         self.assertEqual(md5(sub[2]), '13aeaa0c245a8bed9a451cb643e3ad8b')
+    def test_youtube_subtitles_vtt_format(self):
+        DL = FakeYDL()
+        DL.params['writesubtitles'] = True
+        DL.params['subtitlesformat'] = 'vtt'
+        IE = YoutubeIE(DL)
+        info_dict = IE.extract('QRS8MkLhQmM')
+        sub = info_dict[0]['subtitles'][0]
+        self.assertEqual(md5(sub[2]), '356cdc577fde0c6783b9b822e7206ff7')
     def test_youtube_list_subtitles(self):
         DL = FakeYDL()
         DL.params['listsubtitles'] = True
@@ -100,7 +84,7 @@ class TestYoutubeSubtitles(unittest.TestCase):
         self.assertEqual(info_dict, None)
     def test_youtube_automatic_captions(self):
         DL = FakeYDL()
-        DL.params['writesubtitles'] = True
+        DL.params['writeautomaticsub'] = True
         DL.params['subtitleslang'] = 'it'
         IE = YoutubeIE(DL)
         info_dict = IE.extract('8YoUxe5ncPo')
index 5f4f642e8ad5036831f7548b1ebdec45d94a6988..ebc7a123c1de30a00941abad2f7e916d6e0991fb 100644 (file)
     "info_dict": {
         "title": "卡马乔国足开大脚长传冲吊集锦"
     }
+  },
+  {
+    "name": "CSpan",
+    "url": "http://www.c-spanvideo.org/program/HolderonV",
+    "file": "315139.flv",
+    "md5": "74a623266956f69e4df0068ab6c80fe4",
+    "info_dict": {
+        "title": "Attorney General Eric Holder on Voting Rights Act Decision"
+    },
+    "skip": "Requires rtmpdump"
+  },
+  {
+    "name": "Wimp",
+    "url": "http://www.wimp.com/deerfence/",
+    "file": "deerfence.flv",
+    "md5": "8b215e2e0168c6081a1cf84b2846a2b5",
+    "info_dict": {
+        "title": "Watch Till End: Herd of deer jump over a fence."
+    }
   }
 ]
index 20ef4314130f781ffb810b483139b2f6f65e6420..876dea94ca6600f774ed42e2755cc662a9d1afc8 100755 (executable)
Binary files a/youtube-dl and b/youtube-dl differ
index 9707195572534b16e6a0af74a6e64a83998c8673..0ac019f43d8a64d8cc1b48c2df7ce7e7c5766885 100644 (file)
@@ -136,12 +136,14 @@ redistribute it or use it however you like.
 \-F,\ \-\-list\-formats\ \ \ \ \ \ \ \ \ list\ all\ available\ formats\ (currently\ youtube
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ only)
 \-\-write\-sub\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ write\ subtitle\ file\ (currently\ youtube\ only)
+\-\-write\-auto\-sub\ \ \ \ \ \ \ \ \ \ \ write\ automatic\ subtitle\ file\ (currently\ youtube
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ only)
 \-\-only\-sub\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ [deprecated]\ alias\ of\ \-\-skip\-download
 \-\-all\-subs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ downloads\ all\ the\ available\ subtitles\ of\ the
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ video\ (currently\ youtube\ only)
 \-\-list\-subs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ lists\ all\ available\ subtitles\ for\ the\ video
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (currently\ youtube\ only)
-\-\-sub\-format\ FORMAT\ \ \ \ \ \ \ \ subtitle\ format\ [srt/sbv]\ (default=srt)
+\-\-sub\-format\ FORMAT\ \ \ \ \ \ \ \ subtitle\ format\ [srt/sbv/vtt]\ (default=srt)
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (currently\ youtube\ only)
 \-\-sub\-lang\ LANG\ \ \ \ \ \ \ \ \ \ \ \ language\ of\ the\ subtitles\ to\ download\ (optional)
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ use\ IETF\ language\ tags\ like\ \[aq]en\[aq]
index 67409bc79f95f9e1f1cd361d3d80c5269ebaad60..a3e9bdfd1e90555adfb9e1b44452123fec8cbba9 100644 (file)
@@ -3,7 +3,7 @@ __youtube-dl()
     local cur prev opts
     COMPREPLY=()
     cur="${COMP_WORDS[COMP_CWORD]}"
-    opts="--help --version --update --ignore-errors --rate-limit --retries --buffer-size --no-resize-buffer --dump-user-agent --user-agent --referer --list-extractors --proxy --no-check-certificate --test --playlist-start --playlist-end --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --title --id --literal --auto-number --output --autonumber-size --restrict-filenames --batch-file --no-overwrites --continue --no-continue --cookies --no-part --no-mtime --write-description --write-info-json --write-thumbnail --quiet --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-filename --get-format --newline --no-progress --console-title --verbose --dump-intermediate-pages --format --all-formats --prefer-free-formats --max-quality --list-formats --write-sub --only-sub --all-subs --list-subs --sub-format --sub-lang --username --password --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites"
+    opts="--help --version --update --ignore-errors --rate-limit --retries --buffer-size --no-resize-buffer --dump-user-agent --user-agent --referer --list-extractors --proxy --no-check-certificate --test --playlist-start --playlist-end --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --title --id --literal --auto-number --output --autonumber-size --restrict-filenames --batch-file --no-overwrites --continue --no-continue --cookies --no-part --no-mtime --write-description --write-info-json --write-thumbnail --quiet --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-filename --get-format --newline --no-progress --console-title --verbose --dump-intermediate-pages --format --all-formats --prefer-free-formats --max-quality --list-formats --write-sub --write-auto-sub --only-sub --all-subs --list-subs --sub-format --sub-lang --username --password --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites"
 
     if [[ ${cur} == * ]] ; then
         COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
index 445f3e85e6813fe82fc20bc74d3e33fc83d997b2..155895fe26bb13c11d0e5ae5cec5379a911460df 100644 (file)
@@ -137,7 +137,7 @@ class FileDownloader(object):
         self.ydl.report_warning(*args, **kargs)
 
     def report_error(self, *args, **kargs):
-        self.ydl.error(*args, **kargs)
+        self.ydl.report_error(*args, **kargs)
 
     def slow_down(self, start_time, byte_counter):
         """Sleep if the download speed is over the rate limit."""
index b4a966b7035d322173b8595b5a30141ae7299b34..9931c98e9fe02dc3fbcc90df7a01eaf9aa6e7fb9 100644 (file)
@@ -72,9 +72,10 @@ class YoutubeDL(object):
     writeinfojson:     Write the video description to a .info.json file
     writethumbnail:    Write the thumbnail image to a file
     writesubtitles:    Write the video subtitles to a file
+    writeautomaticsub: Write the automatic subtitles to a file
     allsubtitles:      Downloads all the subtitles of the video
     listsubtitles:     Lists all available subtitles for the video
-    subtitlesformat:   Subtitle format [sbv/srt] (default=srt)
+    subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
     subtitleslang:     Language of the subtitles to download
     keepvideo:         Keep the video file after post-processing
     daterange:         A DateRange object, download only if the upload_date is in the range.
@@ -474,7 +475,7 @@ class YoutubeDL(object):
                 self.report_error(u'Cannot write description file ' + descfn)
                 return
 
-        if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
+        if (self.params.get('writesubtitles', False) or self.params.get('writeautomaticsub')) and 'subtitles' in info_dict and info_dict['subtitles']:
             # subtitles download errors are already managed as troubles in relevant IE
             # that way it will silently go on when used with unsupporting IE
             subtitle = info_dict['subtitles'][0]
index 2acaab66859c3a268083ee738fbf6ea9d8c518f0..6a8fc5e96036bfec41414022055c8e4663eb4097 100644 (file)
@@ -191,6 +191,9 @@ def parseOpts(overrideArguments=None):
     video_format.add_option('--write-sub', '--write-srt',
             action='store_true', dest='writesubtitles',
             help='write subtitle file (currently youtube only)', default=False)
+    video_format.add_option('--write-auto-sub', '--write-automatic-sub',
+            action='store_true', dest='writeautomaticsub',
+            help='write automatic subtitle file (currently youtube only)', default=False)
     video_format.add_option('--only-sub',
             action='store_true', dest='skip_download',
             help='[deprecated] alias of --skip-download', default=False)
@@ -202,7 +205,7 @@ def parseOpts(overrideArguments=None):
             help='lists all available subtitles for the video (currently youtube only)', default=False)
     video_format.add_option('--sub-format',
             action='store', dest='subtitlesformat', metavar='FORMAT',
-            help='subtitle format [srt/sbv] (default=srt) (currently youtube only)', default='srt')
+            help='subtitle format [srt/sbv/vtt] (default=srt) (currently youtube only)', default='srt')
     video_format.add_option('--sub-lang', '--srt-lang',
             action='store', dest='subtitleslang', metavar='LANG',
             help='language of the subtitles to download (optional) use IETF language tags like \'en\'')
@@ -537,6 +540,7 @@ def _real_main(argv=None):
         'writeinfojson': opts.writeinfojson,
         'writethumbnail': opts.writethumbnail,
         'writesubtitles': opts.writesubtitles,
+        'writeautomaticsub': opts.writeautomaticsub,
         'allsubtitles': opts.allsubtitles,
         'listsubtitles': opts.listsubtitles,
         'subtitlesformat': opts.subtitlesformat,
index 0ea99086044c78857310d27d22c14510433171a6..2750fc8f90e51bc0a84afd56d9555a70947ceb76 100644 (file)
@@ -6,6 +6,7 @@ from .bliptv import BlipTVIE, BlipTVUserIE
 from .breakcom import BreakIE
 from .collegehumor import CollegeHumorIE
 from .comedycentral import ComedyCentralIE
+from .cspan import CSpanIE
 from .dailymotion import DailymotionIE
 from .depositfiles import DepositFilesIE
 from .eighttracks import EightTracksIE
@@ -49,6 +50,7 @@ from .vbox7 import Vbox7IE
 from .vevo import VevoIE
 from .vimeo import VimeoIE
 from .vine import VineIE
+from .wimp import WimpIE
 from .worldstarhiphop import WorldStarHipHopIE
 from .xhamster import XHamsterIE
 from .xnxx import XNXXIE
@@ -132,6 +134,8 @@ def gen_extractors():
         VevoIE(),
         JukeboxIE(),
         TudouIE(),
+        CSpanIE(),
+        WimpIE(),
         GenericIE()
     ]
 
index 82e3ffe04312d27abb5865a48ccdc69075afbc6f..b061b9566168758465ad56f43b1f74b89b2cce10 100644 (file)
@@ -1,53 +1,21 @@
 import re
-import socket
+import json
 
 from .common import InfoExtractor
 from ..utils import (
-    compat_http_client,
-    compat_str,
-    compat_urllib_error,
+    # This is used by the not implemented extractLiveStream method
     compat_urllib_parse,
-    compat_urllib_request,
 
     ExtractorError,
     unified_strdate,
 )
 
 class ArteTvIE(InfoExtractor):
-    """arte.tv information extractor."""
-
-    _VALID_URL = r'(?:http://)?videos\.arte\.tv/(?:fr|de)/videos/.*'
+    _VALID_URL = r'(?:http://)?www\.arte.tv/guide/(?:fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
     _LIVE_URL = r'index-[0-9]+\.html$'
 
     IE_NAME = u'arte.tv'
 
-    def fetch_webpage(self, url):
-        request = compat_urllib_request.Request(url)
-        try:
-            self.report_download_webpage(url)
-            webpage = compat_urllib_request.urlopen(request).read()
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            raise ExtractorError(u'Unable to retrieve video webpage: %s' % compat_str(err))
-        except ValueError as err:
-            raise ExtractorError(u'Invalid URL: %s' % url)
-        return webpage
-
-    def grep_webpage(self, url, regex, regexFlags, matchTuples):
-        page = self.fetch_webpage(url)
-        mobj = re.search(regex, page, regexFlags)
-        info = {}
-
-        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
-
-        for (i, key, err) in matchTuples:
-            if mobj.group(i) is None:
-                raise ExtractorError(err)
-            else:
-                info[key] = mobj.group(i)
-
-        return info
-
     # TODO implement Live Stream
     # def extractLiveStream(self, url):
     #     video_lang = url.split('/')[-4]
@@ -75,62 +43,44 @@ class ArteTvIE(InfoExtractor):
     #     )
     #     video_url = u'%s/%s' % (info.get('url'), info.get('path'))
 
-    def extractPlus7Stream(self, url):
-        video_lang = url.split('/')[-3]
-        info = self.grep_webpage(
-            url,
-            r'param name="movie".*?videorefFileUrl=(http[^\'"&]*)',
-            0,
-            [
-                (1, 'url', u'Invalid URL: %s' % url)
-            ]
-        )
-        next_url = compat_urllib_parse.unquote(info.get('url'))
-        info = self.grep_webpage(
-            next_url,
-            r'<video lang="%s" ref="(http[^\'"&]*)' % video_lang,
-            0,
-            [
-                (1, 'url', u'Could not find <video> tag: %s' % url)
-            ]
-        )
-        next_url = compat_urllib_parse.unquote(info.get('url'))
-
-        info = self.grep_webpage(
-            next_url,
-            r'<video id="(.*?)".*?>.*?' +
-                '<name>(.*?)</name>.*?' +
-                '<dateVideo>(.*?)</dateVideo>.*?' +
-                '<url quality="hd">(.*?)</url>',
-            re.DOTALL,
-            [
-                (1, 'id',    u'could not extract video id: %s' % url),
-                (2, 'title', u'could not extract video title: %s' % url),
-                (3, 'date',  u'could not extract video date: %s' % url),
-                (4, 'url',   u'could not extract video url: %s' % url)
-            ]
-        )
-
-        return {
-            'id':           info.get('id'),
-            'url':          compat_urllib_parse.unquote(info.get('url')),
-            'uploader':     u'arte.tv',
-            'upload_date':  unified_strdate(info.get('date')),
-            'title':        info.get('title').decode('utf-8'),
-            'ext':          u'mp4',
-            'format':       u'NA',
-            'player_url':   None,
-        }
-
     def _real_extract(self, url):
-        video_id = url.split('/')[-1]
-        self.report_extraction(video_id)
+        mobj = re.match(self._VALID_URL, url)
+        name = mobj.group('name')
+        # This is not a real id, it can be for example AJT for the news
+        # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal
+        video_id = mobj.group('id')
 
         if re.search(self._LIVE_URL, video_id) is not None:
             raise ExtractorError(u'Arte live streams are not yet supported, sorry')
             # self.extractLiveStream(url)
             # return
+
+        webpage = self._download_webpage(url, video_id)
+        json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url')
+
+        json_info = self._download_webpage(json_url, video_id, 'Downloading info json')
+        self.report_extraction(video_id)
+        info = json.loads(json_info)
+        player_info = info['videoJsonPlayer']
+
+        info_dict = {'id': player_info['VID'],
+                     'title': player_info['VTI'],
+                     'description': player_info['VDE'],
+                     'upload_date': unified_strdate(player_info['VDA'].split(' ')[0]),
+                     'thumbnail': player_info['programImage'],
+                     }
+
+        formats = player_info['VSR'].values()
+        # We order the formats by quality
+        formats = sorted(formats, key=lambda f: int(f['height']))
+        # Pick the best quality
+        format_info = formats[-1]
+        if format_info['mediaType'] == u'rtmp':
+            info_dict['url'] = format_info['streamer']
+            info_dict['play_path'] = 'mp4:' + format_info['url']
+            info_dict['ext'] = 'mp4'
         else:
-            info = self.extractPlus7Stream(url)
+            info_dict['url'] = format_info['url']
+            info_dict['ext'] = 'mp4'
 
-        return [info]
+        return info_dict
index 1bb359046b958b5554371ca4aa602696efe8fe8f..6985e88f0a474fc48806304d03f5553bb01d315b 100644 (file)
@@ -172,7 +172,7 @@ class ComedyCentralIE(InfoExtractor):
                 'ext': 'mp4',
                 'format': format,
                 'thumbnail': None,
-                'description': officialTitle,
+                'description': compat_str(officialTitle),
             }
             results.append(info)
 
diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py
new file mode 100644 (file)
index 0000000..2246515
--- /dev/null
@@ -0,0 +1,44 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse,
+)
+
+class CSpanIE(InfoExtractor):
+    _VALID_URL = r'http://www.c-spanvideo.org/program/(.*)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        prog_name = mobj.group(1)
+        webpage = self._download_webpage(url, prog_name)
+        video_id = self._search_regex(r'programid=(.*?)&', webpage, 'video id')
+        data = compat_urllib_parse.urlencode({'programid': video_id,
+                                              'dynamic':'1'})
+        info_url = 'http://www.c-spanvideo.org/common/services/flashXml.php?' + data
+        video_info = self._download_webpage(info_url, video_id, u'Downloading video info')
+
+        self.report_extraction(video_id)
+
+        title = self._html_search_regex(r'<string name="title">(.*?)</string>',
+                                        video_info, 'title')
+        description = self._html_search_regex(r'<meta (?:property="og:|name=")description" content="(.*?)"',
+                                              webpage, 'description',
+                                              flags=re.MULTILINE|re.DOTALL)
+        thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.*?)"',
+                                            webpage, 'thumbnail')
+
+        url = self._search_regex(r'<string name="URL">(.*?)</string>',
+                                 video_info, 'video url')
+        url = url.replace('$(protocol)', 'rtmp').replace('$(port)', '443')
+        path = self._search_regex(r'<string name="path">(.*?)</string>',
+                            video_info, 'rtmp play path')
+
+        return {'id': video_id,
+                'title': title,
+                'ext': 'flv',
+                'url': url,
+                'play_path': path,
+                'description': description,
+                'thumbnail': thumbnail,
+                }
index aa88e1a92e587a9d68075ac6a5196e35ce7857af..49a249ae32a901ea3bc5be431b2a510a09fc95bc 100644 (file)
@@ -3,7 +3,6 @@ import json
 
 from .common import InfoExtractor
 from ..utils import (
-    unified_strdate,
     ExtractorError,
 )
 
diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py
new file mode 100644 (file)
index 0000000..84f065a
--- /dev/null
@@ -0,0 +1,28 @@
+import re
+import base64
+
+from .common import InfoExtractor
+
+
+class WimpIE(InfoExtractor):
+    _VALID_URL = r'(?:http://)?(?:www\.)?wimp\.com/([^/]+)/'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group(1)
+        webpage = self._download_webpage(url, video_id)
+        title = self._search_regex(r'<meta name="description" content="(.+?)" />',webpage, 'video title')
+        thumbnail_url = self._search_regex(r'<meta property="og\:image" content="(.+?)" />', webpage,'video thumbnail')
+        googleString = self._search_regex("googleCode = '(.*?)'", webpage, 'file url')
+        googleString = base64.b64decode(googleString).decode('ascii')
+        final_url = self._search_regex('","(.*?)"', googleString,'final video url')
+        ext = final_url.rpartition(u'.')[2]
+
+        return [{
+            'id':        video_id,
+            'url':       final_url,
+            'ext':       ext,
+            'title':     title,
+            'thumbnail': thumbnail_url,
+        }]
+
index de653cb3d24e6318f157d5177b87c6b09b913bef..c7922c533343ca162c922659d34dc867d6839864 100644 (file)
@@ -131,15 +131,24 @@ class YoutubeIE(InfoExtractor):
 
     def _decrypt_signature(self, s):
         """Decrypt the key the two subkeys must have a length of 43"""
-        (a,b) = s.split('.')
-        if len(a) != 43 or len(b) != 43:
-            raise ExtractorError(u'Unable to decrypt signature, subkeys lengths %d.%d not supported; retrying might work' % (len(a), len(b)))
-        if self._downloader.params.get('verbose'):
-            self.to_screen('encrypted signature length %d.%d' % (len(a), len(b)))
-        b = ''.join([b[:8],a[0],b[9:18],b[-4],b[19:39], b[18]])[0:40]
-        a = a[-40:]
-        s_dec = '.'.join((a,b))[::-1]
-        return s_dec
+
+        if len(s) == 88:
+            return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
+        elif len(s) == 87:
+            return s[62] + s[82:62:-1] + s[83] + s[61:52:-1] + s[0] + s[51:2:-1]
+        elif len(s) == 86:
+            return s[2:63] + s[82] + s[64:82] + s[63]
+        elif len(s) == 85:
+            return s[76] + s[82:76:-1] + s[83] + s[75:60:-1] + s[0] + s[59:50:-1] + s[1] + s[49:2:-1]
+        elif len(s) == 84:
+            return s[83:36:-1] + s[2] + s[35:26:-1] + s[3] + s[25:3:-1] + s[26]
+        elif len(s) == 83:
+            return s[52] + s[81:55:-1] + s[2] + s[54:52:-1] + s[82] + s[51:36:-1] + s[55] + s[35:2:-1] + s[36]
+        elif len(s) == 82:
+            return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34]
+
+        else:
+            raise ExtractorError(u'Unable to decrypt signature, subkeys length %d not supported; retrying might work' % (len(s)))
 
     def _get_available_subtitles(self, video_id):
         self.report_video_subtitles_download(video_id)
@@ -454,14 +463,13 @@ class YoutubeIE(InfoExtractor):
             if video_subtitles:
                 (sub_error, sub_lang, sub) = video_subtitles[0]
                 if sub_error:
-                    # We try with the automatic captions
-                    video_subtitles = self._request_automatic_caption(video_id, video_webpage)
-                    (sub_error_auto, sub_lang, sub) = video_subtitles[0]
-                    if sub is not None:
-                        pass
-                    else:
-                        # We report the original error
-                        self._downloader.report_warning(sub_error)
+                    self._downloader.report_warning(sub_error)
+        
+        if self._downloader.params.get('writeautomaticsub', False):
+            video_subtitles = self._request_automatic_caption(video_id, video_webpage)
+            (sub_error, sub_lang, sub) = video_subtitles[0]
+            if sub_error:
+                self._downloader.report_warning(sub_error)
 
         if self._downloader.params.get('allsubtitles', False):
             video_subtitles = self._extract_all_subtitles(video_id)
@@ -510,6 +518,12 @@ class YoutubeIE(InfoExtractor):
                     if 'sig' in url_data:
                         url += '&signature=' + url_data['sig'][0]
                     elif 's' in url_data:
+                        if self._downloader.params.get('verbose'):
+                            s = url_data['s'][0]
+                            player = self._search_regex(r'html5player-(.+?)\.js', video_webpage,
+                                'html5 player', fatal=False)
+                            self.to_screen('encrypted signature length %d (%d.%d), itag %s, html5 player %s' %
+                                (len(s), len(s.split('.')[0]), len(s.split('.')[1]), url_data['itag'][0], player))
                         signature = self._decrypt_signature(url_data['s'][0])
                         url += '&signature=' + signature
                     if 'ratebypass' not in url:
index 3b456e934c9fb83a987c787f076bc577c3926bad..d1e848284e7b779637ecb663242d5d8d6cb88990 100644 (file)
@@ -1,2 +1,2 @@
 
-__version__ = '2013.06.33'
+__version__ = '2013.06.34'