From: Rogério Brito Date: Tue, 2 Jul 2013 00:43:32 +0000 (-0300) Subject: Imported Upstream version 2013.06.34 X-Git-Url: https://git.rapsys.eu/youtubedl/commitdiff_plain/961c212c4f97846f00004b37e8dbd94b124a2f99?hp=-c Imported Upstream version 2013.06.34 --- 961c212c4f97846f00004b37e8dbd94b124a2f99 diff --git a/README.md b/README.md index 7d19024..81b86e2 100644 --- a/README.md +++ b/README.md @@ -116,12 +116,14 @@ which means you can modify it, redistribute it or use it however you like. -F, --list-formats list all available formats (currently youtube only) --write-sub write subtitle file (currently youtube only) + --write-auto-sub write automatic subtitle file (currently youtube + only) --only-sub [deprecated] alias of --skip-download --all-subs downloads all the available subtitles of the video (currently youtube only) --list-subs lists all available subtitles for the video (currently youtube only) - --sub-format FORMAT subtitle format [srt/sbv] (default=srt) + --sub-format FORMAT subtitle format [srt/sbv/vtt] (default=srt) (currently youtube only) --sub-lang LANG language of the subtitles to download (optional) use IETF language tags like 'en' diff --git a/README.txt b/README.txt index 56e2512..239709b 100644 --- a/README.txt +++ b/README.txt @@ -131,12 +131,14 @@ Video Format Options: -F, --list-formats list all available formats (currently youtube only) --write-sub write subtitle file (currently youtube only) + --write-auto-sub write automatic subtitle file (currently youtube + only) --only-sub [deprecated] alias of --skip-download --all-subs downloads all the available subtitles of the video (currently youtube only) --list-subs lists all available subtitles for the video (currently youtube only) - --sub-format FORMAT subtitle format [srt/sbv] (default=srt) + --sub-format FORMAT subtitle format [srt/sbv/vtt] (default=srt) (currently youtube only) --sub-lang LANG language of the subtitles to download (optional) use IETF language tags like 'en' diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py new file mode 100644 index 0000000..b168cea --- /dev/null +++ b/devscripts/youtube_genalgo.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python + +# Generate youtube signature algorithm from test cases + +import sys + +tests = [ + ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<", + "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"), + ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<", + "!?;:|}][{=+-_)(*&^$#@/MNBVCXZASqFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"), + ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<", + "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"), + ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<", + "{>/?;}[.=+-_)(*&^%$#@!MqBVCXZASDFwHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"), + ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<", + "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"), + ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<", + "D.>/?;}[{=+_)(*&^%$#!MNBVCXeAS.<", + "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"), +] + +def find_matching(wrong, right): + idxs = [wrong.index(c) for c in right] + return compress(idxs) + return ('s[%d]' % i for i in idxs) + +def compress(idxs): + def _genslice(start, end, step): + starts = '' if start == 0 else str(start) + ends = ':%d' % (end+step) + steps = '' if step == 1 else (':%d' % step) + return 's[%s%s%s]' % (starts, ends, steps) + + step = None + for i, prev in zip(idxs[1:], idxs[:-1]): + if step is not None: + if i - prev == step: + continue + yield _genslice(start, prev, step) + step = None + continue + if i - prev in [-1, 1]: + step = i - prev + start = prev + continue + else: + yield 's[%d]' % prev + if step is None: + yield 's[%d]' % i + else: + yield _genslice(start, i, step) + +def _assert_compress(inp, exp): + res = list(compress(inp)) + if res != exp: + print('Got %r, expected %r' % (res, exp)) + assert res == exp +_assert_compress([0,2,4,6], ['s[0]', 's[2]', 's[4]', 's[6]']) +_assert_compress([0,1,2,4,6,7], ['s[:3]', 's[4]', 's[6:8]']) +_assert_compress([8,0,1,2,4,7,6,9], ['s[8]', 's[:3]', 's[4]', 's[7:5:-1]', 's[9]']) + +def gen(wrong, right, indent): + code = ' + '.join(find_matching(wrong, right)) + return 'if len(s) == %d:\n%s return %s\n' % (len(wrong), indent, code) + +def genall(tests): + indent = ' ' * 8 + return indent + (indent + 'el').join(gen(wrong, right, indent) for wrong,right in tests) + +def main(): + print(genall(tests)) + +if __name__ == '__main__': + main() diff --git a/test/helper.py b/test/helper.py new file mode 100644 index 0000000..842ffc2 --- /dev/null +++ b/test/helper.py @@ -0,0 +1,33 @@ +import io +import json +import os.path + +from youtube_dl import YoutubeDL, YoutubeDLHandler +from youtube_dl.utils import ( + compat_cookiejar, + compat_urllib_request, +) + +# General configuration (from __init__, not very elegant...) +jar = compat_cookiejar.CookieJar() +cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar) +proxy_handler = compat_urllib_request.ProxyHandler() +opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler()) +compat_urllib_request.install_opener(opener) + +PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") +with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: + parameters = json.load(pf) + +class FakeYDL(YoutubeDL): + def __init__(self): + self.result = [] + # Different instances of the downloader can't share the same dictionary + # some test set the "sublang" parameter, which would break the md5 checks. + self.params = dict(parameters) + def to_screen(self, s): + print(s) + def trouble(self, s, tb=None): + raise Exception(s) + def download(self, x): + self.result.append(x) \ No newline at end of file diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 320b440..4486b7e 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -10,30 +10,8 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from youtube_dl.extractor import YoutubeUserIE, YoutubePlaylistIE, YoutubeIE, YoutubeChannelIE from youtube_dl.utils import * -from youtube_dl import YoutubeDL -PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") -with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: - parameters = json.load(pf) - -# General configuration (from __init__, not very elegant...) -jar = compat_cookiejar.CookieJar() -cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar) -proxy_handler = compat_urllib_request.ProxyHandler() -opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler()) -compat_urllib_request.install_opener(opener) - -class FakeYDL(YoutubeDL): - def __init__(self): - self.result = [] - self.params = parameters - def to_screen(self, s): - print(s) - def trouble(self, s, tb=None): - raise Exception(s) - def extract_info(self, url): - self.result.append(url) - return url +from helper import FakeYDL class TestYoutubeLists(unittest.TestCase): def assertIsPlaylist(self,info): diff --git a/test/test_youtube_sig.py b/test/test_youtube_sig.py new file mode 100755 index 0000000..e87b625 --- /dev/null +++ b/test/test_youtube_sig.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python + +import unittest +import sys + +# Allow direct execution +import os +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.extractor.youtube import YoutubeIE +from helper import FakeYDL + +sig = YoutubeIE(FakeYDL())._decrypt_signature + +class TestYoutubeSig(unittest.TestCase): + def test_43_43(self): + wrong = '5AEEAE0EC39677BC65FD9021CCD115F1F2DBD5A59E4.C0B243A3E2DED6769199AF3461781E75122AE135135' + right = '931EA22157E1871643FA9519676DED253A342B0C.4E95A5DBD2F1F511DCC1209DF56CB77693CE0EAE' + self.assertEqual(sig(wrong), right) + + def test_88(self): + wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<" + right = "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej" + self.assertEqual(sig(wrong), right) + + def test_87(self): + wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<" + right = "!?;:|}][{=+-_)(*&^$#@/MNBVCXZASqFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr" + self.assertEqual(sig(wrong), right) + + def test_86(self): + wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<" + right = "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@" + self.assertEqual(sig(wrong), right) + + def test_85(self): + wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<" + right = "{>/?;}[.=+-_)(*&^%$#@!MqBVCXZASDFwHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr" + self.assertEqual(sig(wrong), right) + + def test_84(self): + wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<" + right = "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1" + self.assertEqual(sig(wrong), right) + + def test_83(self): + wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<" + right = "D.>/?;}[{=+_)(*&^%$#!MNBVCXeAS.*?)/(?P.*?)(\?.*)?' _LIVE_URL = r'index-[0-9]+\.html$' IE_NAME = u'arte.tv' - def fetch_webpage(self, url): - request = compat_urllib_request.Request(url) - try: - self.report_download_webpage(url) - webpage = compat_urllib_request.urlopen(request).read() - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - raise ExtractorError(u'Unable to retrieve video webpage: %s' % compat_str(err)) - except ValueError as err: - raise ExtractorError(u'Invalid URL: %s' % url) - return webpage - - def grep_webpage(self, url, regex, regexFlags, matchTuples): - page = self.fetch_webpage(url) - mobj = re.search(regex, page, regexFlags) - info = {} - - if mobj is None: - raise ExtractorError(u'Invalid URL: %s' % url) - - for (i, key, err) in matchTuples: - if mobj.group(i) is None: - raise ExtractorError(err) - else: - info[key] = mobj.group(i) - - return info - # TODO implement Live Stream # def extractLiveStream(self, url): # video_lang = url.split('/')[-4] @@ -75,62 +43,44 @@ class ArteTvIE(InfoExtractor): # ) # video_url = u'%s/%s' % (info.get('url'), info.get('path')) - def extractPlus7Stream(self, url): - video_lang = url.split('/')[-3] - info = self.grep_webpage( - url, - r'param name="movie".*?videorefFileUrl=(http[^\'"&]*)', - 0, - [ - (1, 'url', u'Invalid URL: %s' % url) - ] - ) - next_url = compat_urllib_parse.unquote(info.get('url')) - info = self.grep_webpage( - next_url, - r'