From: Rogério Brito Date: Thu, 4 Jul 2013 11:48:28 +0000 (-0300) Subject: Imported Upstream version 2013.07.02 X-Git-Url: https://git.rapsys.eu/youtubedl/commitdiff_plain/9a117f94b4bfe84cfe1d904d5132aefcf41511c9?hp=--cc Imported Upstream version 2013.07.02 --- 9a117f94b4bfe84cfe1d904d5132aefcf41511c9 diff --git a/README.md b/README.md index 81b86e2..b246d3c 100644 --- a/README.md +++ b/README.md @@ -18,19 +18,13 @@ which means you can modify it, redistribute it or use it however you like. --version print program version and exit -U, --update update this program to latest version -i, --ignore-errors continue on download errors - -r, --rate-limit LIMIT maximum download rate (e.g. 50k or 44.6m) - -R, --retries RETRIES number of retries (default is 10) - --buffer-size SIZE size of download buffer (e.g. 1024 or 16k) - (default is 1024) - --no-resize-buffer do not automatically adjust the buffer size. By - default, the buffer size is automatically resized - from an initial value of SIZE. --dump-user-agent display the current browser identification --user-agent UA specify a custom user agent --referer REF specify a custom referer, use if the video access is restricted to one domain --list-extractors List all supported extractors and the URLs they would handle + --extractor-descriptions Output descriptions of all supported extractors --proxy URL Use the specified HTTP/HTTPS proxy --no-check-certificate Suppress HTTPS certificate validation. @@ -50,6 +44,15 @@ which means you can modify it, redistribute it or use it however you like. --datebefore DATE download only videos uploaded before this date --dateafter DATE download only videos uploaded after this date +## Download Options: + -r, --rate-limit LIMIT maximum download rate (e.g. 50k or 44.6m) + -R, --retries RETRIES number of retries (default is 10) + --buffer-size SIZE size of download buffer (e.g. 1024 or 16k) + (default is 1024) + --no-resize-buffer do not automatically adjust the buffer size. By + default, the buffer size is automatically resized + from an initial value of SIZE. + ## Filesystem Options: -t, --title use title in file name (default) --id use only video ID in file name @@ -168,7 +171,7 @@ The `-o` option allows users to indicate a template for the output file names. T - `playlist`: The name or the id of the playlist that contains the video. - `playlist_index`: The index of the video in the playlist, a five-digit number. -The current default template is `%(id)s.%(ext)s`, but that will be switchted to `%(title)s-%(id)s.%(ext)s` (which can be requested with `-t` at the moment). +The current default template is `%(title)s-%(id)s.%(ext)s`. In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title: @@ -194,11 +197,11 @@ Examples: ### Can you please put the -b option back? -Most people asking this question are not aware that youtube-dl now defaults to downloading the highest available quality as reported by YouTube, which will be 1080p or 720p in some cases, so you no longer need the -b option. For some specific videos, maybe YouTube does not report them to be available in a specific high quality format you''re interested in. In that case, simply request it with the -f option and youtube-dl will try to download it. +Most people asking this question are not aware that youtube-dl now defaults to downloading the highest available quality as reported by YouTube, which will be 1080p or 720p in some cases, so you no longer need the `-b` option. For some specific videos, maybe YouTube does not report them to be available in a specific high quality format you're interested in. In that case, simply request it with the `-f` option and youtube-dl will try to download it. ### I get HTTP error 402 when trying to download a video. What's this? -Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We''re [considering to provide a way to let you solve the CAPTCHA](https://github.com/rg3/youtube-dl/issues/154), but at the moment, your best course of action is pointing a webbrowser to the youtube URL, solving the CAPTCHA, and restart youtube-dl. +Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We're [considering to provide a way to let you solve the CAPTCHA](https://github.com/rg3/youtube-dl/issues/154), but at the moment, your best course of action is pointing a webbrowser to the youtube URL, solving the CAPTCHA, and restart youtube-dl. ### I have downloaded a video but how can I play it? diff --git a/README.txt b/README.txt index 239709b..8f08dd2 100644 --- a/README.txt +++ b/README.txt @@ -25,19 +25,13 @@ OPTIONS --version print program version and exit -U, --update update this program to latest version -i, --ignore-errors continue on download errors - -r, --rate-limit LIMIT maximum download rate (e.g. 50k or 44.6m) - -R, --retries RETRIES number of retries (default is 10) - --buffer-size SIZE size of download buffer (e.g. 1024 or 16k) - (default is 1024) - --no-resize-buffer do not automatically adjust the buffer size. By - default, the buffer size is automatically resized - from an initial value of SIZE. --dump-user-agent display the current browser identification --user-agent UA specify a custom user agent --referer REF specify a custom referer, use if the video access is restricted to one domain --list-extractors List all supported extractors and the URLs they would handle + --extractor-descriptions Output descriptions of all supported extractors --proxy URL Use the specified HTTP/HTTPS proxy --no-check-certificate Suppress HTTPS certificate validation. @@ -59,6 +53,17 @@ Video Selection: --datebefore DATE download only videos uploaded before this date --dateafter DATE download only videos uploaded after this date +Download Options: +----------------- + + -r, --rate-limit LIMIT maximum download rate (e.g. 50k or 44.6m) + -R, --retries RETRIES number of retries (default is 10) + --buffer-size SIZE size of download buffer (e.g. 1024 or 16k) + (default is 1024) + --no-resize-buffer do not automatically adjust the buffer size. By + default, the buffer size is automatically resized + from an initial value of SIZE. + Filesystem Options: ------------------- @@ -205,9 +210,7 @@ lowercase S. Allowed names are: - playlist_index: The index of the video in the playlist, a five-digit number. -The current default template is %(id)s.%(ext)s, but that will be -switchted to %(title)s-%(id)s.%(ext)s (which can be requested with -t at -the moment). +The current default template is %(title)s-%(id)s.%(ext)s. In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system @@ -244,14 +247,14 @@ Most people asking this question are not aware that youtube-dl now defaults to downloading the highest available quality as reported by YouTube, which will be 1080p or 720p in some cases, so you no longer need the -b option. For some specific videos, maybe YouTube does not -report them to be available in a specific high quality format you''re +report them to be available in a specific high quality format you're interested in. In that case, simply request it with the -f option and youtube-dl will try to download it. I get HTTP error 402 when trying to download a video. What's this? Apparently YouTube requires you to pass a CAPTCHA test if you download -too much. We''re considering to provide a way to let you solve the +too much. We're considering to provide a way to let you solve the CAPTCHA, but at the moment, your best course of action is pointing a webbrowser to the youtube URL, solving the CAPTCHA, and restart youtube-dl. diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py index b168cea..c3d69e6 100644 --- a/devscripts/youtube_genalgo.py +++ b/devscripts/youtube_genalgo.py @@ -5,18 +5,25 @@ import sys tests = [ + # 88 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<", "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"), + # 87 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<", "!?;:|}][{=+-_)(*&^$#@/MNBVCXZASqFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"), + # 86 - vfl_ymO4Z 2013/06/27 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<", "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"), + # 85 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<", "{>/?;}[.=+-_)(*&^%$#@!MqBVCXZASDFwHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"), + # 84 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<", "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"), + # 83 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<", "D.>/?;}[{=+_)(*&^%$#!MNBVCXeAS.<", "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"), ] diff --git a/setup.py b/setup.py index 61435fc..3b6dc2d 100644 --- a/setup.py +++ b/setup.py @@ -12,8 +12,9 @@ except ImportError: from distutils.core import setup try: + # This will create an exe that needs Microsoft Visual C++ 2008 + # Redistributable Package import py2exe - """This will create an exe that needs Microsoft Visual C++ 2008 Redistributable Package""" except ImportError: if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe': print("Cannot import py2exe", file=sys.stderr) @@ -26,13 +27,15 @@ py2exe_options = { "dist_dir": '.', "dll_excludes": ['w9xpopen.exe'], } + py2exe_console = [{ "script": "./youtube_dl/__main__.py", "dest_base": "youtube-dl", }] + py2exe_params = { 'console': py2exe_console, - 'options': { "py2exe": py2exe_options }, + 'options': {"py2exe": py2exe_options}, 'zipfile': None } @@ -41,30 +44,34 @@ if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe': else: params = { 'scripts': ['bin/youtube-dl'], - 'data_files': [('etc/bash_completion.d', ['youtube-dl.bash-completion']), # Installing system-wide would require sudo... - ('share/doc/youtube_dl', ['README.txt']), - ('share/man/man1/', ['youtube-dl.1'])] + 'data_files': [ # Installing system-wide would require sudo... + ('etc/bash_completion.d', ['youtube-dl.bash-completion']), + ('share/doc/youtube_dl', ['README.txt']), + ('share/man/man1/', ['youtube-dl.1']) + ] } # Get the version from youtube_dl/version.py without importing the package -exec(compile(open('youtube_dl/version.py').read(), 'youtube_dl/version.py', 'exec')) +exec(compile(open('youtube_dl/version.py').read(), + 'youtube_dl/version.py', 'exec')) setup( - name = 'youtube_dl', - version = __version__, - description = 'YouTube video downloader', - long_description = 'Small command-line program to download videos from YouTube.com and other video sites.', - url = 'https://github.com/rg3/youtube-dl', - author = 'Ricardo Garcia', - maintainer = 'Philipp Hagemeister', - maintainer_email = 'phihag@phihag.de', - packages = ['youtube_dl', 'youtube_dl.extractor'], + name='youtube_dl', + version=__version__, + description='YouTube video downloader', + long_description='Small command-line program to download videos from' + ' YouTube.com and other video sites.', + url='https://github.com/rg3/youtube-dl', + author='Ricardo Garcia', + maintainer='Philipp Hagemeister', + maintainer_email='phihag@phihag.de', + packages=['youtube_dl', 'youtube_dl.extractor'], # Provokes warning on most systems (why?!) - #test_suite = 'nose.collector', - #test_requires = ['nosetest'], + # test_suite = 'nose.collector', + # test_requires = ['nosetest'], - classifiers = [ + classifiers=[ "Topic :: Multimedia :: Video", "Development Status :: 5 - Production/Stable", "Environment :: Console", diff --git a/test/helper.py b/test/helper.py index 842ffc2..a2b468b 100644 --- a/test/helper.py +++ b/test/helper.py @@ -2,6 +2,7 @@ import io import json import os.path +import youtube_dl.extractor from youtube_dl import YoutubeDL, YoutubeDLHandler from youtube_dl.utils import ( compat_cookiejar, @@ -30,4 +31,14 @@ class FakeYDL(YoutubeDL): def trouble(self, s, tb=None): raise Exception(s) def download(self, x): - self.result.append(x) \ No newline at end of file + self.result.append(x) + +def get_testcases(): + for ie in youtube_dl.extractor.gen_extractors(): + t = getattr(ie, '_TEST', None) + if t: + t['name'] = type(ie).__name__[:-len('IE')] + yield t + for t in getattr(ie, '_TESTS', []): + t['name'] = type(ie).__name__[:-len('IE')] + yield t diff --git a/test/test_all_urls.py b/test/test_all_urls.py index d3ee296..39a5ee3 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -7,7 +7,8 @@ import unittest import os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from youtube_dl.extractor import YoutubeIE, YoutubePlaylistIE, YoutubeChannelIE, JustinTVIE +from youtube_dl.extractor import YoutubeIE, YoutubePlaylistIE, YoutubeChannelIE, JustinTVIE, gen_extractors +from helper import get_testcases class TestAllURLsMatching(unittest.TestCase): def test_youtube_playlist_matching(self): @@ -50,5 +51,16 @@ class TestAllURLsMatching(unittest.TestCase): self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc') self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc'), 'BaW_jenozKc') + def test_no_duplicates(self): + ies = gen_extractors() + for tc in get_testcases(): + url = tc['url'] + for ie in ies: + if type(ie).__name__ in ['GenericIE', tc['name'] + 'IE']: + self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url)) + else: + self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url)) + + if __name__ == '__main__': unittest.main() diff --git a/test/test_download.py b/test/test_download.py index 067bde4..21cb2e6 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -14,10 +14,8 @@ import binascii sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import youtube_dl.YoutubeDL -import youtube_dl.extractor from youtube_dl.utils import * -DEF_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tests.json') PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") RETRIES = 3 @@ -56,8 +54,9 @@ def _file_md5(fn): with open(fn, 'rb') as f: return hashlib.md5(f.read()).hexdigest() -with io.open(DEF_FILE, encoding='utf-8') as deff: - defs = json.load(deff) +from helper import get_testcases +defs = get_testcases() + with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: parameters = json.load(pf) @@ -73,22 +72,23 @@ def generator(test_case): def test_template(self): ie = youtube_dl.extractor.get_info_extractor(test_case['name']) + def print_skipping(reason): + print('Skipping %s: %s' % (test_case['name'], reason)) if not ie._WORKING: - print('Skipping: IE marked as not _WORKING') + print_skipping('IE marked as not _WORKING') return if 'playlist' not in test_case and not test_case['file']: - print('Skipping: No output file specified') + print_skipping('No output file specified') return if 'skip' in test_case: - print('Skipping: {0}'.format(test_case['skip'])) + print_skipping(test_case['skip']) return params = self.parameters.copy() params.update(test_case.get('params', {})) ydl = YoutubeDL(params) - for ie in youtube_dl.extractor.gen_extractors(): - ydl.add_info_extractor(ie) + ydl.add_default_info_extractors() finished_hook_called = set() def _hook(status): if status['status'] == 'finished': @@ -155,9 +155,12 @@ def generator(test_case): ### And add them to TestDownload for n, test_case in enumerate(defs): test_method = generator(test_case) - test_method.__name__ = "test_{0}".format(test_case["name"]) - if getattr(TestDownload, test_method.__name__, False): - test_method.__name__ = "test_{0}_{1}".format(test_case["name"], n) + tname = 'test_' + str(test_case['name']) + i = 1 + while hasattr(TestDownload, tname): + tname = 'test_' + str(test_case['name']) + '_' + str(i) + i += 1 + test_method.__name__ = tname setattr(TestDownload, test_method.__name__, test_method) del test_method diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 4486b7e..dd9e292 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -8,7 +8,7 @@ import json import os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from youtube_dl.extractor import YoutubeUserIE, YoutubePlaylistIE, YoutubeIE, YoutubeChannelIE +from youtube_dl.extractor import YoutubeUserIE, YoutubePlaylistIE, YoutubeIE, YoutubeChannelIE, YoutubeShowIE from youtube_dl.utils import * from helper import FakeYDL @@ -88,5 +88,11 @@ class TestYoutubeLists(unittest.TestCase): result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')[0] self.assertEqual(len(result['entries']), 2) + def test_youtube_show(self): + dl = FakeYDL() + ie = YoutubeShowIE(dl) + result = ie.extract('http://www.youtube.com/show/airdisasters') + self.assertTrue(len(result) >= 4) + if __name__ == '__main__': unittest.main() diff --git a/test/tests.json b/test/tests.json deleted file mode 100644 index ebc7a12..0000000 --- a/test/tests.json +++ /dev/null @@ -1,718 +0,0 @@ -[ - { - "name": "Youtube", - "url": "http://www.youtube.com/watch?v=BaW_jenozKc", - "file": "BaW_jenozKc.mp4", - "info_dict": { - "title": "youtube-dl test video \"'/\\ä↭𝕐", - "uploader": "Philipp Hagemeister", - "uploader_id": "phihag", - "upload_date": "20121002", - "description": "test chars: \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ." - } - }, - { - "name": "Youtube", - "url": "http://www.youtube.com/watch?v=1ltcDfZMA3U", - "file": "1ltcDfZMA3U.flv", - "note": "Test VEVO video (#897)", - "info_dict": { - "upload_date": "20070518", - "title": "Maps - It Will Find You", - "description": "Music video by Maps performing It Will Find You.", - "uploader": "MuteUSA", - "uploader_id": "MuteUSA" - } - }, - { - "name": "Youtube", - "url": "http://www.youtube.com/watch?v=UxxajLWwzqY", - "file": "UxxajLWwzqY.mp4", - "note": "Test generic use_cipher_signature video (#897)", - "info_dict": { - "upload_date": "20120506", - "title": "Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]", - "description": "md5:b085c9804f5ab69f4adea963a2dceb3c", - "uploader": "IconaPop", - "uploader_id": "IconaPop" - } - }, - { - "name": "Dailymotion", - "md5": "392c4b85a60a90dc4792da41ce3144eb", - "url": "http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech", - "file": "x33vw9.mp4", - "info_dict": { - "uploader": "Alex and Van .", - "title": "Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\"" - } - }, - { - "name": "Metacafe", - "add_ie": ["Youtube"], - "url": "http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/", - "file": "_aUehQsCQtM.flv", - "info_dict": { - "upload_date": "20090102", - "title": "The Electric Company | \"Short I\" | PBS KIDS GO!", - "description": "md5:2439a8ef6d5a70e380c22f5ad323e5a8", - "uploader": "PBS", - "uploader_id": "PBS" - } - }, - { - "name": "BlipTV", - "md5": "b2d849efcf7ee18917e4b4d9ff37cafe", - "url": "http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352", - "file": "5779306.m4v", - "info_dict": { - "upload_date": "20111205", - "description": "md5:9bc31f227219cde65e47eeec8d2dc596", - "uploader": "Comic Book Resources - CBR TV", - "title": "CBR EXCLUSIVE: \"Gotham City Imposters\" Bats VS Jokerz Short 3" - } - }, - { - "name": "XVideos", - "md5": "1d0c835822f0a71a7bf011855db929d0", - "url": "http://www.xvideos.com/video939581/funny_porns_by_s_-1", - "file": "939581.flv", - "info_dict": { - "title": "Funny Porns By >>>>S<<<<<< -1" - } - }, - { - "name": "YouPorn", - "md5": "c37ddbaaa39058c76a7e86c6813423c1", - "url": "http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/", - "file": "505835.mp4", - "info_dict": { - "upload_date": "20101221", - "description": "Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?", - "uploader": "Ask Dan And Jennifer", - "title": "Sex Ed: Is It Safe To Masturbate Daily?" - } - }, - { - "name": "Pornotube", - "md5": "374dd6dcedd24234453b295209aa69b6", - "url": "http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing", - "file": "1689755.flv", - "info_dict": { - "upload_date": "20090708", - "title": "Marilyn-Monroe-Bathing" - } - }, - { - "name": "YouJizz", - "md5": "07e15fa469ba384c7693fd246905547c", - "url": "http://www.youjizz.com/videos/zeichentrick-1-2189178.html", - "file": "2189178.flv", - "info_dict": { - "title": "Zeichentrick 1" - } - }, - { - "name": "Vimeo", - "md5": "8879b6cc097e987f02484baf890129e5", - "url": "http://vimeo.com/56015672", - "file": "56015672.mp4", - "info_dict": { - "title": "youtube-dl test video - ★ \" ' 幸 / \\ ä ↭ 𝕐", - "uploader": "Filippo Valsorda", - "uploader_id": "user7108434", - "upload_date": "20121220", - "description": "This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: ★ \" ' 幸 / \\ ä ↭ 𝕐" - } - }, - { - "name": "Soundcloud", - "md5": "ebef0a451b909710ed1d7787dddbf0d7", - "url": "http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy", - "file": "62986583.mp3", - "info_dict": { - "upload_date": "20121011", - "description": "No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd", - "uploader": "E.T. ExTerrestrial Music", - "title": "Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1" - } - }, - { - "name": "StanfordOpenClassroom", - "md5": "544a9468546059d4e80d76265b0443b8", - "url": "http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100", - "file": "PracticalUnix_intro-environment.mp4", - "info_dict": { - "title": "Intro Environment" - } - }, - { - "name": "XNXX", - "md5": "0831677e2b4761795f68d417e0b7b445", - "url": "http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_", - "file": "1135332.flv", - "info_dict": { - "title": "lida » Naked Funny Actress (5)" - } - }, - { - "name": "Youku", - "url": "http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html", - "file": "XNDgyMDQ2NTQw_part00.flv", - "md5": "ffe3f2e435663dc2d1eea34faeff5b5b", - "params": { "test": false }, - "info_dict": { - "title": "youtube-dl test video \"'/\\ä↭𝕐" - } - }, - { - "name": "NBA", - "url": "http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html", - "file": "0021200253-okc-bkn-recap.nba.mp4", - "md5": "c0edcfc37607344e2ff8f13c378c88a4", - "info_dict": { - "description": "Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.", - "title": "Thunder vs. Nets" - } - }, - { - "name": "JustinTV", - "url": "http://www.twitch.tv/thegamedevhub/b/296128360", - "file": "296128360.flv", - "md5": "ecaa8a790c22a40770901460af191c9a", - "info_dict": { - "upload_date": "20110927", - "uploader_id": 25114803, - "uploader": "thegamedevhub", - "title": "Beginner Series - Scripting With Python Pt.1" - } - }, - { - "name": "MyVideo", - "url": "http://www.myvideo.de/watch/8229274/bowling_fail_or_win", - "file": "8229274.flv", - "md5": "2d2753e8130479ba2cb7e0a37002053e", - "info_dict": { - "title": "bowling-fail-or-win" - } - }, - { - "name": "Escapist", - "url": "http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate", - "file": "6618-Breaking-Down-Baldurs-Gate.mp4", - "md5": "c6793dbda81388f4264c1ba18684a74d", - "info_dict": { - "description": "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.", - "uploader": "the-escapist-presents", - "title": "Breaking Down Baldur's Gate" - } - }, - { - "name": "GooglePlus", - "url": "https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH", - "file": "ZButuJc6CtH.flv", - "info_dict": { - "upload_date": "20120613", - "uploader": "井上ヨシマサ", - "title": "嘆きの天使 降臨" - } - }, - { - "name": "FunnyOrDie", - "url": "http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version", - "file": "0732f586d7.mp4", - "md5": "f647e9e90064b53b6e046e75d0241fbd", - "info_dict": { - "description": "Lyrics changed to match the video. Spoken cameo by Obscurus Lupa (from ThatGuyWithTheGlasses.com). Based on a concept by Dustin McLean (DustFilms.com). Performed, edited, and written by David A. Scott.", - "title": "Heart-Shaped Box: Literal Video Version" - } - }, - { - "name": "Steam", - "url": "http://store.steampowered.com/video/105600/", - "playlist": [ - { - "file": "81300.flv", - "md5": "f870007cee7065d7c76b88f0a45ecc07", - "info_dict": { - "title": "Terraria 1.1 Trailer" - } - }, - { - "file": "80859.flv", - "md5": "61aaf31a5c5c3041afb58fb83cbb5751", - "info_dict": { - "title": "Terraria Trailer" - } - } - ] - }, - { - "name": "Ustream", - "url": "http://www.ustream.tv/recorded/20274954", - "file": "20274954.flv", - "md5": "088f151799e8f572f84eb62f17d73e5c", - "info_dict": { - "title": "Young Americans for Liberty February 7, 2012 2:28 AM", - "uploader": "Young Americans for Liberty" - } - }, - { - "name": "InfoQ", - "url": "http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things", - "file": "12-jan-pythonthings.mp4", - "info_dict": { - "description": "Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.", - "title": "A Few of My Favorite [Python] Things" - }, - "params": { - "skip_download": true - } - }, - { - "name": "ComedyCentral", - "url": "http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart", - "file": "422212.mp4", - "md5": "4e2f5cb088a83cd8cdb7756132f9739d", - "info_dict": { - "upload_date": "20121214", - "description": "Kristen Stewart", - "uploader": "thedailyshow", - "title": "thedailyshow-kristen-stewart part 1" - } - }, - { - "name": "RBMARadio", - "url": "http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011", - "file": "ford-lopatin-live-at-primavera-sound-2011.mp3", - "md5": "6bc6f9bcb18994b4c983bc3bf4384d95", - "info_dict": { - "title": "Live at Primavera Sound 2011", - "description": "Joel Ford and Daniel \u2019Oneohtrix Point Never\u2019 Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.", - "uploader": "Ford & Lopatin", - "uploader_id": "ford-lopatin", - "location": "Spain" - } - }, - { - "name": "Facebook", - "url": "https://www.facebook.com/photo.php?v=120708114770723", - "file": "120708114770723.mp4", - "md5": "48975a41ccc4b7a581abd68651c1a5a8", - "info_dict": { - "title": "PEOPLE ARE AWESOME 2013", - "duration": 279 - } - }, - { - "name": "EightTracks", - "url": "http://8tracks.com/ytdl/youtube-dl-test-tracks-a", - "playlist": [ - { - "file": "11885610.m4a", - "md5": "96ce57f24389fc8734ce47f4c1abcc55", - "info_dict": { - "title": "youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad", - "uploader_id": "ytdl" - } - }, - { - "file": "11885608.m4a", - "md5": "4ab26f05c1f7291ea460a3920be8021f", - "info_dict": { - "title": "youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad", - "uploader_id": "ytdl" - - } - }, - { - "file": "11885679.m4a", - "md5": "d30b5b5f74217410f4689605c35d1fd7", - "info_dict": { - "title": "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad", - "uploader_id": "ytdl" - } - }, - { - "file": "11885680.m4a", - "md5": "4eb0a669317cd725f6bbd336a29f923a", - "info_dict": { - "title": "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad", - "uploader_id": "ytdl" - } - }, - { - "file": "11885682.m4a", - "md5": "1893e872e263a2705558d1d319ad19e8", - "info_dict": { - "title": "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad", - "uploader_id": "ytdl" - } - }, - { - "file": "11885683.m4a", - "md5": "b673c46f47a216ab1741ae8836af5899", - "info_dict": { - "title": "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad", - "uploader_id": "ytdl" - } - }, - { - "file": "11885684.m4a", - "md5": "1d74534e95df54986da7f5abf7d842b7", - "info_dict": { - "title": "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad", - "uploader_id": "ytdl" - } - }, - { - "file": "11885685.m4a", - "md5": "f081f47af8f6ae782ed131d38b9cd1c0", - "info_dict": { - "title": "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad", - "uploader_id": "ytdl" - } - } - ] - }, - { - "name": "Keek", - "url": "http://www.keek.com/ytdl/keeks/NODfbab", - "file": "NODfbab.mp4", - "md5": "9b0636f8c0f7614afa4ea5e4c6e57e83", - "info_dict": { - "uploader": "ytdl", - "title": "test chars: \"'/\\ä<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de ." - } - }, - { - "name": "TED", - "url": "http://www.ted.com/talks/dan_dennett_on_our_consciousness.html", - "file": "102.mp4", - "md5": "8cd9dfa41ee000ce658fd48fb5d89a61", - "info_dict": { - "title": "Dan Dennett: The illusion of consciousness", - "description": "md5:c6fa72e6eedbd938c9caf6b2702f5922" - } - }, - { - "name": "MySpass", - "url": "http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/", - "file": "11741.mp4", - "md5": "0b49f4844a068f8b33f4b7c88405862b", - "info_dict": { - "description": "Wer kann in die Fußstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?", - "title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2" - } - }, - { - "name": "Generic", - "url": "http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html", - "file": "13601338388002.mp4", - "md5": "85b90ccc9d73b4acd9138d3af4c27f89", - "info_dict": { - "uploader": "www.hodiho.fr", - "title": "Régis plante sa Jeep" - } - }, - { - "name": "Spiegel", - "url": "http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html", - "file": "1259285.mp4", - "md5": "2c2754212136f35fb4b19767d242f66e", - "info_dict": { - "title": "Vulkanausbruch in Ecuador: Der \"Feuerschlund\" ist wieder aktiv" - } - }, - { - "name": "LiveLeak", - "md5": "0813c2430bea7a46bf13acf3406992f4", - "url": "http://www.liveleak.com/view?i=757_1364311680", - "file": "757_1364311680.mp4", - "info_dict": { - "title": "Most unlucky car accident", - "description": "extremely bad day for this guy..!", - "uploader": "ljfriel2" - } - }, - { - "name": "WorldStarHipHop", - "url": "http://www.worldstarhiphop.com/videos/video.php?v=wshh6a7q1ny0G34ZwuIO", - "file": "wshh6a7q1ny0G34ZwuIO.mp4", - "md5": "9d04de741161603bf7071bbf4e883186", - "info_dict": { - "title": "Video: KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!" - } - }, - { - "name": "ARD", - "url": "http://www.ardmediathek.de/das-erste/tagesschau-in-100-sek?documentId=14077640", - "file": "14077640.mp4", - "md5": "6ca8824255460c787376353f9e20bbd8", - "info_dict": { - "title": "11.04.2013 09:23 Uhr - Tagesschau in 100 Sekunden" - }, - "skip": "Requires rtmpdump" - }, - { - "name": "Tumblr", - "url": "http://resigno.tumblr.com/post/53364321212/e-de-extrema-importancia-que-esse-video-seja", - "file": "53364321212.mp4", - "md5": "0716d3dd51baf68a28b40fdf1251494e", - "info_dict": { - "title": "Rafael Lemos | Tumblr" - } - }, - { - "name": "SoundcloudSet", - "url":"https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep", - "playlist":[ - { - "file":"30510138.mp3", - "md5":"f9136bf103901728f29e419d2c70f55d", - "info_dict": { - "upload_date": "20111213", - "description": "The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com", - "uploader": "The Royal Concept", - "title": "D-D-Dance" - } - }, - { - "file":"47127625.mp3", - "md5":"09b6758a018470570f8fd423c9453dd8", - "info_dict": { - "upload_date": "20120521", - "description": "The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com", - "uploader": "The Royal Concept", - "title": "The Royal Concept - Gimme Twice" - } - }, - { - "file":"47127627.mp3", - "md5":"154abd4e418cea19c3b901f1e1306d9c", - "info_dict": { - "upload_date": "20120521", - "uploader": "The Royal Concept", - "title": "Goldrushed" - } - }, - { - "file":"47127629.mp3", - "md5":"2f5471edc79ad3f33a683153e96a79c1", - "info_dict": { - "upload_date": "20120521", - "description": "The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com", - "uploader": "The Royal Concept", - "title": "In the End" - } - }, - { - "file":"47127631.mp3", - "md5":"f9ba87aa940af7213f98949254f1c6e2", - "info_dict": { - "upload_date": "20120521", - "description": "The Royal Concept from Stockholm\r\nFilip / David / Povel / Magnus\r\nwww.theroyalconceptband.com", - "uploader": "The Royal Concept", - "title": "Knocked Up" - } - }, - { - "file":"75206121.mp3", - "md5":"f9d1fe9406717e302980c30de4af9353", - "info_dict": { - "upload_date": "20130116", - "description": "The unreleased track World on Fire premiered on the CW's hit show Arrow (8pm/7pm central). \r\nAs a gift to our fans we would like to offer you a free download of the track! ", - "uploader": "The Royal Concept", - "title": "World On Fire" - } - } - ] - }, - { - "name":"Bandcamp", - "url":"http://youtube-dl.bandcamp.com/track/youtube-dl-test-song", - "file":"1812978515.mp3", - "md5":"cdeb30cdae1921719a3cbcab696ef53c", - "info_dict": { - "title":"youtube-dl test song \"'/\\ä↭" - }, - "skip": "There is a limit of 200 free downloads / month for the test song" - }, - { - "name": "RedTube", - "url": "http://www.redtube.com/66418", - "file": "66418.mp4", - "md5": "7b8c22b5e7098a3e1c09709df1126d2d", - "info_dict":{ - "title":"Sucked on a toilet" - } - }, - { - "name": "Photobucket", - "url": "http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0", - "file": "zpsc0c3b9fa.mp4", - "md5": "7dabfb92b0a31f6c16cebc0f8e60ff99", - "info_dict": { - "upload_date": "20130504", - "uploader": "rachaneronas", - "title": "Tired of Link Building? Try BacklinkMyDomain.com!" - } - }, - { - "name": "Ina", - "url": "www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html", - "file": "I12055569.mp4", - "md5": "a667021bf2b41f8dc6049479d9bb38a3", - "info_dict":{ - "title":"François Hollande \"Je crois que c'est clair\"" - } - }, - { - "name": "Yahoo", - "url": "http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html", - "file": "214727115.flv", - "md5": "2e717f169c1be93d84d3794a00d4a325", - "info_dict": { - "title": "Julian Smith & Travis Legg Watch Julian Smith" - }, - "skip": "Requires rtmpdump" - }, - { - "name": "Howcast", - "url": "http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly", - "file": "390161.mp4", - "md5": "1d7ba54e2c9d7dc6935ef39e00529138", - "info_dict":{ - "title":"How to Tie a Square Knot Properly", - "description":"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot." - } - }, - { - "name": "Vine", - "url": "https://vine.co/v/b9KOOWX7HUx", - "file": "b9KOOWX7HUx.mp4", - "md5": "2f36fed6235b16da96ce9b4dc890940d", - "info_dict":{ - "title": "Chicken.", - "uploader": "Jack Dorsey" - } - }, - { - "name": "Flickr", - "url": "http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/", - "file": "5645318632.mp4", - "md5": "6fdc01adbc89d72fc9c4f15b4a4ba87b", - "info_dict":{ - "title": "Dark Hollow Waterfalls", - "uploader_id": "forestwander-nature-pictures", - "description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up." - } - }, - { - "name": "Teamcoco", - "url": "http://teamcoco.com/video/louis-ck-interview-george-w-bush", - "file": "19705.mp4", - "md5": "27b6f7527da5acf534b15f21b032656e", - "info_dict":{ - "title": "Louis C.K. Interview Pt. 1 11/3/11", - "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one." - } - }, - { - "name": "XHamster", - "url": "http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html", - "file": "1509445.flv", - "md5": "9f48e0e8d58e3076bb236ff412ab62fa", - "info_dict": { - "upload_date": "20121014", - "uploader_id": "Ruseful2011", - "title": "FemaleAgent Shy beauty takes the bait" - } - }, - { - "name": "Hypem", - "url": "http://hypem.com/track/1v6ga/BODYWORK+-+TAME", - "file": "1v6ga.mp3", - "md5": "b9cc91b5af8995e9f0c1cee04c575828", - "info_dict":{ - "title":"Tame" - } - }, - { - "name": "Vbox7", - "url": "http://vbox7.com/play:249bb972c2", - "file": "249bb972c2.flv", - "md5": "9c70d6d956f888bdc08c124acc120cfe", - "info_dict":{ - "title":"Смях! Чудо - чист за секунди - Скрита камера" - } - }, - { - "name": "Gametrailers", - "url": "http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer", - "file": "zbvr8i.flv", - "md5": "c3edbc995ab4081976e16779bd96a878", - "info_dict": { - "title": "E3 2013: Debut Trailer" - }, - "skip": "Requires rtmpdump" - }, - { - "name": "Statigram", - "url": "http://statigr.am/p/484091715184808010_284179915", - "file": "484091715184808010_284179915.mp4", - "md5": "deda4ff333abe2e118740321e992605b", - "info_dict": { - "uploader_id": "videoseconds", - "title": "Instagram photo by @videoseconds (Videos)" - } - }, - { - "name": "Break", - "url": "http://www.break.com/video/when-girls-act-like-guys-2468056", - "file": "2468056.mp4", - "md5": "a3513fb1547fba4fb6cfac1bffc6c46b", - "info_dict": { - "title": "When Girls Act Like D-Bags" - } - }, - { - "name": "Vevo", - "url": "http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280", - "file": "GB1101300280.mp4", - "md5": "06bea460acb744eab74a9d7dcb4bfd61", - "info_dict": { - "title": "Somebody To Die For", - "upload_date": "20130624", - "uploader": "Hurts" - } - }, - { - "name": "Tudou", - "url": "http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html", - "file": "159447792.f4v", - "md5": "ad7c358a01541e926a1e413612c6b10a", - "info_dict": { - "title": "卡马乔国足开大脚长传冲吊集锦" - } - }, - { - "name": "CSpan", - "url": "http://www.c-spanvideo.org/program/HolderonV", - "file": "315139.flv", - "md5": "74a623266956f69e4df0068ab6c80fe4", - "info_dict": { - "title": "Attorney General Eric Holder on Voting Rights Act Decision" - }, - "skip": "Requires rtmpdump" - }, - { - "name": "Wimp", - "url": "http://www.wimp.com/deerfence/", - "file": "deerfence.flv", - "md5": "8b215e2e0168c6081a1cf84b2846a2b5", - "info_dict": { - "title": "Watch Till End: Herd of deer jump over a fence." - } - } -] diff --git a/youtube-dl b/youtube-dl index 876dea9..4ddbecc 100755 Binary files a/youtube-dl and b/youtube-dl differ diff --git a/youtube-dl.1 b/youtube-dl.1 index 0ac019f..001c05e 100644 --- a/youtube-dl.1 +++ b/youtube-dl.1 @@ -22,19 +22,13 @@ redistribute it or use it however you like. \-\-version\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ print\ program\ version\ and\ exit \-U,\ \-\-update\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ update\ this\ program\ to\ latest\ version \-i,\ \-\-ignore\-errors\ \ \ \ \ \ \ \ continue\ on\ download\ errors -\-r,\ \-\-rate\-limit\ LIMIT\ \ \ \ \ maximum\ download\ rate\ (e.g.\ 50k\ or\ 44.6m) -\-R,\ \-\-retries\ RETRIES\ \ \ \ \ \ number\ of\ retries\ (default\ is\ 10) -\-\-buffer\-size\ SIZE\ \ \ \ \ \ \ \ \ size\ of\ download\ buffer\ (e.g.\ 1024\ or\ 16k) -\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (default\ is\ 1024) -\-\-no\-resize\-buffer\ \ \ \ \ \ \ \ \ do\ not\ automatically\ adjust\ the\ buffer\ size.\ By -\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ default,\ the\ buffer\ size\ is\ automatically\ resized -\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ from\ an\ initial\ value\ of\ SIZE. \-\-dump\-user\-agent\ \ \ \ \ \ \ \ \ \ display\ the\ current\ browser\ identification \-\-user\-agent\ UA\ \ \ \ \ \ \ \ \ \ \ \ specify\ a\ custom\ user\ agent \-\-referer\ REF\ \ \ \ \ \ \ \ \ \ \ \ \ \ specify\ a\ custom\ referer,\ use\ if\ the\ video\ access \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ is\ restricted\ to\ one\ domain \-\-list\-extractors\ \ \ \ \ \ \ \ \ \ List\ all\ supported\ extractors\ and\ the\ URLs\ they \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ would\ handle +\-\-extractor\-descriptions\ \ \ Output\ descriptions\ of\ all\ supported\ extractors \-\-proxy\ URL\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Use\ the\ specified\ HTTP/HTTPS\ proxy \-\-no\-check\-certificate\ \ \ \ \ Suppress\ HTTPS\ certificate\ validation. \f[] @@ -59,6 +53,19 @@ redistribute it or use it however you like. \-\-dateafter\ DATE\ \ \ \ \ \ \ \ \ \ \ download\ only\ videos\ uploaded\ after\ this\ date \f[] .fi +.SS Download Options: +.IP +.nf +\f[C] +\-r,\ \-\-rate\-limit\ LIMIT\ \ \ \ \ maximum\ download\ rate\ (e.g.\ 50k\ or\ 44.6m) +\-R,\ \-\-retries\ RETRIES\ \ \ \ \ \ number\ of\ retries\ (default\ is\ 10) +\-\-buffer\-size\ SIZE\ \ \ \ \ \ \ \ \ size\ of\ download\ buffer\ (e.g.\ 1024\ or\ 16k) +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (default\ is\ 1024) +\-\-no\-resize\-buffer\ \ \ \ \ \ \ \ \ do\ not\ automatically\ adjust\ the\ buffer\ size.\ By +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ default,\ the\ buffer\ size\ is\ automatically\ resized +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ from\ an\ initial\ value\ of\ SIZE. +\f[] +.fi .SS Filesystem Options: .IP .nf @@ -225,9 +232,7 @@ video. \f[C]playlist_index\f[]: The index of the video in the playlist, a five\-digit number. .PP -The current default template is \f[C]%(id)s.%(ext)s\f[], but that will -be switchted to \f[C]%(title)s\-%(id)s.%(ext)s\f[] (which can be -requested with \f[C]\-t\f[] at the moment). +The current default template is \f[C]%(title)s\-%(id)s.%(ext)s\f[]. .PP In some cases, you don\[aq]t want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows @@ -269,18 +274,17 @@ $\ youtube\-dl\ \-\-dateafter\ 20000101\ \-\-datebefore\ 20100101\ #will\ only\ Most people asking this question are not aware that youtube\-dl now defaults to downloading the highest available quality as reported by YouTube, which will be 1080p or 720p in some cases, so you no longer -need the \-b option. +need the \f[C]\-b\f[] option. For some specific videos, maybe YouTube does not report them to be -available in a specific high quality format you\[aq]\[aq]re interested -in. -In that case, simply request it with the \-f option and youtube\-dl will -try to download it. +available in a specific high quality format you\[aq]re interested in. +In that case, simply request it with the \f[C]\-f\f[] option and +youtube\-dl will try to download it. .SS I get HTTP error 402 when trying to download a video. What\[aq]s this? .PP Apparently YouTube requires you to pass a CAPTCHA test if you download too much. -We\[aq]\[aq]re considering to provide a way to let you solve the +We\[aq]re considering to provide a way to let you solve the CAPTCHA (https://github.com/rg3/youtube-dl/issues/154), but at the moment, your best course of action is pointing a webbrowser to the youtube URL, solving the CAPTCHA, and restart youtube\-dl. diff --git a/youtube-dl.bash-completion b/youtube-dl.bash-completion index a3e9bdf..fd12ce8 100644 --- a/youtube-dl.bash-completion +++ b/youtube-dl.bash-completion @@ -3,7 +3,7 @@ __youtube-dl() local cur prev opts COMPREPLY=() cur="${COMP_WORDS[COMP_CWORD]}" - opts="--help --version --update --ignore-errors --rate-limit --retries --buffer-size --no-resize-buffer --dump-user-agent --user-agent --referer --list-extractors --proxy --no-check-certificate --test --playlist-start --playlist-end --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --title --id --literal --auto-number --output --autonumber-size --restrict-filenames --batch-file --no-overwrites --continue --no-continue --cookies --no-part --no-mtime --write-description --write-info-json --write-thumbnail --quiet --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-filename --get-format --newline --no-progress --console-title --verbose --dump-intermediate-pages --format --all-formats --prefer-free-formats --max-quality --list-formats --write-sub --write-auto-sub --only-sub --all-subs --list-subs --sub-format --sub-lang --username --password --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites" + opts="--help --version --update --ignore-errors --dump-user-agent --user-agent --referer --list-extractors --extractor-descriptions --proxy --no-check-certificate --playlist-start --playlist-end --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --rate-limit --retries --buffer-size --no-resize-buffer --test --title --id --literal --auto-number --output --autonumber-size --restrict-filenames --batch-file --no-overwrites --continue --no-continue --cookies --no-part --no-mtime --write-description --write-info-json --write-thumbnail --quiet --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-filename --get-format --newline --no-progress --console-title --verbose --dump-intermediate-pages --format --all-formats --prefer-free-formats --max-quality --list-formats --write-sub --write-auto-sub --only-sub --all-subs --list-subs --sub-format --sub-lang --username --password --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites" if [[ ${cur} == * ]] ; then COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) ) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 9931c98..d3281fe 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -13,7 +13,7 @@ import time import traceback from .utils import * -from .extractor import get_info_extractor +from .extractor import get_info_extractor, gen_extractors from .FileDownloader import FileDownloader @@ -113,6 +113,13 @@ class YoutubeDL(object): self._ies.append(ie) ie.set_downloader(self) + def add_default_info_extractors(self): + """ + Add the InfoExtractors returned by gen_extractors to the end of the list + """ + for ie in gen_extractors(): + self.add_info_extractor(ie) + def add_post_processor(self, pp): """Add a PostProcessor object to the end of the chain.""" self._pps.append(pp) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 6a8fc5e..db63d0a 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -26,7 +26,8 @@ __authors__ = ( 'Julien Fraichard', 'Johny Mo Swag', 'Axel Noack', - ) + 'Albert Kim', +) __license__ = 'Public Domain' @@ -34,6 +35,7 @@ import codecs import getpass import optparse import os +import random import re import shlex import socket @@ -117,6 +119,7 @@ def parseOpts(overrideArguments=None): selection = optparse.OptionGroup(parser, 'Video Selection') authentication = optparse.OptionGroup(parser, 'Authentication Options') video_format = optparse.OptionGroup(parser, 'Video Format Options') + downloader = optparse.OptionGroup(parser, 'Download Options') postproc = optparse.OptionGroup(parser, 'Post-processing Options') filesystem = optparse.OptionGroup(parser, 'Filesystem Options') verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') @@ -129,15 +132,6 @@ def parseOpts(overrideArguments=None): action='store_true', dest='update_self', help='update this program to latest version') general.add_option('-i', '--ignore-errors', action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) - general.add_option('-r', '--rate-limit', - dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)') - general.add_option('-R', '--retries', - dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10) - general.add_option('--buffer-size', - dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16k) (default is %default)', default="1024") - general.add_option('--no-resize-buffer', - action='store_true', dest='noresizebuffer', - help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False) general.add_option('--dump-user-agent', action='store_true', dest='dump_user_agent', help='display the current browser identification', default=False) @@ -149,9 +143,12 @@ def parseOpts(overrideArguments=None): general.add_option('--list-extractors', action='store_true', dest='list_extractors', help='List all supported extractors and the URLs they would handle', default=False) + general.add_option('--extractor-descriptions', + action='store_true', dest='list_extractor_descriptions', + help='Output descriptions of all supported extractors', default=False) general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL') general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.') - general.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP) + selection.add_option('--playlist-start', dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is %default)', default=1) @@ -210,6 +207,17 @@ def parseOpts(overrideArguments=None): action='store', dest='subtitleslang', metavar='LANG', help='language of the subtitles to download (optional) use IETF language tags like \'en\'') + downloader.add_option('-r', '--rate-limit', + dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)') + downloader.add_option('-R', '--retries', + dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10) + downloader.add_option('--buffer-size', + dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16k) (default is %default)', default="1024") + downloader.add_option('--no-resize-buffer', + action='store_true', dest='noresizebuffer', + help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False) + downloader.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP) + verbosity.add_option('-q', '--quiet', action='store_true', dest='quiet', help='activates quiet mode', default=False) verbosity.add_option('-s', '--simulate', @@ -316,6 +324,7 @@ def parseOpts(overrideArguments=None): parser.add_option_group(general) parser.add_option_group(selection) + parser.add_option_group(downloader) parser.add_option_group(filesystem) parser.add_option_group(verbosity) parser.add_option_group(video_format) @@ -415,13 +424,25 @@ def _real_main(argv=None): extractors = gen_extractors() if opts.list_extractors: - for ie in extractors: + for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()): compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '')) matchedUrls = [url for url in all_urls if ie.suitable(url)] all_urls = [url for url in all_urls if url not in matchedUrls] for mu in matchedUrls: compat_print(u' ' + mu) sys.exit(0) + if opts.list_extractor_descriptions: + for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()): + if not ie._WORKING: + continue + desc = getattr(ie, 'IE_DESC', ie.IE_NAME) + if hasattr(ie, 'SEARCH_KEY'): + _SEARCHES = (u'cute kittens', u'slithering pythons', u'falling cat', u'angry poodle', u'purple fish', u'running tortoise') + _COUNTS = (u'', u'5', u'10', u'all') + desc += u' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES)) + compat_print(desc) + sys.exit(0) + # Conflicting, missing and erroneous options if opts.usenetrc and (opts.username is not None or opts.password is not None): @@ -561,19 +582,20 @@ def _real_main(argv=None): if opts.verbose: ydl.to_screen(u'[debug] youtube-dl version ' + __version__) try: - sp = subprocess.Popen(['git', 'rev-parse', '--short', 'HEAD'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, - cwd=os.path.dirname(os.path.abspath(__file__))) + sp = subprocess.Popen( + ['git', 'rev-parse', '--short', 'HEAD'], + stdout=subprocess.PIPE, stderr=subprocess.PIPE, + cwd=os.path.dirname(os.path.abspath(__file__))) out, err = sp.communicate() out = out.decode().strip() if re.match('[0-9a-f]+', out): ydl.to_screen(u'[debug] Git HEAD: ' + out) except: - pass + sys.exc_clear() ydl.to_screen(u'[debug] Python version %s - %s' %(platform.python_version(), platform.platform())) ydl.to_screen(u'[debug] Proxy map: ' + str(proxy_handler.proxies)) - for extractor in extractors: - ydl.add_info_extractor(extractor) + ydl.add_default_info_extractors() # PostProcessors if opts.extractaudio: diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 2750fc8..41efc57 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -1,6 +1,7 @@ from .ard import ARDIE from .arte import ArteTvIE +from .auengine import AUEngineIE from .bandcamp import BandcampIE from .bliptv import BlipTVIE, BlipTVUserIE from .breakcom import BreakIE @@ -14,14 +15,17 @@ from .escapist import EscapistIE from .facebook import FacebookIE from .flickr import FlickrIE from .funnyordie import FunnyOrDieIE +from .gamespot import GameSpotIE from .gametrailers import GametrailersIE from .generic import GenericIE from .googleplus import GooglePlusIE from .googlesearch import GoogleSearchIE +from .hotnewhiphop import HotNewHipHopIE from .howcast import HowcastIE from .hypem import HypemIE from .ina import InaIE from .infoq import InfoQIE +from .instagram import InstagramIE from .jukebox import JukeboxIE from .justintv import JustinTVIE from .keek import KeekIE @@ -36,6 +40,7 @@ from .photobucket import PhotobucketIE from .pornotube import PornotubeIE from .rbmaradio import RBMARadioIE from .redtube import RedTubeIE +from .ringtv import RingTVIE from .soundcloud import SoundcloudIE, SoundcloudSetIE from .spiegel import SpiegelIE from .stanfordoc import StanfordOpenClassroomIE @@ -43,13 +48,17 @@ from .statigram import StatigramIE from .steam import SteamIE from .teamcoco import TeamcocoIE from .ted import TEDIE +from .tf1 import TF1IE +from .traileraddict import TrailerAddictIE from .tudou import TudouIE from .tumblr import TumblrIE +from .tutv import TutvIE from .ustream import UstreamIE from .vbox7 import Vbox7IE from .vevo import VevoIE from .vimeo import VimeoIE from .vine import VineIE +from .wat import WatIE from .wimp import WimpIE from .worldstarhiphop import WorldStarHipHopIE from .xhamster import XHamsterIE @@ -59,85 +68,22 @@ from .yahoo import YahooIE, YahooSearchIE from .youjizz import YouJizzIE from .youku import YoukuIE from .youporn import YouPornIE -from .youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE +from .youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE, YoutubeShowIE from .zdf import ZDFIE +_ALL_CLASSES = [ + klass + for name, klass in globals().items() + if name.endswith('IE') and name != 'GenericIE' +] +_ALL_CLASSES.append(GenericIE) + def gen_extractors(): """ Return a list of an instance of every supported extractor. The order does matter; the first extractor matched is the one handling the URL. """ - return [ - YoutubePlaylistIE(), - YoutubeChannelIE(), - YoutubeUserIE(), - YoutubeSearchIE(), - YoutubeIE(), - MetacafeIE(), - DailymotionIE(), - GoogleSearchIE(), - PhotobucketIE(), - YahooIE(), - YahooSearchIE(), - DepositFilesIE(), - FacebookIE(), - BlipTVIE(), - BlipTVUserIE(), - VimeoIE(), - MyVideoIE(), - ComedyCentralIE(), - EscapistIE(), - CollegeHumorIE(), - XVideosIE(), - SoundcloudSetIE(), - SoundcloudIE(), - InfoQIE(), - MixcloudIE(), - StanfordOpenClassroomIE(), - MTVIE(), - YoukuIE(), - XNXXIE(), - YouJizzIE(), - PornotubeIE(), - YouPornIE(), - GooglePlusIE(), - ArteTvIE(), - NBAIE(), - WorldStarHipHopIE(), - JustinTVIE(), - FunnyOrDieIE(), - SteamIE(), - UstreamIE(), - RBMARadioIE(), - EightTracksIE(), - KeekIE(), - TEDIE(), - MySpassIE(), - SpiegelIE(), - LiveLeakIE(), - ARDIE(), - ZDFIE(), - TumblrIE(), - BandcampIE(), - RedTubeIE(), - InaIE(), - HowcastIE(), - VineIE(), - FlickrIE(), - TeamcocoIE(), - XHamsterIE(), - HypemIE(), - Vbox7IE(), - GametrailersIE(), - StatigramIE(), - BreakIE(), - VevoIE(), - JukeboxIE(), - TudouIE(), - CSpanIE(), - WimpIE(), - GenericIE() - ] + return [klass() for klass in _ALL_CLASSES] def get_info_extractor(ie_name): """Returns the info extractor class with the given ie_name""" diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index e1ecdf4..5793a41 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -9,6 +9,15 @@ class ARDIE(InfoExtractor): _VALID_URL = r'^(?:https?://)?(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P[^/\?]+)(?:\?.*)?' _TITLE = r'(?P.*)</h1>' _MEDIA_STREAM = r'mediaCollection\.addMediaStream\((?P<media_type>\d+), (?P<quality>\d+), "(?P<rtmp_url>[^"]*)", "(?P<video_url>[^"]*)", "[^"]*"\)' + _TEST = { + u'url': u'http://www.ardmediathek.de/das-erste/tagesschau-in-100-sek?documentId=14077640', + u'file': u'14077640.mp4', + u'md5': u'6ca8824255460c787376353f9e20bbd8', + u'info_dict': { + u"title": u"11.04.2013 09:23 Uhr - Tagesschau in 100 Sekunden" + }, + u'skip': u'Requires rtmpdump' + } def _real_extract(self, url): # determine video id from url diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index b061b95..183274e 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -11,11 +11,21 @@ from ..utils import ( ) class ArteTvIE(InfoExtractor): - _VALID_URL = r'(?:http://)?www\.arte.tv/guide/(?:fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?' + """ + There are two sources of video in arte.tv: videos.arte.tv and + www.arte.tv/guide, the extraction process is different for each one. + The videos expire in 7 days, so we can't add tests. + """ + _EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?:fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?' + _VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?:fr|de)/.*-(?P<id>.*?).html' _LIVE_URL = r'index-[0-9]+\.html$' IE_NAME = u'arte.tv' + @classmethod + def suitable(cls, url): + return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL)) + # TODO implement Live Stream # def extractLiveStream(self, url): # video_lang = url.split('/')[-4] @@ -44,17 +54,26 @@ class ArteTvIE(InfoExtractor): # video_url = u'%s/%s' % (info.get('url'), info.get('path')) def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - name = mobj.group('name') - # This is not a real id, it can be for example AJT for the news - # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal - video_id = mobj.group('id') + mobj = re.match(self._EMISSION_URL, url) + if mobj is not None: + name = mobj.group('name') + # This is not a real id, it can be for example AJT for the news + # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal + video_id = mobj.group('id') + return self._extract_emission(url, video_id) + + mobj = re.match(self._VIDEOS_URL, url) + if mobj is not None: + id = mobj.group('id') + return self._extract_video(url, id) if re.search(self._LIVE_URL, video_id) is not None: raise ExtractorError(u'Arte live streams are not yet supported, sorry') # self.extractLiveStream(url) # return + def _extract_emission(self, url, video_id): + """Extract from www.arte.tv/guide""" webpage = self._download_webpage(url, video_id) json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url') @@ -68,6 +87,7 @@ class ArteTvIE(InfoExtractor): 'description': player_info['VDE'], 'upload_date': unified_strdate(player_info['VDA'].split(' ')[0]), 'thumbnail': player_info['programImage'], + 'ext': 'flv', } formats = player_info['VSR'].values() @@ -78,9 +98,36 @@ class ArteTvIE(InfoExtractor): if format_info['mediaType'] == u'rtmp': info_dict['url'] = format_info['streamer'] info_dict['play_path'] = 'mp4:' + format_info['url'] - info_dict['ext'] = 'mp4' else: info_dict['url'] = format_info['url'] - info_dict['ext'] = 'mp4' return info_dict + + def _extract_video(self, url, video_id): + """Extract from videos.arte.tv""" + config_xml_url = url.replace('/videos/', '/do_delegate/videos/') + config_xml_url = config_xml_url.replace('.html', ',view,asPlayerXml.xml') + config_xml = self._download_webpage(config_xml_url, video_id) + config_xml_url = self._html_search_regex(r'<video lang=".*?" ref="(.*?)"', config_xml, 'config xml url') + config_xml = self._download_webpage(config_xml_url, video_id) + + video_urls = list(re.finditer(r'<url quality="(?P<quality>.*?)">(?P<url>.*?)</url>', config_xml)) + def _key(m): + quality = m.group('quality') + if quality == 'hd': + return 2 + else: + return 1 + # We pick the best quality + video_urls = sorted(video_urls, key=_key) + video_url = list(video_urls)[-1].group('url') + + title = self._html_search_regex(r'<name>(.*?)</name>', config_xml, 'title') + thumbnail = self._html_search_regex(r'<firstThumbnailUrl>(.*?)</firstThumbnailUrl>', + config_xml, 'thumbnail') + return {'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'url': video_url, + 'ext': 'flv', + } diff --git a/youtube_dl/extractor/auengine.py b/youtube_dl/extractor/auengine.py new file mode 100644 index 0000000..3b4ade3 --- /dev/null +++ b/youtube_dl/extractor/auengine.py @@ -0,0 +1,38 @@ +import os.path +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse, + compat_urllib_parse_urlparse, +) + +class AUEngineIE(InfoExtractor): + _VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed.php\?.*?file=([^&]+).*?' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group(1) + webpage = self._download_webpage(url, video_id) + title = self._html_search_regex(r'<title>(?P<title>.+?)', + webpage, u'title') + title = title.strip() + links = re.findall(r'[^A-Za-z0-9]?(?:file|url):\s*["\'](http[^\'"&]*)', webpage) + links = [compat_urllib_parse.unquote(l) for l in links] + for link in links: + root, pathext = os.path.splitext(compat_urllib_parse_urlparse(link).path) + if pathext == '.png': + thumbnail = link + elif pathext == '.mp4': + url = link + ext = pathext + if ext == title[-len(ext):]: + title = title[:-len(ext)] + ext = ext[1:] + return [{ + 'id': video_id, + 'url': url, + 'ext': ext, + 'title': title, + 'thumbnail': thumbnail, + }] diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index dcf6721..129a20f 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -9,6 +9,15 @@ from ..utils import ( class BandcampIE(InfoExtractor): _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P.*)' + _TEST = { + u'url': u'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song', + u'file': u'1812978515.mp3', + u'md5': u'cdeb30cdae1921719a3cbcab696ef53c', + u'info_dict': { + u"title": u"youtube-dl test song \"'/\\\u00e4\u21ad" + }, + u'skip': u'There is a limit of 200 free downloads / month for the test song' + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py index df2ad4b..37141e6 100644 --- a/youtube_dl/extractor/bliptv.py +++ b/youtube_dl/extractor/bliptv.py @@ -24,6 +24,17 @@ class BlipTVIE(InfoExtractor): _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(.+)$' _URL_EXT = r'^.*\.([a-z0-9]+)$' IE_NAME = u'blip.tv' + _TEST = { + u'url': u'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352', + u'file': u'5779306.m4v', + u'md5': u'b2d849efcf7ee18917e4b4d9ff37cafe', + u'info_dict': { + u"upload_date": u"20111205", + u"description": u"md5:9bc31f227219cde65e47eeec8d2dc596", + u"uploader": u"Comic Book Resources - CBR TV", + u"title": u"CBR EXCLUSIVE: \"Gotham City Imposters\" Bats VS Jokerz Short 3" + } + } def report_direct_download(self, title): """Report information extraction.""" diff --git a/youtube_dl/extractor/breakcom.py b/youtube_dl/extractor/breakcom.py index 1f6620d..34f555e 100644 --- a/youtube_dl/extractor/breakcom.py +++ b/youtube_dl/extractor/breakcom.py @@ -5,6 +5,14 @@ from .common import InfoExtractor class BreakIE(InfoExtractor): _VALID_URL = r'(?:http://)?(?:www\.)?break\.com/video/([^/]+)' + _TEST = { + u'url': u'http://www.break.com/video/when-girls-act-like-guys-2468056', + u'file': u'2468056.mp4', + u'md5': u'a3513fb1547fba4fb6cfac1bffc6c46b', + u'info_dict': { + u"title": u"When Girls Act Like D-Bags" + } + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 6985e88..93d9e3d 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -12,8 +12,7 @@ from ..utils import ( class ComedyCentralIE(InfoExtractor): - """Information extractor for The Daily Show and Colbert Report """ - + IE_DESC = u'The Daily Show / Colbert Report' # urls can be abbreviations like :thedailyshow or :colbert # urls for episodes like: # or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day @@ -27,6 +26,17 @@ class ComedyCentralIE(InfoExtractor): (the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?)) |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))))) $""" + _TEST = { + u'url': u'http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart', + u'file': u'422212.mp4', + u'md5': u'4e2f5cb088a83cd8cdb7756132f9739d', + u'info_dict': { + u"upload_date": u"20121214", + u"description": u"Kristen Stewart", + u"uploader": u"thedailyshow", + u"title": u"thedailyshow-kristen-stewart part 1" + } + } _available_formats = ['3500', '2200', '1700', '1200', '750', '400'] diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 64d63e1..655836f 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -44,6 +44,7 @@ class InfoExtractor(object): location: Physical location of the video. player_url: SWF Player URL (used for rtmpdump). subtitles: The subtitle file contents. + view_count: How many users have watched the video on the platform. urlhandle: [internal] The urlHandle to be used to download the file, like returned by urllib.request.urlopen @@ -262,3 +263,7 @@ class SearchInfoExtractor(InfoExtractor): def _get_n_results(self, query, n): """Get a specified number of results for a query""" raise NotImplementedError("This method must be implemented by sublclasses") + + @property + def SEARCH_KEY(self): + return self._SEARCH_KEY diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index 2246515..a485327 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -7,6 +7,15 @@ from ..utils import ( class CSpanIE(InfoExtractor): _VALID_URL = r'http://www.c-spanvideo.org/program/(.*)' + _TEST = { + u'url': u'http://www.c-spanvideo.org/program/HolderonV', + u'file': u'315139.flv', + u'md5': u'74a623266956f69e4df0068ab6c80fe4', + u'info_dict': { + u"title": u"Attorney General Eric Holder on Voting Rights Act Decision" + }, + u'skip': u'Requires rtmpdump' + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 34306b0..3297a85 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -14,6 +14,15 @@ class DailymotionIE(InfoExtractor): _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)' IE_NAME = u'dailymotion' + _TEST = { + u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech', + u'file': u'x33vw9.mp4', + u'md5': u'392c4b85a60a90dc4792da41ce3144eb', + u'info_dict': { + u"uploader": u"Alex and Van .", + u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\"" + } + } def _real_extract(self, url): # Extract id and simplified title from URL diff --git a/youtube_dl/extractor/eighttracks.py b/youtube_dl/extractor/eighttracks.py index c3d4343..cced068 100644 --- a/youtube_dl/extractor/eighttracks.py +++ b/youtube_dl/extractor/eighttracks.py @@ -12,6 +12,77 @@ from ..utils import ( class EightTracksIE(InfoExtractor): IE_NAME = '8tracks' _VALID_URL = r'https?://8tracks.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$' + _TEST = { + u"name": u"EightTracks", + u"url": u"http://8tracks.com/ytdl/youtube-dl-test-tracks-a", + u"playlist": [ + { + u"file": u"11885610.m4a", + u"md5": u"96ce57f24389fc8734ce47f4c1abcc55", + u"info_dict": { + u"title": u"youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad", + u"uploader_id": u"ytdl" + } + }, + { + u"file": u"11885608.m4a", + u"md5": u"4ab26f05c1f7291ea460a3920be8021f", + u"info_dict": { + u"title": u"youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad", + u"uploader_id": u"ytdl" + } + }, + { + u"file": u"11885679.m4a", + u"md5": u"d30b5b5f74217410f4689605c35d1fd7", + u"info_dict": { + u"title": u"youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad", + u"uploader_id": u"ytdl" + } + }, + { + u"file": u"11885680.m4a", + u"md5": u"4eb0a669317cd725f6bbd336a29f923a", + u"info_dict": { + u"title": u"youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad", + u"uploader_id": u"ytdl" + } + }, + { + u"file": u"11885682.m4a", + u"md5": u"1893e872e263a2705558d1d319ad19e8", + u"info_dict": { + u"title": u"PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad", + u"uploader_id": u"ytdl" + } + }, + { + u"file": u"11885683.m4a", + u"md5": u"b673c46f47a216ab1741ae8836af5899", + u"info_dict": { + u"title": u"PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad", + u"uploader_id": u"ytdl" + } + }, + { + u"file": u"11885684.m4a", + u"md5": u"1d74534e95df54986da7f5abf7d842b7", + u"info_dict": { + u"title": u"phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad", + u"uploader_id": u"ytdl" + } + }, + { + u"file": u"11885685.m4a", + u"md5": u"f081f47af8f6ae782ed131d38b9cd1c0", + u"info_dict": { + u"title": u"phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad", + u"uploader_id": u"ytdl" + } + } + ] + } + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/escapist.py b/youtube_dl/extractor/escapist.py index 86b145b..794460e 100644 --- a/youtube_dl/extractor/escapist.py +++ b/youtube_dl/extractor/escapist.py @@ -12,6 +12,16 @@ from ..utils import ( class EscapistIE(InfoExtractor): _VALID_URL = r'^(https?://)?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$' + _TEST = { + u'url': u'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate', + u'file': u'6618-Breaking-Down-Baldurs-Gate.mp4', + u'md5': u'c6793dbda81388f4264c1ba18684a74d', + u'info_dict': { + u"description": u"Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.", + u"uploader": u"the-escapist-presents", + u"title": u"Breaking Down Baldur's Gate" + } + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index c694f9a..beaa5b4 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -22,6 +22,15 @@ class FacebookIE(InfoExtractor): _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&' _NETRC_MACHINE = 'facebook' IE_NAME = u'facebook' + _TEST = { + u'url': u'https://www.facebook.com/photo.php?v=120708114770723', + u'file': u'120708114770723.mp4', + u'md5': u'48975a41ccc4b7a581abd68651c1a5a8', + u'info_dict': { + u"duration": 279, + u"title": u"PEOPLE ARE AWESOME 2013" + } + } def report_login(self): """Report attempt to log in.""" diff --git a/youtube_dl/extractor/flickr.py b/youtube_dl/extractor/flickr.py index 791d5b6..bd97bff 100644 --- a/youtube_dl/extractor/flickr.py +++ b/youtube_dl/extractor/flickr.py @@ -10,6 +10,16 @@ from ..utils import ( class FlickrIE(InfoExtractor): """Information Extractor for Flickr videos""" _VALID_URL = r'(?:https?://)?(?:www\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*' + _TEST = { + u'url': u'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/', + u'file': u'5645318632.mp4', + u'md5': u'6fdc01adbc89d72fc9c4f15b4a4ba87b', + u'info_dict': { + u"description": u"Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.", + u"uploader_id": u"forestwander-nature-pictures", + u"title": u"Dark Hollow Waterfalls" + } + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py index 3045978..388aacf 100644 --- a/youtube_dl/extractor/funnyordie.py +++ b/youtube_dl/extractor/funnyordie.py @@ -5,6 +5,15 @@ from .common import InfoExtractor class FunnyOrDieIE(InfoExtractor): _VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P<id>[0-9a-f]+)/.*$' + _TEST = { + u'url': u'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version', + u'file': u'0732f586d7.mp4', + u'md5': u'f647e9e90064b53b6e046e75d0241fbd', + u'info_dict': { + u"description": u"Lyrics changed to match the video. Spoken cameo by Obscurus Lupa (from ThatGuyWithTheGlasses.com). Based on a concept by Dustin McLean (DustFilms.com). Performed, edited, and written by David A. Scott.", + u"title": u"Heart-Shaped Box: Literal Video Version" + } + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py new file mode 100644 index 0000000..cec3b7a --- /dev/null +++ b/youtube_dl/extractor/gamespot.py @@ -0,0 +1,45 @@ +import re +import xml.etree.ElementTree + +from .common import InfoExtractor +from ..utils import ( + unified_strdate, +) + +class GameSpotIE(InfoExtractor): + _VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/([^/]+)/videos/([^/]+)-([^/d]+)/' + _TEST = { + u"url": u"http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/", + u"file": u"6410818.mp4", + u"md5": u"5569d64ca98db01f0177c934fe8c1e9b", + u"info_dict": { + u"title": u"Arma III - Community Guide: SITREP I", + u"upload_date": u"20130627", + } + } + + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group(3).split("-")[-1] + info_url = "http://www.gamespot.com/pages/video_player/xml.php?id="+str(video_id) + info_xml = self._download_webpage(info_url, video_id) + doc = xml.etree.ElementTree.fromstring(info_xml) + clip_el = doc.find('./playList/clip') + + video_url = clip_el.find('./URI').text + title = clip_el.find('./title').text + ext = video_url.rpartition('.')[2] + thumbnail_url = clip_el.find('./screenGrabURI').text + view_count = int(clip_el.find('./views').text) + upload_date = unified_strdate(clip_el.find('./postDate').text) + + return [{ + 'id' : video_id, + 'url' : video_url, + 'ext' : ext, + 'title' : title, + 'thumbnail' : thumbnail_url, + 'upload_date' : upload_date, + 'view_count' : view_count, + }] diff --git a/youtube_dl/extractor/gametrailers.py b/youtube_dl/extractor/gametrailers.py index 33e59e8..3ce93b4 100644 --- a/youtube_dl/extractor/gametrailers.py +++ b/youtube_dl/extractor/gametrailers.py @@ -9,6 +9,15 @@ from ..utils import ( class GametrailersIE(InfoExtractor): _VALID_URL = r'http://www.gametrailers.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)' + _TEST = { + u'url': u'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer', + u'file': u'zbvr8i.flv', + u'md5': u'c3edbc995ab4081976e16779bd96a878', + u'info_dict': { + u"title": u"E3 2013: Debut Trailer" + }, + u'skip': u'Requires rtmpdump' + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 7a877b3..20bc533 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -11,10 +11,18 @@ from ..utils import ( ) class GenericIE(InfoExtractor): - """Generic last-resort information extractor.""" - + IE_DESC = u'Generic downloader that works on some sites' _VALID_URL = r'.*' IE_NAME = u'generic' + _TEST = { + u'url': u'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html', + u'file': u'13601338388002.mp4', + u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89', + u'info_dict': { + u"uploader": u"www.hodiho.fr", + u"title": u"R\u00e9gis plante sa Jeep" + } + } def report_download_webpage(self, video_id): """Report webpage download.""" @@ -102,7 +110,7 @@ class GenericIE(InfoExtractor): mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage) if mobj is None: # Broaden the search a little bit: JWPlayer JS loader - mobj = re.search(r'[^A-Za-z0-9]?file:\s*["\'](http[^\'"&]*)', webpage) + mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http[^\'"&]*)', webpage) if mobj is None: # Try to find twitter cards info mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage) @@ -135,7 +143,7 @@ class GenericIE(InfoExtractor): # Video Title - Tagline | Site Name # and so on and so forth; it's just not practical video_title = self._html_search_regex(r'<title>(.*)', - webpage, u'video title') + webpage, u'video title', default=u'video', flags=re.DOTALL) # video uploader is domain name video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*', diff --git a/youtube_dl/extractor/googleplus.py b/youtube_dl/extractor/googleplus.py index ff2cdee..9f7fc19 100644 --- a/youtube_dl/extractor/googleplus.py +++ b/youtube_dl/extractor/googleplus.py @@ -1,3 +1,5 @@ +# coding: utf-8 + import datetime import re @@ -8,10 +10,18 @@ from ..utils import ( class GooglePlusIE(InfoExtractor): - """Information extractor for plus.google.com.""" - + IE_DESC = u'Google Plus' _VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)' IE_NAME = u'plus.google' + _TEST = { + u"url": u"https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH", + u"file": u"ZButuJc6CtH.flv", + u"info_dict": { + u"upload_date": u"20120613", + u"uploader": u"井上ヨシマサ", + u"title": u"嘆きの天使 降臨" + } + } def _real_extract(self, url): # Extract id from URL diff --git a/youtube_dl/extractor/googlesearch.py b/youtube_dl/extractor/googlesearch.py index 21c240e..f9c88e9 100644 --- a/youtube_dl/extractor/googlesearch.py +++ b/youtube_dl/extractor/googlesearch.py @@ -8,7 +8,7 @@ from ..utils import ( class GoogleSearchIE(SearchInfoExtractor): - """Information Extractor for Google Video search queries.""" + IE_DESC = u'Google Video search' _MORE_PAGES_INDICATOR = r'id="pnnext" class="pn"' _MAX_RESULTS = 1000 IE_NAME = u'video.google:search' diff --git a/youtube_dl/extractor/hotnewhiphop.py b/youtube_dl/extractor/hotnewhiphop.py new file mode 100644 index 0000000..ca3abb7 --- /dev/null +++ b/youtube_dl/extractor/hotnewhiphop.py @@ -0,0 +1,48 @@ +import re +import base64 + +from .common import InfoExtractor + + +class HotNewHipHopIE(InfoExtractor): + _VALID_URL = r'http://www\.hotnewhiphop.com/.*\.(?P.*)\.html' + _TEST = { + u'url': u"http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html'", + u'file': u'1435540.mp3', + u'md5': u'2c2cd2f76ef11a9b3b581e8b232f3d96', + u'info_dict': { + u"title": u"Freddie Gibbs Songs - Lay It Down" + } + } + + def _real_extract(self, url): + m = re.match(self._VALID_URL, url) + video_id = m.group('id') + + webpage_src = self._download_webpage(url, video_id) + + video_url_base64 = self._search_regex(r'data-path="(.*?)"', + webpage_src, u'video URL', fatal=False) + + if video_url_base64 == None: + video_url = self._search_regex(r'"contentUrl" content="(.*?)"', webpage_src, + u'video URL') + return self.url_result(video_url, ie='Youtube') + + video_url = base64.b64decode(video_url_base64).decode('utf-8') + + video_title = self._html_search_regex(r"(.*)", + webpage_src, u'title') + + # Getting thumbnail and if not thumbnail sets correct title for WSHH candy video. + thumbnail = self._html_search_regex(r'"og:image" content="(.*)"', + webpage_src, u'thumbnail', fatal=False) + + results = [{ + 'id': video_id, + 'url' : video_url, + 'title' : video_title, + 'thumbnail' : thumbnail, + 'ext' : 'mp3', + }] + return results \ No newline at end of file diff --git a/youtube_dl/extractor/howcast.py b/youtube_dl/extractor/howcast.py index 7b94f85..6104c4b 100644 --- a/youtube_dl/extractor/howcast.py +++ b/youtube_dl/extractor/howcast.py @@ -5,6 +5,15 @@ from .common import InfoExtractor class HowcastIE(InfoExtractor): _VALID_URL = r'(?:https?://)?(?:www\.)?howcast\.com/videos/(?P\d+)' + _TEST = { + u'url': u'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly', + u'file': u'390161.mp4', + u'md5': u'1d7ba54e2c9d7dc6935ef39e00529138', + u'info_dict': { + u"description": u"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot.", + u"title": u"How to Tie a Square Knot Properly" + } + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/hypem.py b/youtube_dl/extractor/hypem.py index ceec4f6..ab2b591 100644 --- a/youtube_dl/extractor/hypem.py +++ b/youtube_dl/extractor/hypem.py @@ -15,6 +15,14 @@ from ..utils import ( class HypemIE(InfoExtractor): """Information Extractor for hypem""" _VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)' + _TEST = { + u'url': u'http://hypem.com/track/1v6ga/BODYWORK+-+TAME', + u'file': u'1v6ga.mp3', + u'md5': u'b9cc91b5af8995e9f0c1cee04c575828', + u'info_dict': { + u"title": u"Tame" + } + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/ina.py b/youtube_dl/extractor/ina.py index c19b956..962c592 100644 --- a/youtube_dl/extractor/ina.py +++ b/youtube_dl/extractor/ina.py @@ -6,6 +6,14 @@ from .common import InfoExtractor class InaIE(InfoExtractor): """Information Extractor for Ina.fr""" _VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?PI[0-9]+)/.*' + _TEST = { + u'url': u'www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html', + u'file': u'I12055569.mp4', + u'md5': u'a667021bf2b41f8dc6049479d9bb38a3', + u'info_dict': { + u"title": u"Fran\u00e7ois Hollande \"Je crois que c'est clair\"" + } + } def _real_extract(self,url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/infoq.py b/youtube_dl/extractor/infoq.py index 9056742..c79c589 100644 --- a/youtube_dl/extractor/infoq.py +++ b/youtube_dl/extractor/infoq.py @@ -11,6 +11,18 @@ from ..utils import ( class InfoQIE(InfoExtractor): _VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$' + _TEST = { + u"name": u"InfoQ", + u"url": u"http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things", + u"file": u"12-jan-pythonthings.mp4", + u"info_dict": { + u"description": u"Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.", + u"title": u"A Few of My Favorite [Python] Things" + }, + u"params": { + u"skip_download": True + } + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py new file mode 100644 index 0000000..6ae704e --- /dev/null +++ b/youtube_dl/extractor/instagram.py @@ -0,0 +1,42 @@ +import re + +from .common import InfoExtractor + +class InstagramIE(InfoExtractor): + _VALID_URL = r'(?:http://)?instagram.com/p/(.*?)/' + _TEST = { + u'url': u'http://instagram.com/p/aye83DjauH/#', + u'file': u'aye83DjauH.mp4', + u'md5': u'0d2da106a9d2631273e192b372806516', + u'info_dict': { + u"uploader_id": u"naomipq", + u"title": u"Video by naomipq" + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group(1) + webpage = self._download_webpage(url, video_id) + video_url = self._html_search_regex( + r'', + webpage, u'thumbnail URL', fatal=False) + html_title = self._html_search_regex( + r'(.+?)', + webpage, u'title', flags=re.DOTALL) + title = re.sub(u'(?: *\(Videos?\))? \u2022 Instagram$', '', html_title).strip() + uploader_id = self._html_search_regex(r'content="(.*?)\'s video on Instagram', + webpage, u'uploader name', fatal=False) + ext = 'mp4' + + return [{ + 'id': video_id, + 'url': video_url, + 'ext': ext, + 'title': title, + 'thumbnail': thumbnail_url, + 'uploader_id' : uploader_id + }] diff --git a/youtube_dl/extractor/justintv.py b/youtube_dl/extractor/justintv.py index b2006e3..f600179 100644 --- a/youtube_dl/extractor/justintv.py +++ b/youtube_dl/extractor/justintv.py @@ -26,6 +26,17 @@ class JustinTVIE(InfoExtractor): """ _JUSTIN_PAGE_LIMIT = 100 IE_NAME = u'justin.tv' + _TEST = { + u'url': u'http://www.twitch.tv/thegamedevhub/b/296128360', + u'file': u'296128360.flv', + u'md5': u'ecaa8a790c22a40770901460af191c9a', + u'info_dict': { + u"upload_date": u"20110927", + u"uploader_id": 25114803, + u"uploader": u"thegamedevhub", + u"title": u"Beginner Series - Scripting With Python Pt.1" + } + } def report_download_page(self, channel, offset): """Report attempt to download a single page of videos.""" diff --git a/youtube_dl/extractor/keek.py b/youtube_dl/extractor/keek.py index e2093a0..72ad6a3 100644 --- a/youtube_dl/extractor/keek.py +++ b/youtube_dl/extractor/keek.py @@ -6,6 +6,15 @@ from .common import InfoExtractor class KeekIE(InfoExtractor): _VALID_URL = r'http://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P\w+)' IE_NAME = u'keek' + _TEST = { + u'url': u'http://www.keek.com/ytdl/keeks/NODfbab', + u'file': u'NODfbab.mp4', + u'md5': u'9b0636f8c0f7614afa4ea5e4c6e57e83', + u'info_dict': { + u"uploader": u"ytdl", + u"title": u"test chars: \"'/\\\u00e4<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de ." + } + } def _real_extract(self, url): m = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/liveleak.py b/youtube_dl/extractor/liveleak.py index d4b142e..cf8a2c9 100644 --- a/youtube_dl/extractor/liveleak.py +++ b/youtube_dl/extractor/liveleak.py @@ -10,6 +10,16 @@ class LiveLeakIE(InfoExtractor): _VALID_URL = r'^(?:http?://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P[\w_]+)(?:.*)' IE_NAME = u'liveleak' + _TEST = { + u'url': u'http://www.liveleak.com/view?i=757_1364311680', + u'file': u'757_1364311680.mp4', + u'md5': u'0813c2430bea7a46bf13acf3406992f4', + u'info_dict': { + u"description": u"extremely bad day for this guy..!", + u"uploader": u"ljfriel2", + u"title": u"Most unlucky car accident" + } + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py index 66d6554..4c3f81b 100644 --- a/youtube_dl/extractor/metacafe.py +++ b/youtube_dl/extractor/metacafe.py @@ -20,6 +20,19 @@ class MetacafeIE(InfoExtractor): _DISCLAIMER = 'http://www.metacafe.com/family_filter/' _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user' IE_NAME = u'metacafe' + _TEST = { + u"add_ie": ["Youtube"], + u"url": u"http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/", + u"file": u"_aUehQsCQtM.flv", + u"info_dict": { + u"upload_date": u"20090102", + u"title": u"The Electric Company | \"Short I\" | PBS KIDS GO!", + u"description": u"md5:2439a8ef6d5a70e380c22f5ad323e5a8", + u"uploader": u"PBS", + u"uploader_id": u"PBS" + } + } + def report_disclaimer(self): """Report disclaimer retrieval.""" diff --git a/youtube_dl/extractor/myspass.py b/youtube_dl/extractor/myspass.py index 7b016bb..107665d 100644 --- a/youtube_dl/extractor/myspass.py +++ b/youtube_dl/extractor/myspass.py @@ -11,6 +11,15 @@ from ..utils import ( class MySpassIE(InfoExtractor): _VALID_URL = r'http://www.myspass.de/.*' + _TEST = { + u'url': u'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/', + u'file': u'11741.mp4', + u'md5': u'0b49f4844a068f8b33f4b7c88405862b', + u'info_dict': { + u"description": u"Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?", + u"title": u"Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2" + } + } def _real_extract(self, url): META_DATA_URL_TEMPLATE = 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=%s' diff --git a/youtube_dl/extractor/myvideo.py b/youtube_dl/extractor/myvideo.py index 47a44e3..b2a7b1d 100644 --- a/youtube_dl/extractor/myvideo.py +++ b/youtube_dl/extractor/myvideo.py @@ -18,6 +18,14 @@ class MyVideoIE(InfoExtractor): _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*' IE_NAME = u'myvideo' + _TEST = { + u'url': u'http://www.myvideo.de/watch/8229274/bowling_fail_or_win', + u'file': u'8229274.flv', + u'md5': u'2d2753e8130479ba2cb7e0a37002053e', + u'info_dict': { + u"title": u"bowling-fail-or-win" + } + } # Original Code from: https://github.com/dersphere/plugin.video.myvideo_de.git # Released into the Public Domain by Tristan Fischer on 2013-05-19 diff --git a/youtube_dl/extractor/nba.py b/youtube_dl/extractor/nba.py index 296d4cd..122b7dd 100644 --- a/youtube_dl/extractor/nba.py +++ b/youtube_dl/extractor/nba.py @@ -8,6 +8,15 @@ from ..utils import ( class NBAIE(InfoExtractor): _VALID_URL = r'^(?:https?://)?(?:watch\.|www\.)?nba\.com/(?:nba/)?video(/[^?]*?)(?:/index\.html)?(?:\?.*)?$' + _TEST = { + u'url': u'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html', + u'file': u'0021200253-okc-bkn-recap.nba.mp4', + u'md5': u'c0edcfc37607344e2ff8f13c378c88a4', + u'info_dict': { + u"description": u"Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.", + u"title": u"Thunder vs. Nets" + } + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/photobucket.py b/youtube_dl/extractor/photobucket.py index cd7fe6f..305b797 100644 --- a/youtube_dl/extractor/photobucket.py +++ b/youtube_dl/extractor/photobucket.py @@ -16,6 +16,16 @@ class PhotobucketIE(InfoExtractor): # Check if it's necessary to keep the old extracion process _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P.*)\.(?P(flv)|(mp4))' IE_NAME = u'photobucket' + _TEST = { + u'url': u'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0', + u'file': u'zpsc0c3b9fa.mp4', + u'md5': u'7dabfb92b0a31f6c16cebc0f8e60ff99', + u'info_dict': { + u"upload_date": u"20130504", + u"uploader": u"rachaneronas", + u"title": u"Tired of Link Building? Try BacklinkMyDomain.com!" + } + } def _real_extract(self, url): # Extract id from URL diff --git a/youtube_dl/extractor/pornotube.py b/youtube_dl/extractor/pornotube.py index 0adb40d..add76a1 100644 --- a/youtube_dl/extractor/pornotube.py +++ b/youtube_dl/extractor/pornotube.py @@ -10,6 +10,15 @@ from ..utils import ( class PornotubeIE(InfoExtractor): _VALID_URL = r'^(?:https?://)?(?:\w+\.)?pornotube\.com(/c/(?P[0-9]+))?(/m/(?P[0-9]+))(/(?P.+))$' + _TEST = { + u'url': u'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing', + u'file': u'1689755.flv', + u'md5': u'374dd6dcedd24234453b295209aa69b6', + u'info_dict': { + u"upload_date": u"20090708", + u"title": u"Marilyn-Monroe-Bathing" + } + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/rbmaradio.py b/youtube_dl/extractor/rbmaradio.py index 0c75eee..4b6147a 100644 --- a/youtube_dl/extractor/rbmaradio.py +++ b/youtube_dl/extractor/rbmaradio.py @@ -11,6 +11,18 @@ from ..utils import ( class RBMARadioIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$' + _TEST = { + u'url': u'http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011', + u'file': u'ford-lopatin-live-at-primavera-sound-2011.mp3', + u'md5': u'6bc6f9bcb18994b4c983bc3bf4384d95', + u'info_dict': { + u"uploader_id": u"ford-lopatin", + u"location": u"Spain", + u"description": u"Joel Ford and Daniel \u2019Oneohtrix Point Never\u2019 Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.", + u"uploader": u"Ford & Lopatin", + u"title": u"Live at Primavera Sound 2011" + } + } def _real_extract(self, url): m = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py index ebc4e23..1d2cf1f 100644 --- a/youtube_dl/extractor/redtube.py +++ b/youtube_dl/extractor/redtube.py @@ -5,6 +5,14 @@ from .common import InfoExtractor class RedTubeIE(InfoExtractor): _VALID_URL = r'(?:http://)?(?:www\.)?redtube\.com/(?P<id>[0-9]+)' + _TEST = { + u'url': u'http://www.redtube.com/66418', + u'file': u'66418.mp4', + u'md5': u'7b8c22b5e7098a3e1c09709df1126d2d', + u'info_dict': { + u"title": u"Sucked on a toilet" + } + } def _real_extract(self,url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/ringtv.py b/youtube_dl/extractor/ringtv.py new file mode 100644 index 0000000..1b08c31 --- /dev/null +++ b/youtube_dl/extractor/ringtv.py @@ -0,0 +1,37 @@ +import re + +from .common import InfoExtractor + + +class RingTVIE(InfoExtractor): + _VALID_URL = r'(?:http://)?(?:www\.)?ringtv\.craveonline\.com/videos/video/([^/]+)' + _TEST = { + u"url": u"http://ringtv.craveonline.com/videos/video/746619-canelo-alvarez-talks-about-mayweather-showdown", + u"file": u"746619.mp4", + u"md5": u"7c46b4057d22de32e0a539f017e64ad3", + u"info_dict": { + u"title": u"Canelo Alvarez talks about Mayweather showdown", + u"description": u"Saul \\\"Canelo\\\" Alvarez spoke to the media about his Sept. 14 showdown with Floyd Mayweather after their kick-off presser in NYC. Canelo is motivated and confident that he will have the speed and gameplan to beat the pound-for-pound king." + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group(1).split('-')[0] + webpage = self._download_webpage(url, video_id) + title = self._search_regex(r'<title>(.+?)', + webpage, 'video title').replace(' | RingTV','') + description = self._search_regex(r'
(.+?)
', + webpage, 'Description') + final_url = "http://ringtv.craveonline.springboardplatform.com/storage/ringtv.craveonline.com/conversion/%s.mp4" %(str(video_id)) + thumbnail_url = "http://ringtv.craveonline.springboardplatform.com/storage/ringtv.craveonline.com/snapshots/%s.jpg" %(str(video_id)) + ext = final_url.split('.')[-1] + return [{ + 'id' : video_id, + 'url' : final_url, + 'ext' : ext, + 'title' : title, + 'thumbnail' : thumbnail_url, + 'description' : description, + }] + diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 80d7e1b..d47c49c 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -19,8 +19,19 @@ class SoundcloudIE(InfoExtractor): of the stream token and uid """ - _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)' + _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)(?:[?].*)?$' IE_NAME = u'soundcloud' + _TEST = { + u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy', + u'file': u'62986583.mp3', + u'md5': u'ebef0a451b909710ed1d7787dddbf0d7', + u'info_dict': { + u"upload_date": u"20121011", + u"description": u"No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd", + u"uploader": u"E.T. ExTerrestrial Music", + u"title": u"Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1" + } + } def report_resolve(self, video_id): """Report information extraction.""" @@ -75,8 +86,72 @@ class SoundcloudSetIE(InfoExtractor): of the stream token and uid """ - _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)' + _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$' IE_NAME = u'soundcloud:set' + _TEST = { + u"url":"https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep", + u"playlist": [ + { + u"file":"30510138.mp3", + u"md5":"f9136bf103901728f29e419d2c70f55d", + u"info_dict": { + u"upload_date": u"20111213", + u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com", + u"uploader": u"The Royal Concept", + u"title": u"D-D-Dance" + } + }, + { + u"file":"47127625.mp3", + u"md5":"09b6758a018470570f8fd423c9453dd8", + u"info_dict": { + u"upload_date": u"20120521", + u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com", + u"uploader": u"The Royal Concept", + u"title": u"The Royal Concept - Gimme Twice" + } + }, + { + u"file":"47127627.mp3", + u"md5":"154abd4e418cea19c3b901f1e1306d9c", + u"info_dict": { + u"upload_date": u"20120521", + u"uploader": u"The Royal Concept", + u"title": u"Goldrushed" + } + }, + { + u"file":"47127629.mp3", + u"md5":"2f5471edc79ad3f33a683153e96a79c1", + u"info_dict": { + u"upload_date": u"20120521", + u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com", + u"uploader": u"The Royal Concept", + u"title": u"In the End" + } + }, + { + u"file":"47127631.mp3", + u"md5":"f9ba87aa940af7213f98949254f1c6e2", + u"info_dict": { + u"upload_date": u"20120521", + u"description": u"The Royal Concept from Stockholm\r\nFilip / David / Povel / Magnus\r\nwww.theroyalconceptband.com", + u"uploader": u"The Royal Concept", + u"title": u"Knocked Up" + } + }, + { + u"file":"75206121.mp3", + u"md5":"f9d1fe9406717e302980c30de4af9353", + u"info_dict": { + u"upload_date": u"20130116", + u"description": u"The unreleased track World on Fire premiered on the CW's hit show Arrow (8pm/7pm central). \r\nAs a gift to our fans we would like to offer you a free download of the track! ", + u"uploader": u"The Royal Concept", + u"title": u"World On Fire" + } + } + ] + } def report_resolve(self, video_id): """Report information extraction.""" diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py index 98a65b7..13c8640 100644 --- a/youtube_dl/extractor/spiegel.py +++ b/youtube_dl/extractor/spiegel.py @@ -6,6 +6,14 @@ from .common import InfoExtractor class SpiegelIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P[0-9]+)(?:\.html)?(?:#.*)?$' + _TEST = { + u'url': u'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html', + u'file': u'1259285.mp4', + u'md5': u'2c2754212136f35fb4b19767d242f66e', + u'info_dict': { + u"title": u"Vulkanausbruch in Ecuador: Der \"Feuerschlund\" ist wieder aktiv" + } + } def _real_extract(self, url): m = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/stanfordoc.py b/youtube_dl/extractor/stanfordoc.py index 8d3e32a..b27838b 100644 --- a/youtube_dl/extractor/stanfordoc.py +++ b/youtube_dl/extractor/stanfordoc.py @@ -16,10 +16,17 @@ from ..utils import ( class StanfordOpenClassroomIE(InfoExtractor): - """Information extractor for Stanford's Open ClassRoom""" - - _VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P[^&]+)(&video=(?P