From: Rogério Brito Date: Sun, 1 Mar 2015 05:04:05 +0000 (-0300) Subject: Imported Upstream version 2015.02.28 X-Git-Url: https://git.rapsys.eu/youtubedl/commitdiff_plain/f46044c66663049e286c20ee015db99d47d9dd8a Imported Upstream version 2015.02.28 --- diff --git a/Makefile b/Makefile index 0636fc4..c6c7627 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,8 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites clean: - rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe + rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe + find -name "*.pyc" -delete PREFIX ?= /usr/local BINDIR ?= $(PREFIX)/bin @@ -43,7 +44,7 @@ test: ot: offlinetest offlinetest: codetest - nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations --exclude test_youtube_lists + nosetests --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py tar: youtube-dl.tar.gz diff --git a/README.md b/README.md index 06dea40..04f664c 100644 --- a/README.md +++ b/README.md @@ -77,6 +77,7 @@ which means you can modify it, redistribute it or use it however you like. on Windows) --flat-playlist Do not extract the videos of a playlist, only list them. + --no-color Do not emit color codes in output. ## Network Options: --proxy URL Use the specified HTTP/HTTPS proxy. Pass in @@ -119,8 +120,27 @@ which means you can modify it, redistribute it or use it however you like. COUNT views --max-views COUNT Do not download any videos with more than COUNT views + --match-filter FILTER (Experimental) Generic video filter. + Specify any key (see help for -o for a list + of available keys) to match if the key is + present, !key to check if the key is not + present,key > NUMBER (like "comment_count > + 12", also works with >=, <, <=, !=, =) to + compare against a number, and & to require + multiple matches. Values which are not + known are excluded unless you put a + question mark (?) after the operator.For + example, to only match videos that have + been liked more than 100 times and disliked + less than 50 times (or the dislike + functionality is not available at the given + service), but who also have a description, + use --match-filter "like_count > 100 & + dislike_count 10M]"). This works for - filesize, height, width, tbr, abr, vbr, and - fps and the comparisons <, <=, >, >=, =, != - . Formats for which the value is not known - are excluded unless you put a question mark - (?) after the operator. You can combine - format filters, so -f "[height <=? - 720][tbr>500]" selects up to 720p videos - (or videos where the height is not known) - with a bitrate of at least 500 KBit/s. By - default, youtube-dl will pick the best - quality. Use commas to download multiple - audio formats, such as -f + filesize, height, width, tbr, abr, vbr, + asr, and fps and the comparisons <, <=, >, + >=, =, != and for ext, acodec, vcodec, + container, and protocol and the comparisons + =, != . Formats for which the value is not + known are excluded unless you put a + question mark (?) after the operator. You + can combine format filters, so -f "[height + <=? 720][tbr>500]" selects up to 720p + videos (or videos where the height is not + known) with a bitrate of at least 500 + KBit/s. By default, youtube-dl will pick + the best quality. Use commas to download + multiple audio formats, such as -f 136/137/mp4/bestvideo,140/m4a/bestaudio. You can merge the video and audio of two formats into a single file using -f NUMBER (like "comment_count > + 12", also works with >=, <, <=, !=, =) to + compare against a number, and & to require + multiple matches. Values which are not + known are excluded unless you put a + question mark (?) after the operator.For + example, to only match videos that have + been liked more than 100 times and disliked + less than 50 times (or the dislike + functionality is not available at the given + service), but who also have a description, + use --match-filter "like_count > 100 & + dislike_count 10M]"). This works for - filesize, height, width, tbr, abr, vbr, and - fps and the comparisons <, <=, >, >=, =, != - . Formats for which the value is not known - are excluded unless you put a question mark - (?) after the operator. You can combine - format filters, so -f "[height <=? - 720][tbr>500]" selects up to 720p videos - (or videos where the height is not known) - with a bitrate of at least 500 KBit/s. By - default, youtube-dl will pick the best - quality. Use commas to download multiple - audio formats, such as -f + filesize, height, width, tbr, abr, vbr, + asr, and fps and the comparisons <, <=, >, + >=, =, != and for ext, acodec, vcodec, + container, and protocol and the comparisons + =, != . Formats for which the value is not + known are excluded unless you put a + question mark (?) after the operator. You + can combine format filters, so -f "[height + <=? 720][tbr>500]" selects up to 720p + videos (or videos where the height is not + known) with a bitrate of at least 500 + KBit/s. By default, youtube-dl will pick + the best quality. Use commas to download + multiple audio formats, such as -f 136/137/mp4/bestvideo,140/m4a/bestaudio. You can merge the video and audio of two formats into a single file using -f = 6) + self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f') + self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792') + for lang in ['es', 'fr', 'de']: + self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) def test_nosubtitles(self): self.DL.expect_warning('video doesn\'t have subtitles') @@ -159,61 +124,21 @@ class TestDailymotionSubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(len(subtitles), 0) - - def test_multiple_langs(self): - self.DL.params['writesubtitles'] = True - langs = ['es', 'fr', 'de'] - self.DL.params['subtitleslangs'] = langs - subtitles = self.getSubtitles() - for lang in langs: - self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) + self.assertFalse(subtitles) class TestTedSubtitles(BaseTestSubtitles): url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html' IE = TEDIE - def test_no_writesubtitles(self): - subtitles = self.getSubtitles() - self.assertEqual(subtitles, None) - - def test_subtitles(self): - self.DL.params['writesubtitles'] = True - subtitles = self.getSubtitles() - self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14') - - def test_subtitles_lang(self): - self.DL.params['writesubtitles'] = True - self.DL.params['subtitleslangs'] = ['fr'] - subtitles = self.getSubtitles() - self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5') - def test_allsubtitles(self): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertTrue(len(subtitles.keys()) >= 28) - - def test_list_subtitles(self): - self.DL.expect_warning('Automatic Captions not supported by this server') - self.DL.params['listsubtitles'] = True - info_dict = self.getInfoDict() - self.assertEqual(info_dict, None) - - def test_automatic_captions(self): - self.DL.expect_warning('Automatic Captions not supported by this server') - self.DL.params['writeautomaticsub'] = True - self.DL.params['subtitleslang'] = ['en'] - subtitles = self.getSubtitles() - self.assertTrue(len(subtitles.keys()) == 0) - - def test_multiple_langs(self): - self.DL.params['writesubtitles'] = True - langs = ['es', 'fr', 'de'] - self.DL.params['subtitleslangs'] = langs - subtitles = self.getSubtitles() - for lang in langs: + self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14') + self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5') + for lang in ['es', 'fr', 'de']: self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) @@ -221,14 +146,7 @@ class TestBlipTVSubtitles(BaseTestSubtitles): url = 'http://blip.tv/a/a-6603250' IE = BlipTVIE - def test_list_subtitles(self): - self.DL.expect_warning('Automatic Captions not supported by this server') - self.DL.params['listsubtitles'] = True - info_dict = self.getInfoDict() - self.assertEqual(info_dict, None) - def test_allsubtitles(self): - self.DL.expect_warning('Automatic Captions not supported by this server') self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() @@ -240,39 +158,13 @@ class TestVimeoSubtitles(BaseTestSubtitles): url = 'http://vimeo.com/76979871' IE = VimeoIE - def test_no_writesubtitles(self): - subtitles = self.getSubtitles() - self.assertEqual(subtitles, None) - - def test_subtitles(self): - self.DL.params['writesubtitles'] = True - subtitles = self.getSubtitles() - self.assertEqual(md5(subtitles['en']), '26399116d23ae3cf2c087cea94bc43b4') - - def test_subtitles_lang(self): - self.DL.params['writesubtitles'] = True - self.DL.params['subtitleslangs'] = ['fr'] - subtitles = self.getSubtitles() - self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8') - def test_allsubtitles(self): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr'])) - - def test_list_subtitles(self): - self.DL.expect_warning('Automatic Captions not supported by this server') - self.DL.params['listsubtitles'] = True - info_dict = self.getInfoDict() - self.assertEqual(info_dict, None) - - def test_automatic_captions(self): - self.DL.expect_warning('Automatic Captions not supported by this server') - self.DL.params['writeautomaticsub'] = True - self.DL.params['subtitleslang'] = ['en'] - subtitles = self.getSubtitles() - self.assertTrue(len(subtitles.keys()) == 0) + self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888') + self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8') def test_nosubtitles(self): self.DL.expect_warning('video doesn\'t have subtitles') @@ -280,27 +172,13 @@ class TestVimeoSubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(len(subtitles), 0) - - def test_multiple_langs(self): - self.DL.params['writesubtitles'] = True - langs = ['es', 'fr', 'de'] - self.DL.params['subtitleslangs'] = langs - subtitles = self.getSubtitles() - for lang in langs: - self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) + self.assertFalse(subtitles) class TestWallaSubtitles(BaseTestSubtitles): url = 'http://vod.walla.co.il/movie/2705958/the-yes-men' IE = WallaIE - def test_list_subtitles(self): - self.DL.expect_warning('Automatic Captions not supported by this server') - self.DL.params['listsubtitles'] = True - info_dict = self.getInfoDict() - self.assertEqual(info_dict, None) - def test_allsubtitles(self): self.DL.expect_warning('Automatic Captions not supported by this server') self.DL.params['writesubtitles'] = True @@ -315,26 +193,20 @@ class TestWallaSubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(len(subtitles), 0) + self.assertFalse(subtitles) class TestCeskaTelevizeSubtitles(BaseTestSubtitles): url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky' IE = CeskaTelevizeIE - def test_list_subtitles(self): - self.DL.expect_warning('Automatic Captions not supported by this server') - self.DL.params['listsubtitles'] = True - info_dict = self.getInfoDict() - self.assertEqual(info_dict, None) - def test_allsubtitles(self): self.DL.expect_warning('Automatic Captions not supported by this server') self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), set(['cs'])) - self.assertEqual(md5(subtitles['cs']), '9bf52d9549533c32c427e264bf0847d4') + self.assertTrue(len(subtitles['cs']) > 20000) def test_nosubtitles(self): self.DL.expect_warning('video doesn\'t have subtitles') @@ -342,7 +214,110 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(len(subtitles), 0) + self.assertFalse(subtitles) + + +class TestLyndaSubtitles(BaseTestSubtitles): + url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html' + IE = LyndaIE + + def test_allsubtitles(self): + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(set(subtitles.keys()), set(['en'])) + self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7') + + +class TestNPOSubtitles(BaseTestSubtitles): + url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860' + IE = NPOIE + + def test_allsubtitles(self): + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(set(subtitles.keys()), set(['nl'])) + self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4') + + +class TestMTVSubtitles(BaseTestSubtitles): + url = 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother' + IE = ComedyCentralIE + + def getInfoDict(self): + return super(TestMTVSubtitles, self).getInfoDict()['entries'][0] + + def test_allsubtitles(self): + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(set(subtitles.keys()), set(['en'])) + self.assertEqual(md5(subtitles['en']), 'b9f6ca22a6acf597ec76f61749765e65') + + +class TestNRKSubtitles(BaseTestSubtitles): + url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1' + IE = NRKTVIE + + def test_allsubtitles(self): + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(set(subtitles.keys()), set(['no'])) + self.assertEqual(md5(subtitles['no']), '1d221e6458c95c5494dcd38e6a1f129a') + + +class TestRaiSubtitles(BaseTestSubtitles): + url = 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html' + IE = RaiIE + + def test_allsubtitles(self): + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(set(subtitles.keys()), set(['it'])) + self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a') + + +class TestVikiSubtitles(BaseTestSubtitles): + url = 'http://www.viki.com/videos/1060846v-punch-episode-18' + IE = VikiIE + + def test_allsubtitles(self): + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(set(subtitles.keys()), set(['en'])) + self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a') + + +class TestThePlatformSubtitles(BaseTestSubtitles): + # from http://www.3playmedia.com/services-features/tools/integrations/theplatform/ + # (see http://theplatform.com/about/partners/type/subtitles-closed-captioning/) + url = 'theplatform:JFUjUE1_ehvq' + IE = ThePlatformIE + + def test_allsubtitles(self): + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(set(subtitles.keys()), set(['en'])) + self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b') + + +class TestRtveSubtitles(BaseTestSubtitles): + url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/' + IE = RTVEALaCartaIE + + def test_allsubtitles(self): + print('Skipping, only available from Spain') + return + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(set(subtitles.keys()), set(['es'])) + self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca') if __name__ == '__main__': diff --git a/test/test_swfinterp.py b/test/test_swfinterp.py index 9f18055..f1e8998 100644 --- a/test/test_swfinterp.py +++ b/test/test_swfinterp.py @@ -34,8 +34,8 @@ def _make_testfunc(testfile): def test_func(self): as_file = os.path.join(TEST_DIR, testfile) swf_file = os.path.join(TEST_DIR, test_id + '.swf') - if ((not os.path.exists(swf_file)) - or os.path.getmtime(swf_file) < os.path.getmtime(as_file)): + if ((not os.path.exists(swf_file)) or + os.path.getmtime(swf_file) < os.path.getmtime(as_file)): # Recompile try: subprocess.check_call([ diff --git a/test/test_utils.py b/test/test_utils.py index 80c765b..3fba8ae 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -53,6 +53,7 @@ from youtube_dl.utils import ( version_tuple, xpath_with_ns, render_table, + match_str, ) @@ -84,6 +85,8 @@ class TestUtil(unittest.TestCase): self.assertEqual( sanitize_filename('New World record at 0:12:34'), 'New World record at 0_12_34') + self.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf') + self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf') forbidden = '"\0\\/' for fc in forbidden: @@ -243,6 +246,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(parse_duration('2.5 hours'), 9000) self.assertEqual(parse_duration('02:03:04'), 7384) self.assertEqual(parse_duration('01:02:03:04'), 93784) + self.assertEqual(parse_duration('1 hour 3 minutes'), 3780) def test_fix_xml_ampersands(self): self.assertEqual( @@ -369,6 +373,10 @@ class TestUtil(unittest.TestCase): "playlist":[{"controls":{"all":null}}] }''') + inp = '"SAND Number: SAND 2013-7800P\\nPresenter: Tom Russo\\nHabanero Software Training - Xyce Software\\nXyce, Sandia\\u0027s"' + json_code = js_to_json(inp) + self.assertEqual(json.loads(json_code), json.loads(inp)) + def test_js_to_json_edgecases(self): on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"}) @@ -459,6 +467,37 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') '123 4\n' '9999 51') + def test_match_str(self): + self.assertRaises(ValueError, match_str, 'xy>foobar', {}) + self.assertFalse(match_str('xy', {'x': 1200})) + self.assertTrue(match_str('!xy', {'x': 1200})) + self.assertTrue(match_str('x', {'x': 1200})) + self.assertFalse(match_str('!x', {'x': 1200})) + self.assertTrue(match_str('x', {'x': 0})) + self.assertFalse(match_str('x>0', {'x': 0})) + self.assertFalse(match_str('x>0', {})) + self.assertTrue(match_str('x>?0', {})) + self.assertTrue(match_str('x>1K', {'x': 1200})) + self.assertFalse(match_str('x>2K', {'x': 1200})) + self.assertTrue(match_str('x>=1200 & x < 1300', {'x': 1200})) + self.assertFalse(match_str('x>=1100 & x < 1200', {'x': 1200})) + self.assertFalse(match_str('y=a212', {'y': 'foobar42'})) + self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'})) + self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'})) + self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'})) + self.assertFalse(match_str( + 'like_count > 100 & dislike_count 100 & dislike_count 100 & dislike_count 100 & dislike_count \ NUMBER\ (like\ "comment_count\ > +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 12",\ also\ works\ with\ >=,\ <,\ <=,\ !=,\ =)\ to +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ compare\ against\ a\ number,\ and\ &\ to\ require +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ multiple\ matches.\ Values\ which\ are\ not +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ known\ are\ excluded\ unless\ you\ put\ a +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ question\ mark\ (?)\ after\ the\ operator.For +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ example,\ to\ only\ match\ videos\ that\ have +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ been\ liked\ more\ than\ 100\ times\ and\ disliked +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ less\ than\ 50\ times\ (or\ the\ dislike +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ functionality\ is\ not\ available\ at\ the\ given +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ service),\ but\ who\ also\ have\ a\ description, +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ use\ \ \-\-match\-filter\ "like_count\ >\ 100\ & +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ dislike_count\ 10M]").\ \ This\ works\ for -\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ filesize,\ height,\ width,\ tbr,\ abr,\ vbr,\ and -\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ fps\ and\ the\ comparisons\ <,\ <=,\ >,\ >=,\ =,\ != -\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ .\ Formats\ for\ which\ the\ value\ is\ not\ known -\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ are\ excluded\ unless\ you\ put\ a\ question\ mark -\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (?)\ after\ the\ operator.\ You\ can\ combine -\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ format\ filters,\ so\ \ \-f\ "[height\ <=? -\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 720][tbr>500]"\ selects\ up\ to\ 720p\ videos -\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (or\ videos\ where\ the\ height\ is\ not\ known) -\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ with\ a\ bitrate\ of\ at\ least\ 500\ KBit/s.\ By -\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ default,\ youtube\-dl\ will\ pick\ the\ best -\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ quality.\ Use\ commas\ to\ download\ multiple -\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ audio\ formats,\ such\ as\ \-f +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ filesize,\ height,\ width,\ tbr,\ abr,\ vbr, +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ asr,\ and\ fps\ and\ the\ comparisons\ <,\ <=,\ >, +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ >=,\ =,\ !=\ and\ for\ ext,\ acodec,\ vcodec, +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ container,\ and\ protocol\ and\ the\ comparisons +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ =,\ !=\ .\ Formats\ for\ which\ the\ value\ is\ not +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ known\ are\ excluded\ unless\ you\ put\ a +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ question\ mark\ (?)\ after\ the\ operator.\ You +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ can\ combine\ format\ filters,\ so\ \ \-f\ "[height +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ <=?\ 720][tbr>500]"\ selects\ up\ to\ 720p +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ videos\ (or\ videos\ where\ the\ height\ is\ not +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ known)\ with\ a\ bitrate\ of\ at\ least\ 500 +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ KBit/s.\ By\ default,\ youtube\-dl\ will\ pick +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ the\ best\ quality.\ Use\ commas\ to\ download +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ multiple\ audio\ formats,\ such\ as\ \-f \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 136/137/mp4/bestvideo,140/m4a/bestaudio. \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ You\ can\ merge\ the\ video\ and\ audio\ of\ two \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ formats\ into\ a\ single\ file\ using\ \-f\ NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare against a number, and & to require multiple matches. Values which are not known are excluded unless you put a question mark (?) after the operator.For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike functionality is not available at the given service), but who also have a description, use --match-filter "like_count > 100 & dislike_count 10M]"). This works for filesize, height, width, tbr, abr, vbr, and fps and the comparisons <, <=, >, >=, =, != . Formats for which the value is not known are excluded unless you put a question mark (?) after the operator. You can combine format filters, so -f "[height <=? 720][tbr>500]" selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. By default, youtube-dl will pick the best quality. Use commas to download multiple audio formats, such as -f 136/137/mp4/bestvideo,140/m4a/bestaudio. You can merge the video and audio of two formats into a single file using -f + (requires ffmpeg or avconv), for example -f bestvideo+bestaudio.' +complete --command youtube-dl --long-option format --short-option f --description 'video format code, specify the order of preference using slashes, as in -f 22/17/18 . Instead of format codes, you can select by extension for the extensions aac, m4a, mp3, mp4, ogg, wav, webm. You can also use the special names "best", "bestvideo", "bestaudio", "worst". You can filter the video results by putting a condition in brackets, as in -f "best[height=720]" (or -f "[filesize>10M]"). This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec, vcodec, container, and protocol and the comparisons =, != . Formats for which the value is not known are excluded unless you put a question mark (?) after the operator. You can combine format filters, so -f "[height <=? 720][tbr>500]" selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. By default, youtube-dl will pick the best quality. Use commas to download multiple audio formats, such as -f 136/137/mp4/bestvideo,140/m4a/bestaudio. You can merge the video and audio of two formats into a single file using -f + (requires ffmpeg or avconv), for example -f bestvideo+bestaudio.' complete --command youtube-dl --long-option all-formats --description 'download all available video formats' complete --command youtube-dl --long-option prefer-free-formats --description 'prefer free video formats unless a specific one is requested' complete --command youtube-dl --long-option max-quality --description 'highest quality format to download' @@ -109,7 +113,7 @@ complete --command youtube-dl --long-option write-sub --description 'write subti complete --command youtube-dl --long-option write-auto-sub --description 'write automatic subtitle file (youtube only)' complete --command youtube-dl --long-option all-subs --description 'downloads all the available subtitles of the video' complete --command youtube-dl --long-option list-subs --description 'lists all available subtitles for the video' -complete --command youtube-dl --long-option sub-format --description 'subtitle format (default=srt) ([sbv/vtt] youtube only)' +complete --command youtube-dl --long-option sub-format --description 'subtitle format, accepts formats preference, for example: "ass/srt/best"' complete --command youtube-dl --long-option sub-lang --description 'languages of the subtitles to download (optional) separated by commas, use IETF language tags like '"'"'en,pt'"'"'' complete --command youtube-dl --long-option username --short-option u --description 'login with this account ID' complete --command youtube-dl --long-option password --short-option p --description 'account password. If this option is left out, youtube-dl will ask interactively.' @@ -129,7 +133,9 @@ complete --command youtube-dl --long-option xattrs --description 'write metadata complete --command youtube-dl --long-option fixup --description 'Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default; fix file if we can, warn otherwise)' complete --command youtube-dl --long-option prefer-avconv --description 'Prefer avconv over ffmpeg for running the postprocessors (default)' complete --command youtube-dl --long-option prefer-ffmpeg --description 'Prefer ffmpeg over avconv for running the postprocessors' +complete --command youtube-dl --long-option ffmpeg-location --description 'Location of the ffmpeg/avconv binary; either the path to the binary or its containing directory.' complete --command youtube-dl --long-option exec --description 'Execute a command on the file after downloading, similar to find'"'"'s -exec syntax. Example: --exec '"'"'adb push {} /sdcard/Music/ && rm {}'"'"'' +complete --command youtube-dl --long-option convert-subtitles --description 'Convert the subtitles to other format (currently supported: srt|ass|vtt)' complete --command youtube-dl --arguments ":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory" diff --git a/youtube-dl.zsh b/youtube-dl.zsh index 71d1423..5071f16 100644 --- a/youtube-dl.zsh +++ b/youtube-dl.zsh @@ -19,7 +19,7 @@ __youtube_dl() { elif [[ ${prev} == "--recode-video" ]]; then _arguments '*: :(mp4 flv ogg webm mkv)' else - _arguments '*: :(--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --default-search --ignore-config --flat-playlist --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --no-playlist --age-limit --download-archive --include-ads --rate-limit --retries --buffer-size --no-resize-buffer --test --playlist-reverse --xattr-set-filesize --external-downloader --batch-file --id --output --autonumber-size --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-intermediate-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --format --all-formats --prefer-free-formats --max-quality --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --xattrs --fixup --prefer-avconv --prefer-ffmpeg --exec)' + _arguments '*: :(--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --default-search --ignore-config --flat-playlist --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --rate-limit --retries --buffer-size --no-resize-buffer --test --playlist-reverse --xattr-set-filesize --hls-prefer-native --external-downloader --batch-file --id --output --autonumber-size --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-intermediate-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --format --all-formats --prefer-free-formats --max-quality --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subtitles)' fi ;; esac diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 9605f8f..74e4261 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -154,7 +154,7 @@ class YoutubeDL(object): allsubtitles: Downloads all the subtitles of the video (requires writesubtitles or writeautomaticsub) listsubtitles: Lists all available subtitles for the video - subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt) + subtitlesformat: The format code for subtitles subtitleslangs: List of languages of the subtitles to download keepvideo: Keep the video file after post-processing daterange: A DateRange object, download only if the upload_date is in the range. @@ -199,18 +199,25 @@ class YoutubeDL(object): postprocessor. progress_hooks: A list of functions that get called on download progress, with a dictionary with the entries - * status: One of "downloading" and "finished". + * status: One of "downloading", "error", or "finished". Check this first and ignore unknown values. - If status is one of "downloading" or "finished", the + If status is one of "downloading", or "finished", the following properties may also be present: * filename: The final filename (always present) + * tmpfilename: The filename we're currently writing to * downloaded_bytes: Bytes on disk * total_bytes: Size of the whole file, None if unknown - * tmpfilename: The filename we're currently writing to + * total_bytes_estimate: Guess of the eventual file size, + None if unavailable. + * elapsed: The number of seconds since download started. * eta: The estimated time in seconds, None if unknown * speed: The download speed in bytes/second, None if unknown + * fragment_index: The counter of the currently + downloaded video fragment. + * fragment_count: The number of fragments (= individual + files that will be merged) Progress hooks are guaranteed to be called at least once (with status "finished") if the download is successful. @@ -225,10 +232,19 @@ class YoutubeDL(object): call_home: Boolean, true iff we are allowed to contact the youtube-dl servers for debugging. sleep_interval: Number of seconds to sleep before each download. - external_downloader: Executable of the external downloader to call. listformats: Print an overview of available video formats and exit. list_thumbnails: Print a table of all thumbnails and exit. - + match_filter: A function that gets called with the info_dict of + every video. + If it returns a message, the video is ignored. + If it returns None, the video is downloaded. + match_filter_func in utils.py is one example for this. + no_color: Do not emit color codes in output. + + The following options determine which downloader is picked: + external_downloader: Executable of the external downloader to call. + None or unset for standard (built-in) downloader. + hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv. The following parameters are not used by YoutubeDL itself, they are used by the FileDownloader: @@ -292,8 +308,8 @@ class YoutubeDL(object): raise if (sys.version_info >= (3,) and sys.platform != 'win32' and - sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] - and not params.get('restrictfilenames', False)): + sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and + not params.get('restrictfilenames', False)): # On Python 3, the Unicode filesystem API will throw errors (#1474) self.report_warning( 'Assuming --restrict-filenames since file system encoding ' @@ -485,7 +501,7 @@ class YoutubeDL(object): else: if self.params.get('no_warnings'): return - if self._err_file.isatty() and os.name != 'nt': + if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt': _msg_header = '\033[0;33mWARNING:\033[0m' else: _msg_header = 'WARNING:' @@ -497,7 +513,7 @@ class YoutubeDL(object): Do the same as trouble, but prefixes the message with 'ERROR:', colored in red if stderr is a tty file. ''' - if self._err_file.isatty() and os.name != 'nt': + if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt': _msg_header = '\033[0;31mERROR:\033[0m' else: _msg_header = 'ERROR:' @@ -554,7 +570,7 @@ class YoutubeDL(object): self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')') return None - def _match_entry(self, info_dict): + def _match_entry(self, info_dict, incomplete): """ Returns None iff the file should be downloaded """ video_title = info_dict.get('title', info_dict.get('id', 'video')) @@ -583,9 +599,17 @@ class YoutubeDL(object): if max_views is not None and view_count > max_views: return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views) if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')): - return 'Skipping "%s" because it is age restricted' % title + return 'Skipping "%s" because it is age restricted' % video_title if self.in_download_archive(info_dict): return '%s has already been recorded in archive' % video_title + + if not incomplete: + match_filter = self.params.get('match_filter') + if match_filter is not None: + ret = match_filter(info_dict) + if ret is not None: + return ret + return None @staticmethod @@ -779,7 +803,7 @@ class YoutubeDL(object): 'extractor_key': ie_result['extractor_key'], } - reason = self._match_entry(entry) + reason = self._match_entry(entry, incomplete=True) if reason is not None: self.to_screen('[download] ' + reason) continue @@ -826,26 +850,43 @@ class YoutubeDL(object): '!=': operator.ne, } operator_rex = re.compile(r'''(?x)\s*\[ - (?Pwidth|height|tbr|abr|vbr|filesize|fps) + (?Pwidth|height|tbr|abr|vbr|asr|filesize|fps) \s*(?P%s)(?P\s*\?)?\s* (?P[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?) \]$ ''' % '|'.join(map(re.escape, OPERATORS.keys()))) m = operator_rex.search(format_spec) + if m: + try: + comparison_value = int(m.group('value')) + except ValueError: + comparison_value = parse_filesize(m.group('value')) + if comparison_value is None: + comparison_value = parse_filesize(m.group('value') + 'B') + if comparison_value is None: + raise ValueError( + 'Invalid value %r in format specification %r' % ( + m.group('value'), format_spec)) + op = OPERATORS[m.group('op')] + if not m: - raise ValueError('Invalid format specification %r' % format_spec) + STR_OPERATORS = { + '=': operator.eq, + '!=': operator.ne, + } + str_operator_rex = re.compile(r'''(?x)\s*\[ + \s*(?Pext|acodec|vcodec|container|protocol) + \s*(?P%s)(?P\s*\?)? + \s*(?P[a-zA-Z0-9_-]+) + \s*\]$ + ''' % '|'.join(map(re.escape, STR_OPERATORS.keys()))) + m = str_operator_rex.search(format_spec) + if m: + comparison_value = m.group('value') + op = STR_OPERATORS[m.group('op')] - try: - comparison_value = int(m.group('value')) - except ValueError: - comparison_value = parse_filesize(m.group('value')) - if comparison_value is None: - comparison_value = parse_filesize(m.group('value') + 'B') - if comparison_value is None: - raise ValueError( - 'Invalid value %r in format specification %r' % ( - m.group('value'), format_spec)) - op = OPERATORS[m.group('op')] + if not m: + raise ValueError('Invalid format specification %r' % format_spec) def _filter(f): actual_value = f.get(m.group('key')) @@ -920,27 +961,9 @@ class YoutubeDL(object): return res def _calc_cookies(self, info_dict): - class _PseudoRequest(object): - def __init__(self, url): - self.url = url - self.headers = {} - self.unverifiable = False - - def add_unredirected_header(self, k, v): - self.headers[k] = v - - def get_full_url(self): - return self.url - - def is_unverifiable(self): - return self.unverifiable - - def has_header(self, h): - return h in self.headers - - pr = _PseudoRequest(info_dict['url']) + pr = compat_urllib_request.Request(info_dict['url']) self.cookiejar.add_cookie_header(pr) - return pr.headers.get('Cookie') + return pr.get_header('Cookie') def process_video_result(self, info_dict, download=True): assert info_dict.get('_type', 'video') == 'video' @@ -985,6 +1008,15 @@ class YoutubeDL(object): info_dict['timestamp']) info_dict['upload_date'] = upload_date.strftime('%Y%m%d') + if self.params.get('listsubtitles', False): + if 'automatic_captions' in info_dict: + self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions') + self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles') + return + info_dict['requested_subtitles'] = self.process_subtitles( + info_dict['id'], info_dict.get('subtitles'), + info_dict.get('automatic_captions')) + # This extractors handle format selection themselves if info_dict['extractor'] in ['Youku']: if download: @@ -1113,6 +1145,55 @@ class YoutubeDL(object): info_dict.update(formats_to_download[-1]) return info_dict + def process_subtitles(self, video_id, normal_subtitles, automatic_captions): + """Select the requested subtitles and their format""" + available_subs = {} + if normal_subtitles and self.params.get('writesubtitles'): + available_subs.update(normal_subtitles) + if automatic_captions and self.params.get('writeautomaticsub'): + for lang, cap_info in automatic_captions.items(): + if lang not in available_subs: + available_subs[lang] = cap_info + + if (not self.params.get('writesubtitles') and not + self.params.get('writeautomaticsub') or not + available_subs): + return None + + if self.params.get('allsubtitles', False): + requested_langs = available_subs.keys() + else: + if self.params.get('subtitleslangs', False): + requested_langs = self.params.get('subtitleslangs') + elif 'en' in available_subs: + requested_langs = ['en'] + else: + requested_langs = [list(available_subs.keys())[0]] + + formats_query = self.params.get('subtitlesformat', 'best') + formats_preference = formats_query.split('/') if formats_query else [] + subs = {} + for lang in requested_langs: + formats = available_subs.get(lang) + if formats is None: + self.report_warning('%s subtitles not available for %s' % (lang, video_id)) + continue + for ext in formats_preference: + if ext == 'best': + f = formats[-1] + break + matches = list(filter(lambda f: f['ext'] == ext, formats)) + if matches: + f = matches[-1] + break + else: + f = formats[-1] + self.report_warning( + 'No subtitle format found matching "%s" for language %s, ' + 'using %s' % (formats_query, lang, f['ext'])) + subs[lang] = f + return subs + def process_info(self, info_dict): """Process a single resolved IE result.""" @@ -1133,7 +1214,7 @@ class YoutubeDL(object): if 'format' not in info_dict: info_dict['format'] = info_dict['ext'] - reason = self._match_entry(info_dict) + reason = self._match_entry(info_dict, incomplete=False) if reason is not None: self.to_screen('[download] ' + reason) return @@ -1215,15 +1296,23 @@ class YoutubeDL(object): subtitles_are_requested = any([self.params.get('writesubtitles', False), self.params.get('writeautomaticsub')]) - if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']: + if subtitles_are_requested and info_dict.get('requested_subtitles'): # subtitles download errors are already managed as troubles in relevant IE # that way it will silently go on when used with unsupporting IE - subtitles = info_dict['subtitles'] - sub_format = self.params.get('subtitlesformat', 'srt') - for sub_lang in subtitles.keys(): - sub = subtitles[sub_lang] - if sub is None: - continue + subtitles = info_dict['requested_subtitles'] + ie = self.get_info_extractor(info_dict['extractor_key']) + for sub_lang, sub_info in subtitles.items(): + sub_format = sub_info['ext'] + if sub_info.get('data') is not None: + sub_data = sub_info['data'] + else: + try: + sub_data = ie._download_webpage( + sub_info['url'], info_dict['id'], note=False) + except ExtractorError as err: + self.report_warning('Unable to download subtitle for "%s": %s' % + (sub_lang, compat_str(err.cause))) + continue try: sub_filename = subtitles_filename(filename, sub_lang, sub_format) if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)): @@ -1231,7 +1320,7 @@ class YoutubeDL(object): else: self.to_screen('[info] Writing video subtitles to: ' + sub_filename) with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: - subfile.write(sub) + subfile.write(sub_data) except (OSError, IOError): self.report_error('Cannot write subtitles file ' + sub_filename) return @@ -1264,7 +1353,7 @@ class YoutubeDL(object): downloaded = [] success = True merger = FFmpegMergerPP(self, not self.params.get('keepvideo')) - if not merger._executable: + if not merger.available: postprocessors = [] self.report_warning('You have requested multiple ' 'formats but ffmpeg or avconv are not installed.' @@ -1343,8 +1432,8 @@ class YoutubeDL(object): """Download a given list of URLs.""" outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) if (len(url_list) > 1 and - '%' not in outtmpl - and self.params.get('max_downloads') != 1): + '%' not in outtmpl and + self.params.get('max_downloads') != 1): raise SameFileError(outtmpl) for url in url_list: @@ -1511,30 +1600,18 @@ class YoutubeDL(object): return res def list_formats(self, info_dict): - def line(format, idlen=20): - return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % ( - format['format_id'], - format['ext'], - self.format_resolution(format), - self._format_note(format), - )) - formats = info_dict.get('formats', [info_dict]) - idlen = max(len('format code'), - max(len(f['format_id']) for f in formats)) - formats_s = [ - line(f, idlen) for f in formats + table = [ + [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)] + for f in formats if f.get('preference') is None or f['preference'] >= -1000] if len(formats) > 1: - formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)' - formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)' + table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)' - header_line = line({ - 'format_id': 'format code', 'ext': 'extension', - 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen) + header_line = ['format code', 'extension', 'resolution', 'note'] self.to_screen( - '[info] Available formats for %s:\n%s\n%s' % - (info_dict['id'], header_line, '\n'.join(formats_s))) + '[info] Available formats for %s:\n%s' % + (info_dict['id'], render_table(header_line, table))) def list_thumbnails(self, info_dict): thumbnails = info_dict.get('thumbnails') @@ -1553,6 +1630,17 @@ class YoutubeDL(object): ['ID', 'width', 'height', 'URL'], [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])) + def list_subtitles(self, video_id, subtitles, name='subtitles'): + if not subtitles: + self.to_screen('%s has no %s' % (video_id, name)) + return + self.to_screen( + 'Available %s for %s:' % (name, video_id)) + self.to_screen(render_table( + ['Language', 'formats'], + [[lang, ', '.join(f['ext'] for f in reversed(formats))] + for lang, formats in subtitles.items()])) + def urlopen(self, req): """ Start an HTTP download """ @@ -1614,7 +1702,7 @@ class YoutubeDL(object): self._write_string('[debug] Python version %s - %s\n' % ( platform.python_version(), platform_name())) - exe_versions = FFmpegPostProcessor.get_versions() + exe_versions = FFmpegPostProcessor.get_versions(self) exe_versions['rtmpdump'] = rtmpdump_version() exe_str = ', '.join( '%s %s' % (exe, v) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index e90679f..49f3826 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -23,9 +23,10 @@ from .compat import ( ) from .utils import ( DateRange, - DEFAULT_OUTTMPL, decodeOption, + DEFAULT_OUTTMPL, DownloadError, + match_filter_func, MaxDownloadsReached, preferredencoding, read_batch_urls, @@ -169,6 +170,9 @@ def _real_main(argv=None): if opts.recodevideo is not None: if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv']: parser.error('invalid video recode format specified') + if opts.convertsubtitles is not None: + if opts.convertsubtitles not in ['srt', 'vtt', 'ass']: + parser.error('invalid subtitle format specified') if opts.date is not None: date = DateRange.day(opts.date) @@ -188,14 +192,14 @@ def _real_main(argv=None): # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems) if opts.outtmpl is not None: opts.outtmpl = opts.outtmpl.decode(preferredencoding()) - outtmpl = ((opts.outtmpl is not None and opts.outtmpl) - or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') - or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') - or (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') - or (opts.usetitle and '%(title)s-%(id)s.%(ext)s') - or (opts.useid and '%(id)s.%(ext)s') - or (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') - or DEFAULT_OUTTMPL) + outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or + (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or + (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or + (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') or + (opts.usetitle and '%(title)s-%(id)s.%(ext)s') or + (opts.useid and '%(id)s.%(ext)s') or + (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') or + DEFAULT_OUTTMPL) if not os.path.splitext(outtmpl)[1] and opts.extractaudio: parser.error('Cannot download a video and extract audio into the same' ' file! Use "{0}.%(ext)s" instead of "{0}" as the output' @@ -222,10 +226,14 @@ def _real_main(argv=None): 'key': 'FFmpegVideoConvertor', 'preferedformat': opts.recodevideo, }) + if opts.convertsubtitles: + postprocessors.append({ + 'key': 'FFmpegSubtitlesConvertor', + 'format': opts.convertsubtitles, + }) if opts.embedsubtitles: postprocessors.append({ 'key': 'FFmpegEmbedSubtitle', - 'subtitlesformat': opts.subtitlesformat, }) if opts.xattrs: postprocessors.append({'key': 'XAttrMetadata'}) @@ -247,6 +255,9 @@ def _real_main(argv=None): xattr # Confuse flake8 except ImportError: parser.error('setting filesize xattr requested but python-xattr is not available') + match_filter = ( + None if opts.match_filter is None + else match_filter_func(opts.match_filter)) ydl_opts = { 'usenetrc': opts.usenetrc, @@ -344,6 +355,10 @@ def _real_main(argv=None): 'list_thumbnails': opts.list_thumbnails, 'playlist_items': opts.playlist_items, 'xattr_set_filesize': opts.xattr_set_filesize, + 'match_filter': match_filter, + 'no_color': opts.no_color, + 'ffmpeg_location': opts.ffmpeg_location, + 'hls_prefer_native': opts.hls_prefer_native, } with YoutubeDL(ydl_opts) as ydl: diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py index 5efd0f8..07224d5 100644 --- a/youtube_dl/aes.py +++ b/youtube_dl/aes.py @@ -1,7 +1,5 @@ from __future__ import unicode_literals -__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text'] - import base64 from math import ceil @@ -329,3 +327,5 @@ def inc(data): data[i] = data[i] + 1 break return data + +__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text'] diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py index eff1122..9fb66e2 100644 --- a/youtube_dl/downloader/__init__.py +++ b/youtube_dl/downloader/__init__.py @@ -34,6 +34,9 @@ def get_suitable_downloader(info_dict, params={}): if ed.supports(info_dict): return ed + if protocol == 'm3u8' and params.get('hls_prefer_native'): + return NativeHlsFD + return PROTOCOL_MAP.get(protocol, HttpFD) diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 7bb3a94..3ae9002 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -1,4 +1,4 @@ -from __future__ import unicode_literals +from __future__ import division, unicode_literals import os import re @@ -54,6 +54,7 @@ class FileDownloader(object): self.ydl = ydl self._progress_hooks = [] self.params = params + self.add_progress_hook(self.report_progress) @staticmethod def format_seconds(seconds): @@ -226,42 +227,64 @@ class FileDownloader(object): self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line) self.to_console_title('youtube-dl ' + msg) - def report_progress(self, percent, data_len_str, speed, eta): - """Report download progress.""" - if self.params.get('noprogress', False): + def report_progress(self, s): + if s['status'] == 'finished': + if self.params.get('noprogress', False): + self.to_screen('[download] Download completed') + else: + s['_total_bytes_str'] = format_bytes(s['total_bytes']) + if s.get('elapsed') is not None: + s['_elapsed_str'] = self.format_seconds(s['elapsed']) + msg_template = '100%% of %(_total_bytes_str)s in %(_elapsed_str)s' + else: + msg_template = '100%% of %(_total_bytes_str)s' + self._report_progress_status( + msg_template % s, is_last_line=True) + + if self.params.get('noprogress'): return - if eta is not None: - eta_str = self.format_eta(eta) - else: - eta_str = 'Unknown ETA' - if percent is not None: - percent_str = self.format_percent(percent) + + if s['status'] != 'downloading': + return + + if s.get('eta') is not None: + s['_eta_str'] = self.format_eta(s['eta']) else: - percent_str = 'Unknown %' - speed_str = self.format_speed(speed) + s['_eta_str'] = 'Unknown ETA' - msg = ('%s of %s at %s ETA %s' % - (percent_str, data_len_str, speed_str, eta_str)) - self._report_progress_status(msg) + if s.get('total_bytes') and s.get('downloaded_bytes') is not None: + s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes']) + elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None: + s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate']) + else: + if s.get('downloaded_bytes') == 0: + s['_percent_str'] = self.format_percent(0) + else: + s['_percent_str'] = 'Unknown %' - def report_progress_live_stream(self, downloaded_data_len, speed, elapsed): - if self.params.get('noprogress', False): - return - downloaded_str = format_bytes(downloaded_data_len) - speed_str = self.format_speed(speed) - elapsed_str = FileDownloader.format_seconds(elapsed) - msg = '%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str) - self._report_progress_status(msg) - - def report_finish(self, data_len_str, tot_time): - """Report download finished.""" - if self.params.get('noprogress', False): - self.to_screen('[download] Download completed') + if s.get('speed') is not None: + s['_speed_str'] = self.format_speed(s['speed']) + else: + s['_speed_str'] = 'Unknown speed' + + if s.get('total_bytes') is not None: + s['_total_bytes_str'] = format_bytes(s['total_bytes']) + msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s' + elif s.get('total_bytes_estimate') is not None: + s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate']) + msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s' else: - self._report_progress_status( - ('100%% of %s in %s' % - (data_len_str, self.format_seconds(tot_time))), - is_last_line=True) + if s.get('downloaded_bytes') is not None: + s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes']) + if s.get('elapsed'): + s['_elapsed_str'] = self.format_seconds(s['elapsed']) + msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)' + else: + msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s' + else: + msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s' + + self._report_progress_status(msg_template % s) def report_resuming_byte(self, resume_len): """Report attempt to resume at given byte.""" @@ -288,14 +311,14 @@ class FileDownloader(object): """ nooverwrites_and_exists = ( - self.params.get('nooverwrites', False) - and os.path.exists(encodeFilename(filename)) + self.params.get('nooverwrites', False) and + os.path.exists(encodeFilename(filename)) ) continuedl_and_exists = ( - self.params.get('continuedl', False) - and os.path.isfile(encodeFilename(filename)) - and not self.params.get('nopart', False) + self.params.get('continuedl', False) and + os.path.isfile(encodeFilename(filename)) and + not self.params.get('nopart', False) ) # Check file already present diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index ff031d2..51c41c7 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -75,7 +75,7 @@ class ExternalFD(FileDownloader): class CurlFD(ExternalFD): def _make_cmd(self, tmpfilename, info_dict): - cmd = [self.exe, '-o', tmpfilename] + cmd = [self.exe, '--location', '-o', tmpfilename] for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] cmd += self._source_address('--interface') diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 0e7a1c2..3dc796f 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -1,4 +1,4 @@ -from __future__ import unicode_literals +from __future__ import division, unicode_literals import base64 import io @@ -11,11 +11,11 @@ from .common import FileDownloader from .http import HttpFD from ..compat import ( compat_urlparse, + compat_urllib_error, ) from ..utils import ( struct_pack, struct_unpack, - format_bytes, encodeFilename, sanitize_open, xpath_text, @@ -122,7 +122,8 @@ class FlvReader(io.BytesIO): self.read_unsigned_int() # BootstrapinfoVersion # Profile,Live,Update,Reserved - self.read(1) + flags = self.read_unsigned_char() + live = flags & 0x20 != 0 # time scale self.read_unsigned_int() # CurrentMediaTime @@ -161,6 +162,7 @@ class FlvReader(io.BytesIO): return { 'segments': segments, 'fragments': fragments, + 'live': live, } def read_bootstrap_info(self): @@ -183,6 +185,10 @@ def build_fragments_list(boot_info): for segment, fragments_count in segment_run_table['segment_run']: for _ in range(fragments_count): res.append((segment, next(fragments_counter))) + + if boot_info['live']: + res = res[-2:] + return res @@ -247,22 +253,43 @@ class F4mFD(FileDownloader): self.report_error('Unsupported DRM') return media + def _get_bootstrap_from_url(self, bootstrap_url): + bootstrap = self.ydl.urlopen(bootstrap_url).read() + return read_bootstrap_info(bootstrap) + + def _update_live_fragments(self, bootstrap_url, latest_fragment): + fragments_list = [] + retries = 30 + while (not fragments_list) and (retries > 0): + boot_info = self._get_bootstrap_from_url(bootstrap_url) + fragments_list = build_fragments_list(boot_info) + fragments_list = [f for f in fragments_list if f[1] > latest_fragment] + if not fragments_list: + # Retry after a while + time.sleep(5.0) + retries -= 1 + + if not fragments_list: + self.report_error('Failed to update fragments') + + return fragments_list + + def _parse_bootstrap_node(self, node, base_url): + if node.text is None: + bootstrap_url = compat_urlparse.urljoin( + base_url, node.attrib['url']) + boot_info = self._get_bootstrap_from_url(bootstrap_url) + else: + bootstrap_url = None + bootstrap = base64.b64decode(node.text) + boot_info = read_bootstrap_info(bootstrap) + return (boot_info, bootstrap_url) + def real_download(self, filename, info_dict): man_url = info_dict['url'] requested_bitrate = info_dict.get('tbr') self.to_screen('[download] Downloading f4m manifest') manifest = self.ydl.urlopen(man_url).read() - self.report_destination(filename) - http_dl = HttpQuietDownloader( - self.ydl, - { - 'continuedl': True, - 'quiet': True, - 'noprogress': True, - 'ratelimit': self.params.get('ratelimit', None), - 'test': self.params.get('test', False), - } - ) doc = etree.fromstring(manifest) formats = [(int(f.attrib.get('bitrate', -1)), f) @@ -277,18 +304,13 @@ class F4mFD(FileDownloader): base_url = compat_urlparse.urljoin(man_url, media.attrib['url']) bootstrap_node = doc.find(_add_ns('bootstrapInfo')) - if bootstrap_node.text is None: - bootstrap_url = compat_urlparse.urljoin( - base_url, bootstrap_node.attrib['url']) - bootstrap = self.ydl.urlopen(bootstrap_url).read() - else: - bootstrap = base64.b64decode(bootstrap_node.text) + boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, base_url) + live = boot_info['live'] metadata_node = media.find(_add_ns('metadata')) if metadata_node is not None: metadata = base64.b64decode(metadata_node.text) else: metadata = None - boot_info = read_bootstrap_info(bootstrap) fragments_list = build_fragments_list(boot_info) if self.params.get('test', False): @@ -298,64 +320,112 @@ class F4mFD(FileDownloader): # For some akamai manifests we'll need to add a query to the fragment url akamai_pv = xpath_text(doc, _add_ns('pv-2.0')) + self.report_destination(filename) + http_dl = HttpQuietDownloader( + self.ydl, + { + 'continuedl': True, + 'quiet': True, + 'noprogress': True, + 'ratelimit': self.params.get('ratelimit', None), + 'test': self.params.get('test', False), + } + ) tmpfilename = self.temp_name(filename) (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb') + write_flv_header(dest_stream) - write_metadata_tag(dest_stream, metadata) + if not live: + write_metadata_tag(dest_stream, metadata) # This dict stores the download progress, it's updated by the progress # hook state = { + 'status': 'downloading', 'downloaded_bytes': 0, - 'frag_counter': 0, + 'frag_index': 0, + 'frag_count': total_frags, + 'filename': filename, + 'tmpfilename': tmpfilename, } start = time.time() - def frag_progress_hook(status): - frag_total_bytes = status.get('total_bytes', 0) - estimated_size = (state['downloaded_bytes'] + - (total_frags - state['frag_counter']) * frag_total_bytes) - if status['status'] == 'finished': + def frag_progress_hook(s): + if s['status'] not in ('downloading', 'finished'): + return + + frag_total_bytes = s.get('total_bytes', 0) + if s['status'] == 'finished': state['downloaded_bytes'] += frag_total_bytes - state['frag_counter'] += 1 - progress = self.calc_percent(state['frag_counter'], total_frags) - byte_counter = state['downloaded_bytes'] + state['frag_index'] += 1 + + estimated_size = ( + (state['downloaded_bytes'] + frag_total_bytes) / + (state['frag_index'] + 1) * total_frags) + time_now = time.time() + state['total_bytes_estimate'] = estimated_size + state['elapsed'] = time_now - start + + if s['status'] == 'finished': + progress = self.calc_percent(state['frag_index'], total_frags) else: - frag_downloaded_bytes = status['downloaded_bytes'] - byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes + frag_downloaded_bytes = s['downloaded_bytes'] frag_progress = self.calc_percent(frag_downloaded_bytes, frag_total_bytes) - progress = self.calc_percent(state['frag_counter'], total_frags) + progress = self.calc_percent(state['frag_index'], total_frags) progress += frag_progress / float(total_frags) - eta = self.calc_eta(start, time.time(), estimated_size, byte_counter) - self.report_progress(progress, format_bytes(estimated_size), - status.get('speed'), eta) + state['eta'] = self.calc_eta( + start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes) + state['speed'] = s.get('speed') + self._hook_progress(state) + http_dl.add_progress_hook(frag_progress_hook) frags_filenames = [] - for (seg_i, frag_i) in fragments_list: + while fragments_list: + seg_i, frag_i = fragments_list.pop(0) name = 'Seg%d-Frag%d' % (seg_i, frag_i) url = base_url + name if akamai_pv: url += '?' + akamai_pv.strip(';') frag_filename = '%s-%s' % (tmpfilename, name) - success = http_dl.download(frag_filename, {'url': url}) - if not success: - return False - with open(frag_filename, 'rb') as down: - down_data = down.read() - reader = FlvReader(down_data) - while True: - _, box_type, box_data = reader.read_box_info() - if box_type == b'mdat': - dest_stream.write(box_data) - break - frags_filenames.append(frag_filename) + try: + success = http_dl.download(frag_filename, {'url': url}) + if not success: + return False + with open(frag_filename, 'rb') as down: + down_data = down.read() + reader = FlvReader(down_data) + while True: + _, box_type, box_data = reader.read_box_info() + if box_type == b'mdat': + dest_stream.write(box_data) + break + if live: + os.remove(frag_filename) + else: + frags_filenames.append(frag_filename) + except (compat_urllib_error.HTTPError, ) as err: + if live and (err.code == 404 or err.code == 410): + # We didn't keep up with the live window. Continue + # with the next available fragment. + msg = 'Fragment %d unavailable' % frag_i + self.report_warning(msg) + fragments_list = [] + else: + raise + + if not fragments_list and live and bootstrap_url: + fragments_list = self._update_live_fragments(bootstrap_url, frag_i) + total_frags += len(fragments_list) + if fragments_list and (fragments_list[0][1] > frag_i + 1): + msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1)) + self.report_warning(msg) dest_stream.close() - self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start) + elapsed = time.time() - start self.try_rename(tmpfilename, filename) for frag_file in frags_filenames: os.remove(frag_file) @@ -366,6 +436,7 @@ class F4mFD(FileDownloader): 'total_bytes': fsize, 'filename': filename, 'status': 'finished', + 'elapsed': elapsed, }) return True diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index e527ee4..8be4f42 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -23,15 +23,14 @@ class HlsFD(FileDownloader): tmpfilename = self.temp_name(filename) ffpp = FFmpegPostProcessor(downloader=self) - program = ffpp._executable - if program is None: + if not ffpp.available: self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.') return False ffpp.check_version() args = [ encodeArgument(opt) - for opt in (program, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')] + for opt in (ffpp.executable, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')] args.append(encodeFilename(tmpfilename, True)) retval = subprocess.call(args) @@ -48,7 +47,7 @@ class HlsFD(FileDownloader): return True else: self.to_stderr('\n') - self.report_error('%s exited with code %d' % (program, retval)) + self.report_error('%s exited with code %d' % (ffpp.basename, retval)) return False diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 49170cf..2e3dac8 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -1,11 +1,10 @@ from __future__ import unicode_literals +import errno import os +import socket import time -from socket import error as SocketError -import errno - from .common import FileDownloader from ..compat import ( compat_urllib_request, @@ -15,7 +14,6 @@ from ..utils import ( ContentTooShortError, encodeFilename, sanitize_open, - format_bytes, ) @@ -102,7 +100,7 @@ class HttpFD(FileDownloader): resume_len = 0 open_mode = 'wb' break - except SocketError as e: + except socket.error as e: if e.errno != errno.ECONNRESET: # Connection reset is no problem, just retry raise @@ -137,7 +135,6 @@ class HttpFD(FileDownloader): self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len)) return False - data_len_str = format_bytes(data_len) byte_counter = 0 + resume_len block_size = self.params.get('buffersize', 1024) start = time.time() @@ -196,20 +193,19 @@ class HttpFD(FileDownloader): # Progress message speed = self.calc_speed(start, now, byte_counter - resume_len) if data_len is None: - eta = percent = None + eta = None else: - percent = self.calc_percent(byte_counter, data_len) eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len) - self.report_progress(percent, data_len_str, speed, eta) self._hook_progress({ + 'status': 'downloading', 'downloaded_bytes': byte_counter, 'total_bytes': data_len, 'tmpfilename': tmpfilename, 'filename': filename, - 'status': 'downloading', 'eta': eta, 'speed': speed, + 'elapsed': now - start, }) if is_test and byte_counter == data_len: @@ -221,7 +217,13 @@ class HttpFD(FileDownloader): return False if tmpfilename != '-': stream.close() - self.report_finish(data_len_str, (time.time() - start)) + + self._hook_progress({ + 'downloaded_bytes': byte_counter, + 'total_bytes': data_len, + 'tmpfilename': tmpfilename, + 'status': 'error', + }) if data_len is not None and byte_counter != data_len: raise ContentTooShortError(byte_counter, int(data_len)) self.try_rename(tmpfilename, filename) @@ -235,6 +237,7 @@ class HttpFD(FileDownloader): 'total_bytes': byte_counter, 'filename': filename, 'status': 'finished', + 'elapsed': time.time() - start, }) return True diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py index f7eeb6f..89e98ae 100644 --- a/youtube_dl/downloader/rtmp.py +++ b/youtube_dl/downloader/rtmp.py @@ -11,7 +11,6 @@ from ..compat import compat_str from ..utils import ( check_executable, encodeFilename, - format_bytes, get_exe_version, ) @@ -51,23 +50,23 @@ class RtmpFD(FileDownloader): if not resume_percent: resume_percent = percent resume_downloaded_data_len = downloaded_data_len - eta = self.calc_eta(start, time.time(), 100 - resume_percent, percent - resume_percent) - speed = self.calc_speed(start, time.time(), downloaded_data_len - resume_downloaded_data_len) + time_now = time.time() + eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent) + speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len) data_len = None if percent > 0: data_len = int(downloaded_data_len * 100 / percent) - data_len_str = '~' + format_bytes(data_len) - self.report_progress(percent, data_len_str, speed, eta) - cursor_in_new_line = False self._hook_progress({ + 'status': 'downloading', 'downloaded_bytes': downloaded_data_len, - 'total_bytes': data_len, + 'total_bytes_estimate': data_len, 'tmpfilename': tmpfilename, 'filename': filename, - 'status': 'downloading', 'eta': eta, + 'elapsed': time_now - start, 'speed': speed, }) + cursor_in_new_line = False else: # no percent for live streams mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line) @@ -75,15 +74,15 @@ class RtmpFD(FileDownloader): downloaded_data_len = int(float(mobj.group(1)) * 1024) time_now = time.time() speed = self.calc_speed(start, time_now, downloaded_data_len) - self.report_progress_live_stream(downloaded_data_len, speed, time_now - start) - cursor_in_new_line = False self._hook_progress({ 'downloaded_bytes': downloaded_data_len, 'tmpfilename': tmpfilename, 'filename': filename, 'status': 'downloading', + 'elapsed': time_now - start, 'speed': speed, }) + cursor_in_new_line = False elif self.params.get('verbose', False): if not cursor_in_new_line: self.to_screen('') @@ -120,7 +119,9 @@ class RtmpFD(FileDownloader): # Download using rtmpdump. rtmpdump returns exit code 2 when # the connection was interrumpted and resuming appears to be # possible. This is part of rtmpdump's normal usage, AFAIK. - basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename] + basic_args = [ + 'rtmpdump', '--verbose', '-r', url, + '-o', encodeFilename(tmpfilename, True)] if player_url is not None: basic_args += ['--swfVfy', player_url] if page_url is not None: diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 047f700..ffcc7d9 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -6,7 +6,9 @@ from .academicearth import AcademicEarthCourseIE from .addanime import AddAnimeIE from .adobetv import AdobeTVIE from .adultswim import AdultSwimIE +from .aftenposten import AftenpostenIE from .aftonbladet import AftonbladetIE +from .airmozilla import AirMozillaIE from .aljazeera import AlJazeeraIE from .alphaporno import AlphaPornoIE from .anitube import AnitubeIE @@ -48,14 +50,24 @@ from .brightcove import BrightcoveIE from .buzzfeed import BuzzFeedIE from .byutv import BYUtvIE from .c56 import C56IE +from .camdemy import ( + CamdemyIE, + CamdemyFolderIE +) from .canal13cl import Canal13clIE from .canalplus import CanalplusIE from .canalc2 import Canalc2IE from .cbs import CBSIE from .cbsnews import CBSNewsIE +from .cbssports import CBSSportsIE +from .ccc import CCCIE from .ceskatelevize import CeskaTelevizeIE from .channel9 import Channel9IE from .chilloutzone import ChilloutzoneIE +from .chirbit import ( + ChirbitIE, + ChirbitProfileIE, +) from .cinchcast import CinchcastIE from .clipfish import ClipfishIE from .cliphunter import CliphunterIE @@ -73,7 +85,7 @@ from .collegehumor import CollegeHumorIE from .collegerama import CollegeRamaIE from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE from .comcarcoff import ComCarCoffIE -from .commonmistakes import CommonMistakesIE +from .commonmistakes import CommonMistakesIE, UnicodeBOMIE from .condenast import CondeNastIE from .cracked import CrackedIE from .criterion import CriterionIE @@ -115,6 +127,7 @@ from .ellentv import ( EllenTVClipsIE, ) from .elpais import ElPaisIE +from .embedly import EmbedlyIE from .empflix import EMPFlixIE from .engadget import EngadgetIE from .eporner import EpornerIE @@ -183,6 +196,7 @@ from .hellporno import HellPornoIE from .helsinki import HelsinkiIE from .hentaistigma import HentaiStigmaIE from .historicfilms import HistoricFilmsIE +from .history import HistoryIE from .hitbox import HitboxIE, HitboxLiveIE from .hornbunny import HornBunnyIE from .hostingbulk import HostingBulkIE @@ -197,6 +211,7 @@ from .imdb import ( ImdbIE, ImdbListIE ) +from .imgur import ImgurIE from .ina import InaIE from .infoq import InfoQIE from .instagram import InstagramIE, InstagramUserIE @@ -212,6 +227,7 @@ from .jeuxvideo import JeuxVideoIE from .jove import JoveIE from .jukebox import JukeboxIE from .jpopsukitv import JpopsukiIE +from .kaltura import KalturaIE from .kankan import KankanIE from .karaoketv import KaraoketvIE from .keezmovies import KeezMoviesIE @@ -223,6 +239,11 @@ from .krasview import KrasViewIE from .ku6 import Ku6IE from .la7 import LA7IE from .laola1tv import Laola1TvIE +from .letv import ( + LetvIE, + LetvTvIE, + LetvPlaylistIE +) from .lifenews import LifeNewsIE from .liveleak import LiveLeakIE from .livestream import ( @@ -275,6 +296,7 @@ from .myspace import MySpaceIE, MySpaceAlbumIE from .myspass import MySpassIE from .myvideo import MyVideoIE from .myvidster import MyVidsterIE +from .nationalgeographic import NationalGeographicIE from .naver import NaverIE from .nba import NBAIE from .nbc import ( @@ -312,6 +334,8 @@ from .nowvideo import NowVideoIE from .npo import ( NPOIE, NPOLiveIE, + NPORadioIE, + NPORadioFragmentIE, TegenlichtVproIE, ) from .nrk import ( @@ -322,6 +346,7 @@ from .ntvde import NTVDeIE from .ntvru import NTVRuIE from .nytimes import NYTimesIE from .nuvid import NuvidIE +from .odnoklassniki import OdnoklassnikiIE from .oktoberfesttv import OktoberfestTVIE from .ooyala import OoyalaIE from .openfilm import OpenFilmIE @@ -341,13 +366,18 @@ from .playfm import PlayFMIE from .playvid import PlayvidIE from .podomatic import PodomaticIE from .pornhd import PornHdIE -from .pornhub import PornHubIE +from .pornhub import ( + PornHubIE, + PornHubPlaylistIE, +) from .pornotube import PornotubeIE from .pornoxo import PornoXOIE from .promptfile import PromptFileIE from .prosiebensat1 import ProSiebenSat1IE +from .puls4 import Puls4IE from .pyvideo import PyvideoIE from .quickvid import QuickVidIE +from .r7 import R7IE from .radiode import RadioDeIE from .radiobremen import RadioBremenIE from .radiofrance import RadioFranceIE @@ -362,7 +392,7 @@ from .rottentomatoes import RottenTomatoesIE from .roxwel import RoxwelIE from .rtbf import RTBFIE from .rte import RteIE -from .rtlnl import RtlXlIE +from .rtlnl import RtlNlIE from .rtlnow import RTLnowIE from .rtl2 import RTL2IE from .rtp import RTPIE @@ -377,6 +407,7 @@ from .rutube import ( RutubePersonIE, ) from .rutv import RUTVIE +from .sandia import SandiaIE from .sapo import SapoIE from .savefrom import SaveFromIE from .sbs import SBSIE @@ -407,7 +438,10 @@ from .soundcloud import ( SoundcloudUserIE, SoundcloudPlaylistIE ) -from .soundgasm import SoundgasmIE +from .soundgasm import ( + SoundgasmIE, + SoundgasmProfileIE +) from .southpark import ( SouthParkIE, SouthparkDeIE, @@ -427,6 +461,7 @@ from .streamcloud import StreamcloudIE from .streamcz import StreamCZIE from .streetvoice import StreetVoiceIE from .sunporno import SunPornoIE +from .svtplay import SVTPlayIE from .swrmediathek import SWRMediathekIE from .syfy import SyfyIE from .sztvhu import SztvHuIE @@ -472,6 +507,7 @@ from .tumblr import TumblrIE from .tunein import TuneInIE from .turbo import TurboIE from .tutv import TutvIE +from .tv4 import TV4IE from .tvigle import TvigleIE from .tvp import TvpIE, TvpSeriesIE from .tvplay import TVPlayIE @@ -569,6 +605,7 @@ from .yahoo import ( YahooIE, YahooSearchIE, ) +from .yam import YamIE from .yesjapan import YesJapanIE from .ynet import YnetIE from .youjizz import YouJizzIE @@ -592,6 +629,7 @@ from .youtube import ( YoutubeUserIE, YoutubeWatchLaterIE, ) +from .zapiks import ZapiksIE from .zdf import ZDFIE, ZDFChannelIE from .zingmp3 import ( ZingMp3SongIE, diff --git a/youtube_dl/extractor/adobetv.py b/youtube_dl/extractor/adobetv.py index 28e07f8..97d1285 100644 --- a/youtube_dl/extractor/adobetv.py +++ b/youtube_dl/extractor/adobetv.py @@ -28,7 +28,6 @@ class AdobeTVIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) player = self._parse_json( @@ -44,8 +43,10 @@ class AdobeTVIE(InfoExtractor): self._html_search_meta('datepublished', webpage, 'upload date')) duration = parse_duration( - self._html_search_meta('duration', webpage, 'duration') - or self._search_regex(r'Runtime:\s*(\d{2}:\d{2}:\d{2})', webpage, 'duration')) + self._html_search_meta('duration', webpage, 'duration') or + self._search_regex( + r'Runtime:\s*(\d{2}:\d{2}:\d{2})', + webpage, 'duration', fatal=False)) view_count = str_to_int(self._search_regex( r'
\s*Views?:\s*([\d,.]+)\s*
', diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py index 502a9c2..34b8b01 100644 --- a/youtube_dl/extractor/adultswim.py +++ b/youtube_dl/extractor/adultswim.py @@ -38,6 +38,7 @@ class AdultSwimIE(InfoExtractor): }, ], 'info_dict': { + 'id': 'rQxZvXQ4ROaSOqq-or2Mow', 'title': 'Rick and Morty - Pilot', 'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. " } @@ -55,6 +56,7 @@ class AdultSwimIE(InfoExtractor): } ], 'info_dict': { + 'id': '-t8CamQlQ2aYZ49ItZCFog', 'title': 'American Dad - Putting Francine Out of Business', 'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].' }, diff --git a/youtube_dl/extractor/aftenposten.py b/youtube_dl/extractor/aftenposten.py new file mode 100644 index 0000000..2b257ed --- /dev/null +++ b/youtube_dl/extractor/aftenposten.py @@ -0,0 +1,103 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_iso8601, + xpath_with_ns, + xpath_text, + find_xpath_attr, +) + + +class AftenpostenIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/([^/]+/)*(?P[^/]+)-\d+\.html' + + _TEST = { + 'url': 'http://www.aftenposten.no/webtv/serier-og-programmer/sweatshopenglish/TRAILER-SWEATSHOP---I-cant-take-any-more-7800835.html?paging=§ion=webtv_serierogprogrammer_sweatshop_sweatshopenglish', + 'md5': 'fd828cd29774a729bf4d4425fe192972', + 'info_dict': { + 'id': '21039', + 'ext': 'mov', + 'title': 'TRAILER: "Sweatshop" - I can´t take any more', + 'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238', + 'timestamp': 1416927969, + 'upload_date': '20141125', + } + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + video_id = self._html_search_regex( + r'data-xs-id="(\d+)"', webpage, 'video id') + + data = self._download_xml( + 'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=%s' % video_id, video_id) + + NS_MAP = { + 'atom': 'http://www.w3.org/2005/Atom', + 'xt': 'http://xstream.dk/', + 'media': 'http://search.yahoo.com/mrss/', + } + + entry = data.find(xpath_with_ns('./atom:entry', NS_MAP)) + + title = xpath_text( + entry, xpath_with_ns('./atom:title', NS_MAP), 'title') + description = xpath_text( + entry, xpath_with_ns('./atom:summary', NS_MAP), 'description') + timestamp = parse_iso8601(xpath_text( + entry, xpath_with_ns('./atom:published', NS_MAP), 'upload date')) + + formats = [] + media_group = entry.find(xpath_with_ns('./media:group', NS_MAP)) + for media_content in media_group.findall(xpath_with_ns('./media:content', NS_MAP)): + media_url = media_content.get('url') + if not media_url: + continue + tbr = int_or_none(media_content.get('bitrate')) + mobj = re.search(r'^(?Prtmp://[^/]+/(?P[^/]+))/(?P.+)$', media_url) + if mobj: + formats.append({ + 'url': mobj.group('url'), + 'play_path': 'mp4:%s' % mobj.group('playpath'), + 'app': mobj.group('app'), + 'ext': 'flv', + 'tbr': tbr, + 'format_id': 'rtmp-%d' % tbr, + }) + else: + formats.append({ + 'url': media_url, + 'tbr': tbr, + }) + self._sort_formats(formats) + + link = find_xpath_attr( + entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original') + if link is not None: + formats.append({ + 'url': link.get('href'), + 'format_id': link.get('rel'), + }) + + thumbnails = [{ + 'url': splash.get('url'), + 'width': int_or_none(splash.get('width')), + 'height': int_or_none(splash.get('height')), + } for splash in media_group.findall(xpath_with_ns('./xt:splash', NS_MAP))] + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'timestamp': timestamp, + 'formats': formats, + 'thumbnails': thumbnails, + } diff --git a/youtube_dl/extractor/airmozilla.py b/youtube_dl/extractor/airmozilla.py new file mode 100644 index 0000000..611ad1e --- /dev/null +++ b/youtube_dl/extractor/airmozilla.py @@ -0,0 +1,74 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_duration, + parse_iso8601, +) + + +class AirMozillaIE(InfoExtractor): + _VALID_URL = r'https?://air\.mozilla\.org/(?P[0-9a-z-]+)/?' + _TEST = { + 'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/', + 'md5': '2e3e7486ba5d180e829d453875b9b8bf', + 'info_dict': { + 'id': '6x4q2w', + 'ext': 'mp4', + 'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco', + 'thumbnail': 're:https://\w+\.cloudfront\.net/6x4q2w/poster\.jpg\?t=\d+', + 'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...', + 'timestamp': 1422487800, + 'upload_date': '20150128', + 'location': 'SFO Commons', + 'duration': 3780, + 'view_count': int, + 'categories': ['Main'], + } + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + video_id = self._html_search_regex(r'//vid.ly/(.*?)/embed', webpage, 'id') + + embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id) + jwconfig = self._search_regex(r'\svar jwconfig = (\{.*?\});\s', embed_script, 'metadata') + metadata = self._parse_json(jwconfig, video_id) + + formats = [{ + 'url': source['file'], + 'ext': source['type'], + 'format_id': self._search_regex(r'&format=(.*)$', source['file'], 'video format'), + 'format': source['label'], + 'height': int(source['label'].rstrip('p')), + } for source in metadata['playlist'][0]['sources']] + self._sort_formats(formats) + + view_count = int_or_none(self._html_search_regex( + r'Views since archived: ([0-9]+)', + webpage, 'view count', fatal=False)) + timestamp = parse_iso8601(self._html_search_regex( + r'