X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/a070911bf98c751e4425008f1335cc57c86d30fc..626b36b728b1afd8246b8248bdaacf0f3cab18ee:/test/test_utils.py diff --git a/test/test_utils.py b/test/test_utils.py index 206760d..e6887be 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -21,15 +21,21 @@ from youtube_dl.utils import ( clean_html, DateRange, detect_exe_version, + determine_ext, + dict_get, + encode_compat_str, encodeFilename, escape_rfc3986, escape_url, + ExtractorError, find_xpath_attr, fix_xml_ampersands, InAdvancePagedList, intlist_to_bytes, + is_html, js_to_json, limit_length, + ohdave_rsa_encrypt, OnDemandPagedList, orderedSet, parse_duration, @@ -37,6 +43,10 @@ from youtube_dl.utils import ( parse_iso8601, read_batch_urls, sanitize_filename, + sanitize_path, + prepend_extension, + replace_extension, + remove_quotes, shell_quote, smuggle_url, str_to_int, @@ -47,10 +57,24 @@ from youtube_dl.utils import ( unified_strdate, unsmuggle_url, uppercase_escape, + lowercase_escape, url_basename, urlencode_postdata, version_tuple, xpath_with_ns, + xpath_element, + xpath_text, + xpath_attr, + render_table, + match_str, + parse_dfxp_time_expr, + dfxp2srt, + cli_option, + cli_valueless_option, + cli_bool_option, +) +from youtube_dl.compat import ( + compat_etree_fromstring, ) @@ -83,6 +107,11 @@ class TestUtil(unittest.TestCase): sanitize_filename('New World record at 0:12:34'), 'New World record at 0_12_34') + self.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf') + self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf') + self.assertEqual(sanitize_filename('.gasdgf'), 'gasdgf') + self.assertEqual(sanitize_filename('.gasdgf', is_id=True), '.gasdgf') + forbidden = '"\0\\/' for fc in forbidden: for fbc in forbidden: @@ -123,6 +152,67 @@ class TestUtil(unittest.TestCase): self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw') self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI') + def test_sanitize_path(self): + if sys.platform != 'win32': + return + + self.assertEqual(sanitize_path('abc'), 'abc') + self.assertEqual(sanitize_path('abc/def'), 'abc\\def') + self.assertEqual(sanitize_path('abc\\def'), 'abc\\def') + self.assertEqual(sanitize_path('abc|def'), 'abc#def') + self.assertEqual(sanitize_path('<>:"|?*'), '#######') + self.assertEqual(sanitize_path('C:/abc/def'), 'C:\\abc\\def') + self.assertEqual(sanitize_path('C?:/abc/def'), 'C##\\abc\\def') + + self.assertEqual(sanitize_path('\\\\?\\UNC\\ComputerName\\abc'), '\\\\?\\UNC\\ComputerName\\abc') + self.assertEqual(sanitize_path('\\\\?\\UNC/ComputerName/abc'), '\\\\?\\UNC\\ComputerName\\abc') + + self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc') + self.assertEqual(sanitize_path('\\\\?\\C:/abc'), '\\\\?\\C:\\abc') + self.assertEqual(sanitize_path('\\\\?\\C:\\ab?c\\de:f'), '\\\\?\\C:\\ab#c\\de#f') + self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc') + + self.assertEqual( + sanitize_path('youtube/%(uploader)s/%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s'), + 'youtube\\%(uploader)s\\%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s') + + self.assertEqual( + sanitize_path('youtube/TheWreckingYard ./00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part'), + 'youtube\\TheWreckingYard #\\00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part') + self.assertEqual(sanitize_path('abc/def...'), 'abc\\def..#') + self.assertEqual(sanitize_path('abc.../def'), 'abc..#\\def') + self.assertEqual(sanitize_path('abc.../def...'), 'abc..#\\def..#') + + self.assertEqual(sanitize_path('../abc'), '..\\abc') + self.assertEqual(sanitize_path('../../abc'), '..\\..\\abc') + self.assertEqual(sanitize_path('./abc'), 'abc') + self.assertEqual(sanitize_path('./../abc'), '..\\abc') + + def test_prepend_extension(self): + self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext') + self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext') + self.assertEqual(prepend_extension('abc.unexpected_ext', 'temp', 'ext'), 'abc.unexpected_ext.temp') + self.assertEqual(prepend_extension('abc', 'temp'), 'abc.temp') + self.assertEqual(prepend_extension('.abc', 'temp'), '.abc.temp') + self.assertEqual(prepend_extension('.abc.ext', 'temp'), '.abc.temp.ext') + + def test_replace_extension(self): + self.assertEqual(replace_extension('abc.ext', 'temp'), 'abc.temp') + self.assertEqual(replace_extension('abc.ext', 'temp', 'ext'), 'abc.temp') + self.assertEqual(replace_extension('abc.unexpected_ext', 'temp', 'ext'), 'abc.unexpected_ext.temp') + self.assertEqual(replace_extension('abc', 'temp'), 'abc.temp') + self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp') + self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp') + + def test_remove_quotes(self): + self.assertEqual(remove_quotes(None), None) + self.assertEqual(remove_quotes('"'), '"') + self.assertEqual(remove_quotes("'"), "'") + self.assertEqual(remove_quotes(';'), ';') + self.assertEqual(remove_quotes('";'), '";') + self.assertEqual(remove_quotes('""'), '') + self.assertEqual(remove_quotes('";"'), ';') + def test_ordered_set(self): self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7]) self.assertEqual(orderedSet([]), []) @@ -132,8 +222,10 @@ class TestUtil(unittest.TestCase): def test_unescape_html(self): self.assertEqual(unescapeHTML('%20;'), '%20;') - self.assertEqual( - unescapeHTML('é'), 'é') + self.assertEqual(unescapeHTML('/'), '/') + self.assertEqual(unescapeHTML('/'), '/') + self.assertEqual(unescapeHTML('é'), 'é') + self.assertEqual(unescapeHTML('�'), '�') def test_daterange(self): _20century = DateRange("19000101", "20000101") @@ -154,6 +246,18 @@ class TestUtil(unittest.TestCase): self.assertEqual( unified_strdate('11/26/2014 11:30:00 AM PST', day_first=False), '20141126') + self.assertEqual( + unified_strdate('2/2/2015 6:47:40 PM', day_first=False), + '20150202') + self.assertEqual(unified_strdate('25-09-2014'), '20140925') + self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None) + + def test_determine_ext(self): + self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4') + self.assertEqual(determine_ext('http://example.com/foo/bar/?download', None), None) + self.assertEqual(determine_ext('http://example.com/foo/bar.nonext/?download', None), None) + self.assertEqual(determine_ext('http://example.com/foo/bar/mp4?download', None), None) + self.assertEqual(determine_ext('http://example.com/foo/bar.m3u8//?download'), 'm3u8') def test_find_xpath_attr(self): testxml = ''' @@ -161,12 +265,21 @@ class TestUtil(unittest.TestCase): + ''' - doc = xml.etree.ElementTree.fromstring(testxml) + doc = compat_etree_fromstring(testxml) + self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n'), None) self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n', 'v'), None) + self.assertEqual(find_xpath_attr(doc, './/node', 'n'), None) + self.assertEqual(find_xpath_attr(doc, './/node', 'n', 'v'), None) + self.assertEqual(find_xpath_attr(doc, './/node', 'x'), doc[1]) self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'a'), doc[1]) + self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'b'), doc[3]) + self.assertEqual(find_xpath_attr(doc, './/node', 'y'), doc[2]) self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'c'), doc[2]) + self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'd'), doc[3]) + self.assertEqual(find_xpath_attr(doc, './/node', 'x', ''), doc[4]) def test_xpath_with_ns(self): testxml = ''' @@ -175,12 +288,56 @@ class TestUtil(unittest.TestCase): http://server.com/download.mp3 ''' - doc = xml.etree.ElementTree.fromstring(testxml) + doc = compat_etree_fromstring(testxml) find = lambda p: doc.find(xpath_with_ns(p, {'media': 'http://example.com/'})) self.assertTrue(find('media:song') is not None) self.assertEqual(find('media:song/media:author').text, 'The Author') self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3') + def test_xpath_element(self): + doc = xml.etree.ElementTree.Element('root') + div = xml.etree.ElementTree.SubElement(doc, 'div') + p = xml.etree.ElementTree.SubElement(div, 'p') + p.text = 'Foo' + self.assertEqual(xpath_element(doc, 'div/p'), p) + self.assertEqual(xpath_element(doc, ['div/p']), p) + self.assertEqual(xpath_element(doc, ['div/bar', 'div/p']), p) + self.assertEqual(xpath_element(doc, 'div/bar', default='default'), 'default') + self.assertEqual(xpath_element(doc, ['div/bar'], default='default'), 'default') + self.assertTrue(xpath_element(doc, 'div/bar') is None) + self.assertTrue(xpath_element(doc, ['div/bar']) is None) + self.assertTrue(xpath_element(doc, ['div/bar'], 'div/baz') is None) + self.assertRaises(ExtractorError, xpath_element, doc, 'div/bar', fatal=True) + self.assertRaises(ExtractorError, xpath_element, doc, ['div/bar'], fatal=True) + self.assertRaises(ExtractorError, xpath_element, doc, ['div/bar', 'div/baz'], fatal=True) + + def test_xpath_text(self): + testxml = ''' +
+

Foo

+
+
''' + doc = compat_etree_fromstring(testxml) + self.assertEqual(xpath_text(doc, 'div/p'), 'Foo') + self.assertEqual(xpath_text(doc, 'div/bar', default='default'), 'default') + self.assertTrue(xpath_text(doc, 'div/bar') is None) + self.assertRaises(ExtractorError, xpath_text, doc, 'div/bar', fatal=True) + + def test_xpath_attr(self): + testxml = ''' +
+

Foo

+
+
''' + doc = compat_etree_fromstring(testxml) + self.assertEqual(xpath_attr(doc, 'div/p', 'x'), 'a') + self.assertEqual(xpath_attr(doc, 'div/bar', 'x'), None) + self.assertEqual(xpath_attr(doc, 'div/p', 'y'), None) + self.assertEqual(xpath_attr(doc, 'div/bar', 'x', default='default'), 'default') + self.assertEqual(xpath_attr(doc, 'div/p', 'y', default='default'), 'default') + self.assertRaises(ExtractorError, xpath_attr, doc, 'div/bar', 'x', fatal=True) + self.assertRaises(ExtractorError, xpath_attr, doc, 'div/p', 'y', fatal=True) + def test_smuggle_url(self): data = {"ö": "ö", "abc": [3]} url = 'https://foo.bar/baz?x=y#a' @@ -236,6 +393,10 @@ class TestUtil(unittest.TestCase): self.assertEqual(parse_duration('5 s'), 5) self.assertEqual(parse_duration('3 min'), 180) self.assertEqual(parse_duration('2.5 hours'), 9000) + self.assertEqual(parse_duration('02:03:04'), 7384) + self.assertEqual(parse_duration('01:02:03:04'), 93784) + self.assertEqual(parse_duration('1 hour 3 minutes'), 3780) + self.assertEqual(parse_duration('87 Min.'), 5220) def test_fix_xml_ampersands(self): self.assertEqual( @@ -291,11 +452,39 @@ class TestUtil(unittest.TestCase): data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'}) self.assertTrue(isinstance(data, bytes)) + def test_dict_get(self): + FALSE_VALUES = { + 'none': None, + 'false': False, + 'zero': 0, + 'empty_string': '', + 'empty_list': [], + } + d = FALSE_VALUES.copy() + d['a'] = 42 + self.assertEqual(dict_get(d, 'a'), 42) + self.assertEqual(dict_get(d, 'b'), None) + self.assertEqual(dict_get(d, 'b', 42), 42) + self.assertEqual(dict_get(d, ('a', )), 42) + self.assertEqual(dict_get(d, ('b', 'a', )), 42) + self.assertEqual(dict_get(d, ('b', 'c', 'a', 'd', )), 42) + self.assertEqual(dict_get(d, ('b', 'c', )), None) + self.assertEqual(dict_get(d, ('b', 'c', ), 42), 42) + for key, false_value in FALSE_VALUES.items(): + self.assertEqual(dict_get(d, ('b', 'c', key, )), None) + self.assertEqual(dict_get(d, ('b', 'c', key, ), skip_false_values=False), false_value) + + def test_encode_compat_str(self): + self.assertEqual(encode_compat_str(b'\xd1\x82\xd0\xb5\xd1\x81\xd1\x82', 'utf-8'), 'тест') + self.assertEqual(encode_compat_str('тест', 'utf-8'), 'тест') + def test_parse_iso8601(self): self.assertEqual(parse_iso8601('2014-03-23T23:04:26+0100'), 1395612266) self.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266) self.assertEqual(parse_iso8601('2014-03-23T22:04:26Z'), 1395612266) self.assertEqual(parse_iso8601('2014-03-23T22:04:26.1234Z'), 1395612266) + self.assertEqual(parse_iso8601('2015-09-29T08:27:31.727'), 1443515251) + self.assertEqual(parse_iso8601('2015-09-29T08-27-31.727'), None) def test_strip_jsonp(self): stripped = strip_jsonp('cb ([ {"id":"532cb",\n\n\n"x":\n3}\n]\n);') @@ -306,10 +495,18 @@ class TestUtil(unittest.TestCase): d = json.loads(stripped) self.assertEqual(d, {'STATUS': 'OK'}) + stripped = strip_jsonp('ps.embedHandler({"status": "success"});') + d = json.loads(stripped) + self.assertEqual(d, {'status': 'success'}) + def test_uppercase_escape(self): self.assertEqual(uppercase_escape('aä'), 'aä') self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐') + def test_lowercase_escape(self): + self.assertEqual(lowercase_escape('aä'), 'aä') + self.assertEqual(lowercase_escape('\\u0026'), '&') + def test_limit_length(self): self.assertEqual(limit_length(None, 12), None) self.assertEqual(limit_length('foo', 12), 'foo') @@ -362,6 +559,13 @@ class TestUtil(unittest.TestCase): "playlist":[{"controls":{"all":null}}] }''') + inp = '''"The CW\\'s \\'Crazy Ex-Girlfriend\\'"''' + self.assertEqual(js_to_json(inp), '''"The CW's 'Crazy Ex-Girlfriend'"''') + + inp = '"SAND Number: SAND 2013-7800P\\nPresenter: Tom Russo\\nHabanero Software Training - Xyce Software\\nXyce, Sandia\\u0027s"' + json_code = js_to_json(inp) + self.assertEqual(json.loads(json_code), json.loads(inp)) + def test_js_to_json_edgecases(self): on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"}) @@ -369,6 +573,22 @@ class TestUtil(unittest.TestCase): on = js_to_json('{"abc": true}') self.assertEqual(json.loads(on), {'abc': True}) + # Ignore JavaScript code as well + on = js_to_json('''{ + "x": 1, + y: "a", + z: some.code + }''') + d = json.loads(on) + self.assertEqual(d['x'], 1) + self.assertEqual(d['y'], 'a') + + on = js_to_json('["abc", "def",]') + self.assertEqual(json.loads(on), ['abc', 'def']) + + on = js_to_json('{"abc": "def",}') + self.assertEqual(json.loads(on), {'abc': 'def'}) + def test_clean_html(self): self.assertEqual(clean_html('a:\nb'), 'a: b') self.assertEqual(clean_html('a:\n "b"'), 'a: "b"') @@ -417,5 +637,169 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') self.assertTrue(age_restricted(18, 14)) self.assertFalse(age_restricted(18, 18)) + def test_is_html(self): + self.assertFalse(is_html(b'\x49\x44\x43\xaaa')) + self.assertTrue(is_html( # UTF-8 with BOM + b'\xef\xbb\xbf\xaaa')) + self.assertTrue(is_html( # UTF-16-LE + b'\xff\xfe<\x00h\x00t\x00m\x00l\x00>\x00\xe4\x00' + )) + self.assertTrue(is_html( # UTF-16-BE + b'\xfe\xff\x00<\x00h\x00t\x00m\x00l\x00>\x00\xe4' + )) + self.assertTrue(is_html( # UTF-32-BE + b'\x00\x00\xFE\xFF\x00\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4')) + self.assertTrue(is_html( # UTF-32-LE + b'\xFF\xFE\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4\x00\x00\x00')) + + def test_render_table(self): + self.assertEqual( + render_table( + ['a', 'bcd'], + [[123, 4], [9999, 51]]), + 'a bcd\n' + '123 4\n' + '9999 51') + + def test_match_str(self): + self.assertRaises(ValueError, match_str, 'xy>foobar', {}) + self.assertFalse(match_str('xy', {'x': 1200})) + self.assertTrue(match_str('!xy', {'x': 1200})) + self.assertTrue(match_str('x', {'x': 1200})) + self.assertFalse(match_str('!x', {'x': 1200})) + self.assertTrue(match_str('x', {'x': 0})) + self.assertFalse(match_str('x>0', {'x': 0})) + self.assertFalse(match_str('x>0', {})) + self.assertTrue(match_str('x>?0', {})) + self.assertTrue(match_str('x>1K', {'x': 1200})) + self.assertFalse(match_str('x>2K', {'x': 1200})) + self.assertTrue(match_str('x>=1200 & x < 1300', {'x': 1200})) + self.assertFalse(match_str('x>=1100 & x < 1200', {'x': 1200})) + self.assertFalse(match_str('y=a212', {'y': 'foobar42'})) + self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'})) + self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'})) + self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'})) + self.assertFalse(match_str( + 'like_count > 100 & dislike_count 100 & dislike_count 100 & dislike_count 100 & dislike_count + + +
+

The following line contains Chinese characters and special symbols

+

第二行
♪♪

+

Third
Line

+

Lines with invalid timestamps are ignored

+

Ignore, two

+

Ignored, three

+
+ +
''' + srt_data = '''1 +00:00:00,000 --> 00:00:01,000 +The following line contains Chinese characters and special symbols + +2 +00:00:01,000 --> 00:00:02,000 +第二行 +♪♪ + +3 +00:00:02,000 --> 00:00:03,000 +Third +Line + +''' + self.assertEqual(dfxp2srt(dfxp_data), srt_data) + + dfxp_data_no_default_namespace = ''' + + +
+

The first line

+
+ +
''' + srt_data = '''1 +00:00:00,000 --> 00:00:01,000 +The first line + +''' + self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data) + + def test_cli_option(self): + self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128']) + self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), []) + self.assertEqual(cli_option({}, '--proxy', 'proxy'), []) + + def test_cli_valueless_option(self): + self.assertEqual(cli_valueless_option( + {'downloader': 'external'}, '--external-downloader', 'downloader', 'external'), ['--external-downloader']) + self.assertEqual(cli_valueless_option( + {'downloader': 'internal'}, '--external-downloader', 'downloader', 'external'), []) + self.assertEqual(cli_valueless_option( + {'nocheckcertificate': True}, '--no-check-certificate', 'nocheckcertificate'), ['--no-check-certificate']) + self.assertEqual(cli_valueless_option( + {'nocheckcertificate': False}, '--no-check-certificate', 'nocheckcertificate'), []) + self.assertEqual(cli_valueless_option( + {'checkcertificate': True}, '--no-check-certificate', 'checkcertificate', False), []) + self.assertEqual(cli_valueless_option( + {'checkcertificate': False}, '--no-check-certificate', 'checkcertificate', False), ['--no-check-certificate']) + + def test_cli_bool_option(self): + self.assertEqual( + cli_bool_option( + {'nocheckcertificate': True}, '--no-check-certificate', 'nocheckcertificate'), + ['--no-check-certificate', 'true']) + self.assertEqual( + cli_bool_option( + {'nocheckcertificate': True}, '--no-check-certificate', 'nocheckcertificate', separator='='), + ['--no-check-certificate=true']) + self.assertEqual( + cli_bool_option( + {'nocheckcertificate': True}, '--check-certificate', 'nocheckcertificate', 'false', 'true'), + ['--check-certificate', 'false']) + self.assertEqual( + cli_bool_option( + {'nocheckcertificate': True}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='), + ['--check-certificate=false']) + self.assertEqual( + cli_bool_option( + {'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true'), + ['--check-certificate', 'true']) + self.assertEqual( + cli_bool_option( + {'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='), + ['--check-certificate=true']) + + def test_ohdave_rsa_encrypt(self): + N = 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd + e = 65537 + + self.assertEqual( + ohdave_rsa_encrypt(b'aa111222', e, N), + '726664bd9a23fd0c70f9f1b84aab5e3905ce1e45a584e9cbcf9bcc7510338fc1986d6c599ff990d923aa43c51c0d9013cd572e13bc58f4ae48f2ed8c0b0ba881') + if __name__ == '__main__': unittest.main()