X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/f19349c26118065acbe21509383c63465df794fe..935cfce0211ba23786d89634ccc4674834401f5b:/test/test_utils.py diff --git a/test/test_utils.py b/test/test_utils.py index 501355c..b7ef51f 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -18,13 +18,18 @@ import xml.etree.ElementTree from youtube_dl.utils import ( age_restricted, args_to_str, + encode_base_n, clean_html, + date_from_str, DateRange, detect_exe_version, determine_ext, + dict_get, + encode_compat_str, encodeFilename, escape_rfc3986, escape_url, + extract_attributes, ExtractorError, find_xpath_attr, fix_xml_ampersands, @@ -33,21 +38,25 @@ from youtube_dl.utils import ( is_html, js_to_json, limit_length, + ohdave_rsa_encrypt, OnDemandPagedList, orderedSet, parse_duration, parse_filesize, + parse_count, parse_iso8601, read_batch_urls, sanitize_filename, sanitize_path, prepend_extension, replace_extension, + remove_start, + remove_end, + remove_quotes, shell_quote, smuggle_url, str_to_int, strip_jsonp, - struct_unpack, timeconvert, unescapeHTML, unified_strdate, @@ -56,6 +65,7 @@ from youtube_dl.utils import ( lowercase_escape, url_basename, urlencode_postdata, + update_url_query, version_tuple, xpath_with_ns, xpath_element, @@ -70,7 +80,10 @@ from youtube_dl.utils import ( cli_bool_option, ) from youtube_dl.compat import ( + compat_chr, compat_etree_fromstring, + compat_urlparse, + compat_parse_qs, ) @@ -127,8 +140,8 @@ class TestUtil(unittest.TestCase): self.assertEqual('yes_no', sanitize_filename('yes? no', restricted=True)) self.assertEqual('this_-_that', sanitize_filename('this: that', restricted=True)) - tests = 'a\xe4b\u4e2d\u56fd\u7684c' - self.assertEqual(sanitize_filename(tests, restricted=True), 'a_b_c') + tests = 'aäb\u4e2d\u56fd\u7684c' + self.assertEqual(sanitize_filename(tests, restricted=True), 'aab_c') self.assertTrue(sanitize_filename('\xf6', restricted=True) != '') # No empty filename forbidden = '"\0\\/&!: \'\t\n()[]{}$;`^,#' @@ -143,6 +156,10 @@ class TestUtil(unittest.TestCase): self.assertTrue(sanitize_filename('-', restricted=True) != '') self.assertTrue(sanitize_filename(':', restricted=True) != '') + self.assertEqual(sanitize_filename( + 'ÃÃÃÃÃà ÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÅÃÅÃÃÃÃÅ°ÃÃÃà áâãäåæçèéêëìÃîïðñòóôõöÅøÅùúûüűýþÿ', restricted=True), + 'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYPssaaaaaaaeceeeeiiiionooooooooeuuuuuypy') + def test_sanitize_ids(self): self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw') self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw') @@ -200,6 +217,25 @@ class TestUtil(unittest.TestCase): self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp') self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp') + def test_remove_start(self): + self.assertEqual(remove_start(None, 'A - '), None) + self.assertEqual(remove_start('A - B', 'A - '), 'B') + self.assertEqual(remove_start('B - A', 'A - '), 'B - A') + + def test_remove_end(self): + self.assertEqual(remove_end(None, ' - B'), None) + self.assertEqual(remove_end('A - B', ' - B'), 'A') + self.assertEqual(remove_end('B - A', ' - B'), 'B - A') + + def test_remove_quotes(self): + self.assertEqual(remove_quotes(None), None) + self.assertEqual(remove_quotes('"'), '"') + self.assertEqual(remove_quotes("'"), "'") + self.assertEqual(remove_quotes(';'), ';') + self.assertEqual(remove_quotes('";'), '";') + self.assertEqual(remove_quotes('""'), '') + self.assertEqual(remove_quotes('";"'), ';') + def test_ordered_set(self): self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7]) self.assertEqual(orderedSet([]), []) @@ -213,6 +249,15 @@ class TestUtil(unittest.TestCase): self.assertEqual(unescapeHTML('/'), '/') self.assertEqual(unescapeHTML('é'), 'é') self.assertEqual(unescapeHTML(''), '') + # HTML5 entities + self.assertEqual(unescapeHTML('.''), '.\'') + + def test_date_from_str(self): + self.assertEqual(date_from_str('yesterday'), date_from_str('now-1day')) + self.assertEqual(date_from_str('now+7day'), date_from_str('now+1week')) + self.assertEqual(date_from_str('now+14day'), date_from_str('now+2week')) + self.assertEqual(date_from_str('now+365day'), date_from_str('now+1year')) + self.assertEqual(date_from_str('now+30day'), date_from_str('now+1month')) def test_daterange(self): _20century = DateRange("19000101", "20000101") @@ -236,6 +281,7 @@ class TestUtil(unittest.TestCase): self.assertEqual( unified_strdate('2/2/2015 6:47:40 PM', day_first=False), '20150202') + self.assertEqual(unified_strdate('Feb 14th 2016 5:45PM'), '20160214') self.assertEqual(unified_strdate('25-09-2014'), '20140925') self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None) @@ -384,6 +430,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(parse_duration('01:02:03:04'), 93784) self.assertEqual(parse_duration('1 hour 3 minutes'), 3780) self.assertEqual(parse_duration('87 Min.'), 5220) + self.assertEqual(parse_duration('PT1H0.040S'), 3600.04) def test_fix_xml_ampersands(self): self.assertEqual( @@ -423,9 +470,6 @@ class TestUtil(unittest.TestCase): testPL(5, 2, (2, 99), [2, 3, 4]) testPL(5, 2, (20, 99), []) - def test_struct_unpack(self): - self.assertEqual(struct_unpack('!B', b'\x00'), (0,)) - def test_read_batch_urls(self): f = io.StringIO('''\xef\xbb\xbf foo bar\r @@ -439,6 +483,66 @@ class TestUtil(unittest.TestCase): data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'}) self.assertTrue(isinstance(data, bytes)) + def test_update_url_query(self): + def query_dict(url): + return compat_parse_qs(compat_urlparse.urlparse(url).query) + self.assertEqual(query_dict(update_url_query( + 'http://example.com/path', {'quality': ['HD'], 'format': ['mp4']})), + query_dict('http://example.com/path?quality=HD&format=mp4')) + self.assertEqual(query_dict(update_url_query( + 'http://example.com/path', {'system': ['LINUX', 'WINDOWS']})), + query_dict('http://example.com/path?system=LINUX&system=WINDOWS')) + self.assertEqual(query_dict(update_url_query( + 'http://example.com/path', {'fields': 'id,formats,subtitles'})), + query_dict('http://example.com/path?fields=id,formats,subtitles')) + self.assertEqual(query_dict(update_url_query( + 'http://example.com/path', {'fields': ('id,formats,subtitles', 'thumbnails')})), + query_dict('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails')) + self.assertEqual(query_dict(update_url_query( + 'http://example.com/path?manifest=f4m', {'manifest': []})), + query_dict('http://example.com/path')) + self.assertEqual(query_dict(update_url_query( + 'http://example.com/path?system=LINUX&system=WINDOWS', {'system': 'LINUX'})), + query_dict('http://example.com/path?system=LINUX')) + self.assertEqual(query_dict(update_url_query( + 'http://example.com/path', {'fields': b'id,formats,subtitles'})), + query_dict('http://example.com/path?fields=id,formats,subtitles')) + self.assertEqual(query_dict(update_url_query( + 'http://example.com/path', {'width': 1080, 'height': 720})), + query_dict('http://example.com/path?width=1080&height=720')) + self.assertEqual(query_dict(update_url_query( + 'http://example.com/path', {'bitrate': 5020.43})), + query_dict('http://example.com/path?bitrate=5020.43')) + self.assertEqual(query_dict(update_url_query( + 'http://example.com/path', {'test': '第äºè¡ÑеÑÑ'})), + query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82')) + + def test_dict_get(self): + FALSE_VALUES = { + 'none': None, + 'false': False, + 'zero': 0, + 'empty_string': '', + 'empty_list': [], + } + d = FALSE_VALUES.copy() + d['a'] = 42 + self.assertEqual(dict_get(d, 'a'), 42) + self.assertEqual(dict_get(d, 'b'), None) + self.assertEqual(dict_get(d, 'b', 42), 42) + self.assertEqual(dict_get(d, ('a', )), 42) + self.assertEqual(dict_get(d, ('b', 'a', )), 42) + self.assertEqual(dict_get(d, ('b', 'c', 'a', 'd', )), 42) + self.assertEqual(dict_get(d, ('b', 'c', )), None) + self.assertEqual(dict_get(d, ('b', 'c', ), 42), 42) + for key, false_value in FALSE_VALUES.items(): + self.assertEqual(dict_get(d, ('b', 'c', key, )), None) + self.assertEqual(dict_get(d, ('b', 'c', key, ), skip_false_values=False), false_value) + + def test_encode_compat_str(self): + self.assertEqual(encode_compat_str(b'\xd1\x82\xd0\xb5\xd1\x81\xd1\x82', 'utf-8'), 'ÑеÑÑ') + self.assertEqual(encode_compat_str('ÑеÑÑ', 'utf-8'), 'ÑеÑÑ') + def test_parse_iso8601(self): self.assertEqual(parse_iso8601('2014-03-23T23:04:26+0100'), 1395612266) self.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266) @@ -456,6 +560,10 @@ class TestUtil(unittest.TestCase): d = json.loads(stripped) self.assertEqual(d, {'STATUS': 'OK'}) + stripped = strip_jsonp('ps.embedHandler({"status": "success"});') + d = json.loads(stripped) + self.assertEqual(d, {'status': 'success'}) + def test_uppercase_escape(self): self.assertEqual(uppercase_escape('aä'), 'aä') self.assertEqual(uppercase_escape('\\U0001d550'), 'ð') @@ -492,11 +600,11 @@ class TestUtil(unittest.TestCase): ) self.assertEqual( escape_url('http://ÑеÑÑ.ÑÑ/ÑÑагменÑ'), - 'http://ÑеÑÑ.ÑÑ/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82' + 'http://xn--e1aybc.xn--p1ai/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82' ) self.assertEqual( escape_url('http://ÑеÑÑ.ÑÑ/абв?абв=абв#абв'), - 'http://ÑеÑÑ.ÑÑ/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2' + 'http://xn--e1aybc.xn--p1ai/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2' ) self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0') @@ -523,6 +631,18 @@ class TestUtil(unittest.TestCase): json_code = js_to_json(inp) self.assertEqual(json.loads(json_code), json.loads(inp)) + inp = '''{ + 0:{src:'skipped', type: 'application/dash+xml'}, + 1:{src:'skipped', type: 'application/vnd.apple.mpegURL'}, + }''' + self.assertEqual(js_to_json(inp), '''{ + "0":{"src":"skipped", "type": "application/dash+xml"}, + "1":{"src":"skipped", "type": "application/vnd.apple.mpegURL"} + }''') + + inp = '''{"foo":101}''' + self.assertEqual(js_to_json(inp), '''{"foo":101}''') + def test_js_to_json_edgecases(self): on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"}) @@ -546,6 +666,65 @@ class TestUtil(unittest.TestCase): on = js_to_json('{"abc": "def",}') self.assertEqual(json.loads(on), {'abc': 'def'}) + on = js_to_json('{ 0: /* " \n */ ",]" , }') + self.assertEqual(json.loads(on), {'0': ',]'}) + + on = js_to_json(r'["
x<\/p>"]') + self.assertEqual(json.loads(on), ['
x
']) + + on = js_to_json(r'["\xaa"]') + self.assertEqual(json.loads(on), ['\u00aa']) + + on = js_to_json("['a\\\nb']") + self.assertEqual(json.loads(on), ['ab']) + + on = js_to_json('{0xff:0xff}') + self.assertEqual(json.loads(on), {'255': 255}) + + on = js_to_json('{077:077}') + self.assertEqual(json.loads(on), {'63': 63}) + + on = js_to_json('{42:42}') + self.assertEqual(json.loads(on), {'42': 42}) + + def test_extract_attributes(self): + self.assertEqual(extract_attributes('The following line contains Chinese characters and special symbols
第äºè¡
âªâª
Third
Line
Lines with invalid timestamps are ignored
+Ignore, two
+Ignored, three