X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/47d80ec0b18245caeb97018d4c1af18d0b5b972b..201856721e6959e772b5e5ddee42d4d75d966703:/test/test_utils.py?ds=sidebyside diff --git a/test/test_utils.py b/test/test_utils.py index 2e3cd01..659c6ec 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -33,14 +33,21 @@ from youtube_dl.utils import ( ExtractorError, find_xpath_attr, fix_xml_ampersands, + float_or_none, get_element_by_class, + get_element_by_attribute, + get_elements_by_class, + get_elements_by_attribute, InAdvancePagedList, + int_or_none, intlist_to_bytes, is_html, js_to_json, limit_length, + merge_dicts, mimetype2ext, month_by_name, + multipart_encode, ohdave_rsa_encrypt, OnDemandPagedList, orderedSet, @@ -49,9 +56,14 @@ from youtube_dl.utils import ( parse_filesize, parse_count, parse_iso8601, + parse_resolution, + parse_bitrate, + pkcs1pad, read_batch_urls, sanitize_filename, sanitize_path, + sanitize_url, + expand_path, prepend_extension, replace_extension, remove_start, @@ -61,6 +73,7 @@ from youtube_dl.utils import ( smuggle_url, str_to_int, strip_jsonp, + strip_or_none, timeconvert, unescapeHTML, unified_strdate, @@ -69,7 +82,9 @@ from youtube_dl.utils import ( uppercase_escape, lowercase_escape, url_basename, + url_or_none, base_url, + urljoin, urlencode_postdata, urshift, update_url_query, @@ -90,6 +105,9 @@ from youtube_dl.utils import ( from youtube_dl.compat import ( compat_chr, compat_etree_fromstring, + compat_getenv, + compat_os_name, + compat_setenv, compat_urlparse, compat_parse_qs, ) @@ -166,7 +184,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(sanitize_filename( 'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', restricted=True), - 'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYPssaaaaaaaeceeeeiiiionooooooooeuuuuuypy') + 'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYTHssaaaaaaaeceeeeiiiionooooooooeuuuuuythy') def test_sanitize_ids(self): self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw') @@ -209,6 +227,24 @@ class TestUtil(unittest.TestCase): self.assertEqual(sanitize_path('./abc'), 'abc') self.assertEqual(sanitize_path('./../abc'), '..\\abc') + def test_sanitize_url(self): + self.assertEqual(sanitize_url('//foo.bar'), 'http://foo.bar') + self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar') + self.assertEqual(sanitize_url('rmtps://foo.bar'), 'rtmps://foo.bar') + self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar') + + def test_expand_path(self): + def env(var): + return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var) + + compat_setenv('YOUTUBE_DL_EXPATH_PATH', 'expanded') + self.assertEqual(expand_path(env('YOUTUBE_DL_EXPATH_PATH')), 'expanded') + self.assertEqual(expand_path(env('HOME')), compat_getenv('HOME')) + self.assertEqual(expand_path('~'), compat_getenv('HOME')) + self.assertEqual( + expand_path('~/%s' % env('YOUTUBE_DL_EXPATH_PATH')), + '%s/expanded' % compat_getenv('HOME')) + def test_prepend_extension(self): self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext') self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext') @@ -257,6 +293,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(unescapeHTML('/'), '/') self.assertEqual(unescapeHTML('é'), 'é') self.assertEqual(unescapeHTML('�'), '�') + self.assertEqual(unescapeHTML('&a"'), '&a"') # HTML5 entities self.assertEqual(unescapeHTML('.''), '.\'') @@ -294,6 +331,9 @@ class TestUtil(unittest.TestCase): self.assertEqual(unified_strdate('27.02.2016 17:30'), '20160227') self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None) self.assertEqual(unified_strdate('Feb 7, 2016 at 6:35 pm'), '20160207') + self.assertEqual(unified_strdate('July 15th, 2013'), '20130715') + self.assertEqual(unified_strdate('September 1st, 2013'), '20130901') + self.assertEqual(unified_strdate('Sep 2nd, 2013'), '20130902') def test_unified_timestamps(self): self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600) @@ -315,6 +355,10 @@ class TestUtil(unittest.TestCase): self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None) self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500) self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100) + self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361) + self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540) + self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140) + self.assertEqual(unified_timestamp('2018-03-14T08:32:43.1493874+00:00'), 1521016363) def test_determine_ext(self): self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4') @@ -322,6 +366,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(determine_ext('http://example.com/foo/bar.nonext/?download', None), None) self.assertEqual(determine_ext('http://example.com/foo/bar/mp4?download', None), None) self.assertEqual(determine_ext('http://example.com/foo/bar.m3u8//?download'), 'm3u8') + self.assertEqual(determine_ext('foobar', None), None) def test_find_xpath_attr(self): testxml = ''' @@ -422,7 +467,24 @@ class TestUtil(unittest.TestCase): def test_shell_quote(self): args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')] - self.assertEqual(shell_quote(args), """ffmpeg -i 'ñ€ß'"'"'.mp4'""") + self.assertEqual( + shell_quote(args), + """ffmpeg -i 'ñ€ß'"'"'.mp4'""" if compat_os_name != 'nt' else '''ffmpeg -i "ñ€ß'.mp4"''') + + def test_float_or_none(self): + self.assertEqual(float_or_none('42.42'), 42.42) + self.assertEqual(float_or_none('42'), 42.0) + self.assertEqual(float_or_none(''), None) + self.assertEqual(float_or_none(None), None) + self.assertEqual(float_or_none([]), None) + self.assertEqual(float_or_none(set()), None) + + def test_int_or_none(self): + self.assertEqual(int_or_none('42'), 42) + self.assertEqual(int_or_none(''), None) + self.assertEqual(int_or_none(None), None) + self.assertEqual(int_or_none([]), None) + self.assertEqual(int_or_none(set()), None) def test_str_to_int(self): self.assertEqual(str_to_int('123,456'), 123456) @@ -445,6 +507,38 @@ class TestUtil(unittest.TestCase): self.assertEqual(base_url('http://foo.de/bar/baz'), 'http://foo.de/bar/') self.assertEqual(base_url('http://foo.de/bar/baz?x=z/x/c'), 'http://foo.de/bar/') + def test_urljoin(self): + self.assertEqual(urljoin('http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin(b'http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin('http://foo.de/', b'/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin(b'http://foo.de/', b'/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin('//foo.de/', '/a/b/c.txt'), '//foo.de/a/b/c.txt') + self.assertEqual(urljoin('http://foo.de/', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin('http://foo.de', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin('http://foo.de', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin('http://foo.de/', 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin('http://foo.de/', '//foo.de/a/b/c.txt'), '//foo.de/a/b/c.txt') + self.assertEqual(urljoin(None, 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin(None, '//foo.de/a/b/c.txt'), '//foo.de/a/b/c.txt') + self.assertEqual(urljoin('', 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin(['foobar'], 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin('http://foo.de/', None), None) + self.assertEqual(urljoin('http://foo.de/', ''), None) + self.assertEqual(urljoin('http://foo.de/', ['foobar']), None) + self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt') + self.assertEqual(urljoin('http://foo.de/a/b/c.txt', 'rtmp://foo.de'), 'rtmp://foo.de') + self.assertEqual(urljoin(None, 'rtmp://foo.de'), 'rtmp://foo.de') + + def test_url_or_none(self): + self.assertEqual(url_or_none(None), None) + self.assertEqual(url_or_none(''), None) + self.assertEqual(url_or_none('foo'), None) + self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de') + self.assertEqual(url_or_none('https://foo.de'), 'https://foo.de') + self.assertEqual(url_or_none('http$://foo.de'), None) + self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de') + self.assertEqual(url_or_none('//foo.de'), '//foo.de') + def test_parse_age_limit(self): self.assertEqual(parse_age_limit(None), None) self.assertEqual(parse_age_limit(False), None) @@ -458,6 +552,8 @@ class TestUtil(unittest.TestCase): self.assertEqual(parse_age_limit('PG-13'), 13) self.assertEqual(parse_age_limit('TV-14'), 14) self.assertEqual(parse_age_limit('TV-MA'), 17) + self.assertEqual(parse_age_limit('TV14'), 14) + self.assertEqual(parse_age_limit('TV_G'), 0) def test_parse_duration(self): self.assertEqual(parse_duration(None), None) @@ -489,6 +585,8 @@ class TestUtil(unittest.TestCase): self.assertEqual(parse_duration('1 hour 3 minutes'), 3780) self.assertEqual(parse_duration('87 Min.'), 5220) self.assertEqual(parse_duration('PT1H0.040S'), 3600.04) + self.assertEqual(parse_duration('PT00H03M30SZ'), 210) + self.assertEqual(parse_duration('P0Y0M0DT0H4M20.880S'), 260.88) def test_fix_xml_ampersands(self): self.assertEqual( @@ -575,6 +673,16 @@ class TestUtil(unittest.TestCase): 'http://example.com/path', {'test': '第二行тест'})), query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82')) + def test_multipart_encode(self): + self.assertEqual( + multipart_encode({b'field': b'value'}, boundary='AAAAAA')[0], + b'--AAAAAA\r\nContent-Disposition: form-data; name="field"\r\n\r\nvalue\r\n--AAAAAA--\r\n') + self.assertEqual( + multipart_encode({'欄位'.encode('utf-8'): '值'.encode('utf-8')}, boundary='AAAAAA')[0], + b'--AAAAAA\r\nContent-Disposition: form-data; name="\xe6\xac\x84\xe4\xbd\x8d"\r\n\r\n\xe5\x80\xbc\r\n--AAAAAA--\r\n') + self.assertRaises( + ValueError, multipart_encode, {b'field': b'value'}, boundary='value') + def test_dict_get(self): FALSE_VALUES = { 'none': None, @@ -597,6 +705,17 @@ class TestUtil(unittest.TestCase): self.assertEqual(dict_get(d, ('b', 'c', key, )), None) self.assertEqual(dict_get(d, ('b', 'c', key, ), skip_false_values=False), false_value) + def test_merge_dicts(self): + self.assertEqual(merge_dicts({'a': 1}, {'b': 2}), {'a': 1, 'b': 2}) + self.assertEqual(merge_dicts({'a': 1}, {'a': 2}), {'a': 1}) + self.assertEqual(merge_dicts({'a': 1}, {'a': None}), {'a': 1}) + self.assertEqual(merge_dicts({'a': 1}, {'a': ''}), {'a': 1}) + self.assertEqual(merge_dicts({'a': 1}, {}), {'a': 1}) + self.assertEqual(merge_dicts({'a': None}, {'a': 1}), {'a': 1}) + self.assertEqual(merge_dicts({'a': ''}, {'a': 1}), {'a': ''}) + self.assertEqual(merge_dicts({'a': ''}, {'a': 'abc'}), {'a': 'abc'}) + self.assertEqual(merge_dicts({'a': None}, {'a': ''}, {'a': 'abc'}), {'a': 'abc'}) + def test_encode_compat_str(self): self.assertEqual(encode_compat_str(b'\xd1\x82\xd0\xb5\xd1\x81\xd1\x82', 'utf-8'), 'тест') self.assertEqual(encode_compat_str('тест', 'utf-8'), 'тест') @@ -622,6 +741,30 @@ class TestUtil(unittest.TestCase): d = json.loads(stripped) self.assertEqual(d, {'status': 'success'}) + stripped = strip_jsonp('window.cb && window.cb({"status": "success"});') + d = json.loads(stripped) + self.assertEqual(d, {'status': 'success'}) + + stripped = strip_jsonp('window.cb && cb({"status": "success"});') + d = json.loads(stripped) + self.assertEqual(d, {'status': 'success'}) + + stripped = strip_jsonp('({"status": "success"});') + d = json.loads(stripped) + self.assertEqual(d, {'status': 'success'}) + + def test_strip_or_none(self): + self.assertEqual(strip_or_none(' abc'), 'abc') + self.assertEqual(strip_or_none('abc '), 'abc') + self.assertEqual(strip_or_none(' abc '), 'abc') + self.assertEqual(strip_or_none('\tabc\t'), 'abc') + self.assertEqual(strip_or_none('\n\tabc\n\t'), 'abc') + self.assertEqual(strip_or_none('abc'), 'abc') + self.assertEqual(strip_or_none(''), '') + self.assertEqual(strip_or_none(None), None) + self.assertEqual(strip_or_none(42), None) + self.assertEqual(strip_or_none([]), None) + def test_uppercase_escape(self): self.assertEqual(uppercase_escape('aä'), 'aä') self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐') @@ -675,6 +818,19 @@ class TestUtil(unittest.TestCase): 'vcodec': 'h264', 'acodec': 'aac', }) + self.assertEqual(parse_codecs('av01.0.05M.08'), { + 'vcodec': 'av01.0.05M.08', + 'acodec': 'none', + }) + self.assertEqual(parse_codecs('theora, vorbis'), { + 'vcodec': 'theora', + 'acodec': 'vorbis', + }) + self.assertEqual(parse_codecs('unknownvcodec, unknownacodec'), { + 'vcodec': 'unknownvcodec', + 'acodec': 'unknownacodec', + }) + self.assertEqual(parse_codecs('unknown'), {}) def test_escape_rfc3986(self): reserved = "!*'();:@&=+$,/?#[]" @@ -743,6 +899,9 @@ class TestUtil(unittest.TestCase): inp = '''{"duration": "00:01:07"}''' self.assertEqual(js_to_json(inp), '''{"duration": "00:01:07"}''') + inp = '''{segments: [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}''' + self.assertEqual(js_to_json(inp), '''{"segments": [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}''') + def test_js_to_json_edgecases(self): on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"}) @@ -763,12 +922,27 @@ class TestUtil(unittest.TestCase): on = js_to_json('["abc", "def",]') self.assertEqual(json.loads(on), ['abc', 'def']) + on = js_to_json('[/*comment\n*/"abc"/*comment\n*/,/*comment\n*/"def",/*comment\n*/]') + self.assertEqual(json.loads(on), ['abc', 'def']) + + on = js_to_json('[//comment\n"abc" //comment\n,//comment\n"def",//comment\n]') + self.assertEqual(json.loads(on), ['abc', 'def']) + on = js_to_json('{"abc": "def",}') self.assertEqual(json.loads(on), {'abc': 'def'}) + on = js_to_json('{/*comment\n*/"abc"/*comment\n*/:/*comment\n*/"def"/*comment\n*/,/*comment\n*/}') + self.assertEqual(json.loads(on), {'abc': 'def'}) + on = js_to_json('{ 0: /* " \n */ ",]" , }') self.assertEqual(json.loads(on), {'0': ',]'}) + on = js_to_json('{ /*comment\n*/0/*comment\n*/: /* " \n */ ",]" , }') + self.assertEqual(json.loads(on), {'0': ',]'}) + + on = js_to_json('{ 0: // comment\n1 }') + self.assertEqual(json.loads(on), {'0': 1}) + on = js_to_json(r'["

x<\/p>"]') self.assertEqual(json.loads(on), ['

x

']) @@ -778,15 +952,34 @@ class TestUtil(unittest.TestCase): on = js_to_json("['a\\\nb']") self.assertEqual(json.loads(on), ['ab']) + on = js_to_json("/*comment\n*/[/*comment\n*/'a\\\nb'/*comment\n*/]/*comment\n*/") + self.assertEqual(json.loads(on), ['ab']) + on = js_to_json('{0xff:0xff}') self.assertEqual(json.loads(on), {'255': 255}) + on = js_to_json('{/*comment\n*/0xff/*comment\n*/:/*comment\n*/0xff/*comment\n*/}') + self.assertEqual(json.loads(on), {'255': 255}) + on = js_to_json('{077:077}') self.assertEqual(json.loads(on), {'63': 63}) + on = js_to_json('{/*comment\n*/077/*comment\n*/:/*comment\n*/077/*comment\n*/}') + self.assertEqual(json.loads(on), {'63': 63}) + on = js_to_json('{42:42}') self.assertEqual(json.loads(on), {'42': 42}) + on = js_to_json('{/*comment\n*/42/*comment\n*/:/*comment\n*/42/*comment\n*/}') + self.assertEqual(json.loads(on), {'42': 42}) + + on = js_to_json('{42:4.2e1}') + self.assertEqual(json.loads(on), {'42': 42.0}) + + def test_js_to_json_malformed(self): + self.assertEqual(js_to_json('42a1'), '42"a1"') + self.assertEqual(js_to_json('42a-1'), '42"a"-1') + def test_extract_attributes(self): self.assertEqual(extract_attributes(''), {'x': 'y'}) self.assertEqual(extract_attributes(""), {'x': 'y'}) @@ -824,10 +1017,13 @@ class TestUtil(unittest.TestCase): supports_outside_bmp = False if supports_outside_bmp: self.assertEqual(extract_attributes(''), {'x': 'Smile \U0001f600!'}) + # Malformed HTML should not break attributes extraction on older Python + self.assertEqual(extract_attributes(''), {}) def test_clean_html(self): self.assertEqual(clean_html('a:\nb'), 'a: b') self.assertEqual(clean_html('a:\n "b"'), 'a: "b"') + self.assertEqual(clean_html('a
\xa0b'), 'a\nb') def test_intlist_to_bytes(self): self.assertEqual( @@ -837,7 +1033,7 @@ class TestUtil(unittest.TestCase): def test_args_to_str(self): self.assertEqual( args_to_str(['foo', 'ba/r', '-baz', '2 be', '']), - 'foo ba/r -baz \'2 be\' \'\'' + 'foo ba/r -baz \'2 be\' \'\'' if compat_os_name != 'nt' else 'foo ba/r -baz "2 be" ""' ) def test_parse_filesize(self): @@ -864,6 +1060,23 @@ class TestUtil(unittest.TestCase): self.assertEqual(parse_count('1.1kk '), 1100000) self.assertEqual(parse_count('1.1kk views'), 1100000) + def test_parse_resolution(self): + self.assertEqual(parse_resolution(None), {}) + self.assertEqual(parse_resolution(''), {}) + self.assertEqual(parse_resolution('1920x1080'), {'width': 1920, 'height': 1080}) + self.assertEqual(parse_resolution('1920×1080'), {'width': 1920, 'height': 1080}) + self.assertEqual(parse_resolution('1920 x 1080'), {'width': 1920, 'height': 1080}) + self.assertEqual(parse_resolution('720p'), {'height': 720}) + self.assertEqual(parse_resolution('4k'), {'height': 2160}) + self.assertEqual(parse_resolution('8K'), {'height': 4320}) + + def test_parse_bitrate(self): + self.assertEqual(parse_bitrate(None), None) + self.assertEqual(parse_bitrate(''), None) + self.assertEqual(parse_bitrate('300kbps'), 300) + self.assertEqual(parse_bitrate('1500kbps'), 1500) + self.assertEqual(parse_bitrate('300 kbps'), 300) + def test_version_tuple(self): self.assertEqual(version_tuple('1'), (1,)) self.assertEqual(version_tuple('10.23.344'), (10, 23, 344)) @@ -942,6 +1155,18 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') self.assertFalse(match_str( 'like_count > 100 & dislike_count Ignored, three

- ''' + '''.encode('utf-8') srt_data = '''1 00:00:00,000 --> 00:00:01,000 The following line contains Chinese characters and special symbols @@ -990,7 +1215,7 @@ Line

The first line

- ''' + '''.encode('utf-8') srt_data = '''1 00:00:00,000 --> 00:00:01,000 The first line @@ -998,6 +1223,67 @@ The first line ''' self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data) + dfxp_data_with_style = ''' + + + +