4 from __future__ 
import unicode_literals
 
   6 # Allow direct execution 
  10 sys
.path
.insert(0, os
.path
.dirname(os
.path
.dirname(os
.path
.abspath(__file__
)))) 
  13 # Various small unit tests 
  16 import xml
.etree
.ElementTree
 
  18 from youtube_dl
.utils 
import ( 
  37     get_element_by_attribute
, 
  38     get_elements_by_class
, 
  39     get_elements_by_attribute
, 
  97 from youtube_dl
.compat 
import ( 
  99     compat_etree_fromstring
, 
 108 class TestUtil(unittest
.TestCase
): 
 109     def test_timeconvert(self
): 
 110         self
.assertTrue(timeconvert('') is None) 
 111         self
.assertTrue(timeconvert('bougrg') is None) 
 113     def test_sanitize_filename(self
): 
 114         self
.assertEqual(sanitize_filename('abc'), 'abc') 
 115         self
.assertEqual(sanitize_filename('abc_d-e'), 'abc_d-e') 
 117         self
.assertEqual(sanitize_filename('123'), '123') 
 119         self
.assertEqual('abc_de', sanitize_filename('abc/de')) 
 120         self
.assertFalse('/' in sanitize_filename('abc/de///')) 
 122         self
.assertEqual('abc_de', sanitize_filename('abc/<>\\*|de')) 
 123         self
.assertEqual('xxx', sanitize_filename('xxx/<>\\*|')) 
 124         self
.assertEqual('yes no', sanitize_filename('yes? no')) 
 125         self
.assertEqual('this - that', sanitize_filename('this: that')) 
 127         self
.assertEqual(sanitize_filename('AT&T'), 'AT&T') 
 129         self
.assertEqual(sanitize_filename(aumlaut
), aumlaut
) 
 130         tests 
= '\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0446\u0430' 
 131         self
.assertEqual(sanitize_filename(tests
), tests
) 
 134             sanitize_filename('New World record at 0:12:34'), 
 135             'New World record at 0_12_34') 
 137         self
.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf') 
 138         self
.assertEqual(sanitize_filename('--gasdgf', is_id
=True), '--gasdgf') 
 139         self
.assertEqual(sanitize_filename('.gasdgf'), 'gasdgf') 
 140         self
.assertEqual(sanitize_filename('.gasdgf', is_id
=True), '.gasdgf') 
 144             for fbc 
in forbidden
: 
 145                 self
.assertTrue(fbc 
not in sanitize_filename(fc
)) 
 147     def test_sanitize_filename_restricted(self
): 
 148         self
.assertEqual(sanitize_filename('abc', restricted
=True), 'abc') 
 149         self
.assertEqual(sanitize_filename('abc_d-e', restricted
=True), 'abc_d-e') 
 151         self
.assertEqual(sanitize_filename('123', restricted
=True), '123') 
 153         self
.assertEqual('abc_de', sanitize_filename('abc/de', restricted
=True)) 
 154         self
.assertFalse('/' in sanitize_filename('abc/de///', restricted
=True)) 
 156         self
.assertEqual('abc_de', sanitize_filename('abc/<>\\*|de', restricted
=True)) 
 157         self
.assertEqual('xxx', sanitize_filename('xxx/<>\\*|', restricted
=True)) 
 158         self
.assertEqual('yes_no', sanitize_filename('yes? no', restricted
=True)) 
 159         self
.assertEqual('this_-_that', sanitize_filename('this: that', restricted
=True)) 
 161         tests 
= 'aäb\u4e2d\u56fd\u7684c' 
 162         self
.assertEqual(sanitize_filename(tests
, restricted
=True), 'aab_c') 
 163         self
.assertTrue(sanitize_filename('\xf6', restricted
=True) != '')  # No empty filename 
 165         forbidden 
= '"\0\\/&!: \'\t\n()[]{}$;`^,#' 
 167             for fbc 
in forbidden
: 
 168                 self
.assertTrue(fbc 
not in sanitize_filename(fc
, restricted
=True)) 
 170         # Handle a common case more neatly 
 171         self
.assertEqual(sanitize_filename('\u5927\u58f0\u5e26 - Song', restricted
=True), 'Song') 
 172         self
.assertEqual(sanitize_filename('\u603b\u7edf: Speech', restricted
=True), 'Speech') 
 173         # .. but make sure the file name is never empty 
 174         self
.assertTrue(sanitize_filename('-', restricted
=True) != '') 
 175         self
.assertTrue(sanitize_filename(':', restricted
=True) != '') 
 177         self
.assertEqual(sanitize_filename( 
 178             'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', restricted
=True), 
 179             'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYPssaaaaaaaeceeeeiiiionooooooooeuuuuuypy') 
 181     def test_sanitize_ids(self
): 
 182         self
.assertEqual(sanitize_filename('_n_cd26wFpw', is_id
=True), '_n_cd26wFpw') 
 183         self
.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id
=True), '_BD_eEpuzXw') 
 184         self
.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id
=True), 'N0Y__7-UOdI') 
 186     def test_sanitize_path(self
): 
 187         if sys
.platform 
!= 'win32': 
 190         self
.assertEqual(sanitize_path('abc'), 'abc') 
 191         self
.assertEqual(sanitize_path('abc/def'), 'abc\\def') 
 192         self
.assertEqual(sanitize_path('abc\\def'), 'abc\\def') 
 193         self
.assertEqual(sanitize_path('abc|def'), 'abc#def') 
 194         self
.assertEqual(sanitize_path('<>:"|?*'), '#######') 
 195         self
.assertEqual(sanitize_path('C:/abc/def'), 'C:\\abc\\def') 
 196         self
.assertEqual(sanitize_path('C?:/abc/def'), 'C##\\abc\\def') 
 198         self
.assertEqual(sanitize_path('\\\\?\\UNC\\ComputerName\\abc'), '\\\\?\\UNC\\ComputerName\\abc') 
 199         self
.assertEqual(sanitize_path('\\\\?\\UNC/ComputerName/abc'), '\\\\?\\UNC\\ComputerName\\abc') 
 201         self
.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc') 
 202         self
.assertEqual(sanitize_path('\\\\?\\C:/abc'), '\\\\?\\C:\\abc') 
 203         self
.assertEqual(sanitize_path('\\\\?\\C:\\ab?c\\de:f'), '\\\\?\\C:\\ab#c\\de#f') 
 204         self
.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc') 
 207             sanitize_path('youtube/%(uploader)s/%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s'), 
 208             'youtube\\%(uploader)s\\%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s') 
 211             sanitize_path('youtube/TheWreckingYard ./00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part'), 
 212             'youtube\\TheWreckingYard #\\00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part') 
 213         self
.assertEqual(sanitize_path('abc/def...'), 'abc\\def..#') 
 214         self
.assertEqual(sanitize_path('abc.../def'), 'abc..#\\def') 
 215         self
.assertEqual(sanitize_path('abc.../def...'), 'abc..#\\def..#') 
 217         self
.assertEqual(sanitize_path('../abc'), '..\\abc') 
 218         self
.assertEqual(sanitize_path('../../abc'), '..\\..\\abc') 
 219         self
.assertEqual(sanitize_path('./abc'), 'abc') 
 220         self
.assertEqual(sanitize_path('./../abc'), '..\\abc') 
 222     def test_expand_path(self
): 
 224             return '%{0}%'.format(var
) if sys
.platform 
== 'win32' else '${0}'.format(var
) 
 226         compat_setenv('YOUTUBE_DL_EXPATH_PATH', 'expanded') 
 227         self
.assertEqual(expand_path(env('YOUTUBE_DL_EXPATH_PATH')), 'expanded') 
 228         self
.assertEqual(expand_path(env('HOME')), compat_getenv('HOME')) 
 229         self
.assertEqual(expand_path('~'), compat_getenv('HOME')) 
 231             expand_path('~/%s' % env('YOUTUBE_DL_EXPATH_PATH')), 
 232             '%s/expanded' % compat_getenv('HOME')) 
 234     def test_prepend_extension(self
): 
 235         self
.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext') 
 236         self
.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext') 
 237         self
.assertEqual(prepend_extension('abc.unexpected_ext', 'temp', 'ext'), 'abc.unexpected_ext.temp') 
 238         self
.assertEqual(prepend_extension('abc', 'temp'), 'abc.temp') 
 239         self
.assertEqual(prepend_extension('.abc', 'temp'), '.abc.temp') 
 240         self
.assertEqual(prepend_extension('.abc.ext', 'temp'), '.abc.temp.ext') 
 242     def test_replace_extension(self
): 
 243         self
.assertEqual(replace_extension('abc.ext', 'temp'), 'abc.temp') 
 244         self
.assertEqual(replace_extension('abc.ext', 'temp', 'ext'), 'abc.temp') 
 245         self
.assertEqual(replace_extension('abc.unexpected_ext', 'temp', 'ext'), 'abc.unexpected_ext.temp') 
 246         self
.assertEqual(replace_extension('abc', 'temp'), 'abc.temp') 
 247         self
.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp') 
 248         self
.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp') 
 250     def test_remove_start(self
): 
 251         self
.assertEqual(remove_start(None, 'A - '), None) 
 252         self
.assertEqual(remove_start('A - B', 'A - '), 'B') 
 253         self
.assertEqual(remove_start('B - A', 'A - '), 'B - A') 
 255     def test_remove_end(self
): 
 256         self
.assertEqual(remove_end(None, ' - B'), None) 
 257         self
.assertEqual(remove_end('A - B', ' - B'), 'A') 
 258         self
.assertEqual(remove_end('B - A', ' - B'), 'B - A') 
 260     def test_remove_quotes(self
): 
 261         self
.assertEqual(remove_quotes(None), None) 
 262         self
.assertEqual(remove_quotes('"'), '"') 
 263         self
.assertEqual(remove_quotes("'"), "'") 
 264         self
.assertEqual(remove_quotes(';'), ';') 
 265         self
.assertEqual(remove_quotes('";'), '";') 
 266         self
.assertEqual(remove_quotes('""'), '') 
 267         self
.assertEqual(remove_quotes('";"'), ';') 
 269     def test_ordered_set(self
): 
 270         self
.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7]) 
 271         self
.assertEqual(orderedSet([]), []) 
 272         self
.assertEqual(orderedSet([1]), [1]) 
 273         # keep the list ordered 
 274         self
.assertEqual(orderedSet([135, 1, 1, 1]), [135, 1]) 
 276     def test_unescape_html(self
): 
 277         self
.assertEqual(unescapeHTML('%20;'), '%20;') 
 278         self
.assertEqual(unescapeHTML('/'), '/') 
 279         self
.assertEqual(unescapeHTML('/'), '/') 
 280         self
.assertEqual(unescapeHTML('é'), 'é') 
 281         self
.assertEqual(unescapeHTML('�'), '�') 
 282         self
.assertEqual(unescapeHTML('&a"'), '&a"') 
 284         self
.assertEqual(unescapeHTML('.''), '.\'') 
 286     def test_date_from_str(self
): 
 287         self
.assertEqual(date_from_str('yesterday'), date_from_str('now-1day')) 
 288         self
.assertEqual(date_from_str('now+7day'), date_from_str('now+1week')) 
 289         self
.assertEqual(date_from_str('now+14day'), date_from_str('now+2week')) 
 290         self
.assertEqual(date_from_str('now+365day'), date_from_str('now+1year')) 
 291         self
.assertEqual(date_from_str('now+30day'), date_from_str('now+1month')) 
 293     def test_daterange(self
): 
 294         _20century 
= DateRange("19000101", "20000101") 
 295         self
.assertFalse("17890714" in _20century
) 
 296         _ac 
= DateRange("00010101") 
 297         self
.assertTrue("19690721" in _ac
) 
 298         _firstmilenium 
= DateRange(end
="10000101") 
 299         self
.assertTrue("07110427" in _firstmilenium
) 
 301     def test_unified_dates(self
): 
 302         self
.assertEqual(unified_strdate('December 21, 2010'), '20101221') 
 303         self
.assertEqual(unified_strdate('8/7/2009'), '20090708') 
 304         self
.assertEqual(unified_strdate('Dec 14, 2012'), '20121214') 
 305         self
.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011') 
 306         self
.assertEqual(unified_strdate('1968 12 10'), '19681210') 
 307         self
.assertEqual(unified_strdate('1968-12-10'), '19681210') 
 308         self
.assertEqual(unified_strdate('28/01/2014 21:00:00 +0100'), '20140128') 
 310             unified_strdate('11/26/2014 11:30:00 AM PST', day_first
=False), 
 313             unified_strdate('2/2/2015 6:47:40 PM', day_first
=False), 
 315         self
.assertEqual(unified_strdate('Feb 14th 2016 5:45PM'), '20160214') 
 316         self
.assertEqual(unified_strdate('25-09-2014'), '20140925') 
 317         self
.assertEqual(unified_strdate('27.02.2016 17:30'), '20160227') 
 318         self
.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None) 
 319         self
.assertEqual(unified_strdate('Feb 7, 2016 at 6:35 pm'), '20160207') 
 320         self
.assertEqual(unified_strdate('July 15th, 2013'), '20130715') 
 321         self
.assertEqual(unified_strdate('September 1st, 2013'), '20130901') 
 322         self
.assertEqual(unified_strdate('Sep 2nd, 2013'), '20130902') 
 324     def test_unified_timestamps(self
): 
 325         self
.assertEqual(unified_timestamp('December 21, 2010'), 1292889600) 
 326         self
.assertEqual(unified_timestamp('8/7/2009'), 1247011200) 
 327         self
.assertEqual(unified_timestamp('Dec 14, 2012'), 1355443200) 
 328         self
.assertEqual(unified_timestamp('2012/10/11 01:56:38 +0000'), 1349920598) 
 329         self
.assertEqual(unified_timestamp('1968 12 10'), -33436800) 
 330         self
.assertEqual(unified_timestamp('1968-12-10'), -33436800) 
 331         self
.assertEqual(unified_timestamp('28/01/2014 21:00:00 +0100'), 1390939200) 
 333             unified_timestamp('11/26/2014 11:30:00 AM PST', day_first
=False), 
 336             unified_timestamp('2/2/2015 6:47:40 PM', day_first
=False), 
 338         self
.assertEqual(unified_timestamp('Feb 14th 2016 5:45PM'), 1455471900) 
 339         self
.assertEqual(unified_timestamp('25-09-2014'), 1411603200) 
 340         self
.assertEqual(unified_timestamp('27.02.2016 17:30'), 1456594200) 
 341         self
.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None) 
 342         self
.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500) 
 343         self
.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100) 
 344         self
.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361) 
 345         self
.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540) 
 347     def test_determine_ext(self
): 
 348         self
.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4') 
 349         self
.assertEqual(determine_ext('http://example.com/foo/bar/?download', None), None) 
 350         self
.assertEqual(determine_ext('http://example.com/foo/bar.nonext/?download', None), None) 
 351         self
.assertEqual(determine_ext('http://example.com/foo/bar/mp4?download', None), None) 
 352         self
.assertEqual(determine_ext('http://example.com/foo/bar.m3u8//?download'), 'm3u8') 
 354     def test_find_xpath_attr(self
): 
 362         doc 
= compat_etree_fromstring(testxml
) 
 364         self
.assertEqual(find_xpath_attr(doc
, './/fourohfour', 'n'), None) 
 365         self
.assertEqual(find_xpath_attr(doc
, './/fourohfour', 'n', 'v'), None) 
 366         self
.assertEqual(find_xpath_attr(doc
, './/node', 'n'), None) 
 367         self
.assertEqual(find_xpath_attr(doc
, './/node', 'n', 'v'), None) 
 368         self
.assertEqual(find_xpath_attr(doc
, './/node', 'x'), doc
[1]) 
 369         self
.assertEqual(find_xpath_attr(doc
, './/node', 'x', 'a'), doc
[1]) 
 370         self
.assertEqual(find_xpath_attr(doc
, './/node', 'x', 'b'), doc
[3]) 
 371         self
.assertEqual(find_xpath_attr(doc
, './/node', 'y'), doc
[2]) 
 372         self
.assertEqual(find_xpath_attr(doc
, './/node', 'y', 'c'), doc
[2]) 
 373         self
.assertEqual(find_xpath_attr(doc
, './/node', 'y', 'd'), doc
[3]) 
 374         self
.assertEqual(find_xpath_attr(doc
, './/node', 'x', ''), doc
[4]) 
 376     def test_xpath_with_ns(self
): 
 377         testxml 
= '''<root xmlns:media="http://example.com/"> 
 379                 <media:author>The Author</media:author> 
 380                 <url>http://server.com/download.mp3</url> 
 383         doc 
= compat_etree_fromstring(testxml
) 
 384         find 
= lambda p
: doc
.find(xpath_with_ns(p
, {'media': 'http://example.com/'})) 
 385         self
.assertTrue(find('media:song') is not None) 
 386         self
.assertEqual(find('media:song/media:author').text
, 'The Author') 
 387         self
.assertEqual(find('media:song/url').text
, 'http://server.com/download.mp3') 
 389     def test_xpath_element(self
): 
 390         doc 
= xml
.etree
.ElementTree
.Element('root') 
 391         div 
= xml
.etree
.ElementTree
.SubElement(doc
, 'div') 
 392         p 
= xml
.etree
.ElementTree
.SubElement(div
, 'p') 
 394         self
.assertEqual(xpath_element(doc
, 'div/p'), p
) 
 395         self
.assertEqual(xpath_element(doc
, ['div/p']), p
) 
 396         self
.assertEqual(xpath_element(doc
, ['div/bar', 'div/p']), p
) 
 397         self
.assertEqual(xpath_element(doc
, 'div/bar', default
='default'), 'default') 
 398         self
.assertEqual(xpath_element(doc
, ['div/bar'], default
='default'), 'default') 
 399         self
.assertTrue(xpath_element(doc
, 'div/bar') is None) 
 400         self
.assertTrue(xpath_element(doc
, ['div/bar']) is None) 
 401         self
.assertTrue(xpath_element(doc
, ['div/bar'], 'div/baz') is None) 
 402         self
.assertRaises(ExtractorError
, xpath_element
, doc
, 'div/bar', fatal
=True) 
 403         self
.assertRaises(ExtractorError
, xpath_element
, doc
, ['div/bar'], fatal
=True) 
 404         self
.assertRaises(ExtractorError
, xpath_element
, doc
, ['div/bar', 'div/baz'], fatal
=True) 
 406     def test_xpath_text(self
): 
 412         doc 
= compat_etree_fromstring(testxml
) 
 413         self
.assertEqual(xpath_text(doc
, 'div/p'), 'Foo') 
 414         self
.assertEqual(xpath_text(doc
, 'div/bar', default
='default'), 'default') 
 415         self
.assertTrue(xpath_text(doc
, 'div/bar') is None) 
 416         self
.assertRaises(ExtractorError
, xpath_text
, doc
, 'div/bar', fatal
=True) 
 418     def test_xpath_attr(self
): 
 424         doc 
= compat_etree_fromstring(testxml
) 
 425         self
.assertEqual(xpath_attr(doc
, 'div/p', 'x'), 'a') 
 426         self
.assertEqual(xpath_attr(doc
, 'div/bar', 'x'), None) 
 427         self
.assertEqual(xpath_attr(doc
, 'div/p', 'y'), None) 
 428         self
.assertEqual(xpath_attr(doc
, 'div/bar', 'x', default
='default'), 'default') 
 429         self
.assertEqual(xpath_attr(doc
, 'div/p', 'y', default
='default'), 'default') 
 430         self
.assertRaises(ExtractorError
, xpath_attr
, doc
, 'div/bar', 'x', fatal
=True) 
 431         self
.assertRaises(ExtractorError
, xpath_attr
, doc
, 'div/p', 'y', fatal
=True) 
 433     def test_smuggle_url(self
): 
 434         data 
= {"ö": "ö", "abc": [3]} 
 435         url 
= 'https://foo.bar/baz?x=y#a' 
 436         smug_url 
= smuggle_url(url
, data
) 
 437         unsmug_url
, unsmug_data 
= unsmuggle_url(smug_url
) 
 438         self
.assertEqual(url
, unsmug_url
) 
 439         self
.assertEqual(data
, unsmug_data
) 
 441         res_url
, res_data 
= unsmuggle_url(url
) 
 442         self
.assertEqual(res_url
, url
) 
 443         self
.assertEqual(res_data
, None) 
 445         smug_url 
= smuggle_url(url
, {'a': 'b'}) 
 446         smug_smug_url 
= smuggle_url(smug_url
, {'c': 'd'}) 
 447         res_url
, res_data 
= unsmuggle_url(smug_smug_url
) 
 448         self
.assertEqual(res_url
, url
) 
 449         self
.assertEqual(res_data
, {'a': 'b', 'c': 'd'}) 
 451     def test_shell_quote(self
): 
 452         args 
= ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')] 
 455             """ffmpeg -i 'ñ€ß'"'"'.mp4'""" if compat_os_name 
!= 'nt' else '''ffmpeg -i "ñ€ß'.mp4"''') 
 457     def test_str_to_int(self
): 
 458         self
.assertEqual(str_to_int('123,456'), 123456) 
 459         self
.assertEqual(str_to_int('123.456'), 123456) 
 461     def test_url_basename(self
): 
 462         self
.assertEqual(url_basename('http://foo.de/'), '') 
 463         self
.assertEqual(url_basename('http://foo.de/bar/baz'), 'baz') 
 464         self
.assertEqual(url_basename('http://foo.de/bar/baz?x=y'), 'baz') 
 465         self
.assertEqual(url_basename('http://foo.de/bar/baz#x=y'), 'baz') 
 466         self
.assertEqual(url_basename('http://foo.de/bar/baz/'), 'baz') 
 468             url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'), 
 471     def test_base_url(self
): 
 472         self
.assertEqual(base_url('http://foo.de/'), 'http://foo.de/') 
 473         self
.assertEqual(base_url('http://foo.de/bar'), 'http://foo.de/') 
 474         self
.assertEqual(base_url('http://foo.de/bar/'), 'http://foo.de/bar/') 
 475         self
.assertEqual(base_url('http://foo.de/bar/baz'), 'http://foo.de/bar/') 
 476         self
.assertEqual(base_url('http://foo.de/bar/baz?x=z/x/c'), 'http://foo.de/bar/') 
 478     def test_urljoin(self
): 
 479         self
.assertEqual(urljoin('http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt') 
 480         self
.assertEqual(urljoin(b
'http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt') 
 481         self
.assertEqual(urljoin('http://foo.de/', b
'/a/b/c.txt'), 'http://foo.de/a/b/c.txt') 
 482         self
.assertEqual(urljoin(b
'http://foo.de/', b
'/a/b/c.txt'), 'http://foo.de/a/b/c.txt') 
 483         self
.assertEqual(urljoin('//foo.de/', '/a/b/c.txt'), '//foo.de/a/b/c.txt') 
 484         self
.assertEqual(urljoin('http://foo.de/', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt') 
 485         self
.assertEqual(urljoin('http://foo.de', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt') 
 486         self
.assertEqual(urljoin('http://foo.de', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt') 
 487         self
.assertEqual(urljoin('http://foo.de/', 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt') 
 488         self
.assertEqual(urljoin('http://foo.de/', '//foo.de/a/b/c.txt'), '//foo.de/a/b/c.txt') 
 489         self
.assertEqual(urljoin(None, 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt') 
 490         self
.assertEqual(urljoin(None, '//foo.de/a/b/c.txt'), '//foo.de/a/b/c.txt') 
 491         self
.assertEqual(urljoin('', 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt') 
 492         self
.assertEqual(urljoin(['foobar'], 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt') 
 493         self
.assertEqual(urljoin('http://foo.de/', None), None) 
 494         self
.assertEqual(urljoin('http://foo.de/', ''), None) 
 495         self
.assertEqual(urljoin('http://foo.de/', ['foobar']), None) 
 496         self
.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt') 
 498     def test_parse_age_limit(self
): 
 499         self
.assertEqual(parse_age_limit(None), None) 
 500         self
.assertEqual(parse_age_limit(False), None) 
 501         self
.assertEqual(parse_age_limit('invalid'), None) 
 502         self
.assertEqual(parse_age_limit(0), 0) 
 503         self
.assertEqual(parse_age_limit(18), 18) 
 504         self
.assertEqual(parse_age_limit(21), 21) 
 505         self
.assertEqual(parse_age_limit(22), None) 
 506         self
.assertEqual(parse_age_limit('18'), 18) 
 507         self
.assertEqual(parse_age_limit('18+'), 18) 
 508         self
.assertEqual(parse_age_limit('PG-13'), 13) 
 509         self
.assertEqual(parse_age_limit('TV-14'), 14) 
 510         self
.assertEqual(parse_age_limit('TV-MA'), 17) 
 512     def test_parse_duration(self
): 
 513         self
.assertEqual(parse_duration(None), None) 
 514         self
.assertEqual(parse_duration(False), None) 
 515         self
.assertEqual(parse_duration('invalid'), None) 
 516         self
.assertEqual(parse_duration('1'), 1) 
 517         self
.assertEqual(parse_duration('1337:12'), 80232) 
 518         self
.assertEqual(parse_duration('9:12:43'), 33163) 
 519         self
.assertEqual(parse_duration('12:00'), 720) 
 520         self
.assertEqual(parse_duration('00:01:01'), 61) 
 521         self
.assertEqual(parse_duration('x:y'), None) 
 522         self
.assertEqual(parse_duration('3h11m53s'), 11513) 
 523         self
.assertEqual(parse_duration('3h 11m 53s'), 11513) 
 524         self
.assertEqual(parse_duration('3 hours 11 minutes 53 seconds'), 11513) 
 525         self
.assertEqual(parse_duration('3 hours 11 mins 53 secs'), 11513) 
 526         self
.assertEqual(parse_duration('62m45s'), 3765) 
 527         self
.assertEqual(parse_duration('6m59s'), 419) 
 528         self
.assertEqual(parse_duration('49s'), 49) 
 529         self
.assertEqual(parse_duration('0h0m0s'), 0) 
 530         self
.assertEqual(parse_duration('0m0s'), 0) 
 531         self
.assertEqual(parse_duration('0s'), 0) 
 532         self
.assertEqual(parse_duration('01:02:03.05'), 3723.05) 
 533         self
.assertEqual(parse_duration('T30M38S'), 1838) 
 534         self
.assertEqual(parse_duration('5 s'), 5) 
 535         self
.assertEqual(parse_duration('3 min'), 180) 
 536         self
.assertEqual(parse_duration('2.5 hours'), 9000) 
 537         self
.assertEqual(parse_duration('02:03:04'), 7384) 
 538         self
.assertEqual(parse_duration('01:02:03:04'), 93784) 
 539         self
.assertEqual(parse_duration('1 hour 3 minutes'), 3780) 
 540         self
.assertEqual(parse_duration('87 Min.'), 5220) 
 541         self
.assertEqual(parse_duration('PT1H0.040S'), 3600.04) 
 542         self
.assertEqual(parse_duration('PT00H03M30SZ'), 210) 
 543         self
.assertEqual(parse_duration('P0Y0M0DT0H4M20.880S'), 260.88) 
 545     def test_fix_xml_ampersands(self
): 
 547             fix_xml_ampersands('"&x=y&z=a'), '"&x=y&z=a') 
 549             fix_xml_ampersands('"&x=y&wrong;&z=a'), 
 550             '"&x=y&wrong;&z=a') 
 552             fix_xml_ampersands('&'><"'), 
 553             '&'><"') 
 555             fix_xml_ampersands('Ӓ᪼'), 'Ӓ᪼') 
 556         self
.assertEqual(fix_xml_ampersands('&#&#'), '&#&#') 
 558     def test_paged_list(self
): 
 559         def testPL(size
, pagesize
, sliceargs
, expected
): 
 560             def get_page(pagenum
): 
 561                 firstid 
= pagenum 
* pagesize
 
 562                 upto 
= min(size
, pagenum 
* pagesize 
+ pagesize
) 
 563                 for i 
in range(firstid
, upto
): 
 566             pl 
= OnDemandPagedList(get_page
, pagesize
) 
 567             got 
= pl
.getslice(*sliceargs
) 
 568             self
.assertEqual(got
, expected
) 
 570             iapl 
= InAdvancePagedList(get_page
, size 
// pagesize 
+ 1, pagesize
) 
 571             got 
= iapl
.getslice(*sliceargs
) 
 572             self
.assertEqual(got
, expected
) 
 574         testPL(5, 2, (), [0, 1, 2, 3, 4]) 
 575         testPL(5, 2, (1,), [1, 2, 3, 4]) 
 576         testPL(5, 2, (2,), [2, 3, 4]) 
 577         testPL(5, 2, (4,), [4]) 
 578         testPL(5, 2, (0, 3), [0, 1, 2]) 
 579         testPL(5, 2, (1, 4), [1, 2, 3]) 
 580         testPL(5, 2, (2, 99), [2, 3, 4]) 
 581         testPL(5, 2, (20, 99), []) 
 583     def test_read_batch_urls(self
): 
 584         f 
= io
.StringIO('''\xef\xbb\xbf foo 
 587             # More after this line\r 
 590         self
.assertEqual(read_batch_urls(f
), ['foo', 'bar', 'baz', 'bam']) 
 592     def test_urlencode_postdata(self
): 
 593         data 
= urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'}) 
 594         self
.assertTrue(isinstance(data
, bytes)) 
 596     def test_update_url_query(self
): 
 598             return compat_parse_qs(compat_urlparse
.urlparse(url
).query
) 
 599         self
.assertEqual(query_dict(update_url_query( 
 600             'http://example.com/path', {'quality': ['HD'], 'format': ['mp4']})), 
 601             query_dict('http://example.com/path?quality=HD&format=mp4')) 
 602         self
.assertEqual(query_dict(update_url_query( 
 603             'http://example.com/path', {'system': ['LINUX', 'WINDOWS']})), 
 604             query_dict('http://example.com/path?system=LINUX&system=WINDOWS')) 
 605         self
.assertEqual(query_dict(update_url_query( 
 606             'http://example.com/path', {'fields': 'id,formats,subtitles'})), 
 607             query_dict('http://example.com/path?fields=id,formats,subtitles')) 
 608         self
.assertEqual(query_dict(update_url_query( 
 609             'http://example.com/path', {'fields': ('id,formats,subtitles', 'thumbnails')})), 
 610             query_dict('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails')) 
 611         self
.assertEqual(query_dict(update_url_query( 
 612             'http://example.com/path?manifest=f4m', {'manifest': []})), 
 613             query_dict('http://example.com/path')) 
 614         self
.assertEqual(query_dict(update_url_query( 
 615             'http://example.com/path?system=LINUX&system=WINDOWS', {'system': 'LINUX'})), 
 616             query_dict('http://example.com/path?system=LINUX')) 
 617         self
.assertEqual(query_dict(update_url_query( 
 618             'http://example.com/path', {'fields': b
'id,formats,subtitles'})), 
 619             query_dict('http://example.com/path?fields=id,formats,subtitles')) 
 620         self
.assertEqual(query_dict(update_url_query( 
 621             'http://example.com/path', {'width': 1080, 'height': 720})), 
 622             query_dict('http://example.com/path?width=1080&height=720')) 
 623         self
.assertEqual(query_dict(update_url_query( 
 624             'http://example.com/path', {'bitrate': 5020.43})), 
 625             query_dict('http://example.com/path?bitrate=5020.43')) 
 626         self
.assertEqual(query_dict(update_url_query( 
 627             'http://example.com/path', {'test': '第二行тест'})), 
 628             query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82')) 
 630     def test_multipart_encode(self
): 
 632             multipart_encode({b
'field': b
'value'}, boundary
='AAAAAA')[0], 
 633             b
'--AAAAAA\r\nContent-Disposition: form-data; name="field"\r\n\r\nvalue\r\n--AAAAAA--\r\n') 
 635             multipart_encode({'欄位'.encode('utf-8'): '值'.encode('utf-8')}, boundary
='AAAAAA')[0], 
 636             b
'--AAAAAA\r\nContent-Disposition: form-data; name="\xe6\xac\x84\xe4\xbd\x8d"\r\n\r\n\xe5\x80\xbc\r\n--AAAAAA--\r\n') 
 638             ValueError, multipart_encode
, {b
'field': b
'value'}, boundary
='value') 
 640     def test_dict_get(self
): 
 648         d 
= FALSE_VALUES
.copy() 
 650         self
.assertEqual(dict_get(d
, 'a'), 42) 
 651         self
.assertEqual(dict_get(d
, 'b'), None) 
 652         self
.assertEqual(dict_get(d
, 'b', 42), 42) 
 653         self
.assertEqual(dict_get(d
, ('a', )), 42) 
 654         self
.assertEqual(dict_get(d
, ('b', 'a', )), 42) 
 655         self
.assertEqual(dict_get(d
, ('b', 'c', 'a', 'd', )), 42) 
 656         self
.assertEqual(dict_get(d
, ('b', 'c', )), None) 
 657         self
.assertEqual(dict_get(d
, ('b', 'c', ), 42), 42) 
 658         for key
, false_value 
in FALSE_VALUES
.items(): 
 659             self
.assertEqual(dict_get(d
, ('b', 'c', key
, )), None) 
 660             self
.assertEqual(dict_get(d
, ('b', 'c', key
, ), skip_false_values
=False), false_value
) 
 662     def test_encode_compat_str(self
): 
 663         self
.assertEqual(encode_compat_str(b
'\xd1\x82\xd0\xb5\xd1\x81\xd1\x82', 'utf-8'), 'тест') 
 664         self
.assertEqual(encode_compat_str('тест', 'utf-8'), 'тест') 
 666     def test_parse_iso8601(self
): 
 667         self
.assertEqual(parse_iso8601('2014-03-23T23:04:26+0100'), 1395612266) 
 668         self
.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266) 
 669         self
.assertEqual(parse_iso8601('2014-03-23T22:04:26Z'), 1395612266) 
 670         self
.assertEqual(parse_iso8601('2014-03-23T22:04:26.1234Z'), 1395612266) 
 671         self
.assertEqual(parse_iso8601('2015-09-29T08:27:31.727'), 1443515251) 
 672         self
.assertEqual(parse_iso8601('2015-09-29T08-27-31.727'), None) 
 674     def test_strip_jsonp(self
): 
 675         stripped 
= strip_jsonp('cb ([ {"id":"532cb",\n\n\n"x":\n3}\n]\n);') 
 676         d 
= json
.loads(stripped
) 
 677         self
.assertEqual(d
, [{"id": "532cb", "x": 3}]) 
 679         stripped 
= strip_jsonp('parseMetadata({"STATUS":"OK"})\n\n\n//epc') 
 680         d 
= json
.loads(stripped
) 
 681         self
.assertEqual(d
, {'STATUS': 'OK'}) 
 683         stripped 
= strip_jsonp('ps.embedHandler({"status": "success"});') 
 684         d 
= json
.loads(stripped
) 
 685         self
.assertEqual(d
, {'status': 'success'}) 
 687         stripped 
= strip_jsonp('window.cb && window.cb({"status": "success"});') 
 688         d 
= json
.loads(stripped
) 
 689         self
.assertEqual(d
, {'status': 'success'}) 
 691         stripped 
= strip_jsonp('window.cb && cb({"status": "success"});') 
 692         d 
= json
.loads(stripped
) 
 693         self
.assertEqual(d
, {'status': 'success'}) 
 695     def test_uppercase_escape(self
): 
 696         self
.assertEqual(uppercase_escape('aä'), 'aä') 
 697         self
.assertEqual(uppercase_escape('\\U0001d550'), '𝕐') 
 699     def test_lowercase_escape(self
): 
 700         self
.assertEqual(lowercase_escape('aä'), 'aä') 
 701         self
.assertEqual(lowercase_escape('\\u0026'), '&') 
 703     def test_limit_length(self
): 
 704         self
.assertEqual(limit_length(None, 12), None) 
 705         self
.assertEqual(limit_length('foo', 12), 'foo') 
 707             limit_length('foo bar baz asd', 12).startswith('foo bar')) 
 708         self
.assertTrue('...' in limit_length('foo bar baz asd', 12)) 
 710     def test_mimetype2ext(self
): 
 711         self
.assertEqual(mimetype2ext(None), None) 
 712         self
.assertEqual(mimetype2ext('video/x-flv'), 'flv') 
 713         self
.assertEqual(mimetype2ext('application/x-mpegURL'), 'm3u8') 
 714         self
.assertEqual(mimetype2ext('text/vtt'), 'vtt') 
 715         self
.assertEqual(mimetype2ext('text/vtt;charset=utf-8'), 'vtt') 
 716         self
.assertEqual(mimetype2ext('text/html; charset=utf-8'), 'html') 
 718     def test_month_by_name(self
): 
 719         self
.assertEqual(month_by_name(None), None) 
 720         self
.assertEqual(month_by_name('December', 'en'), 12) 
 721         self
.assertEqual(month_by_name('décembre', 'fr'), 12) 
 722         self
.assertEqual(month_by_name('December'), 12) 
 723         self
.assertEqual(month_by_name('décembre'), None) 
 724         self
.assertEqual(month_by_name('Unknown', 'unknown'), None) 
 726     def test_parse_codecs(self
): 
 727         self
.assertEqual(parse_codecs(''), {}) 
 728         self
.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), { 
 729             'vcodec': 'avc1.77.30', 
 730             'acodec': 'mp4a.40.2', 
 732         self
.assertEqual(parse_codecs('mp4a.40.2'), { 
 734             'acodec': 'mp4a.40.2', 
 736         self
.assertEqual(parse_codecs('mp4a.40.5,avc1.42001e'), { 
 737             'vcodec': 'avc1.42001e', 
 738             'acodec': 'mp4a.40.5', 
 740         self
.assertEqual(parse_codecs('avc3.640028'), { 
 741             'vcodec': 'avc3.640028', 
 744         self
.assertEqual(parse_codecs(', h264,,newcodec,aac'), { 
 749     def test_escape_rfc3986(self
): 
 750         reserved 
= "!*'();:@&=+$,/?#[]" 
 751         unreserved 
= 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~' 
 752         self
.assertEqual(escape_rfc3986(reserved
), reserved
) 
 753         self
.assertEqual(escape_rfc3986(unreserved
), unreserved
) 
 754         self
.assertEqual(escape_rfc3986('тест'), '%D1%82%D0%B5%D1%81%D1%82') 
 755         self
.assertEqual(escape_rfc3986('%D1%82%D0%B5%D1%81%D1%82'), '%D1%82%D0%B5%D1%81%D1%82') 
 756         self
.assertEqual(escape_rfc3986('foo bar'), 'foo%20bar') 
 757         self
.assertEqual(escape_rfc3986('foo%20bar'), 'foo%20bar') 
 759     def test_escape_url(self
): 
 761             escape_url('http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavré_FD.mp4'), 
 762             'http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavre%CC%81_FD.mp4' 
 765             escape_url('http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erklärt/Das-Erste/Video?documentId=22673108&bcastId=5290'), 
 766             'http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erkl%C3%A4rt/Das-Erste/Video?documentId=22673108&bcastId=5290' 
 769             escape_url('http://тест.рф/фрагмент'), 
 770             'http://xn--e1aybc.xn--p1ai/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82' 
 773             escape_url('http://тест.рф/абв?абв=абв#абв'), 
 774             'http://xn--e1aybc.xn--p1ai/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2' 
 776         self
.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0') 
 778     def test_js_to_json_realworld(self
): 
 780             'clip':{'provider':'pseudo'} 
 782         self
.assertEqual(js_to_json(inp
), '''{ 
 783             "clip":{"provider":"pseudo"} 
 785         json
.loads(js_to_json(inp
)) 
 788             'playlist':[{'controls':{'all':null}}] 
 790         self
.assertEqual(js_to_json(inp
), '''{ 
 791             "playlist":[{"controls":{"all":null}}] 
 794         inp 
= '''"The CW\\'s \\'Crazy Ex-Girlfriend\\'"''' 
 795         self
.assertEqual(js_to_json(inp
), '''"The CW's 'Crazy Ex-Girlfriend'"''') 
 797         inp 
= '"SAND Number: SAND 2013-7800P\\nPresenter: Tom Russo\\nHabanero Software Training - Xyce Software\\nXyce, Sandia\\u0027s"' 
 798         json_code 
= js_to_json(inp
) 
 799         self
.assertEqual(json
.loads(json_code
), json
.loads(inp
)) 
 802             0:{src:'skipped', type: 'application/dash+xml'}, 
 803             1:{src:'skipped', type: 'application/vnd.apple.mpegURL'}, 
 805         self
.assertEqual(js_to_json(inp
), '''{ 
 806             "0":{"src":"skipped", "type": "application/dash+xml"}, 
 807             "1":{"src":"skipped", "type": "application/vnd.apple.mpegURL"} 
 810         inp 
= '''{"foo":101}''' 
 811         self
.assertEqual(js_to_json(inp
), '''{"foo":101}''') 
 813         inp 
= '''{"duration": "00:01:07"}''' 
 814         self
.assertEqual(js_to_json(inp
), '''{"duration": "00:01:07"}''') 
 816     def test_js_to_json_edgecases(self
): 
 817         on 
= js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") 
 818         self
.assertEqual(json
.loads(on
), {"abc_def": "1'\\2\\'3\"4"}) 
 820         on 
= js_to_json('{"abc": true}') 
 821         self
.assertEqual(json
.loads(on
), {'abc': True}) 
 823         # Ignore JavaScript code as well 
 830         self
.assertEqual(d
['x'], 1) 
 831         self
.assertEqual(d
['y'], 'a') 
 833         on 
= js_to_json('["abc", "def",]') 
 834         self
.assertEqual(json
.loads(on
), ['abc', 'def']) 
 836         on 
= js_to_json('[/*comment\n*/"abc"/*comment\n*/,/*comment\n*/"def",/*comment\n*/]') 
 837         self
.assertEqual(json
.loads(on
), ['abc', 'def']) 
 839         on 
= js_to_json('[//comment\n"abc" //comment\n,//comment\n"def",//comment\n]') 
 840         self
.assertEqual(json
.loads(on
), ['abc', 'def']) 
 842         on 
= js_to_json('{"abc": "def",}') 
 843         self
.assertEqual(json
.loads(on
), {'abc': 'def'}) 
 845         on 
= js_to_json('{/*comment\n*/"abc"/*comment\n*/:/*comment\n*/"def"/*comment\n*/,/*comment\n*/}') 
 846         self
.assertEqual(json
.loads(on
), {'abc': 'def'}) 
 848         on 
= js_to_json('{ 0: /* " \n */ ",]" , }') 
 849         self
.assertEqual(json
.loads(on
), {'0': ',]'}) 
 851         on 
= js_to_json('{ /*comment\n*/0/*comment\n*/: /* " \n */ ",]" , }') 
 852         self
.assertEqual(json
.loads(on
), {'0': ',]'}) 
 854         on 
= js_to_json('{ 0: // comment\n1 }') 
 855         self
.assertEqual(json
.loads(on
), {'0': 1}) 
 857         on 
= js_to_json(r
'["<p>x<\/p>"]') 
 858         self
.assertEqual(json
.loads(on
), ['<p>x</p>']) 
 860         on 
= js_to_json(r
'["\xaa"]') 
 861         self
.assertEqual(json
.loads(on
), ['\u00aa']) 
 863         on 
= js_to_json("['a\\\nb']") 
 864         self
.assertEqual(json
.loads(on
), ['ab']) 
 866         on 
= js_to_json("/*comment\n*/[/*comment\n*/'a\\\nb'/*comment\n*/]/*comment\n*/") 
 867         self
.assertEqual(json
.loads(on
), ['ab']) 
 869         on 
= js_to_json('{0xff:0xff}') 
 870         self
.assertEqual(json
.loads(on
), {'255': 255}) 
 872         on 
= js_to_json('{/*comment\n*/0xff/*comment\n*/:/*comment\n*/0xff/*comment\n*/}') 
 873         self
.assertEqual(json
.loads(on
), {'255': 255}) 
 875         on 
= js_to_json('{077:077}') 
 876         self
.assertEqual(json
.loads(on
), {'63': 63}) 
 878         on 
= js_to_json('{/*comment\n*/077/*comment\n*/:/*comment\n*/077/*comment\n*/}') 
 879         self
.assertEqual(json
.loads(on
), {'63': 63}) 
 881         on 
= js_to_json('{42:42}') 
 882         self
.assertEqual(json
.loads(on
), {'42': 42}) 
 884         on 
= js_to_json('{/*comment\n*/42/*comment\n*/:/*comment\n*/42/*comment\n*/}') 
 885         self
.assertEqual(json
.loads(on
), {'42': 42}) 
 887     def test_extract_attributes(self
): 
 888         self
.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'}) 
 889         self
.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'}) 
 890         self
.assertEqual(extract_attributes('<e x=y>'), {'x': 'y'}) 
 891         self
.assertEqual(extract_attributes('<e x="a \'b\' c">'), {'x': "a 'b' c"}) 
 892         self
.assertEqual(extract_attributes('<e x=\'a "b" c\'>'), {'x': 'a "b" c'}) 
 893         self
.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'}) 
 894         self
.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'}) 
 895         self
.assertEqual(extract_attributes('<e x="&">'), {'x': '&'})  # XML 
 896         self
.assertEqual(extract_attributes('<e x=""">'), {'x': '"'}) 
 897         self
.assertEqual(extract_attributes('<e x="£">'), {'x': '£'})  # HTML 3.2 
 898         self
.assertEqual(extract_attributes('<e x="λ">'), {'x': 'λ'})  # HTML 4.0 
 899         self
.assertEqual(extract_attributes('<e x="&foo">'), {'x': '&foo'}) 
 900         self
.assertEqual(extract_attributes('<e x="\'">'), {'x': "'"}) 
 901         self
.assertEqual(extract_attributes('<e x=\'"\'>'), {'x': '"'}) 
 902         self
.assertEqual(extract_attributes('<e x >'), {'x': None}) 
 903         self
.assertEqual(extract_attributes('<e x=y a>'), {'x': 'y', 'a': None}) 
 904         self
.assertEqual(extract_attributes('<e x= y>'), {'x': 'y'}) 
 905         self
.assertEqual(extract_attributes('<e x=1 y=2 x=3>'), {'y': '2', 'x': '3'}) 
 906         self
.assertEqual(extract_attributes('<e \nx=\ny\n>'), {'x': 'y'}) 
 907         self
.assertEqual(extract_attributes('<e \nx=\n"y"\n>'), {'x': 'y'}) 
 908         self
.assertEqual(extract_attributes("<e \nx=\n'y'\n>"), {'x': 'y'}) 
 909         self
.assertEqual(extract_attributes('<e \nx="\ny\n">'), {'x': '\ny\n'}) 
 910         self
.assertEqual(extract_attributes('<e CAPS=x>'), {'caps': 'x'})  # Names lowercased 
 911         self
.assertEqual(extract_attributes('<e x=1 X=2>'), {'x': '2'}) 
 912         self
.assertEqual(extract_attributes('<e X=1 x=2>'), {'x': '2'}) 
 913         self
.assertEqual(extract_attributes('<e _:funny-name1=1>'), {'_:funny-name1': '1'}) 
 914         self
.assertEqual(extract_attributes('<e x="Fáilte 世界 \U0001f600">'), {'x': 'Fáilte 世界 \U0001f600'}) 
 915         self
.assertEqual(extract_attributes('<e x="décomposé">'), {'x': 'décompose\u0301'}) 
 916         # "Narrow" Python builds don't support unicode code points outside BMP. 
 919             supports_outside_bmp 
= True 
 921             supports_outside_bmp 
= False 
 922         if supports_outside_bmp
: 
 923             self
.assertEqual(extract_attributes('<e x="Smile 😀!">'), {'x': 'Smile \U0001f600!'}) 
 924         # Malformed HTML should not break attributes extraction on older Python 
 925         self
.assertEqual(extract_attributes('<mal"formed/>'), {}) 
 927     def test_clean_html(self
): 
 928         self
.assertEqual(clean_html('a:\nb'), 'a: b') 
 929         self
.assertEqual(clean_html('a:\n   "b"'), 'a:    "b"') 
 930         self
.assertEqual(clean_html('a<br>\xa0b'), 'a\nb') 
 932     def test_intlist_to_bytes(self
): 
 934             intlist_to_bytes([0, 1, 127, 128, 255]), 
 935             b
'\x00\x01\x7f\x80\xff') 
 937     def test_args_to_str(self
): 
 939             args_to_str(['foo', 'ba/r', '-baz', '2 be', '']), 
 940             'foo ba/r -baz \'2 be\' \'\'' if compat_os_name 
!= 'nt' else 'foo ba/r -baz "2 be" ""' 
 943     def test_parse_filesize(self
): 
 944         self
.assertEqual(parse_filesize(None), None) 
 945         self
.assertEqual(parse_filesize(''), None) 
 946         self
.assertEqual(parse_filesize('91 B'), 91) 
 947         self
.assertEqual(parse_filesize('foobar'), None) 
 948         self
.assertEqual(parse_filesize('2 MiB'), 2097152) 
 949         self
.assertEqual(parse_filesize('5 GB'), 5000000000) 
 950         self
.assertEqual(parse_filesize('1.2Tb'), 1200000000000) 
 951         self
.assertEqual(parse_filesize('1.2tb'), 1200000000000) 
 952         self
.assertEqual(parse_filesize('1,24 KB'), 1240) 
 953         self
.assertEqual(parse_filesize('1,24 kb'), 1240) 
 954         self
.assertEqual(parse_filesize('8.5 megabytes'), 8500000) 
 956     def test_parse_count(self
): 
 957         self
.assertEqual(parse_count(None), None) 
 958         self
.assertEqual(parse_count(''), None) 
 959         self
.assertEqual(parse_count('0'), 0) 
 960         self
.assertEqual(parse_count('1000'), 1000) 
 961         self
.assertEqual(parse_count('1.000'), 1000) 
 962         self
.assertEqual(parse_count('1.1k'), 1100) 
 963         self
.assertEqual(parse_count('1.1kk'), 1100000) 
 964         self
.assertEqual(parse_count('1.1kk '), 1100000) 
 965         self
.assertEqual(parse_count('1.1kk views'), 1100000) 
 967     def test_version_tuple(self
): 
 968         self
.assertEqual(version_tuple('1'), (1,)) 
 969         self
.assertEqual(version_tuple('10.23.344'), (10, 23, 344)) 
 970         self
.assertEqual(version_tuple('10.1-6'), (10, 1, 6))  # avconv style 
 972     def test_detect_exe_version(self
): 
 973         self
.assertEqual(detect_exe_version('''ffmpeg version 1.2.1 
 974 built on May 27 2013 08:37:26 with gcc 4.7 (Debian 4.7.3-4) 
 975 configuration: --prefix=/usr --extra-'''), '1.2.1') 
 976         self
.assertEqual(detect_exe_version('''ffmpeg version N-63176-g1fb4685 
 977 built on May 15 2014 22:09:06 with gcc 4.8.2 (GCC)'''), 'N-63176-g1fb4685') 
 978         self
.assertEqual(detect_exe_version('''X server found. dri2 connection failed! 
 979 Trying to open render node... 
 980 Success at /dev/dri/renderD128. 
 981 ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') 
 983     def test_age_restricted(self
): 
 984         self
.assertFalse(age_restricted(None, 10))  # unrestricted content 
 985         self
.assertFalse(age_restricted(1, None))  # unrestricted policy 
 986         self
.assertFalse(age_restricted(8, 10)) 
 987         self
.assertTrue(age_restricted(18, 14)) 
 988         self
.assertFalse(age_restricted(18, 18)) 
 990     def test_is_html(self
): 
 991         self
.assertFalse(is_html(b
'\x49\x44\x43<html')) 
 992         self
.assertTrue(is_html(b
'<!DOCTYPE foo>\xaaa')) 
 993         self
.assertTrue(is_html(  # UTF-8 with BOM 
 994             b
'\xef\xbb\xbf<!DOCTYPE foo>\xaaa')) 
 995         self
.assertTrue(is_html(  # UTF-16-LE 
 996             b
'\xff\xfe<\x00h\x00t\x00m\x00l\x00>\x00\xe4\x00' 
 998         self
.assertTrue(is_html(  # UTF-16-BE 
 999             b
'\xfe\xff\x00<\x00h\x00t\x00m\x00l\x00>\x00\xe4' 
1001         self
.assertTrue(is_html(  # UTF-32-BE 
1002             b
'\x00\x00\xFE\xFF\x00\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4')) 
1003         self
.assertTrue(is_html(  # UTF-32-LE 
1004             b
'\xFF\xFE\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4\x00\x00\x00')) 
1006     def test_render_table(self
): 
1010                 [[123, 4], [9999, 51]]), 
1015     def test_match_str(self
): 
1016         self
.assertRaises(ValueError, match_str
, 'xy>foobar', {}) 
1017         self
.assertFalse(match_str('xy', {'x': 1200})) 
1018         self
.assertTrue(match_str('!xy', {'x': 1200})) 
1019         self
.assertTrue(match_str('x', {'x': 1200})) 
1020         self
.assertFalse(match_str('!x', {'x': 1200})) 
1021         self
.assertTrue(match_str('x', {'x': 0})) 
1022         self
.assertFalse(match_str('x>0', {'x': 0})) 
1023         self
.assertFalse(match_str('x>0', {})) 
1024         self
.assertTrue(match_str('x>?0', {})) 
1025         self
.assertTrue(match_str('x>1K', {'x': 1200})) 
1026         self
.assertFalse(match_str('x>2K', {'x': 1200})) 
1027         self
.assertTrue(match_str('x>=1200 & x < 1300', {'x': 1200})) 
1028         self
.assertFalse(match_str('x>=1100 & x < 1200', {'x': 1200})) 
1029         self
.assertFalse(match_str('y=a212', {'y': 'foobar42'})) 
1030         self
.assertTrue(match_str('y=foobar42', {'y': 'foobar42'})) 
1031         self
.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'})) 
1032         self
.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'})) 
1033         self
.assertFalse(match_str( 
1034             'like_count > 100 & dislike_count <? 50 & description', 
1035             {'like_count': 90, 'description': 'foo'})) 
1036         self
.assertTrue(match_str( 
1037             'like_count > 100 & dislike_count <? 50 & description', 
1038             {'like_count': 190, 'description': 'foo'})) 
1039         self
.assertFalse(match_str( 
1040             'like_count > 100 & dislike_count <? 50 & description', 
1041             {'like_count': 190, 'dislike_count': 60, 'description': 'foo'})) 
1042         self
.assertFalse(match_str( 
1043             'like_count > 100 & dislike_count <? 50 & description', 
1044             {'like_count': 190, 'dislike_count': 10})) 
1046     def test_parse_dfxp_time_expr(self
): 
1047         self
.assertEqual(parse_dfxp_time_expr(None), None) 
1048         self
.assertEqual(parse_dfxp_time_expr(''), None) 
1049         self
.assertEqual(parse_dfxp_time_expr('0.1'), 0.1) 
1050         self
.assertEqual(parse_dfxp_time_expr('0.1s'), 0.1) 
1051         self
.assertEqual(parse_dfxp_time_expr('00:00:01'), 1.0) 
1052         self
.assertEqual(parse_dfxp_time_expr('00:00:01.100'), 1.1) 
1053         self
.assertEqual(parse_dfxp_time_expr('00:00:01:100'), 1.1) 
1055     def test_dfxp2srt(self
): 
1056         dfxp_data 
= '''<?xml version="1.0" encoding="UTF-8"?> 
1057             <tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter"> 
1060                     <p begin="0" end="1">The following line contains Chinese characters and special symbols</p> 
1061                     <p begin="1" end="2">第二行<br/>♪♪</p> 
1062                     <p begin="2" dur="1"><span>Third<br/>Line</span></p> 
1063                     <p begin="3" end="-1">Lines with invalid timestamps are ignored</p> 
1064                     <p begin="-1" end="-1">Ignore, two</p> 
1065                     <p begin="3" dur="-1">Ignored, three</p> 
1068             </tt>'''.encode('utf-8') 
1070 00:00:00,000 --> 00:00:01,000 
1071 The following line contains Chinese characters and special symbols 
1074 00:00:01,000 --> 00:00:02,000 
1079 00:00:02,000 --> 00:00:03,000 
1084         self
.assertEqual(dfxp2srt(dfxp_data
), srt_data
) 
1086         dfxp_data_no_default_namespace 
= '''<?xml version="1.0" encoding="UTF-8"?> 
1087             <tt xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter"> 
1090                     <p begin="0" end="1">The first line</p> 
1093             </tt>'''.encode('utf-8') 
1095 00:00:00,000 --> 00:00:01,000 
1099         self
.assertEqual(dfxp2srt(dfxp_data_no_default_namespace
), srt_data
) 
1101         dfxp_data_with_style 
= '''<?xml version="1.0" encoding="utf-8"?> 
1102 <tt xmlns="http://www.w3.org/2006/10/ttaf1" xmlns:ttp="http://www.w3.org/2006/10/ttaf1#parameter" ttp:timeBase="media" xmlns:tts="http://www.w3.org/2006/10/ttaf1#style" xml:lang="en" xmlns:ttm="http://www.w3.org/2006/10/ttaf1#metadata"> 
1105       <style id="s2" style="s0" tts:color="cyan" tts:fontWeight="bold" /> 
1106       <style id="s1" style="s0" tts:color="yellow" tts:fontStyle="italic" /> 
1107       <style id="s3" style="s0" tts:color="lime" tts:textDecoration="underline" /> 
1108       <style id="s0" tts:backgroundColor="black" tts:fontStyle="normal" tts:fontSize="16" tts:fontFamily="sansSerif" tts:color="white" /> 
1111   <body tts:textAlign="center" style="s0"> 
1113       <p begin="00:00:02.08" id="p0" end="00:00:05.84">default style<span tts:color="red">custom style</span></p> 
1114       <p style="s2" begin="00:00:02.08" id="p0" end="00:00:05.84"><span tts:color="lime">part 1<br /></span><span tts:color="cyan">part 2</span></p> 
1115       <p style="s3" begin="00:00:05.84" id="p1" end="00:00:09.56">line 3<br />part 3</p> 
1116       <p style="s1" tts:textDecoration="underline" begin="00:00:09.56" id="p2" end="00:00:12.36"><span style="s2" tts:color="lime">inner<br /> </span>style</p> 
1119 </tt>'''.encode('utf-8') 
1121 00:00:02,080 --> 00:00:05,839 
1122 <font color="white" face="sansSerif" size="16">default style<font color="red">custom style</font></font> 
1125 00:00:02,080 --> 00:00:05,839 
1126 <b><font color="cyan" face="sansSerif" size="16"><font color="lime">part 1 
1127 </font>part 2</font></b> 
1130 00:00:05,839 --> 00:00:09,560 
1131 <u><font color="lime">line 3 
1135 00:00:09,560 --> 00:00:12,359 
1136 <i><u><font color="yellow"><font color="lime">inner 
1137  </font>style</font></u></i> 
1140         self
.assertEqual(dfxp2srt(dfxp_data_with_style
), srt_data
) 
1142         dfxp_data_non_utf8 
= '''<?xml version="1.0" encoding="UTF-16"?> 
1143             <tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter"> 
1146                     <p begin="0" end="1">Line 1</p> 
1147                     <p begin="1" end="2">第二行</p> 
1150             </tt>'''.encode('utf-16') 
1152 00:00:00,000 --> 00:00:01,000 
1156 00:00:01,000 --> 00:00:02,000 
1160         self
.assertEqual(dfxp2srt(dfxp_data_non_utf8
), srt_data
) 
1162     def test_cli_option(self
): 
1163         self
.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128']) 
1164         self
.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), []) 
1165         self
.assertEqual(cli_option({}, '--proxy', 'proxy'), []) 
1166         self
.assertEqual(cli_option({'retries': 10}, '--retries', 'retries'), ['--retries', '10']) 
1168     def test_cli_valueless_option(self
): 
1169         self
.assertEqual(cli_valueless_option( 
1170             {'downloader': 'external'}, '--external-downloader', 'downloader', 'external'), ['--external-downloader']) 
1171         self
.assertEqual(cli_valueless_option( 
1172             {'downloader': 'internal'}, '--external-downloader', 'downloader', 'external'), []) 
1173         self
.assertEqual(cli_valueless_option( 
1174             {'nocheckcertificate': True}, '--no-check-certificate', 'nocheckcertificate'), ['--no-check-certificate']) 
1175         self
.assertEqual(cli_valueless_option( 
1176             {'nocheckcertificate': False}, '--no-check-certificate', 'nocheckcertificate'), []) 
1177         self
.assertEqual(cli_valueless_option( 
1178             {'checkcertificate': True}, '--no-check-certificate', 'checkcertificate', False), []) 
1179         self
.assertEqual(cli_valueless_option( 
1180             {'checkcertificate': False}, '--no-check-certificate', 'checkcertificate', False), ['--no-check-certificate']) 
1182     def test_cli_bool_option(self
): 
1185                 {'nocheckcertificate': True}, '--no-check-certificate', 'nocheckcertificate'), 
1186             ['--no-check-certificate', 'true']) 
1189                 {'nocheckcertificate': True}, '--no-check-certificate', 'nocheckcertificate', separator
='='), 
1190             ['--no-check-certificate=true']) 
1193                 {'nocheckcertificate': True}, '--check-certificate', 'nocheckcertificate', 'false', 'true'), 
1194             ['--check-certificate', 'false']) 
1197                 {'nocheckcertificate': True}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='), 
1198             ['--check-certificate=false']) 
1201                 {'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true'), 
1202             ['--check-certificate', 'true']) 
1205                 {'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='), 
1206             ['--check-certificate=true']) 
1209                 {}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='), 
1212     def test_ohdave_rsa_encrypt(self
): 
1213         N 
= 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd 
1217             ohdave_rsa_encrypt(b
'aa111222', e
, N
), 
1218             '726664bd9a23fd0c70f9f1b84aab5e3905ce1e45a584e9cbcf9bcc7510338fc1986d6c599ff990d923aa43c51c0d9013cd572e13bc58f4ae48f2ed8c0b0ba881') 
1220     def test_pkcs1pad(self
): 
1222         padded_data 
= pkcs1pad(data
, 32) 
1223         self
.assertEqual(padded_data
[:2], [0, 2]) 
1224         self
.assertEqual(padded_data
[28:], [0, 1, 2, 3]) 
1226         self
.assertRaises(ValueError, pkcs1pad
, data
, 8) 
1228     def test_encode_base_n(self
): 
1229         self
.assertEqual(encode_base_n(0, 30), '0') 
1230         self
.assertEqual(encode_base_n(80, 30), '2k') 
1232         custom_table 
= '9876543210ZYXWVUTSRQPONMLKJIHGFEDCBA' 
1233         self
.assertEqual(encode_base_n(0, 30, custom_table
), '9') 
1234         self
.assertEqual(encode_base_n(80, 30, custom_table
), '7P') 
1236         self
.assertRaises(ValueError, encode_base_n
, 0, 70) 
1237         self
.assertRaises(ValueError, encode_base_n
, 0, 60, custom_table
) 
1239     def test_urshift(self
): 
1240         self
.assertEqual(urshift(3, 1), 1) 
1241         self
.assertEqual(urshift(-3, 1), 2147483646) 
1243     def test_get_element_by_class(self
): 
1245             <span class="foo bar">nice</span> 
1248         self
.assertEqual(get_element_by_class('foo', html
), 'nice') 
1249         self
.assertEqual(get_element_by_class('no-such-class', html
), None) 
1251     def test_get_element_by_attribute(self
): 
1253             <span class="foo bar">nice</span> 
1256         self
.assertEqual(get_element_by_attribute('class', 'foo bar', html
), 'nice') 
1257         self
.assertEqual(get_element_by_attribute('class', 'foo', html
), None) 
1258         self
.assertEqual(get_element_by_attribute('class', 'no-such-foo', html
), None) 
1261             <div itemprop="author" itemscope>foo</div> 
1264         self
.assertEqual(get_element_by_attribute('itemprop', 'author', html
), 'foo') 
1266     def test_get_elements_by_class(self
): 
1268             <span class="foo bar">nice</span><span class="foo bar">also nice</span> 
1271         self
.assertEqual(get_elements_by_class('foo', html
), ['nice', 'also nice']) 
1272         self
.assertEqual(get_elements_by_class('no-such-class', html
), []) 
1274     def test_get_elements_by_attribute(self
): 
1276             <span class="foo bar">nice</span><span class="foo bar">also nice</span> 
1279         self
.assertEqual(get_elements_by_attribute('class', 'foo bar', html
), ['nice', 'also nice']) 
1280         self
.assertEqual(get_elements_by_attribute('class', 'foo', html
), []) 
1281         self
.assertEqual(get_elements_by_attribute('class', 'no-such-foo', html
), []) 
1284 if __name__ 
== '__main__':