2 # -*- coding: utf-8 -*- 
   4 from __future__ 
import unicode_literals
 
  30 import xml
.etree
.ElementTree
 
  41     compat_urllib_parse_urlparse
, 
  42     compat_urllib_request
, 
  47 # This is not clearly defined otherwise 
  48 compiled_regex_type 
= type(re
.compile('')) 
  51     'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)', 
  52     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 
  53     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 
  54     'Accept-Encoding': 'gzip, deflate', 
  55     'Accept-Language': 'en-us,en;q=0.5', 
  58 def preferredencoding(): 
  59     """Get preferred encoding. 
  61     Returns the best encoding scheme for the system, based on 
  62     locale.getpreferredencoding() and some further tweaks. 
  65         pref 
= locale
.getpreferredencoding() 
  73 def write_json_file(obj
, fn
): 
  74     """ Encode obj as JSON and write it to fn, atomically if possible """ 
  76     fn 
= encodeFilename(fn
) 
  77     if sys
.version_info 
< (3, 0) and sys
.platform 
!= 'win32': 
  78         encoding 
= get_filesystem_encoding() 
  79         # os.path.basename returns a bytes object, but NamedTemporaryFile 
  80         # will fail if the filename contains non ascii characters unless we 
  81         # use a unicode object 
  82         path_basename 
= lambda f
: os
.path
.basename(fn
).decode(encoding
) 
  83         # the same for os.path.dirname 
  84         path_dirname 
= lambda f
: os
.path
.dirname(fn
).decode(encoding
) 
  86         path_basename 
= os
.path
.basename
 
  87         path_dirname 
= os
.path
.dirname
 
  91         'prefix': path_basename(fn
) + '.', 
  92         'dir': path_dirname(fn
), 
  96     # In Python 2.x, json.dump expects a bytestream. 
  97     # In Python 3.x, it writes to a character stream 
  98     if sys
.version_info 
< (3, 0): 
 106     tf 
= tempfile
.NamedTemporaryFile(**args
) 
 111         if sys
.platform 
== 'win32': 
 112             # Need to remove existing file on Windows, else os.rename raises 
 113             # WindowsError or FileExistsError. 
 118         os
.rename(tf
.name
, fn
) 
 127 if sys
.version_info 
>= (2, 7): 
 128     def find_xpath_attr(node
, xpath
, key
, val
): 
 129         """ Find the xpath xpath[@key=val] """ 
 130         assert re
.match(r
'^[a-zA-Z-]+$', key
) 
 131         assert re
.match(r
'^[a-zA-Z0-9@\s:._-]*$', val
) 
 132         expr 
= xpath 
+ u
"[@%s='%s']" % (key
, val
) 
 133         return node
.find(expr
) 
 135     def find_xpath_attr(node
, xpath
, key
, val
): 
 136         # Here comes the crazy part: In 2.6, if the xpath is a unicode, 
 137         # .//node does not match if a node is a direct child of . ! 
 138         if isinstance(xpath
, unicode): 
 139             xpath 
= xpath
.encode('ascii') 
 141         for f 
in node
.findall(xpath
): 
 142             if f
.attrib
.get(key
) == val
: 
 146 # On python2.6 the xml.etree.ElementTree.Element methods don't support 
 147 # the namespace parameter 
 148 def xpath_with_ns(path
, ns_map
): 
 149     components 
= [c
.split(':') for c 
in path
.split('/')] 
 153             replaced
.append(c
[0]) 
 156             replaced
.append('{%s}%s' % (ns_map
[ns
], tag
)) 
 157     return '/'.join(replaced
) 
 160 def xpath_text(node
, xpath
, name
=None, fatal
=False): 
 161     if sys
.version_info 
< (2, 7):  # Crazy 2.6 
 162         xpath 
= xpath
.encode('ascii') 
 167             name 
= xpath 
if name 
is None else name
 
 168             raise ExtractorError('Could not find XML element %s' % name
) 
 174 def get_element_by_id(id, html
): 
 175     """Return the content of the tag with the specified ID in the passed HTML document""" 
 176     return get_element_by_attribute("id", id, html
) 
 179 def get_element_by_attribute(attribute
, value
, html
): 
 180     """Return the content of the tag with the specified attribute in the passed HTML document""" 
 182     m 
= re
.search(r
'''(?xs) 
 184          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*? 
 186          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*? 
 190     ''' % (re
.escape(attribute
), re
.escape(value
)), html
) 
 194     res 
= m
.group('content') 
 196     if res
.startswith('"') or res
.startswith("'"): 
 199     return unescapeHTML(res
) 
 202 def clean_html(html
): 
 203     """Clean an HTML snippet into a readable string""" 
 205     html 
= html
.replace('\n', ' ') 
 206     html 
= re
.sub(r
'\s*<\s*br\s*/?\s*>\s*', '\n', html
) 
 207     html 
= re
.sub(r
'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html
) 
 209     html 
= re
.sub('<.*?>', '', html
) 
 210     # Replace html entities 
 211     html 
= unescapeHTML(html
) 
 215 def sanitize_open(filename
, open_mode
): 
 216     """Try to open the given filename, and slightly tweak it if this fails. 
 218     Attempts to open the given filename. If this fails, it tries to change 
 219     the filename slightly, step by step, until it's either able to open it 
 220     or it fails and raises a final exception, like the standard open() 
 223     It returns the tuple (stream, definitive_file_name). 
 227             if sys
.platform 
== 'win32': 
 229                 msvcrt
.setmode(sys
.stdout
.fileno(), os
.O_BINARY
) 
 230             return (sys
.stdout
.buffer if hasattr(sys
.stdout
, 'buffer') else sys
.stdout
, filename
) 
 231         stream 
= open(encodeFilename(filename
), open_mode
) 
 232         return (stream
, filename
) 
 233     except (IOError, OSError) as err
: 
 234         if err
.errno 
in (errno
.EACCES
,): 
 237         # In case of error, try to remove win32 forbidden chars 
 238         alt_filename 
= os
.path
.join( 
 239                         re
.sub('[/<>:"\\|\\\\?\\*]', '#', path_part
) 
 240                         for path_part 
in os
.path
.split(filename
) 
 242         if alt_filename 
== filename
: 
 245             # An exception here should be caught in the caller 
 246             stream 
= open(encodeFilename(filename
), open_mode
) 
 247             return (stream
, alt_filename
) 
 250 def timeconvert(timestr
): 
 251     """Convert RFC 2822 defined time string into system timestamp""" 
 253     timetuple 
= email
.utils
.parsedate_tz(timestr
) 
 254     if timetuple 
is not None: 
 255         timestamp 
= email
.utils
.mktime_tz(timetuple
) 
 258 def sanitize_filename(s
, restricted
=False, is_id
=False): 
 259     """Sanitizes a string so it could be used as part of a filename. 
 260     If restricted is set, use a stricter subset of allowed characters. 
 261     Set is_id if this is not an arbitrary string, but an ID that should be kept if possible 
 263     def replace_insane(char
): 
 264         if char 
== '?' or ord(char
) < 32 or ord(char
) == 127: 
 267             return '' if restricted 
else '\'' 
 269             return '_-' if restricted 
else ' -' 
 270         elif char 
in '\\/|*<>': 
 272         if restricted 
and (char 
in '!&\'()[]{}$;`^,#' or char
.isspace()): 
 274         if restricted 
and ord(char
) > 127: 
 278     result 
= ''.join(map(replace_insane
, s
)) 
 280         while '__' in result
: 
 281             result 
= result
.replace('__', '_') 
 282         result 
= result
.strip('_') 
 283         # Common case of "Foreign band name - English song title" 
 284         if restricted 
and result
.startswith('-_'): 
 290 def orderedSet(iterable
): 
 291     """ Remove all duplicates from the input iterable """ 
 299 def _htmlentity_transform(entity
): 
 300     """Transforms an HTML entity to a character.""" 
 301     # Known non-numeric HTML entity 
 302     if entity 
in compat_html_entities
.name2codepoint
: 
 303         return compat_chr(compat_html_entities
.name2codepoint
[entity
]) 
 305     mobj 
= re
.match(r
'#(x?[0-9]+)', entity
) 
 307         numstr 
= mobj
.group(1) 
 308         if numstr
.startswith('x'): 
 310             numstr 
= '0%s' % numstr
 
 313         return compat_chr(int(numstr
, base
)) 
 315     # Unknown entity in name, return its literal representation 
 316     return ('&%s;' % entity
) 
 322     assert type(s
) == compat_str
 
 325         r
'&([^;]+);', lambda m
: _htmlentity_transform(m
.group(1)), s
) 
 328 def encodeFilename(s
, for_subprocess
=False): 
 330     @param s The name of the file 
 333     assert type(s
) == compat_str
 
 335     # Python 3 has a Unicode API 
 336     if sys
.version_info 
>= (3, 0): 
 339     if sys
.platform 
== 'win32' and sys
.getwindowsversion()[0] >= 5: 
 340         # Pass '' directly to use Unicode APIs on Windows 2000 and up 
 341         # (Detecting Windows NT 4 is tricky because 'major >= 4' would 
 342         # match Windows 9x series as well. Besides, NT 4 is obsolete.) 
 343         if not for_subprocess
: 
 346             # For subprocess calls, encode with locale encoding 
 347             # Refer to http://stackoverflow.com/a/9951851/35070 
 348             encoding 
= preferredencoding() 
 350         encoding 
= sys
.getfilesystemencoding() 
 353     return s
.encode(encoding
, 'ignore') 
 356 def encodeArgument(s
): 
 357     if not isinstance(s
, compat_str
): 
 358         # Legacy code that uses byte strings 
 359         # Uncomment the following line after fixing all post processors 
 360         #assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s)) 
 361         s 
= s
.decode('ascii') 
 362     return encodeFilename(s
, True) 
 365 def decodeOption(optval
): 
 368     if isinstance(optval
, bytes): 
 369         optval 
= optval
.decode(preferredencoding()) 
 371     assert isinstance(optval
, compat_str
) 
 374 def formatSeconds(secs
): 
 376         return '%d:%02d:%02d' % (secs 
// 3600, (secs 
% 3600) // 60, secs 
% 60) 
 378         return '%d:%02d' % (secs 
// 60, secs 
% 60) 
 383 def make_HTTPS_handler(opts_no_check_certificate
, **kwargs
): 
 384     if sys
.version_info 
< (3, 2): 
 387         class HTTPSConnectionV3(httplib
.HTTPSConnection
): 
 388             def __init__(self
, *args
, **kwargs
): 
 389                 httplib
.HTTPSConnection
.__init
__(self
, *args
, **kwargs
) 
 392                 sock 
= socket
.create_connection((self
.host
, self
.port
), self
.timeout
) 
 393                 if getattr(self
, '_tunnel_host', False): 
 397                     self
.sock 
= ssl
.wrap_socket(sock
, self
.key_file
, self
.cert_file
, ssl_version
=ssl
.PROTOCOL_TLSv1
) 
 399                     self
.sock 
= ssl
.wrap_socket(sock
, self
.key_file
, self
.cert_file
, ssl_version
=ssl
.PROTOCOL_SSLv23
) 
 401         class HTTPSHandlerV3(compat_urllib_request
.HTTPSHandler
): 
 402             def https_open(self
, req
): 
 403                 return self
.do_open(HTTPSConnectionV3
, req
) 
 404         return HTTPSHandlerV3(**kwargs
) 
 405     elif hasattr(ssl
, 'create_default_context'):  # Python >= 3.4 
 406         context 
= ssl
.create_default_context(ssl
.Purpose
.CLIENT_AUTH
) 
 407         context
.options 
&= ~ssl
.OP_NO_SSLv3  
# Allow older, not-as-secure SSLv3 
 408         if opts_no_check_certificate
: 
 409             context
.verify_mode 
= ssl
.CERT_NONE
 
 410         return compat_urllib_request
.HTTPSHandler(context
=context
, **kwargs
) 
 412         context 
= ssl
.SSLContext(ssl
.PROTOCOL_SSLv23
) 
 413         context
.verify_mode 
= (ssl
.CERT_NONE
 
 414                                if opts_no_check_certificate
 
 415                                else ssl
.CERT_REQUIRED
) 
 416         context
.set_default_verify_paths() 
 418             context
.load_default_certs() 
 419         except AttributeError: 
 421         return compat_urllib_request
.HTTPSHandler(context
=context
, **kwargs
) 
 424 class ExtractorError(Exception): 
 425     """Error during info extraction.""" 
 426     def __init__(self
, msg
, tb
=None, expected
=False, cause
=None, video_id
=None): 
 427         """ tb, if given, is the original traceback (so that it can be printed out). 
 428         If expected is set, this is a normal error message and most likely not a bug in youtube-dl. 
 431         if sys
.exc_info()[0] in (compat_urllib_error
.URLError
, socket
.timeout
, UnavailableVideoError
): 
 433         if video_id 
is not None: 
 434             msg 
= video_id 
+ ': ' + msg
 
 436             msg 
+= ' (caused by %r)' % cause
 
 438             if ytdl_is_updateable(): 
 439                 update_cmd 
= 'type  youtube-dl -U  to update' 
 441                 update_cmd 
= 'see  https://yt-dl.org/update  on how to update' 
 442             msg 
+= '; please report this issue on https://yt-dl.org/bug .' 
 443             msg 
+= ' Make sure you are using the latest version; %s.' % update_cmd
 
 444             msg 
+= ' Be sure to call youtube-dl with the --verbose flag and include its complete output.' 
 445         super(ExtractorError
, self
).__init
__(msg
) 
 448         self
.exc_info 
= sys
.exc_info()  # preserve original exception 
 450         self
.video_id 
= video_id
 
 452     def format_traceback(self
): 
 453         if self
.traceback 
is None: 
 455         return ''.join(traceback
.format_tb(self
.traceback
)) 
 458 class RegexNotFoundError(ExtractorError
): 
 459     """Error when a regex didn't match""" 
 463 class DownloadError(Exception): 
 464     """Download Error exception. 
 466     This exception may be thrown by FileDownloader objects if they are not 
 467     configured to continue on errors. They will contain the appropriate 
 470     def __init__(self
, msg
, exc_info
=None): 
 471         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """ 
 472         super(DownloadError
, self
).__init
__(msg
) 
 473         self
.exc_info 
= exc_info
 
 476 class SameFileError(Exception): 
 477     """Same File exception. 
 479     This exception will be thrown by FileDownloader objects if they detect 
 480     multiple files would have to be downloaded to the same file on disk. 
 485 class PostProcessingError(Exception): 
 486     """Post Processing exception. 
 488     This exception may be raised by PostProcessor's .run() method to 
 489     indicate an error in the postprocessing task. 
 491     def __init__(self
, msg
): 
 494 class MaxDownloadsReached(Exception): 
 495     """ --max-downloads limit has been reached. """ 
 499 class UnavailableVideoError(Exception): 
 500     """Unavailable Format exception. 
 502     This exception will be thrown when a video is requested 
 503     in a format that is not available for that video. 
 508 class ContentTooShortError(Exception): 
 509     """Content Too Short exception. 
 511     This exception may be raised by FileDownloader objects when a file they 
 512     download is too small for what the server announced first, indicating 
 513     the connection was probably interrupted. 
 519     def __init__(self
, downloaded
, expected
): 
 520         self
.downloaded 
= downloaded
 
 521         self
.expected 
= expected
 
 523 class YoutubeDLHandler(compat_urllib_request
.HTTPHandler
): 
 524     """Handler for HTTP requests and responses. 
 526     This class, when installed with an OpenerDirector, automatically adds 
 527     the standard headers to every HTTP request and handles gzipped and 
 528     deflated responses from web servers. If compression is to be avoided in 
 529     a particular request, the original request in the program code only has 
 530     to include the HTTP header "Youtubedl-No-Compression", which will be 
 531     removed before making the real request. 
 533     Part of this code was copied from: 
 535     http://techknack.net/python-urllib2-handlers/ 
 537     Andrew Rowls, the author of that code, agreed to release it to the 
 544             return zlib
.decompress(data
, -zlib
.MAX_WBITS
) 
 546             return zlib
.decompress(data
) 
 549     def addinfourl_wrapper(stream
, headers
, url
, code
): 
 550         if hasattr(compat_urllib_request
.addinfourl
, 'getcode'): 
 551             return compat_urllib_request
.addinfourl(stream
, headers
, url
, code
) 
 552         ret 
= compat_urllib_request
.addinfourl(stream
, headers
, url
) 
 556     def http_request(self
, req
): 
 557         for h
, v 
in std_headers
.items(): 
 558             if h 
not in req
.headers
: 
 560         if 'Youtubedl-no-compression' in req
.headers
: 
 561             if 'Accept-encoding' in req
.headers
: 
 562                 del req
.headers
['Accept-encoding'] 
 563             del req
.headers
['Youtubedl-no-compression'] 
 564         if 'Youtubedl-user-agent' in req
.headers
: 
 565             if 'User-agent' in req
.headers
: 
 566                 del req
.headers
['User-agent'] 
 567             req
.headers
['User-agent'] = req
.headers
['Youtubedl-user-agent'] 
 568             del req
.headers
['Youtubedl-user-agent'] 
 570         if sys
.version_info 
< (2, 7) and '#' in req
.get_full_url(): 
 571             # Python 2.6 is brain-dead when it comes to fragments 
 572             req
._Request
__original 
= req
._Request
__original
.partition('#')[0] 
 573             req
._Request
__r
_type 
= req
._Request
__r
_type
.partition('#')[0] 
 577     def http_response(self
, req
, resp
): 
 580         if resp
.headers
.get('Content-encoding', '') == 'gzip': 
 581             content 
= resp
.read() 
 582             gz 
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
), mode
='rb') 
 584                 uncompressed 
= io
.BytesIO(gz
.read()) 
 585             except IOError as original_ioerror
: 
 586                 # There may be junk add the end of the file 
 587                 # See http://stackoverflow.com/q/4928560/35070 for details 
 588                 for i 
in range(1, 1024): 
 590                         gz 
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
[:-i
]), mode
='rb') 
 591                         uncompressed 
= io
.BytesIO(gz
.read()) 
 596                     raise original_ioerror
 
 597             resp 
= self
.addinfourl_wrapper(uncompressed
, old_resp
.headers
, old_resp
.url
, old_resp
.code
) 
 598             resp
.msg 
= old_resp
.msg
 
 600         if resp
.headers
.get('Content-encoding', '') == 'deflate': 
 601             gz 
= io
.BytesIO(self
.deflate(resp
.read())) 
 602             resp 
= self
.addinfourl_wrapper(gz
, old_resp
.headers
, old_resp
.url
, old_resp
.code
) 
 603             resp
.msg 
= old_resp
.msg
 
 606     https_request 
= http_request
 
 607     https_response 
= http_response
 
 610 def parse_iso8601(date_str
, delimiter
='T'): 
 611     """ Return a UNIX timestamp from the given date """ 
 617         r
'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)', 
 620         timezone 
= datetime
.timedelta() 
 622         date_str 
= date_str
[:-len(m
.group(0))] 
 623         if not m
.group('sign'): 
 624             timezone 
= datetime
.timedelta() 
 626             sign 
= 1 if m
.group('sign') == '+' else -1 
 627             timezone 
= datetime
.timedelta( 
 628                 hours
=sign 
* int(m
.group('hours')), 
 629                 minutes
=sign 
* int(m
.group('minutes'))) 
 630     date_format 
= '%Y-%m-%d{0}%H:%M:%S'.format(delimiter
) 
 631     dt 
= datetime
.datetime
.strptime(date_str
, date_format
) - timezone
 
 632     return calendar
.timegm(dt
.timetuple()) 
 635 def unified_strdate(date_str
): 
 636     """Return a string with the date in the format YYYYMMDD""" 
 643     date_str 
= date_str
.replace(',', ' ') 
 644     # %z (UTC offset) is only supported in python>=3.2 
 645     date_str 
= re
.sub(r
' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str
) 
 646     format_expressions 
= [ 
 651         '%b %dst %Y %I:%M%p', 
 652         '%b %dnd %Y %I:%M%p', 
 653         '%b %dth %Y %I:%M%p', 
 662         '%Y-%m-%d %H:%M:%S.%f', 
 665         '%Y-%m-%dT%H:%M:%SZ', 
 666         '%Y-%m-%dT%H:%M:%S.%fZ', 
 667         '%Y-%m-%dT%H:%M:%S.%f0Z', 
 669         '%Y-%m-%dT%H:%M:%S.%f', 
 672     for expression 
in format_expressions
: 
 674             upload_date 
= datetime
.datetime
.strptime(date_str
, expression
).strftime('%Y%m%d') 
 677     if upload_date 
is None: 
 678         timetuple 
= email
.utils
.parsedate_tz(date_str
) 
 680             upload_date 
= datetime
.datetime(*timetuple
[:6]).strftime('%Y%m%d') 
 683 def determine_ext(url
, default_ext
='unknown_video'): 
 686     guess 
= url
.partition('?')[0].rpartition('.')[2] 
 687     if re
.match(r
'^[A-Za-z0-9]+$', guess
): 
 692 def subtitles_filename(filename
, sub_lang
, sub_format
): 
 693     return filename
.rsplit('.', 1)[0] + '.' + sub_lang 
+ '.' + sub_format
 
 695 def date_from_str(date_str
): 
 697     Return a datetime object from a string in the format YYYYMMDD or 
 698     (now|today)[+-][0-9](day|week|month|year)(s)?""" 
 699     today 
= datetime
.date
.today() 
 700     if date_str 
== 'now'or date_str 
== 'today': 
 702     match 
= re
.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str
) 
 703     if match 
is not None: 
 704         sign 
= match
.group('sign') 
 705         time 
= int(match
.group('time')) 
 708         unit 
= match
.group('unit') 
 717         delta 
= datetime
.timedelta(**{unit
: time
}) 
 719     return datetime
.datetime
.strptime(date_str
, "%Y%m%d").date() 
 721 def hyphenate_date(date_str
): 
 723     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format""" 
 724     match 
= re
.match(r
'^(\d\d\d\d)(\d\d)(\d\d)$', date_str
) 
 725     if match 
is not None: 
 726         return '-'.join(match
.groups()) 
 730 class DateRange(object): 
 731     """Represents a time interval between two dates""" 
 732     def __init__(self
, start
=None, end
=None): 
 733         """start and end must be strings in the format accepted by date""" 
 734         if start 
is not None: 
 735             self
.start 
= date_from_str(start
) 
 737             self
.start 
= datetime
.datetime
.min.date() 
 739             self
.end 
= date_from_str(end
) 
 741             self
.end 
= datetime
.datetime
.max.date() 
 742         if self
.start 
> self
.end
: 
 743             raise ValueError('Date range: "%s" , the start date must be before the end date' % self
) 
 746         """Returns a range that only contains the given day""" 
 748     def __contains__(self
, date
): 
 749         """Check if the date is in the range""" 
 750         if not isinstance(date
, datetime
.date
): 
 751             date 
= date_from_str(date
) 
 752         return self
.start 
<= date 
<= self
.end
 
 754         return '%s - %s' % ( self
.start
.isoformat(), self
.end
.isoformat()) 
 758     """ Returns the platform name as a compat_str """ 
 759     res 
= platform
.platform() 
 760     if isinstance(res
, bytes): 
 761         res 
= res
.decode(preferredencoding()) 
 763     assert isinstance(res
, compat_str
) 
 767 def _windows_write_string(s
, out
): 
 768     """ Returns True if the string was written using special methods, 
 769     False if it has yet to be written out.""" 
 770     # Adapted from http://stackoverflow.com/a/3259271/35070 
 773     import ctypes
.wintypes
 
 781         fileno 
= out
.fileno() 
 782     except AttributeError: 
 783         # If the output stream doesn't have a fileno, it's virtual 
 785     if fileno 
not in WIN_OUTPUT_IDS
: 
 788     GetStdHandle 
= ctypes
.WINFUNCTYPE( 
 789         ctypes
.wintypes
.HANDLE
, ctypes
.wintypes
.DWORD
)( 
 790         ("GetStdHandle", ctypes
.windll
.kernel32
)) 
 791     h 
= GetStdHandle(WIN_OUTPUT_IDS
[fileno
]) 
 793     WriteConsoleW 
= ctypes
.WINFUNCTYPE( 
 794         ctypes
.wintypes
.BOOL
, ctypes
.wintypes
.HANDLE
, ctypes
.wintypes
.LPWSTR
, 
 795         ctypes
.wintypes
.DWORD
, ctypes
.POINTER(ctypes
.wintypes
.DWORD
), 
 796         ctypes
.wintypes
.LPVOID
)(("WriteConsoleW", ctypes
.windll
.kernel32
)) 
 797     written 
= ctypes
.wintypes
.DWORD(0) 
 799     GetFileType 
= ctypes
.WINFUNCTYPE(ctypes
.wintypes
.DWORD
, ctypes
.wintypes
.DWORD
)(("GetFileType", ctypes
.windll
.kernel32
)) 
 800     FILE_TYPE_CHAR 
= 0x0002 
 801     FILE_TYPE_REMOTE 
= 0x8000 
 802     GetConsoleMode 
= ctypes
.WINFUNCTYPE( 
 803         ctypes
.wintypes
.BOOL
, ctypes
.wintypes
.HANDLE
, 
 804         ctypes
.POINTER(ctypes
.wintypes
.DWORD
))( 
 805         ("GetConsoleMode", ctypes
.windll
.kernel32
)) 
 806     INVALID_HANDLE_VALUE 
= ctypes
.wintypes
.DWORD(-1).value
 
 808     def not_a_console(handle
): 
 809         if handle 
== INVALID_HANDLE_VALUE 
or handle 
is None: 
 811         return ((GetFileType(handle
) & ~FILE_TYPE_REMOTE
) != FILE_TYPE_CHAR
 
 812                 or GetConsoleMode(handle
, ctypes
.byref(ctypes
.wintypes
.DWORD())) == 0) 
 817     def next_nonbmp_pos(s
): 
 819             return next(i 
for i
, c 
in enumerate(s
) if ord(c
) > 0xffff) 
 820         except StopIteration: 
 824         count 
= min(next_nonbmp_pos(s
), 1024) 
 827             h
, s
, count 
if count 
else 2, ctypes
.byref(written
), None) 
 829             raise OSError('Failed to write string') 
 830         if not count
:  # We just wrote a non-BMP character 
 831             assert written
.value 
== 2 
 834             assert written
.value 
> 0 
 835             s 
= s
[written
.value
:] 
 839 def write_string(s
, out
=None, encoding
=None): 
 842     assert type(s
) == compat_str
 
 844     if sys
.platform 
== 'win32' and encoding 
is None and hasattr(out
, 'fileno'): 
 845         if _windows_write_string(s
, out
): 
 848     if ('b' in getattr(out
, 'mode', '') or 
 849             sys
.version_info
[0] < 3):  # Python 2 lies about mode of sys.stderr 
 850         byt 
= s
.encode(encoding 
or preferredencoding(), 'ignore') 
 852     elif hasattr(out
, 'buffer'): 
 853         enc 
= encoding 
or getattr(out
, 'encoding', None) or preferredencoding() 
 854         byt 
= s
.encode(enc
, 'ignore') 
 855         out
.buffer.write(byt
) 
 861 def bytes_to_intlist(bs
): 
 864     if isinstance(bs
[0], int):  # Python 3 
 867         return [ord(c
) for c 
in bs
] 
 870 def intlist_to_bytes(xs
): 
 873     return struct_pack('%dB' % len(xs
), *xs
) 
 876 # Cross-platform file locking 
 877 if sys
.platform 
== 'win32': 
 878     import ctypes
.wintypes
 
 881     class OVERLAPPED(ctypes
.Structure
): 
 883             ('Internal', ctypes
.wintypes
.LPVOID
), 
 884             ('InternalHigh', ctypes
.wintypes
.LPVOID
), 
 885             ('Offset', ctypes
.wintypes
.DWORD
), 
 886             ('OffsetHigh', ctypes
.wintypes
.DWORD
), 
 887             ('hEvent', ctypes
.wintypes
.HANDLE
), 
 890     kernel32 
= ctypes
.windll
.kernel32
 
 891     LockFileEx 
= kernel32
.LockFileEx
 
 892     LockFileEx
.argtypes 
= [ 
 893         ctypes
.wintypes
.HANDLE
,     # hFile 
 894         ctypes
.wintypes
.DWORD
,      # dwFlags 
 895         ctypes
.wintypes
.DWORD
,      # dwReserved 
 896         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockLow 
 897         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockHigh 
 898         ctypes
.POINTER(OVERLAPPED
)  # Overlapped 
 900     LockFileEx
.restype 
= ctypes
.wintypes
.BOOL
 
 901     UnlockFileEx 
= kernel32
.UnlockFileEx
 
 902     UnlockFileEx
.argtypes 
= [ 
 903         ctypes
.wintypes
.HANDLE
,     # hFile 
 904         ctypes
.wintypes
.DWORD
,      # dwReserved 
 905         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockLow 
 906         ctypes
.wintypes
.DWORD
,      # nNumberOfBytesToLockHigh 
 907         ctypes
.POINTER(OVERLAPPED
)  # Overlapped 
 909     UnlockFileEx
.restype 
= ctypes
.wintypes
.BOOL
 
 910     whole_low 
= 0xffffffff 
 911     whole_high 
= 0x7fffffff 
 913     def _lock_file(f
, exclusive
): 
 914         overlapped 
= OVERLAPPED() 
 915         overlapped
.Offset 
= 0 
 916         overlapped
.OffsetHigh 
= 0 
 917         overlapped
.hEvent 
= 0 
 918         f
._lock
_file
_overlapped
_p 
= ctypes
.pointer(overlapped
) 
 919         handle 
= msvcrt
.get_osfhandle(f
.fileno()) 
 920         if not LockFileEx(handle
, 0x2 if exclusive 
else 0x0, 0, 
 921                           whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
): 
 922             raise OSError('Locking file failed: %r' % ctypes
.FormatError()) 
 925         assert f
._lock
_file
_overlapped
_p
 
 926         handle 
= msvcrt
.get_osfhandle(f
.fileno()) 
 927         if not UnlockFileEx(handle
, 0, 
 928                             whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
): 
 929             raise OSError('Unlocking file failed: %r' % ctypes
.FormatError()) 
 934     def _lock_file(f
, exclusive
): 
 935         fcntl
.flock(f
, fcntl
.LOCK_EX 
if exclusive 
else fcntl
.LOCK_SH
) 
 938         fcntl
.flock(f
, fcntl
.LOCK_UN
) 
 941 class locked_file(object): 
 942     def __init__(self
, filename
, mode
, encoding
=None): 
 943         assert mode 
in ['r', 'a', 'w'] 
 944         self
.f 
= io
.open(filename
, mode
, encoding
=encoding
) 
 948         exclusive 
= self
.mode 
!= 'r' 
 950             _lock_file(self
.f
, exclusive
) 
 956     def __exit__(self
, etype
, value
, traceback
): 
 965     def write(self
, *args
): 
 966         return self
.f
.write(*args
) 
 968     def read(self
, *args
): 
 969         return self
.f
.read(*args
) 
 972 def get_filesystem_encoding(): 
 973     encoding 
= sys
.getfilesystemencoding() 
 974     return encoding 
if encoding 
is not None else 'utf-8' 
 977 def shell_quote(args
): 
 979     encoding 
= get_filesystem_encoding() 
 981         if isinstance(a
, bytes): 
 982             # We may get a filename encoded with 'encodeFilename' 
 983             a 
= a
.decode(encoding
) 
 984         quoted_args
.append(pipes
.quote(a
)) 
 985     return ' '.join(quoted_args
) 
 988 def takewhile_inclusive(pred
, seq
): 
 989     """ Like itertools.takewhile, but include the latest evaluated element 
 990         (the first element so that Not pred(e)) """ 
 997 def smuggle_url(url
, data
): 
 998     """ Pass additional data in a URL for internal use. """ 
1000     sdata 
= compat_urllib_parse
.urlencode( 
1001         {'__youtubedl_smuggle': json
.dumps(data
)}) 
1002     return url 
+ '#' + sdata
 
1005 def unsmuggle_url(smug_url
, default
=None): 
1006     if not '#__youtubedl_smuggle' in smug_url
: 
1007         return smug_url
, default
 
1008     url
, _
, sdata 
= smug_url
.rpartition('#') 
1009     jsond 
= compat_parse_qs(sdata
)['__youtubedl_smuggle'][0] 
1010     data 
= json
.loads(jsond
) 
1014 def format_bytes(bytes): 
1017     if type(bytes) is str: 
1018         bytes = float(bytes) 
1022         exponent 
= int(math
.log(bytes, 1024.0)) 
1023     suffix 
= ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent
] 
1024     converted 
= float(bytes) / float(1024 ** exponent
) 
1025     return '%.2f%s' % (converted
, suffix
) 
1028 def get_term_width(): 
1029     columns 
= compat_getenv('COLUMNS', None) 
1034         sp 
= subprocess
.Popen( 
1036             stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
) 
1037         out
, err 
= sp
.communicate() 
1038         return int(out
.split()[1]) 
1044 def month_by_name(name
): 
1045     """ Return the number of a month by (locale-independently) English name """ 
1048         'January', 'February', 'March', 'April', 'May', 'June', 
1049         'July', 'August', 'September', 'October', 'November', 'December'] 
1051         return ENGLISH_NAMES
.index(name
) + 1 
1056 def fix_xml_ampersands(xml_str
): 
1057     """Replace all the '&' by '&' in XML""" 
1059         r
'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)', 
1064 def setproctitle(title
): 
1065     assert isinstance(title
, compat_str
) 
1067         libc 
= ctypes
.cdll
.LoadLibrary("libc.so.6") 
1070     title_bytes 
= title
.encode('utf-8') 
1071     buf 
= ctypes
.create_string_buffer(len(title_bytes
)) 
1072     buf
.value 
= title_bytes
 
1074         libc
.prctl(15, buf
, 0, 0, 0) 
1075     except AttributeError: 
1076         return  # Strange libc, just skip this 
1079 def remove_start(s
, start
): 
1080     if s
.startswith(start
): 
1081         return s
[len(start
):] 
1085 def remove_end(s
, end
): 
1087         return s
[:-len(end
)] 
1091 def url_basename(url
): 
1092     path 
= compat_urlparse
.urlparse(url
).path
 
1093     return path
.strip('/').split('/')[-1] 
1096 class HEADRequest(compat_urllib_request
.Request
): 
1097     def get_method(self
): 
1101 def int_or_none(v
, scale
=1, default
=None, get_attr
=None, invscale
=1): 
1104             v 
= getattr(v
, get_attr
, None) 
1107     return default 
if v 
is None else (int(v
) * invscale 
// scale
) 
1110 def str_or_none(v
, default
=None): 
1111     return default 
if v 
is None else compat_str(v
) 
1114 def str_to_int(int_str
): 
1115     """ A more relaxed version of int_or_none """ 
1118     int_str 
= re
.sub(r
'[,\.\+]', '', int_str
) 
1122 def float_or_none(v
, scale
=1, invscale
=1, default
=None): 
1123     return default 
if v 
is None else (float(v
) * invscale 
/ scale
) 
1126 def parse_duration(s
): 
1135                 (?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)? 
1136                 (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s* 
1138             (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$''', s
) 
1141     res 
= int(m
.group('secs')) 
1143         res 
+= int(m
.group('mins')) * 60 
1144         if m
.group('hours'): 
1145             res 
+= int(m
.group('hours')) * 60 * 60 
1147         res 
+= float(m
.group('ms')) 
1151 def prepend_extension(filename
, ext
): 
1152     name
, real_ext 
= os
.path
.splitext(filename
)  
1153     return '{0}.{1}{2}'.format(name
, ext
, real_ext
) 
1156 def check_executable(exe
, args
=[]): 
1157     """ Checks if the given binary is installed somewhere in PATH, and returns its name. 
1158     args can be a list of arguments for a short output (like -version) """ 
1160         subprocess
.Popen([exe
] + args
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
).communicate() 
1166 def get_exe_version(exe
, args
=['--version'], 
1167                     version_re
=r
'version\s+([0-9._-a-zA-Z]+)', 
1168                     unrecognized
='present'): 
1169     """ Returns the version of the specified executable, 
1170     or False if the executable is not present """ 
1172         out
, err 
= subprocess
.Popen( 
1174             stdout
=subprocess
.PIPE
, stderr
=subprocess
.STDOUT
).communicate() 
1177     firstline 
= out
.partition(b
'\n')[0].decode('ascii', 'ignore') 
1178     m 
= re
.search(version_re
, firstline
) 
1185 class PagedList(object): 
1187         # This is only useful for tests 
1188         return len(self
.getslice()) 
1191 class OnDemandPagedList(PagedList
): 
1192     def __init__(self
, pagefunc
, pagesize
): 
1193         self
._pagefunc 
= pagefunc
 
1194         self
._pagesize 
= pagesize
 
1196     def getslice(self
, start
=0, end
=None): 
1198         for pagenum 
in itertools
.count(start 
// self
._pagesize
): 
1199             firstid 
= pagenum 
* self
._pagesize
 
1200             nextfirstid 
= pagenum 
* self
._pagesize 
+ self
._pagesize
 
1201             if start 
>= nextfirstid
: 
1204             page_results 
= list(self
._pagefunc
(pagenum
)) 
1207                 start 
% self
._pagesize
 
1208                 if firstid 
<= start 
< nextfirstid
 
1212                 ((end 
- 1) % self
._pagesize
) + 1 
1213                 if (end 
is not None and firstid 
<= end 
<= nextfirstid
) 
1216             if startv 
!= 0 or endv 
is not None: 
1217                 page_results 
= page_results
[startv
:endv
] 
1218             res
.extend(page_results
) 
1220             # A little optimization - if current page is not "full", ie. does 
1221             # not contain page_size videos then we can assume that this page 
1222             # is the last one - there are no more ids on further pages - 
1223             # i.e. no need to query again. 
1224             if len(page_results
) + startv 
< self
._pagesize
: 
1227             # If we got the whole page, but the next page is not interesting, 
1228             # break out early as well 
1229             if end 
== nextfirstid
: 
1234 class InAdvancePagedList(PagedList
): 
1235     def __init__(self
, pagefunc
, pagecount
, pagesize
): 
1236         self
._pagefunc 
= pagefunc
 
1237         self
._pagecount 
= pagecount
 
1238         self
._pagesize 
= pagesize
 
1240     def getslice(self
, start
=0, end
=None): 
1242         start_page 
= start 
// self
._pagesize
 
1244             self
._pagecount 
if end 
is None else (end 
// self
._pagesize 
+ 1)) 
1245         skip_elems 
= start 
- start_page 
* self
._pagesize
 
1246         only_more 
= None if end 
is None else end 
- start
 
1247         for pagenum 
in range(start_page
, end_page
): 
1248             page 
= list(self
._pagefunc
(pagenum
)) 
1250                 page 
= page
[skip_elems
:] 
1252             if only_more 
is not None: 
1253                 if len(page
) < only_more
: 
1254                     only_more 
-= len(page
) 
1256                     page 
= page
[:only_more
] 
1263 def uppercase_escape(s
): 
1264     unicode_escape 
= codecs
.getdecoder('unicode_escape') 
1266         r
'\\U[0-9a-fA-F]{8}', 
1267         lambda m
: unicode_escape(m
.group(0))[0], 
1271 def escape_rfc3986(s
): 
1272     """Escape non-ASCII characters as suggested by RFC 3986""" 
1273     if sys
.version_info 
< (3, 0) and isinstance(s
, unicode): 
1274         s 
= s
.encode('utf-8') 
1275     return compat_urllib_parse
.quote(s
, b
"%/;:@&=+$,!~*'()?#[]") 
1278 def escape_url(url
): 
1279     """Escape URL as suggested by RFC 3986""" 
1280     url_parsed 
= compat_urllib_parse_urlparse(url
) 
1281     return url_parsed
._replace
( 
1282         path
=escape_rfc3986(url_parsed
.path
), 
1283         params
=escape_rfc3986(url_parsed
.params
), 
1284         query
=escape_rfc3986(url_parsed
.query
), 
1285         fragment
=escape_rfc3986(url_parsed
.fragment
) 
1289     struct
.pack('!I', 0) 
1291     # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument 
1292     def struct_pack(spec
, *args
): 
1293         if isinstance(spec
, compat_str
): 
1294             spec 
= spec
.encode('ascii') 
1295         return struct
.pack(spec
, *args
) 
1297     def struct_unpack(spec
, *args
): 
1298         if isinstance(spec
, compat_str
): 
1299             spec 
= spec
.encode('ascii') 
1300         return struct
.unpack(spec
, *args
) 
1302     struct_pack 
= struct
.pack
 
1303     struct_unpack 
= struct
.unpack
 
1306 def read_batch_urls(batch_fd
): 
1308         if not isinstance(url
, compat_str
): 
1309             url 
= url
.decode('utf-8', 'replace') 
1310         BOM_UTF8 
= '\xef\xbb\xbf' 
1311         if url
.startswith(BOM_UTF8
): 
1312             url 
= url
[len(BOM_UTF8
):] 
1314         if url
.startswith(('#', ';', ']')): 
1318     with contextlib
.closing(batch_fd
) as fd
: 
1319         return [url 
for url 
in map(fixup
, fd
) if url
] 
1322 def urlencode_postdata(*args
, **kargs
): 
1323     return compat_urllib_parse
.urlencode(*args
, **kargs
).encode('ascii') 
1327     etree_iter 
= xml
.etree
.ElementTree
.Element
.iter 
1328 except AttributeError:  # Python <=2.6 
1329     etree_iter 
= lambda n
: n
.findall('.//*') 
1333     class TreeBuilder(xml
.etree
.ElementTree
.TreeBuilder
): 
1334         def doctype(self
, name
, pubid
, system
): 
1335             pass  # Ignore doctypes 
1337     parser 
= xml
.etree
.ElementTree
.XMLParser(target
=TreeBuilder()) 
1338     kwargs 
= {'parser': parser
} if sys
.version_info 
>= (2, 7) else {} 
1339     tree 
= xml
.etree
.ElementTree
.XML(s
.encode('utf-8'), **kwargs
) 
1340     # Fix up XML parser in Python 2.x 
1341     if sys
.version_info 
< (3, 0): 
1342         for n 
in etree_iter(tree
): 
1343             if n
.text 
is not None: 
1344                 if not isinstance(n
.text
, compat_str
): 
1345                     n
.text 
= n
.text
.decode('utf-8') 
1358 def parse_age_limit(s
): 
1361     m 
= re
.match(r
'^(?P<age>\d{1,2})\+?$', s
) 
1362     return int(m
.group('age')) if m 
else US_RATINGS
.get(s
, None) 
1365 def strip_jsonp(code
): 
1367         r
'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r
'\1', code
) 
1370 def js_to_json(code
): 
1373         if v 
in ('true', 'false', 'null'): 
1375         if v
.startswith('"'): 
1377         if v
.startswith("'"): 
1379             v 
= re
.sub(r
"\\\\|\\'|\"", lambda m: { 
1386     res = re.sub(r'''(?x) 
1387         "(?
:[^
"\\]*(?:\\\\|\\")?
)*"| 
1388         '(?:[^'\\]*(?:\\\\|\\')?)*'| 
1389         [a-zA-Z_][a-zA-Z_0-9]* 
1391     res = re.sub(r',(\s*\])', lambda m: m.group(1), res) 
1395 def qualities(quality_ids): 
1396     """ Get a numeric quality value out of a list of possible values """ 
1399             return quality_ids.index(qid) 
1405 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s' 
1408 def limit_length(s, length): 
1409     """ Add ellipses to overly long strings """ 
1414         return s[:length - len(ELLIPSES)] + ELLIPSES 
1418 def version_tuple(v): 
1419     return [int(e) for e in v.split('.')] 
1422 def is_outdated_version(version, limit, assume_new=True): 
1424         return not assume_new 
1426         return version_tuple(version) < version_tuple(limit) 
1428         return not assume_new 
1431 def ytdl_is_updateable(): 
1432     """ Returns if youtube-dl can be updated with -U """ 
1433     from zipimport import zipimporter 
1435     return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')