2 # -*- coding: utf-8 -*-
21 import urllib
.request
as compat_urllib_request
22 except ImportError: # Python 2
23 import urllib2
as compat_urllib_request
26 import urllib
.error
as compat_urllib_error
27 except ImportError: # Python 2
28 import urllib2
as compat_urllib_error
31 import urllib
.parse
as compat_urllib_parse
32 except ImportError: # Python 2
33 import urllib
as compat_urllib_parse
36 from urllib
.parse
import urlparse
as compat_urllib_parse_urlparse
37 except ImportError: # Python 2
38 from urlparse
import urlparse
as compat_urllib_parse_urlparse
41 import urllib
.parse
as compat_urlparse
42 except ImportError: # Python 2
43 import urlparse
as compat_urlparse
46 import http
.cookiejar
as compat_cookiejar
47 except ImportError: # Python 2
48 import cookielib
as compat_cookiejar
51 import html
.entities
as compat_html_entities
52 except ImportError: # Python 2
53 import htmlentitydefs
as compat_html_entities
56 import html
.parser
as compat_html_parser
57 except ImportError: # Python 2
58 import HTMLParser
as compat_html_parser
61 import http
.client
as compat_http_client
62 except ImportError: # Python 2
63 import httplib
as compat_http_client
66 from urllib
.error
import HTTPError
as compat_HTTPError
67 except ImportError: # Python 2
68 from urllib2
import HTTPError
as compat_HTTPError
71 from urllib
.request
import urlretrieve
as compat_urlretrieve
72 except ImportError: # Python 2
73 from urllib
import urlretrieve
as compat_urlretrieve
77 from subprocess
import DEVNULL
78 compat_subprocess_get_DEVNULL
= lambda: DEVNULL
80 compat_subprocess_get_DEVNULL
= lambda: open(os
.path
.devnull
, 'w')
83 from urllib
.parse
import parse_qs
as compat_parse_qs
84 except ImportError: # Python 2
85 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
86 # Python 2's version is apparently totally broken
87 def _unquote(string
, encoding
='utf-8', errors
='replace'):
90 res
= string
.split('%')
97 # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
104 pct_sequence
+= item
[:2].decode('hex')
107 # This segment was just a single percent-encoded character.
108 # May be part of a sequence of code units, so delay decoding.
109 # (Stored in pct_sequence).
113 # Encountered non-percent-encoded characters. Flush the current
115 string
+= pct_sequence
.decode(encoding
, errors
) + rest
118 # Flush the final pct_sequence
119 string
+= pct_sequence
.decode(encoding
, errors
)
122 def _parse_qsl(qs
, keep_blank_values
=False, strict_parsing
=False,
123 encoding
='utf-8', errors
='replace'):
124 qs
, _coerce_result
= qs
, unicode
125 pairs
= [s2
for s1
in qs
.split('&') for s2
in s1
.split(';')]
127 for name_value
in pairs
:
128 if not name_value
and not strict_parsing
:
130 nv
= name_value
.split('=', 1)
133 raise ValueError("bad query field: %r" % (name_value
,))
134 # Handle case of a control-name with no equal sign
135 if keep_blank_values
:
139 if len(nv
[1]) or keep_blank_values
:
140 name
= nv
[0].replace('+', ' ')
141 name
= _unquote(name
, encoding
=encoding
, errors
=errors
)
142 name
= _coerce_result(name
)
143 value
= nv
[1].replace('+', ' ')
144 value
= _unquote(value
, encoding
=encoding
, errors
=errors
)
145 value
= _coerce_result(value
)
146 r
.append((name
, value
))
149 def compat_parse_qs(qs
, keep_blank_values
=False, strict_parsing
=False,
150 encoding
='utf-8', errors
='replace'):
152 pairs
= _parse_qsl(qs
, keep_blank_values
, strict_parsing
,
153 encoding
=encoding
, errors
=errors
)
154 for name
, value
in pairs
:
155 if name
in parsed_result
:
156 parsed_result
[name
].append(value
)
158 parsed_result
[name
] = [value
]
162 compat_str
= unicode # Python 2
167 compat_chr
= unichr # Python 2
172 if type(c
) is int: return c
175 # This is not clearly defined otherwise
176 compiled_regex_type
= type(re
.compile(''))
179 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
180 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
181 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
182 'Accept-Encoding': 'gzip, deflate',
183 'Accept-Language': 'en-us,en;q=0.5',
186 def preferredencoding():
187 """Get preferred encoding.
189 Returns the best encoding scheme for the system, based on
190 locale.getpreferredencoding() and some further tweaks.
193 pref
= locale
.getpreferredencoding()
200 if sys
.version_info
< (3,0):
202 print(s
.encode(preferredencoding(), 'xmlcharrefreplace'))
205 assert type(s
) == type(u
'')
208 # In Python 2.x, json.dump expects a bytestream.
209 # In Python 3.x, it writes to a character stream
210 if sys
.version_info
< (3,0):
211 def write_json_file(obj
, fn
):
212 with open(fn
, 'wb') as f
:
215 def write_json_file(obj
, fn
):
216 with open(fn
, 'w', encoding
='utf-8') as f
:
219 if sys
.version_info
>= (2,7):
220 def find_xpath_attr(node
, xpath
, key
, val
):
221 """ Find the xpath xpath[@key=val] """
222 assert re
.match(r
'^[a-zA-Z]+$', key
)
223 assert re
.match(r
'^[a-zA-Z0-9@\s]*$', val
)
224 expr
= xpath
+ u
"[@%s='%s']" % (key
, val
)
225 return node
.find(expr
)
227 def find_xpath_attr(node
, xpath
, key
, val
):
228 for f
in node
.findall(xpath
):
229 if f
.attrib
.get(key
) == val
:
233 # On python2.6 the xml.etree.ElementTree.Element methods don't support
234 # the namespace parameter
235 def xpath_with_ns(path
, ns_map
):
236 components
= [c
.split(':') for c
in path
.split('/')]
240 replaced
.append(c
[0])
243 replaced
.append('{%s}%s' % (ns_map
[ns
], tag
))
244 return '/'.join(replaced
)
246 def htmlentity_transform(matchobj
):
247 """Transforms an HTML entity to a character.
249 This function receives a match object and is intended to be used with
250 the re.sub() function.
252 entity
= matchobj
.group(1)
254 # Known non-numeric HTML entity
255 if entity
in compat_html_entities
.name2codepoint
:
256 return compat_chr(compat_html_entities
.name2codepoint
[entity
])
258 mobj
= re
.match(u
'(?u)#(x?\\d+)', entity
)
260 numstr
= mobj
.group(1)
261 if numstr
.startswith(u
'x'):
263 numstr
= u
'0%s' % numstr
266 return compat_chr(int(numstr
, base
))
268 # Unknown entity in name, return its literal representation
269 return (u
'&%s;' % entity
)
271 compat_html_parser
.locatestarttagend
= re
.compile(r
"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re
.VERBOSE
) # backport bugfix
272 class BaseHTMLParser(compat_html_parser
.HTMLParser
):
274 compat_html_parser
.HTMLParser
.__init
__(self
)
277 def loads(self
, html
):
282 class AttrParser(BaseHTMLParser
):
283 """Modified HTMLParser that isolates a tag with the specified attribute"""
284 def __init__(self
, attribute
, value
):
285 self
.attribute
= attribute
290 self
.watch_startpos
= False
292 BaseHTMLParser
.__init
__(self
)
294 def error(self
, message
):
295 if self
.error_count
> 10 or self
.started
:
296 raise compat_html_parser
.HTMLParseError(message
, self
.getpos())
297 self
.rawdata
= '\n'.join(self
.html
.split('\n')[self
.getpos()[0]:]) # skip one line
298 self
.error_count
+= 1
301 def handle_starttag(self
, tag
, attrs
):
304 self
.find_startpos(None)
305 if self
.attribute
in attrs
and attrs
[self
.attribute
] == self
.value
:
308 self
.watch_startpos
= True
310 if not tag
in self
.depth
: self
.depth
[tag
] = 0
313 def handle_endtag(self
, tag
):
315 if tag
in self
.depth
: self
.depth
[tag
] -= 1
316 if self
.depth
[self
.result
[0]] == 0:
318 self
.result
.append(self
.getpos())
320 def find_startpos(self
, x
):
321 """Needed to put the start position of the result (self.result[1])
322 after the opening tag with the requested id"""
323 if self
.watch_startpos
:
324 self
.watch_startpos
= False
325 self
.result
.append(self
.getpos())
326 handle_entityref
= handle_charref
= handle_data
= handle_comment
= \
327 handle_decl
= handle_pi
= unknown_decl
= find_startpos
329 def get_result(self
):
330 if self
.result
is None:
332 if len(self
.result
) != 3:
334 lines
= self
.html
.split('\n')
335 lines
= lines
[self
.result
[1][0]-1:self
.result
[2][0]]
336 lines
[0] = lines
[0][self
.result
[1][1]:]
338 lines
[-1] = lines
[-1][:self
.result
[2][1]-self
.result
[1][1]]
339 lines
[-1] = lines
[-1][:self
.result
[2][1]]
340 return '\n'.join(lines
).strip()
341 # Hack for https://github.com/rg3/youtube-dl/issues/662
342 if sys
.version_info
< (2, 7, 3):
343 AttrParser
.parse_endtag
= (lambda self
, i
:
344 i
+ len("</scr'+'ipt>")
345 if self
.rawdata
[i
:].startswith("</scr'+'ipt>")
346 else compat_html_parser
.HTMLParser
.parse_endtag(self
, i
))
348 def get_element_by_id(id, html
):
349 """Return the content of the tag with the specified ID in the passed HTML document"""
350 return get_element_by_attribute("id", id, html
)
352 def get_element_by_attribute(attribute
, value
, html
):
353 """Return the content of the tag with the specified attribute in the passed HTML document"""
354 parser
= AttrParser(attribute
, value
)
357 except compat_html_parser
.HTMLParseError
:
359 return parser
.get_result()
361 class MetaParser(BaseHTMLParser
):
363 Modified HTMLParser that isolates a meta tag with the specified name
366 def __init__(self
, name
):
367 BaseHTMLParser
.__init
__(self
)
372 def handle_starttag(self
, tag
, attrs
):
376 if attrs
.get('name') == self
.name
:
377 self
.result
= attrs
.get('content')
379 def get_result(self
):
382 def get_meta_content(name
, html
):
384 Return the content attribute from the meta tag with the given name attribute.
386 parser
= MetaParser(name
)
389 except compat_html_parser
.HTMLParseError
:
391 return parser
.get_result()
394 def clean_html(html
):
395 """Clean an HTML snippet into a readable string"""
397 html
= html
.replace('\n', ' ')
398 html
= re
.sub(r
'\s*<\s*br\s*/?\s*>\s*', '\n', html
)
399 html
= re
.sub(r
'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html
)
401 html
= re
.sub('<.*?>', '', html
)
402 # Replace html entities
403 html
= unescapeHTML(html
)
407 def sanitize_open(filename
, open_mode
):
408 """Try to open the given filename, and slightly tweak it if this fails.
410 Attempts to open the given filename. If this fails, it tries to change
411 the filename slightly, step by step, until it's either able to open it
412 or it fails and raises a final exception, like the standard open()
415 It returns the tuple (stream, definitive_file_name).
419 if sys
.platform
== 'win32':
421 msvcrt
.setmode(sys
.stdout
.fileno(), os
.O_BINARY
)
422 return (sys
.stdout
.buffer if hasattr(sys
.stdout
, 'buffer') else sys
.stdout
, filename
)
423 stream
= open(encodeFilename(filename
), open_mode
)
424 return (stream
, filename
)
425 except (IOError, OSError) as err
:
426 if err
.errno
in (errno
.EACCES
,):
429 # In case of error, try to remove win32 forbidden chars
430 alt_filename
= os
.path
.join(
431 re
.sub(u
'[/<>:"\\|\\\\?\\*]', u
'#', path_part
)
432 for path_part
in os
.path
.split(filename
)
434 if alt_filename
== filename
:
437 # An exception here should be caught in the caller
438 stream
= open(encodeFilename(filename
), open_mode
)
439 return (stream
, alt_filename
)
442 def timeconvert(timestr
):
443 """Convert RFC 2822 defined time string into system timestamp"""
445 timetuple
= email
.utils
.parsedate_tz(timestr
)
446 if timetuple
is not None:
447 timestamp
= email
.utils
.mktime_tz(timetuple
)
450 def sanitize_filename(s
, restricted
=False, is_id
=False):
451 """Sanitizes a string so it could be used as part of a filename.
452 If restricted is set, use a stricter subset of allowed characters.
453 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
455 def replace_insane(char
):
456 if char
== '?' or ord(char
) < 32 or ord(char
) == 127:
459 return '' if restricted
else '\''
461 return '_-' if restricted
else ' -'
462 elif char
in '\\/|*<>':
464 if restricted
and (char
in '!&\'()[]{}$;`^,#' or char
.isspace()):
466 if restricted
and ord(char
) > 127:
470 result
= u
''.join(map(replace_insane
, s
))
472 while '__' in result
:
473 result
= result
.replace('__', '_')
474 result
= result
.strip('_')
475 # Common case of "Foreign band name - English song title"
476 if restricted
and result
.startswith('-_'):
482 def orderedSet(iterable
):
483 """ Remove all duplicates from the input iterable """
494 assert type(s
) == type(u
'')
496 result
= re
.sub(u
'(?u)&(.+?);', htmlentity_transform
, s
)
499 def encodeFilename(s
):
501 @param s The name of the file
504 assert type(s
) == type(u
'')
506 # Python 3 has a Unicode API
507 if sys
.version_info
>= (3, 0):
510 if sys
.platform
== 'win32' and sys
.getwindowsversion()[0] >= 5:
511 # Pass u'' directly to use Unicode APIs on Windows 2000 and up
512 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
513 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
516 encoding
= sys
.getfilesystemencoding()
519 return s
.encode(encoding
, 'ignore')
521 def decodeOption(optval
):
524 if isinstance(optval
, bytes):
525 optval
= optval
.decode(preferredencoding())
527 assert isinstance(optval
, compat_str
)
530 def formatSeconds(secs
):
532 return '%d:%02d:%02d' % (secs
// 3600, (secs
% 3600) // 60, secs
% 60)
534 return '%d:%02d' % (secs
// 60, secs
% 60)
538 def make_HTTPS_handler(opts
):
539 if sys
.version_info
< (3,2):
540 # Python's 2.x handler is very simplistic
541 return compat_urllib_request
.HTTPSHandler()
544 context
= ssl
.SSLContext(ssl
.PROTOCOL_SSLv23
)
545 context
.set_default_verify_paths()
547 context
.verify_mode
= (ssl
.CERT_NONE
548 if opts
.no_check_certificate
549 else ssl
.CERT_REQUIRED
)
550 return compat_urllib_request
.HTTPSHandler(context
=context
)
552 class ExtractorError(Exception):
553 """Error during info extraction."""
554 def __init__(self
, msg
, tb
=None, expected
=False, cause
=None):
555 """ tb, if given, is the original traceback (so that it can be printed out).
556 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
559 if sys
.exc_info()[0] in (compat_urllib_error
.URLError
, socket
.timeout
, UnavailableVideoError
):
562 msg
= msg
+ u
'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
563 super(ExtractorError
, self
).__init
__(msg
)
566 self
.exc_info
= sys
.exc_info() # preserve original exception
569 def format_traceback(self
):
570 if self
.traceback
is None:
572 return u
''.join(traceback
.format_tb(self
.traceback
))
575 class RegexNotFoundError(ExtractorError
):
576 """Error when a regex didn't match"""
580 class DownloadError(Exception):
581 """Download Error exception.
583 This exception may be thrown by FileDownloader objects if they are not
584 configured to continue on errors. They will contain the appropriate
587 def __init__(self
, msg
, exc_info
=None):
588 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
589 super(DownloadError
, self
).__init
__(msg
)
590 self
.exc_info
= exc_info
593 class SameFileError(Exception):
594 """Same File exception.
596 This exception will be thrown by FileDownloader objects if they detect
597 multiple files would have to be downloaded to the same file on disk.
602 class PostProcessingError(Exception):
603 """Post Processing exception.
605 This exception may be raised by PostProcessor's .run() method to
606 indicate an error in the postprocessing task.
608 def __init__(self
, msg
):
611 class MaxDownloadsReached(Exception):
612 """ --max-downloads limit has been reached. """
616 class UnavailableVideoError(Exception):
617 """Unavailable Format exception.
619 This exception will be thrown when a video is requested
620 in a format that is not available for that video.
625 class ContentTooShortError(Exception):
626 """Content Too Short exception.
628 This exception may be raised by FileDownloader objects when a file they
629 download is too small for what the server announced first, indicating
630 the connection was probably interrupted.
636 def __init__(self
, downloaded
, expected
):
637 self
.downloaded
= downloaded
638 self
.expected
= expected
640 class YoutubeDLHandler(compat_urllib_request
.HTTPHandler
):
641 """Handler for HTTP requests and responses.
643 This class, when installed with an OpenerDirector, automatically adds
644 the standard headers to every HTTP request and handles gzipped and
645 deflated responses from web servers. If compression is to be avoided in
646 a particular request, the original request in the program code only has
647 to include the HTTP header "Youtubedl-No-Compression", which will be
648 removed before making the real request.
650 Part of this code was copied from:
652 http://techknack.net/python-urllib2-handlers/
654 Andrew Rowls, the author of that code, agreed to release it to the
661 return zlib
.decompress(data
, -zlib
.MAX_WBITS
)
663 return zlib
.decompress(data
)
666 def addinfourl_wrapper(stream
, headers
, url
, code
):
667 if hasattr(compat_urllib_request
.addinfourl
, 'getcode'):
668 return compat_urllib_request
.addinfourl(stream
, headers
, url
, code
)
669 ret
= compat_urllib_request
.addinfourl(stream
, headers
, url
)
673 def http_request(self
, req
):
674 for h
,v
in std_headers
.items():
678 if 'Youtubedl-no-compression' in req
.headers
:
679 if 'Accept-encoding' in req
.headers
:
680 del req
.headers
['Accept-encoding']
681 del req
.headers
['Youtubedl-no-compression']
682 if 'Youtubedl-user-agent' in req
.headers
:
683 if 'User-agent' in req
.headers
:
684 del req
.headers
['User-agent']
685 req
.headers
['User-agent'] = req
.headers
['Youtubedl-user-agent']
686 del req
.headers
['Youtubedl-user-agent']
689 def http_response(self
, req
, resp
):
692 if resp
.headers
.get('Content-encoding', '') == 'gzip':
693 content
= resp
.read()
694 gz
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
), mode
='rb')
696 uncompressed
= io
.BytesIO(gz
.read())
697 except IOError as original_ioerror
:
698 # There may be junk add the end of the file
699 # See http://stackoverflow.com/q/4928560/35070 for details
700 for i
in range(1, 1024):
702 gz
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
[:-i
]), mode
='rb')
703 uncompressed
= io
.BytesIO(gz
.read())
708 raise original_ioerror
709 resp
= self
.addinfourl_wrapper(uncompressed
, old_resp
.headers
, old_resp
.url
, old_resp
.code
)
710 resp
.msg
= old_resp
.msg
712 if resp
.headers
.get('Content-encoding', '') == 'deflate':
713 gz
= io
.BytesIO(self
.deflate(resp
.read()))
714 resp
= self
.addinfourl_wrapper(gz
, old_resp
.headers
, old_resp
.url
, old_resp
.code
)
715 resp
.msg
= old_resp
.msg
718 https_request
= http_request
719 https_response
= http_response
721 def unified_strdate(date_str
):
722 """Return a string with the date in the format YYYYMMDD"""
725 date_str
= date_str
.replace(',',' ')
726 # %z (UTC offset) is only supported in python>=3.2
727 date_str
= re
.sub(r
' (\+|-)[\d]*$', '', date_str
)
728 format_expressions
= [
736 '%Y-%m-%dT%H:%M:%SZ',
739 for expression
in format_expressions
:
741 upload_date
= datetime
.datetime
.strptime(date_str
, expression
).strftime('%Y%m%d')
746 def determine_ext(url
, default_ext
=u
'unknown_video'):
747 guess
= url
.partition(u
'?')[0].rpartition(u
'.')[2]
748 if re
.match(r
'^[A-Za-z0-9]+$', guess
):
753 def subtitles_filename(filename
, sub_lang
, sub_format
):
754 return filename
.rsplit('.', 1)[0] + u
'.' + sub_lang
+ u
'.' + sub_format
756 def date_from_str(date_str
):
758 Return a datetime object from a string in the format YYYYMMDD or
759 (now|today)[+-][0-9](day|week|month|year)(s)?"""
760 today
= datetime
.date
.today()
761 if date_str
== 'now'or date_str
== 'today':
763 match
= re
.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str
)
764 if match
is not None:
765 sign
= match
.group('sign')
766 time
= int(match
.group('time'))
769 unit
= match
.group('unit')
778 delta
= datetime
.timedelta(**{unit
: time
})
780 return datetime
.datetime
.strptime(date_str
, "%Y%m%d").date()
782 class DateRange(object):
783 """Represents a time interval between two dates"""
784 def __init__(self
, start
=None, end
=None):
785 """start and end must be strings in the format accepted by date"""
786 if start
is not None:
787 self
.start
= date_from_str(start
)
789 self
.start
= datetime
.datetime
.min.date()
791 self
.end
= date_from_str(end
)
793 self
.end
= datetime
.datetime
.max.date()
794 if self
.start
> self
.end
:
795 raise ValueError('Date range: "%s" , the start date must be before the end date' % self
)
798 """Returns a range that only contains the given day"""
800 def __contains__(self
, date
):
801 """Check if the date is in the range"""
802 if not isinstance(date
, datetime
.date
):
803 date
= date_from_str(date
)
804 return self
.start
<= date
<= self
.end
806 return '%s - %s' % ( self
.start
.isoformat(), self
.end
.isoformat())
810 """ Returns the platform name as a compat_str """
811 res
= platform
.platform()
812 if isinstance(res
, bytes):
813 res
= res
.decode(preferredencoding())
815 assert isinstance(res
, compat_str
)
819 def write_string(s
, out
=None):
822 assert type(s
) == type(u
'')
824 if ('b' in getattr(out
, 'mode', '') or
825 sys
.version_info
[0] < 3): # Python 2 lies about mode of sys.stderr
826 s
= s
.encode(preferredencoding(), 'ignore')
831 def bytes_to_intlist(bs
):
834 if isinstance(bs
[0], int): # Python 3
837 return [ord(c
) for c
in bs
]
840 def intlist_to_bytes(xs
):
843 if isinstance(chr(0), bytes): # Python 2
844 return ''.join([chr(x
) for x
in xs
])
849 def get_cachedir(params
={}):
850 cache_root
= os
.environ
.get('XDG_CACHE_HOME',
851 os
.path
.expanduser('~/.cache'))
852 return params
.get('cachedir', os
.path
.join(cache_root
, 'youtube-dl'))
855 # Cross-platform file locking
856 if sys
.platform
== 'win32':
857 import ctypes
.wintypes
860 class OVERLAPPED(ctypes
.Structure
):
862 ('Internal', ctypes
.wintypes
.LPVOID
),
863 ('InternalHigh', ctypes
.wintypes
.LPVOID
),
864 ('Offset', ctypes
.wintypes
.DWORD
),
865 ('OffsetHigh', ctypes
.wintypes
.DWORD
),
866 ('hEvent', ctypes
.wintypes
.HANDLE
),
869 kernel32
= ctypes
.windll
.kernel32
870 LockFileEx
= kernel32
.LockFileEx
871 LockFileEx
.argtypes
= [
872 ctypes
.wintypes
.HANDLE
, # hFile
873 ctypes
.wintypes
.DWORD
, # dwFlags
874 ctypes
.wintypes
.DWORD
, # dwReserved
875 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockLow
876 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockHigh
877 ctypes
.POINTER(OVERLAPPED
) # Overlapped
879 LockFileEx
.restype
= ctypes
.wintypes
.BOOL
880 UnlockFileEx
= kernel32
.UnlockFileEx
881 UnlockFileEx
.argtypes
= [
882 ctypes
.wintypes
.HANDLE
, # hFile
883 ctypes
.wintypes
.DWORD
, # dwReserved
884 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockLow
885 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockHigh
886 ctypes
.POINTER(OVERLAPPED
) # Overlapped
888 UnlockFileEx
.restype
= ctypes
.wintypes
.BOOL
889 whole_low
= 0xffffffff
890 whole_high
= 0x7fffffff
892 def _lock_file(f
, exclusive
):
893 overlapped
= OVERLAPPED()
894 overlapped
.Offset
= 0
895 overlapped
.OffsetHigh
= 0
896 overlapped
.hEvent
= 0
897 f
._lock
_file
_overlapped
_p
= ctypes
.pointer(overlapped
)
898 handle
= msvcrt
.get_osfhandle(f
.fileno())
899 if not LockFileEx(handle
, 0x2 if exclusive
else 0x0, 0,
900 whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
):
901 raise OSError('Locking file failed: %r' % ctypes
.FormatError())
904 assert f
._lock
_file
_overlapped
_p
905 handle
= msvcrt
.get_osfhandle(f
.fileno())
906 if not UnlockFileEx(handle
, 0,
907 whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
):
908 raise OSError('Unlocking file failed: %r' % ctypes
.FormatError())
913 def _lock_file(f
, exclusive
):
914 fcntl
.lockf(f
, fcntl
.LOCK_EX
if exclusive
else fcntl
.LOCK_SH
)
917 fcntl
.lockf(f
, fcntl
.LOCK_UN
)
920 class locked_file(object):
921 def __init__(self
, filename
, mode
, encoding
=None):
922 assert mode
in ['r', 'a', 'w']
923 self
.f
= io
.open(filename
, mode
, encoding
=encoding
)
927 exclusive
= self
.mode
!= 'r'
929 _lock_file(self
.f
, exclusive
)
935 def __exit__(self
, etype
, value
, traceback
):
944 def write(self
, *args
):
945 return self
.f
.write(*args
)
947 def read(self
, *args
):
948 return self
.f
.read(*args
)
951 def shell_quote(args
):
952 return ' '.join(map(pipes
.quote
, args
))
955 def takewhile_inclusive(pred
, seq
):
956 """ Like itertools.takewhile, but include the latest evaluated element
957 (the first element so that Not pred(e)) """
964 def smuggle_url(url
, data
):
965 """ Pass additional data in a URL for internal use. """
967 sdata
= compat_urllib_parse
.urlencode(
968 {u
'__youtubedl_smuggle': json
.dumps(data
)})
969 return url
+ u
'#' + sdata
972 def unsmuggle_url(smug_url
):
973 if not '#__youtubedl_smuggle' in smug_url
:
974 return smug_url
, None
975 url
, _
, sdata
= smug_url
.rpartition(u
'#')
976 jsond
= compat_parse_qs(sdata
)[u
'__youtubedl_smuggle'][0]
977 data
= json
.loads(jsond
)