2 # -*- coding: utf-8 -*-
21 import urllib
.request
as compat_urllib_request
22 except ImportError: # Python 2
23 import urllib2
as compat_urllib_request
26 import urllib
.error
as compat_urllib_error
27 except ImportError: # Python 2
28 import urllib2
as compat_urllib_error
31 import urllib
.parse
as compat_urllib_parse
32 except ImportError: # Python 2
33 import urllib
as compat_urllib_parse
36 from urllib
.parse
import urlparse
as compat_urllib_parse_urlparse
37 except ImportError: # Python 2
38 from urlparse
import urlparse
as compat_urllib_parse_urlparse
41 import urllib
.parse
as compat_urlparse
42 except ImportError: # Python 2
43 import urlparse
as compat_urlparse
46 import http
.cookiejar
as compat_cookiejar
47 except ImportError: # Python 2
48 import cookielib
as compat_cookiejar
51 import html
.entities
as compat_html_entities
52 except ImportError: # Python 2
53 import htmlentitydefs
as compat_html_entities
56 import html
.parser
as compat_html_parser
57 except ImportError: # Python 2
58 import HTMLParser
as compat_html_parser
61 import http
.client
as compat_http_client
62 except ImportError: # Python 2
63 import httplib
as compat_http_client
66 from urllib
.error
import HTTPError
as compat_HTTPError
67 except ImportError: # Python 2
68 from urllib2
import HTTPError
as compat_HTTPError
71 from urllib
.request
import urlretrieve
as compat_urlretrieve
72 except ImportError: # Python 2
73 from urllib
import urlretrieve
as compat_urlretrieve
77 from subprocess
import DEVNULL
78 compat_subprocess_get_DEVNULL
= lambda: DEVNULL
80 compat_subprocess_get_DEVNULL
= lambda: open(os
.path
.devnull
, 'w')
83 from urllib
.parse
import parse_qs
as compat_parse_qs
84 except ImportError: # Python 2
85 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
86 # Python 2's version is apparently totally broken
87 def _unquote(string
, encoding
='utf-8', errors
='replace'):
90 res
= string
.split('%')
97 # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
104 pct_sequence
+= item
[:2].decode('hex')
107 # This segment was just a single percent-encoded character.
108 # May be part of a sequence of code units, so delay decoding.
109 # (Stored in pct_sequence).
113 # Encountered non-percent-encoded characters. Flush the current
115 string
+= pct_sequence
.decode(encoding
, errors
) + rest
118 # Flush the final pct_sequence
119 string
+= pct_sequence
.decode(encoding
, errors
)
122 def _parse_qsl(qs
, keep_blank_values
=False, strict_parsing
=False,
123 encoding
='utf-8', errors
='replace'):
124 qs
, _coerce_result
= qs
, unicode
125 pairs
= [s2
for s1
in qs
.split('&') for s2
in s1
.split(';')]
127 for name_value
in pairs
:
128 if not name_value
and not strict_parsing
:
130 nv
= name_value
.split('=', 1)
133 raise ValueError("bad query field: %r" % (name_value
,))
134 # Handle case of a control-name with no equal sign
135 if keep_blank_values
:
139 if len(nv
[1]) or keep_blank_values
:
140 name
= nv
[0].replace('+', ' ')
141 name
= _unquote(name
, encoding
=encoding
, errors
=errors
)
142 name
= _coerce_result(name
)
143 value
= nv
[1].replace('+', ' ')
144 value
= _unquote(value
, encoding
=encoding
, errors
=errors
)
145 value
= _coerce_result(value
)
146 r
.append((name
, value
))
149 def compat_parse_qs(qs
, keep_blank_values
=False, strict_parsing
=False,
150 encoding
='utf-8', errors
='replace'):
152 pairs
= _parse_qsl(qs
, keep_blank_values
, strict_parsing
,
153 encoding
=encoding
, errors
=errors
)
154 for name
, value
in pairs
:
155 if name
in parsed_result
:
156 parsed_result
[name
].append(value
)
158 parsed_result
[name
] = [value
]
162 compat_str
= unicode # Python 2
167 compat_chr
= unichr # Python 2
172 if type(c
) is int: return c
175 # This is not clearly defined otherwise
176 compiled_regex_type
= type(re
.compile(''))
179 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
180 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
181 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
182 'Accept-Encoding': 'gzip, deflate',
183 'Accept-Language': 'en-us,en;q=0.5',
186 def preferredencoding():
187 """Get preferred encoding.
189 Returns the best encoding scheme for the system, based on
190 locale.getpreferredencoding() and some further tweaks.
193 pref
= locale
.getpreferredencoding()
200 if sys
.version_info
< (3,0):
202 print(s
.encode(preferredencoding(), 'xmlcharrefreplace'))
205 assert type(s
) == type(u
'')
208 # In Python 2.x, json.dump expects a bytestream.
209 # In Python 3.x, it writes to a character stream
210 if sys
.version_info
< (3,0):
211 def write_json_file(obj
, fn
):
212 with open(fn
, 'wb') as f
:
215 def write_json_file(obj
, fn
):
216 with open(fn
, 'w', encoding
='utf-8') as f
:
219 if sys
.version_info
>= (2,7):
220 def find_xpath_attr(node
, xpath
, key
, val
):
221 """ Find the xpath xpath[@key=val] """
222 assert re
.match(r
'^[a-zA-Z]+$', key
)
223 assert re
.match(r
'^[a-zA-Z0-9@\s]*$', val
)
224 expr
= xpath
+ u
"[@%s='%s']" % (key
, val
)
225 return node
.find(expr
)
227 def find_xpath_attr(node
, xpath
, key
, val
):
228 for f
in node
.findall(xpath
):
229 if f
.attrib
.get(key
) == val
:
233 # On python2.6 the xml.etree.ElementTree.Element methods don't support
234 # the namespace parameter
235 def xpath_with_ns(path
, ns_map
):
236 components
= [c
.split(':') for c
in path
.split('/')]
240 replaced
.append(c
[0])
243 replaced
.append('{%s}%s' % (ns_map
[ns
], tag
))
244 return '/'.join(replaced
)
246 def htmlentity_transform(matchobj
):
247 """Transforms an HTML entity to a character.
249 This function receives a match object and is intended to be used with
250 the re.sub() function.
252 entity
= matchobj
.group(1)
254 # Known non-numeric HTML entity
255 if entity
in compat_html_entities
.name2codepoint
:
256 return compat_chr(compat_html_entities
.name2codepoint
[entity
])
258 mobj
= re
.match(u
'(?u)#(x?\\d+)', entity
)
260 numstr
= mobj
.group(1)
261 if numstr
.startswith(u
'x'):
263 numstr
= u
'0%s' % numstr
266 return compat_chr(int(numstr
, base
))
268 # Unknown entity in name, return its literal representation
269 return (u
'&%s;' % entity
)
271 compat_html_parser
.locatestarttagend
= re
.compile(r
"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re
.VERBOSE
) # backport bugfix
272 class BaseHTMLParser(compat_html_parser
.HTMLParser
):
274 compat_html_parser
.HTMLParser
.__init
__(self
)
277 def loads(self
, html
):
282 class AttrParser(BaseHTMLParser
):
283 """Modified HTMLParser that isolates a tag with the specified attribute"""
284 def __init__(self
, attribute
, value
):
285 self
.attribute
= attribute
290 self
.watch_startpos
= False
292 BaseHTMLParser
.__init
__(self
)
294 def error(self
, message
):
295 if self
.error_count
> 10 or self
.started
:
296 raise compat_html_parser
.HTMLParseError(message
, self
.getpos())
297 self
.rawdata
= '\n'.join(self
.html
.split('\n')[self
.getpos()[0]:]) # skip one line
298 self
.error_count
+= 1
301 def handle_starttag(self
, tag
, attrs
):
304 self
.find_startpos(None)
305 if self
.attribute
in attrs
and attrs
[self
.attribute
] == self
.value
:
308 self
.watch_startpos
= True
310 if not tag
in self
.depth
: self
.depth
[tag
] = 0
313 def handle_endtag(self
, tag
):
315 if tag
in self
.depth
: self
.depth
[tag
] -= 1
316 if self
.depth
[self
.result
[0]] == 0:
318 self
.result
.append(self
.getpos())
320 def find_startpos(self
, x
):
321 """Needed to put the start position of the result (self.result[1])
322 after the opening tag with the requested id"""
323 if self
.watch_startpos
:
324 self
.watch_startpos
= False
325 self
.result
.append(self
.getpos())
326 handle_entityref
= handle_charref
= handle_data
= handle_comment
= \
327 handle_decl
= handle_pi
= unknown_decl
= find_startpos
329 def get_result(self
):
330 if self
.result
is None:
332 if len(self
.result
) != 3:
334 lines
= self
.html
.split('\n')
335 lines
= lines
[self
.result
[1][0]-1:self
.result
[2][0]]
336 lines
[0] = lines
[0][self
.result
[1][1]:]
338 lines
[-1] = lines
[-1][:self
.result
[2][1]-self
.result
[1][1]]
339 lines
[-1] = lines
[-1][:self
.result
[2][1]]
340 return '\n'.join(lines
).strip()
341 # Hack for https://github.com/rg3/youtube-dl/issues/662
342 if sys
.version_info
< (2, 7, 3):
343 AttrParser
.parse_endtag
= (lambda self
, i
:
344 i
+ len("</scr'+'ipt>")
345 if self
.rawdata
[i
:].startswith("</scr'+'ipt>")
346 else compat_html_parser
.HTMLParser
.parse_endtag(self
, i
))
348 def get_element_by_id(id, html
):
349 """Return the content of the tag with the specified ID in the passed HTML document"""
350 return get_element_by_attribute("id", id, html
)
352 def get_element_by_attribute(attribute
, value
, html
):
353 """Return the content of the tag with the specified attribute in the passed HTML document"""
354 parser
= AttrParser(attribute
, value
)
357 except compat_html_parser
.HTMLParseError
:
359 return parser
.get_result()
361 class MetaParser(BaseHTMLParser
):
363 Modified HTMLParser that isolates a meta tag with the specified name
366 def __init__(self
, name
):
367 BaseHTMLParser
.__init
__(self
)
372 def handle_starttag(self
, tag
, attrs
):
376 if attrs
.get('name') == self
.name
:
377 self
.result
= attrs
.get('content')
379 def get_result(self
):
382 def get_meta_content(name
, html
):
384 Return the content attribute from the meta tag with the given name attribute.
386 parser
= MetaParser(name
)
389 except compat_html_parser
.HTMLParseError
:
391 return parser
.get_result()
394 def clean_html(html
):
395 """Clean an HTML snippet into a readable string"""
397 html
= html
.replace('\n', ' ')
398 html
= re
.sub(r
'\s*<\s*br\s*/?\s*>\s*', '\n', html
)
399 html
= re
.sub(r
'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html
)
401 html
= re
.sub('<.*?>', '', html
)
402 # Replace html entities
403 html
= unescapeHTML(html
)
407 def sanitize_open(filename
, open_mode
):
408 """Try to open the given filename, and slightly tweak it if this fails.
410 Attempts to open the given filename. If this fails, it tries to change
411 the filename slightly, step by step, until it's either able to open it
412 or it fails and raises a final exception, like the standard open()
415 It returns the tuple (stream, definitive_file_name).
419 if sys
.platform
== 'win32':
421 msvcrt
.setmode(sys
.stdout
.fileno(), os
.O_BINARY
)
422 return (sys
.stdout
.buffer if hasattr(sys
.stdout
, 'buffer') else sys
.stdout
, filename
)
423 stream
= open(encodeFilename(filename
), open_mode
)
424 return (stream
, filename
)
425 except (IOError, OSError) as err
:
426 if err
.errno
in (errno
.EACCES
,):
429 # In case of error, try to remove win32 forbidden chars
430 alt_filename
= os
.path
.join(
431 re
.sub(u
'[/<>:"\\|\\\\?\\*]', u
'#', path_part
)
432 for path_part
in os
.path
.split(filename
)
434 if alt_filename
== filename
:
437 # An exception here should be caught in the caller
438 stream
= open(encodeFilename(filename
), open_mode
)
439 return (stream
, alt_filename
)
442 def timeconvert(timestr
):
443 """Convert RFC 2822 defined time string into system timestamp"""
445 timetuple
= email
.utils
.parsedate_tz(timestr
)
446 if timetuple
is not None:
447 timestamp
= email
.utils
.mktime_tz(timetuple
)
450 def sanitize_filename(s
, restricted
=False, is_id
=False):
451 """Sanitizes a string so it could be used as part of a filename.
452 If restricted is set, use a stricter subset of allowed characters.
453 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
455 def replace_insane(char
):
456 if char
== '?' or ord(char
) < 32 or ord(char
) == 127:
459 return '' if restricted
else '\''
461 return '_-' if restricted
else ' -'
462 elif char
in '\\/|*<>':
464 if restricted
and (char
in '!&\'()[]{}$;`^,#' or char
.isspace()):
466 if restricted
and ord(char
) > 127:
470 result
= u
''.join(map(replace_insane
, s
))
472 while '__' in result
:
473 result
= result
.replace('__', '_')
474 result
= result
.strip('_')
475 # Common case of "Foreign band name - English song title"
476 if restricted
and result
.startswith('-_'):
482 def orderedSet(iterable
):
483 """ Remove all duplicates from the input iterable """
494 assert type(s
) == type(u
'')
496 result
= re
.sub(u
'(?u)&(.+?);', htmlentity_transform
, s
)
499 def encodeFilename(s
):
501 @param s The name of the file
504 assert type(s
) == type(u
'')
506 # Python 3 has a Unicode API
507 if sys
.version_info
>= (3, 0):
510 if sys
.platform
== 'win32' and sys
.getwindowsversion()[0] >= 5:
511 # Pass u'' directly to use Unicode APIs on Windows 2000 and up
512 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
513 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
516 encoding
= sys
.getfilesystemencoding()
519 return s
.encode(encoding
, 'ignore')
521 def decodeOption(optval
):
524 if isinstance(optval
, bytes):
525 optval
= optval
.decode(preferredencoding())
527 assert isinstance(optval
, compat_str
)
530 def formatSeconds(secs
):
532 return '%d:%02d:%02d' % (secs
// 3600, (secs
% 3600) // 60, secs
% 60)
534 return '%d:%02d' % (secs
// 60, secs
% 60)
538 def make_HTTPS_handler(opts
):
539 if sys
.version_info
< (3,2):
540 # Python's 2.x handler is very simplistic
541 return compat_urllib_request
.HTTPSHandler()
544 context
= ssl
.SSLContext(ssl
.PROTOCOL_SSLv23
)
545 context
.set_default_verify_paths()
547 context
.verify_mode
= (ssl
.CERT_NONE
548 if opts
.no_check_certificate
549 else ssl
.CERT_REQUIRED
)
550 return compat_urllib_request
.HTTPSHandler(context
=context
)
552 class ExtractorError(Exception):
553 """Error during info extraction."""
554 def __init__(self
, msg
, tb
=None, expected
=False, cause
=None):
555 """ tb, if given, is the original traceback (so that it can be printed out).
556 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
559 if sys
.exc_info()[0] in (compat_urllib_error
.URLError
, socket
.timeout
, UnavailableVideoError
):
562 msg
= msg
+ u
'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
563 super(ExtractorError
, self
).__init
__(msg
)
566 self
.exc_info
= sys
.exc_info() # preserve original exception
569 def format_traceback(self
):
570 if self
.traceback
is None:
572 return u
''.join(traceback
.format_tb(self
.traceback
))
575 class DownloadError(Exception):
576 """Download Error exception.
578 This exception may be thrown by FileDownloader objects if they are not
579 configured to continue on errors. They will contain the appropriate
582 def __init__(self
, msg
, exc_info
=None):
583 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
584 super(DownloadError
, self
).__init
__(msg
)
585 self
.exc_info
= exc_info
588 class SameFileError(Exception):
589 """Same File exception.
591 This exception will be thrown by FileDownloader objects if they detect
592 multiple files would have to be downloaded to the same file on disk.
597 class PostProcessingError(Exception):
598 """Post Processing exception.
600 This exception may be raised by PostProcessor's .run() method to
601 indicate an error in the postprocessing task.
603 def __init__(self
, msg
):
606 class MaxDownloadsReached(Exception):
607 """ --max-downloads limit has been reached. """
611 class UnavailableVideoError(Exception):
612 """Unavailable Format exception.
614 This exception will be thrown when a video is requested
615 in a format that is not available for that video.
620 class ContentTooShortError(Exception):
621 """Content Too Short exception.
623 This exception may be raised by FileDownloader objects when a file they
624 download is too small for what the server announced first, indicating
625 the connection was probably interrupted.
631 def __init__(self
, downloaded
, expected
):
632 self
.downloaded
= downloaded
633 self
.expected
= expected
635 class YoutubeDLHandler(compat_urllib_request
.HTTPHandler
):
636 """Handler for HTTP requests and responses.
638 This class, when installed with an OpenerDirector, automatically adds
639 the standard headers to every HTTP request and handles gzipped and
640 deflated responses from web servers. If compression is to be avoided in
641 a particular request, the original request in the program code only has
642 to include the HTTP header "Youtubedl-No-Compression", which will be
643 removed before making the real request.
645 Part of this code was copied from:
647 http://techknack.net/python-urllib2-handlers/
649 Andrew Rowls, the author of that code, agreed to release it to the
656 return zlib
.decompress(data
, -zlib
.MAX_WBITS
)
658 return zlib
.decompress(data
)
661 def addinfourl_wrapper(stream
, headers
, url
, code
):
662 if hasattr(compat_urllib_request
.addinfourl
, 'getcode'):
663 return compat_urllib_request
.addinfourl(stream
, headers
, url
, code
)
664 ret
= compat_urllib_request
.addinfourl(stream
, headers
, url
)
668 def http_request(self
, req
):
669 for h
,v
in std_headers
.items():
673 if 'Youtubedl-no-compression' in req
.headers
:
674 if 'Accept-encoding' in req
.headers
:
675 del req
.headers
['Accept-encoding']
676 del req
.headers
['Youtubedl-no-compression']
677 if 'Youtubedl-user-agent' in req
.headers
:
678 if 'User-agent' in req
.headers
:
679 del req
.headers
['User-agent']
680 req
.headers
['User-agent'] = req
.headers
['Youtubedl-user-agent']
681 del req
.headers
['Youtubedl-user-agent']
684 def http_response(self
, req
, resp
):
687 if resp
.headers
.get('Content-encoding', '') == 'gzip':
688 content
= resp
.read()
689 gz
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
), mode
='rb')
691 uncompressed
= io
.BytesIO(gz
.read())
692 except IOError as original_ioerror
:
693 # There may be junk add the end of the file
694 # See http://stackoverflow.com/q/4928560/35070 for details
695 for i
in range(1, 1024):
697 gz
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
[:-i
]), mode
='rb')
698 uncompressed
= io
.BytesIO(gz
.read())
703 raise original_ioerror
704 resp
= self
.addinfourl_wrapper(uncompressed
, old_resp
.headers
, old_resp
.url
, old_resp
.code
)
705 resp
.msg
= old_resp
.msg
707 if resp
.headers
.get('Content-encoding', '') == 'deflate':
708 gz
= io
.BytesIO(self
.deflate(resp
.read()))
709 resp
= self
.addinfourl_wrapper(gz
, old_resp
.headers
, old_resp
.url
, old_resp
.code
)
710 resp
.msg
= old_resp
.msg
713 https_request
= http_request
714 https_response
= http_response
716 def unified_strdate(date_str
):
717 """Return a string with the date in the format YYYYMMDD"""
720 date_str
= date_str
.replace(',',' ')
721 # %z (UTC offset) is only supported in python>=3.2
722 date_str
= re
.sub(r
' (\+|-)[\d]*$', '', date_str
)
723 format_expressions
= [
731 '%Y-%m-%dT%H:%M:%SZ',
734 for expression
in format_expressions
:
736 upload_date
= datetime
.datetime
.strptime(date_str
, expression
).strftime('%Y%m%d')
741 def determine_ext(url
, default_ext
=u
'unknown_video'):
742 guess
= url
.partition(u
'?')[0].rpartition(u
'.')[2]
743 if re
.match(r
'^[A-Za-z0-9]+$', guess
):
748 def subtitles_filename(filename
, sub_lang
, sub_format
):
749 return filename
.rsplit('.', 1)[0] + u
'.' + sub_lang
+ u
'.' + sub_format
751 def date_from_str(date_str
):
753 Return a datetime object from a string in the format YYYYMMDD or
754 (now|today)[+-][0-9](day|week|month|year)(s)?"""
755 today
= datetime
.date
.today()
756 if date_str
== 'now'or date_str
== 'today':
758 match
= re
.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str
)
759 if match
is not None:
760 sign
= match
.group('sign')
761 time
= int(match
.group('time'))
764 unit
= match
.group('unit')
773 delta
= datetime
.timedelta(**{unit
: time
})
775 return datetime
.datetime
.strptime(date_str
, "%Y%m%d").date()
777 class DateRange(object):
778 """Represents a time interval between two dates"""
779 def __init__(self
, start
=None, end
=None):
780 """start and end must be strings in the format accepted by date"""
781 if start
is not None:
782 self
.start
= date_from_str(start
)
784 self
.start
= datetime
.datetime
.min.date()
786 self
.end
= date_from_str(end
)
788 self
.end
= datetime
.datetime
.max.date()
789 if self
.start
> self
.end
:
790 raise ValueError('Date range: "%s" , the start date must be before the end date' % self
)
793 """Returns a range that only contains the given day"""
795 def __contains__(self
, date
):
796 """Check if the date is in the range"""
797 if not isinstance(date
, datetime
.date
):
798 date
= date_from_str(date
)
799 return self
.start
<= date
<= self
.end
801 return '%s - %s' % ( self
.start
.isoformat(), self
.end
.isoformat())
805 """ Returns the platform name as a compat_str """
806 res
= platform
.platform()
807 if isinstance(res
, bytes):
808 res
= res
.decode(preferredencoding())
810 assert isinstance(res
, compat_str
)
814 def write_string(s
, out
=None):
817 assert type(s
) == type(u
'')
819 if ('b' in getattr(out
, 'mode', '') or
820 sys
.version_info
[0] < 3): # Python 2 lies about mode of sys.stderr
821 s
= s
.encode(preferredencoding(), 'ignore')
826 def bytes_to_intlist(bs
):
829 if isinstance(bs
[0], int): # Python 3
832 return [ord(c
) for c
in bs
]
835 def intlist_to_bytes(xs
):
838 if isinstance(chr(0), bytes): # Python 2
839 return ''.join([chr(x
) for x
in xs
])
844 def get_cachedir(params
={}):
845 cache_root
= os
.environ
.get('XDG_CACHE_HOME',
846 os
.path
.expanduser('~/.cache'))
847 return params
.get('cachedir', os
.path
.join(cache_root
, 'youtube-dl'))
850 # Cross-platform file locking
851 if sys
.platform
== 'win32':
852 import ctypes
.wintypes
855 class OVERLAPPED(ctypes
.Structure
):
857 ('Internal', ctypes
.wintypes
.LPVOID
),
858 ('InternalHigh', ctypes
.wintypes
.LPVOID
),
859 ('Offset', ctypes
.wintypes
.DWORD
),
860 ('OffsetHigh', ctypes
.wintypes
.DWORD
),
861 ('hEvent', ctypes
.wintypes
.HANDLE
),
864 kernel32
= ctypes
.windll
.kernel32
865 LockFileEx
= kernel32
.LockFileEx
866 LockFileEx
.argtypes
= [
867 ctypes
.wintypes
.HANDLE
, # hFile
868 ctypes
.wintypes
.DWORD
, # dwFlags
869 ctypes
.wintypes
.DWORD
, # dwReserved
870 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockLow
871 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockHigh
872 ctypes
.POINTER(OVERLAPPED
) # Overlapped
874 LockFileEx
.restype
= ctypes
.wintypes
.BOOL
875 UnlockFileEx
= kernel32
.UnlockFileEx
876 UnlockFileEx
.argtypes
= [
877 ctypes
.wintypes
.HANDLE
, # hFile
878 ctypes
.wintypes
.DWORD
, # dwReserved
879 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockLow
880 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockHigh
881 ctypes
.POINTER(OVERLAPPED
) # Overlapped
883 UnlockFileEx
.restype
= ctypes
.wintypes
.BOOL
884 whole_low
= 0xffffffff
885 whole_high
= 0x7fffffff
887 def _lock_file(f
, exclusive
):
888 overlapped
= OVERLAPPED()
889 overlapped
.Offset
= 0
890 overlapped
.OffsetHigh
= 0
891 overlapped
.hEvent
= 0
892 f
._lock
_file
_overlapped
_p
= ctypes
.pointer(overlapped
)
893 handle
= msvcrt
.get_osfhandle(f
.fileno())
894 if not LockFileEx(handle
, 0x2 if exclusive
else 0x0, 0,
895 whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
):
896 raise OSError('Locking file failed: %r' % ctypes
.FormatError())
899 assert f
._lock
_file
_overlapped
_p
900 handle
= msvcrt
.get_osfhandle(f
.fileno())
901 if not UnlockFileEx(handle
, 0,
902 whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
):
903 raise OSError('Unlocking file failed: %r' % ctypes
.FormatError())
908 def _lock_file(f
, exclusive
):
909 fcntl
.lockf(f
, fcntl
.LOCK_EX
if exclusive
else fcntl
.LOCK_SH
)
912 fcntl
.lockf(f
, fcntl
.LOCK_UN
)
915 class locked_file(object):
916 def __init__(self
, filename
, mode
, encoding
=None):
917 assert mode
in ['r', 'a', 'w']
918 self
.f
= io
.open(filename
, mode
, encoding
=encoding
)
922 exclusive
= self
.mode
!= 'r'
924 _lock_file(self
.f
, exclusive
)
930 def __exit__(self
, etype
, value
, traceback
):
939 def write(self
, *args
):
940 return self
.f
.write(*args
)
942 def read(self
, *args
):
943 return self
.f
.read(*args
)
946 def shell_quote(args
):
947 return ' '.join(map(pipes
.quote
, args
))
950 def takewhile_inclusive(pred
, seq
):
951 """ Like itertools.takewhile, but include the latest evaluated element
952 (the first element so that Not pred(e)) """
959 def smuggle_url(url
, data
):
960 """ Pass additional data in a URL for internal use. """
962 sdata
= compat_urllib_parse
.urlencode(
963 {u
'__youtubedl_smuggle': json
.dumps(data
)})
964 return url
+ u
'#' + sdata
967 def unsmuggle_url(smug_url
):
968 if not '#__youtubedl_smuggle' in smug_url
:
969 return smug_url
, None
970 url
, _
, sdata
= smug_url
.rpartition(u
'#')
971 jsond
= compat_parse_qs(sdata
)[u
'__youtubedl_smuggle'][0]
972 data
= json
.loads(jsond
)