]> Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/youtube.py
Update changelog.
[youtubedl] / youtube_dl / extractor / youtube.py
1 # coding: utf-8
2
3 import collections
4 import errno
5 import io
6 import itertools
7 import json
8 import os.path
9 import re
10 import socket
11 import string
12 import struct
13 import traceback
14 import zlib
15
16 from .common import InfoExtractor, SearchInfoExtractor
17 from .subtitles import SubtitlesInfoExtractor
18 from ..utils import (
19 compat_chr,
20 compat_http_client,
21 compat_parse_qs,
22 compat_urllib_error,
23 compat_urllib_parse,
24 compat_urllib_request,
25 compat_urlparse,
26 compat_str,
27
28 clean_html,
29 get_cachedir,
30 get_element_by_id,
31 get_element_by_attribute,
32 ExtractorError,
33 unescapeHTML,
34 unified_strdate,
35 orderedSet,
36 write_json_file,
37 )
38
39 class YoutubeBaseInfoExtractor(InfoExtractor):
40 """Provide base functions for Youtube extractors"""
41 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
42 _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
43 _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
44 _NETRC_MACHINE = 'youtube'
45 # If True it will raise an error if no login info is provided
46 _LOGIN_REQUIRED = False
47
48 def report_lang(self):
49 """Report attempt to set language."""
50 self.to_screen(u'Setting language')
51
52 def _set_language(self):
53 request = compat_urllib_request.Request(self._LANG_URL)
54 try:
55 self.report_lang()
56 compat_urllib_request.urlopen(request).read()
57 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
58 self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
59 return False
60 return True
61
62 def _login(self):
63 (username, password) = self._get_login_info()
64 # No authentication to be performed
65 if username is None:
66 if self._LOGIN_REQUIRED:
67 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
68 return False
69
70 request = compat_urllib_request.Request(self._LOGIN_URL)
71 try:
72 login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
73 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
74 self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
75 return False
76
77 galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
78 login_page, u'Login GALX parameter')
79
80 # Log in
81 login_form_strs = {
82 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
83 u'Email': username,
84 u'GALX': galx,
85 u'Passwd': password,
86 u'PersistentCookie': u'yes',
87 u'_utf8': u'霱',
88 u'bgresponse': u'js_disabled',
89 u'checkConnection': u'',
90 u'checkedDomains': u'youtube',
91 u'dnConn': u'',
92 u'pstMsg': u'0',
93 u'rmShown': u'1',
94 u'secTok': u'',
95 u'signIn': u'Sign in',
96 u'timeStmp': u'',
97 u'service': u'youtube',
98 u'uilel': u'3',
99 u'hl': u'en_US',
100 }
101 # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
102 # chokes on unicode
103 login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
104 login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
105 request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
106 try:
107 self.report_login()
108 login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
109 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
110 self._downloader.report_warning(u'unable to log in: bad username or password')
111 return False
112 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
113 self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
114 return False
115 return True
116
117 def _confirm_age(self):
118 age_form = {
119 'next_url': '/',
120 'action_confirm': 'Confirm',
121 }
122 request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
123 try:
124 self.report_age_confirmation()
125 compat_urllib_request.urlopen(request).read().decode('utf-8')
126 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
127 raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
128 return True
129
130 def _real_initialize(self):
131 if self._downloader is None:
132 return
133 if not self._set_language():
134 return
135 if not self._login():
136 return
137 self._confirm_age()
138
139
140 class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
141 IE_DESC = u'YouTube.com'
142 _VALID_URL = r"""(?x)^
143 (
144 (?:https?://|//)? # http(s):// or protocol-independent URL (optional)
145 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
146 tube\.majestyc\.net/|
147 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
148 (?:.*?\#/)? # handle anchor (#/) redirect urls
149 (?: # the various things that can precede the ID:
150 (?:(?:v|embed|e)/) # v/ or embed/ or e/
151 |(?: # or the v= param in all its forms
152 (?:(?:watch|movie)(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
153 (?:\?|\#!?) # the params delimiter ? or # or #!
154 (?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
155 v=
156 )
157 ))
158 |youtu\.be/ # just youtu.be/xxxx
159 )
160 )? # all until now is optional -> you can pass the naked ID
161 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
162 (?(1).+)? # if we found the ID, everything can follow
163 $"""
164 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
165 # Listed in order of quality
166 _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
167 # Apple HTTP Live Streaming
168 '96', '95', '94', '93', '92', '132', '151',
169 # 3D
170 '85', '84', '102', '83', '101', '82', '100',
171 # Dash video
172 '138', '137', '248', '136', '247', '135', '246',
173 '245', '244', '134', '243', '133', '242', '160',
174 # Dash audio
175 '141', '172', '140', '171', '139',
176 ]
177 _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
178 # Apple HTTP Live Streaming
179 '96', '95', '94', '93', '92', '132', '151',
180 # 3D
181 '85', '102', '84', '101', '83', '100', '82',
182 # Dash video
183 '138', '248', '137', '247', '136', '246', '245',
184 '244', '135', '243', '134', '242', '133', '160',
185 # Dash audio
186 '172', '141', '171', '140', '139',
187 ]
188 _video_formats_map = {
189 'flv': ['35', '34', '6', '5'],
190 '3gp': ['36', '17', '13'],
191 'mp4': ['38', '37', '22', '18'],
192 'webm': ['46', '45', '44', '43'],
193 }
194 _video_extensions = {
195 '13': '3gp',
196 '17': '3gp',
197 '18': 'mp4',
198 '22': 'mp4',
199 '36': '3gp',
200 '37': 'mp4',
201 '38': 'mp4',
202 '43': 'webm',
203 '44': 'webm',
204 '45': 'webm',
205 '46': 'webm',
206
207 # 3d videos
208 '82': 'mp4',
209 '83': 'mp4',
210 '84': 'mp4',
211 '85': 'mp4',
212 '100': 'webm',
213 '101': 'webm',
214 '102': 'webm',
215
216 # Apple HTTP Live Streaming
217 '92': 'mp4',
218 '93': 'mp4',
219 '94': 'mp4',
220 '95': 'mp4',
221 '96': 'mp4',
222 '132': 'mp4',
223 '151': 'mp4',
224
225 # Dash mp4
226 '133': 'mp4',
227 '134': 'mp4',
228 '135': 'mp4',
229 '136': 'mp4',
230 '137': 'mp4',
231 '138': 'mp4',
232 '160': 'mp4',
233
234 # Dash mp4 audio
235 '139': 'm4a',
236 '140': 'm4a',
237 '141': 'm4a',
238
239 # Dash webm
240 '171': 'webm',
241 '172': 'webm',
242 '242': 'webm',
243 '243': 'webm',
244 '244': 'webm',
245 '245': 'webm',
246 '246': 'webm',
247 '247': 'webm',
248 '248': 'webm',
249 }
250 _video_dimensions = {
251 '5': '400x240',
252 '6': '???',
253 '13': '???',
254 '17': '176x144',
255 '18': '640x360',
256 '22': '1280x720',
257 '34': '640x360',
258 '35': '854x480',
259 '36': '320x240',
260 '37': '1920x1080',
261 '38': '4096x3072',
262 '43': '640x360',
263 '44': '854x480',
264 '45': '1280x720',
265 '46': '1920x1080',
266 '82': '360p',
267 '83': '480p',
268 '84': '720p',
269 '85': '1080p',
270 '92': '240p',
271 '93': '360p',
272 '94': '480p',
273 '95': '720p',
274 '96': '1080p',
275 '100': '360p',
276 '101': '480p',
277 '102': '720p',
278 '132': '240p',
279 '151': '72p',
280 '133': '240p',
281 '134': '360p',
282 '135': '480p',
283 '136': '720p',
284 '137': '1080p',
285 '138': '>1080p',
286 '139': '48k',
287 '140': '128k',
288 '141': '256k',
289 '160': '192p',
290 '171': '128k',
291 '172': '256k',
292 '242': '240p',
293 '243': '360p',
294 '244': '480p',
295 '245': '480p',
296 '246': '480p',
297 '247': '720p',
298 '248': '1080p',
299 }
300 _special_itags = {
301 '82': '3D',
302 '83': '3D',
303 '84': '3D',
304 '85': '3D',
305 '100': '3D',
306 '101': '3D',
307 '102': '3D',
308 '133': 'DASH Video',
309 '134': 'DASH Video',
310 '135': 'DASH Video',
311 '136': 'DASH Video',
312 '137': 'DASH Video',
313 '138': 'DASH Video',
314 '139': 'DASH Audio',
315 '140': 'DASH Audio',
316 '141': 'DASH Audio',
317 '160': 'DASH Video',
318 '171': 'DASH Audio',
319 '172': 'DASH Audio',
320 '242': 'DASH Video',
321 '243': 'DASH Video',
322 '244': 'DASH Video',
323 '245': 'DASH Video',
324 '246': 'DASH Video',
325 '247': 'DASH Video',
326 '248': 'DASH Video',
327 }
328
329 IE_NAME = u'youtube'
330 _TESTS = [
331 {
332 u"url": u"http://www.youtube.com/watch?v=BaW_jenozKc",
333 u"file": u"BaW_jenozKc.mp4",
334 u"info_dict": {
335 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
336 u"uploader": u"Philipp Hagemeister",
337 u"uploader_id": u"phihag",
338 u"upload_date": u"20121002",
339 u"description": u"test chars: \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
340 }
341 },
342 {
343 u"url": u"http://www.youtube.com/watch?v=UxxajLWwzqY",
344 u"file": u"UxxajLWwzqY.mp4",
345 u"note": u"Test generic use_cipher_signature video (#897)",
346 u"info_dict": {
347 u"upload_date": u"20120506",
348 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
349 u"description": u"md5:5b292926389560516e384ac437c0ec07",
350 u"uploader": u"Icona Pop",
351 u"uploader_id": u"IconaPop"
352 }
353 },
354 {
355 u"url": u"https://www.youtube.com/watch?v=07FYdnEawAQ",
356 u"file": u"07FYdnEawAQ.mp4",
357 u"note": u"Test VEVO video with age protection (#956)",
358 u"info_dict": {
359 u"upload_date": u"20130703",
360 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
361 u"description": u"md5:64249768eec3bc4276236606ea996373",
362 u"uploader": u"justintimberlakeVEVO",
363 u"uploader_id": u"justintimberlakeVEVO"
364 }
365 },
366 {
367 u"url": u"//www.YouTube.com/watch?v=yZIXLfi8CZQ",
368 u"file": u"yZIXLfi8CZQ.mp4",
369 u"note": u"Embed-only video (#1746)",
370 u"info_dict": {
371 u"upload_date": u"20120608",
372 u"title": u"Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012",
373 u"description": u"md5:09b78bd971f1e3e289601dfba15ca4f7",
374 u"uploader": u"SET India",
375 u"uploader_id": u"setindia"
376 }
377 },
378 ]
379
380
381 @classmethod
382 def suitable(cls, url):
383 """Receives a URL and returns True if suitable for this IE."""
384 if YoutubePlaylistIE.suitable(url): return False
385 return re.match(cls._VALID_URL, url) is not None
386
387 def __init__(self, *args, **kwargs):
388 super(YoutubeIE, self).__init__(*args, **kwargs)
389 self._player_cache = {}
390
391 def report_video_webpage_download(self, video_id):
392 """Report attempt to download video webpage."""
393 self.to_screen(u'%s: Downloading video webpage' % video_id)
394
395 def report_video_info_webpage_download(self, video_id):
396 """Report attempt to download video info webpage."""
397 self.to_screen(u'%s: Downloading video info webpage' % video_id)
398
399 def report_information_extraction(self, video_id):
400 """Report attempt to extract video information."""
401 self.to_screen(u'%s: Extracting video information' % video_id)
402
403 def report_unavailable_format(self, video_id, format):
404 """Report extracted video URL."""
405 self.to_screen(u'%s: Format %s not available' % (video_id, format))
406
407 def report_rtmp_download(self):
408 """Indicate the download will use the RTMP protocol."""
409 self.to_screen(u'RTMP download detected')
410
411 def _extract_signature_function(self, video_id, player_url, slen):
412 id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
413 player_url)
414 player_type = id_m.group('ext')
415 player_id = id_m.group('id')
416
417 # Read from filesystem cache
418 func_id = '%s_%s_%d' % (player_type, player_id, slen)
419 assert os.path.basename(func_id) == func_id
420 cache_dir = get_cachedir(self._downloader.params)
421
422 cache_enabled = cache_dir is not None
423 if cache_enabled:
424 cache_fn = os.path.join(os.path.expanduser(cache_dir),
425 u'youtube-sigfuncs',
426 func_id + '.json')
427 try:
428 with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
429 cache_spec = json.load(cachef)
430 return lambda s: u''.join(s[i] for i in cache_spec)
431 except IOError:
432 pass # No cache available
433
434 if player_type == 'js':
435 code = self._download_webpage(
436 player_url, video_id,
437 note=u'Downloading %s player %s' % (player_type, player_id),
438 errnote=u'Download of %s failed' % player_url)
439 res = self._parse_sig_js(code)
440 elif player_type == 'swf':
441 urlh = self._request_webpage(
442 player_url, video_id,
443 note=u'Downloading %s player %s' % (player_type, player_id),
444 errnote=u'Download of %s failed' % player_url)
445 code = urlh.read()
446 res = self._parse_sig_swf(code)
447 else:
448 assert False, 'Invalid player type %r' % player_type
449
450 if cache_enabled:
451 try:
452 test_string = u''.join(map(compat_chr, range(slen)))
453 cache_res = res(test_string)
454 cache_spec = [ord(c) for c in cache_res]
455 try:
456 os.makedirs(os.path.dirname(cache_fn))
457 except OSError as ose:
458 if ose.errno != errno.EEXIST:
459 raise
460 write_json_file(cache_spec, cache_fn)
461 except Exception:
462 tb = traceback.format_exc()
463 self._downloader.report_warning(
464 u'Writing cache to %r failed: %s' % (cache_fn, tb))
465
466 return res
467
468 def _print_sig_code(self, func, slen):
469 def gen_sig_code(idxs):
470 def _genslice(start, end, step):
471 starts = u'' if start == 0 else str(start)
472 ends = (u':%d' % (end+step)) if end + step >= 0 else u':'
473 steps = u'' if step == 1 else (u':%d' % step)
474 return u's[%s%s%s]' % (starts, ends, steps)
475
476 step = None
477 start = '(Never used)' # Quelch pyflakes warnings - start will be
478 # set as soon as step is set
479 for i, prev in zip(idxs[1:], idxs[:-1]):
480 if step is not None:
481 if i - prev == step:
482 continue
483 yield _genslice(start, prev, step)
484 step = None
485 continue
486 if i - prev in [-1, 1]:
487 step = i - prev
488 start = prev
489 continue
490 else:
491 yield u's[%d]' % prev
492 if step is None:
493 yield u's[%d]' % i
494 else:
495 yield _genslice(start, i, step)
496
497 test_string = u''.join(map(compat_chr, range(slen)))
498 cache_res = func(test_string)
499 cache_spec = [ord(c) for c in cache_res]
500 expr_code = u' + '.join(gen_sig_code(cache_spec))
501 code = u'if len(s) == %d:\n return %s\n' % (slen, expr_code)
502 self.to_screen(u'Extracted signature function:\n' + code)
503
504 def _parse_sig_js(self, jscode):
505 funcname = self._search_regex(
506 r'signature=([a-zA-Z]+)', jscode,
507 u'Initial JS player signature function name')
508
509 functions = {}
510
511 def argidx(varname):
512 return string.lowercase.index(varname)
513
514 def interpret_statement(stmt, local_vars, allow_recursion=20):
515 if allow_recursion < 0:
516 raise ExtractorError(u'Recursion limit reached')
517
518 if stmt.startswith(u'var '):
519 stmt = stmt[len(u'var '):]
520 ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
521 r'=(?P<expr>.*)$', stmt)
522 if ass_m:
523 if ass_m.groupdict().get('index'):
524 def assign(val):
525 lvar = local_vars[ass_m.group('out')]
526 idx = interpret_expression(ass_m.group('index'),
527 local_vars, allow_recursion)
528 assert isinstance(idx, int)
529 lvar[idx] = val
530 return val
531 expr = ass_m.group('expr')
532 else:
533 def assign(val):
534 local_vars[ass_m.group('out')] = val
535 return val
536 expr = ass_m.group('expr')
537 elif stmt.startswith(u'return '):
538 assign = lambda v: v
539 expr = stmt[len(u'return '):]
540 else:
541 raise ExtractorError(
542 u'Cannot determine left side of statement in %r' % stmt)
543
544 v = interpret_expression(expr, local_vars, allow_recursion)
545 return assign(v)
546
547 def interpret_expression(expr, local_vars, allow_recursion):
548 if expr.isdigit():
549 return int(expr)
550
551 if expr.isalpha():
552 return local_vars[expr]
553
554 m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
555 if m:
556 member = m.group('member')
557 val = local_vars[m.group('in')]
558 if member == 'split("")':
559 return list(val)
560 if member == 'join("")':
561 return u''.join(val)
562 if member == 'length':
563 return len(val)
564 if member == 'reverse()':
565 return val[::-1]
566 slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
567 if slice_m:
568 idx = interpret_expression(
569 slice_m.group('idx'), local_vars, allow_recursion-1)
570 return val[idx:]
571
572 m = re.match(
573 r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
574 if m:
575 val = local_vars[m.group('in')]
576 idx = interpret_expression(m.group('idx'), local_vars,
577 allow_recursion-1)
578 return val[idx]
579
580 m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
581 if m:
582 a = interpret_expression(m.group('a'),
583 local_vars, allow_recursion)
584 b = interpret_expression(m.group('b'),
585 local_vars, allow_recursion)
586 return a % b
587
588 m = re.match(
589 r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr)
590 if m:
591 fname = m.group('func')
592 if fname not in functions:
593 functions[fname] = extract_function(fname)
594 argvals = [int(v) if v.isdigit() else local_vars[v]
595 for v in m.group('args').split(',')]
596 return functions[fname](argvals)
597 raise ExtractorError(u'Unsupported JS expression %r' % expr)
598
599 def extract_function(funcname):
600 func_m = re.search(
601 r'function ' + re.escape(funcname) +
602 r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
603 jscode)
604 argnames = func_m.group('args').split(',')
605
606 def resf(args):
607 local_vars = dict(zip(argnames, args))
608 for stmt in func_m.group('code').split(';'):
609 res = interpret_statement(stmt, local_vars)
610 return res
611 return resf
612
613 initial_function = extract_function(funcname)
614 return lambda s: initial_function([s])
615
616 def _parse_sig_swf(self, file_contents):
617 if file_contents[1:3] != b'WS':
618 raise ExtractorError(
619 u'Not an SWF file; header is %r' % file_contents[:3])
620 if file_contents[:1] == b'C':
621 content = zlib.decompress(file_contents[8:])
622 else:
623 raise NotImplementedError(u'Unsupported compression format %r' %
624 file_contents[:1])
625
626 def extract_tags(content):
627 pos = 0
628 while pos < len(content):
629 header16 = struct.unpack('<H', content[pos:pos+2])[0]
630 pos += 2
631 tag_code = header16 >> 6
632 tag_len = header16 & 0x3f
633 if tag_len == 0x3f:
634 tag_len = struct.unpack('<I', content[pos:pos+4])[0]
635 pos += 4
636 assert pos+tag_len <= len(content)
637 yield (tag_code, content[pos:pos+tag_len])
638 pos += tag_len
639
640 code_tag = next(tag
641 for tag_code, tag in extract_tags(content)
642 if tag_code == 82)
643 p = code_tag.index(b'\0', 4) + 1
644 code_reader = io.BytesIO(code_tag[p:])
645
646 # Parse ABC (AVM2 ByteCode)
647 def read_int(reader=None):
648 if reader is None:
649 reader = code_reader
650 res = 0
651 shift = 0
652 for _ in range(5):
653 buf = reader.read(1)
654 assert len(buf) == 1
655 b = struct.unpack('<B', buf)[0]
656 res = res | ((b & 0x7f) << shift)
657 if b & 0x80 == 0:
658 break
659 shift += 7
660 return res
661
662 def u30(reader=None):
663 res = read_int(reader)
664 assert res & 0xf0000000 == 0
665 return res
666 u32 = read_int
667
668 def s32(reader=None):
669 v = read_int(reader)
670 if v & 0x80000000 != 0:
671 v = - ((v ^ 0xffffffff) + 1)
672 return v
673
674 def read_string(reader=None):
675 if reader is None:
676 reader = code_reader
677 slen = u30(reader)
678 resb = reader.read(slen)
679 assert len(resb) == slen
680 return resb.decode('utf-8')
681
682 def read_bytes(count, reader=None):
683 if reader is None:
684 reader = code_reader
685 resb = reader.read(count)
686 assert len(resb) == count
687 return resb
688
689 def read_byte(reader=None):
690 resb = read_bytes(1, reader=reader)
691 res = struct.unpack('<B', resb)[0]
692 return res
693
694 # minor_version + major_version
695 read_bytes(2 + 2)
696
697 # Constant pool
698 int_count = u30()
699 for _c in range(1, int_count):
700 s32()
701 uint_count = u30()
702 for _c in range(1, uint_count):
703 u32()
704 double_count = u30()
705 read_bytes((double_count-1) * 8)
706 string_count = u30()
707 constant_strings = [u'']
708 for _c in range(1, string_count):
709 s = read_string()
710 constant_strings.append(s)
711 namespace_count = u30()
712 for _c in range(1, namespace_count):
713 read_bytes(1) # kind
714 u30() # name
715 ns_set_count = u30()
716 for _c in range(1, ns_set_count):
717 count = u30()
718 for _c2 in range(count):
719 u30()
720 multiname_count = u30()
721 MULTINAME_SIZES = {
722 0x07: 2, # QName
723 0x0d: 2, # QNameA
724 0x0f: 1, # RTQName
725 0x10: 1, # RTQNameA
726 0x11: 0, # RTQNameL
727 0x12: 0, # RTQNameLA
728 0x09: 2, # Multiname
729 0x0e: 2, # MultinameA
730 0x1b: 1, # MultinameL
731 0x1c: 1, # MultinameLA
732 }
733 multinames = [u'']
734 for _c in range(1, multiname_count):
735 kind = u30()
736 assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
737 if kind == 0x07:
738 u30() # namespace_idx
739 name_idx = u30()
740 multinames.append(constant_strings[name_idx])
741 else:
742 multinames.append('[MULTINAME kind: %d]' % kind)
743 for _c2 in range(MULTINAME_SIZES[kind]):
744 u30()
745
746 # Methods
747 method_count = u30()
748 MethodInfo = collections.namedtuple(
749 'MethodInfo',
750 ['NEED_ARGUMENTS', 'NEED_REST'])
751 method_infos = []
752 for method_id in range(method_count):
753 param_count = u30()
754 u30() # return type
755 for _ in range(param_count):
756 u30() # param type
757 u30() # name index (always 0 for youtube)
758 flags = read_byte()
759 if flags & 0x08 != 0:
760 # Options present
761 option_count = u30()
762 for c in range(option_count):
763 u30() # val
764 read_bytes(1) # kind
765 if flags & 0x80 != 0:
766 # Param names present
767 for _ in range(param_count):
768 u30() # param name
769 mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
770 method_infos.append(mi)
771
772 # Metadata
773 metadata_count = u30()
774 for _c in range(metadata_count):
775 u30() # name
776 item_count = u30()
777 for _c2 in range(item_count):
778 u30() # key
779 u30() # value
780
781 def parse_traits_info():
782 trait_name_idx = u30()
783 kind_full = read_byte()
784 kind = kind_full & 0x0f
785 attrs = kind_full >> 4
786 methods = {}
787 if kind in [0x00, 0x06]: # Slot or Const
788 u30() # Slot id
789 u30() # type_name_idx
790 vindex = u30()
791 if vindex != 0:
792 read_byte() # vkind
793 elif kind in [0x01, 0x02, 0x03]: # Method / Getter / Setter
794 u30() # disp_id
795 method_idx = u30()
796 methods[multinames[trait_name_idx]] = method_idx
797 elif kind == 0x04: # Class
798 u30() # slot_id
799 u30() # classi
800 elif kind == 0x05: # Function
801 u30() # slot_id
802 function_idx = u30()
803 methods[function_idx] = multinames[trait_name_idx]
804 else:
805 raise ExtractorError(u'Unsupported trait kind %d' % kind)
806
807 if attrs & 0x4 != 0: # Metadata present
808 metadata_count = u30()
809 for _c3 in range(metadata_count):
810 u30() # metadata index
811
812 return methods
813
814 # Classes
815 TARGET_CLASSNAME = u'SignatureDecipher'
816 searched_idx = multinames.index(TARGET_CLASSNAME)
817 searched_class_id = None
818 class_count = u30()
819 for class_id in range(class_count):
820 name_idx = u30()
821 if name_idx == searched_idx:
822 # We found the class we're looking for!
823 searched_class_id = class_id
824 u30() # super_name idx
825 flags = read_byte()
826 if flags & 0x08 != 0: # Protected namespace is present
827 u30() # protected_ns_idx
828 intrf_count = u30()
829 for _c2 in range(intrf_count):
830 u30()
831 u30() # iinit
832 trait_count = u30()
833 for _c2 in range(trait_count):
834 parse_traits_info()
835
836 if searched_class_id is None:
837 raise ExtractorError(u'Target class %r not found' %
838 TARGET_CLASSNAME)
839
840 method_names = {}
841 method_idxs = {}
842 for class_id in range(class_count):
843 u30() # cinit
844 trait_count = u30()
845 for _c2 in range(trait_count):
846 trait_methods = parse_traits_info()
847 if class_id == searched_class_id:
848 method_names.update(trait_methods.items())
849 method_idxs.update(dict(
850 (idx, name)
851 for name, idx in trait_methods.items()))
852
853 # Scripts
854 script_count = u30()
855 for _c in range(script_count):
856 u30() # init
857 trait_count = u30()
858 for _c2 in range(trait_count):
859 parse_traits_info()
860
861 # Method bodies
862 method_body_count = u30()
863 Method = collections.namedtuple('Method', ['code', 'local_count'])
864 methods = {}
865 for _c in range(method_body_count):
866 method_idx = u30()
867 u30() # max_stack
868 local_count = u30()
869 u30() # init_scope_depth
870 u30() # max_scope_depth
871 code_length = u30()
872 code = read_bytes(code_length)
873 if method_idx in method_idxs:
874 m = Method(code, local_count)
875 methods[method_idxs[method_idx]] = m
876 exception_count = u30()
877 for _c2 in range(exception_count):
878 u30() # from
879 u30() # to
880 u30() # target
881 u30() # exc_type
882 u30() # var_name
883 trait_count = u30()
884 for _c2 in range(trait_count):
885 parse_traits_info()
886
887 assert p + code_reader.tell() == len(code_tag)
888 assert len(methods) == len(method_idxs)
889
890 method_pyfunctions = {}
891
892 def extract_function(func_name):
893 if func_name in method_pyfunctions:
894 return method_pyfunctions[func_name]
895 if func_name not in methods:
896 raise ExtractorError(u'Cannot find function %r' % func_name)
897 m = methods[func_name]
898
899 def resfunc(args):
900 registers = ['(this)'] + list(args) + [None] * m.local_count
901 stack = []
902 coder = io.BytesIO(m.code)
903 while True:
904 opcode = struct.unpack('!B', coder.read(1))[0]
905 if opcode == 36: # pushbyte
906 v = struct.unpack('!B', coder.read(1))[0]
907 stack.append(v)
908 elif opcode == 44: # pushstring
909 idx = u30(coder)
910 stack.append(constant_strings[idx])
911 elif opcode == 48: # pushscope
912 # We don't implement the scope register, so we'll just
913 # ignore the popped value
914 stack.pop()
915 elif opcode == 70: # callproperty
916 index = u30(coder)
917 mname = multinames[index]
918 arg_count = u30(coder)
919 args = list(reversed(
920 [stack.pop() for _ in range(arg_count)]))
921 obj = stack.pop()
922 if mname == u'split':
923 assert len(args) == 1
924 assert isinstance(args[0], compat_str)
925 assert isinstance(obj, compat_str)
926 if args[0] == u'':
927 res = list(obj)
928 else:
929 res = obj.split(args[0])
930 stack.append(res)
931 elif mname == u'slice':
932 assert len(args) == 1
933 assert isinstance(args[0], int)
934 assert isinstance(obj, list)
935 res = obj[args[0]:]
936 stack.append(res)
937 elif mname == u'join':
938 assert len(args) == 1
939 assert isinstance(args[0], compat_str)
940 assert isinstance(obj, list)
941 res = args[0].join(obj)
942 stack.append(res)
943 elif mname in method_pyfunctions:
944 stack.append(method_pyfunctions[mname](args))
945 else:
946 raise NotImplementedError(
947 u'Unsupported property %r on %r'
948 % (mname, obj))
949 elif opcode == 72: # returnvalue
950 res = stack.pop()
951 return res
952 elif opcode == 79: # callpropvoid
953 index = u30(coder)
954 mname = multinames[index]
955 arg_count = u30(coder)
956 args = list(reversed(
957 [stack.pop() for _ in range(arg_count)]))
958 obj = stack.pop()
959 if mname == u'reverse':
960 assert isinstance(obj, list)
961 obj.reverse()
962 else:
963 raise NotImplementedError(
964 u'Unsupported (void) property %r on %r'
965 % (mname, obj))
966 elif opcode == 93: # findpropstrict
967 index = u30(coder)
968 mname = multinames[index]
969 res = extract_function(mname)
970 stack.append(res)
971 elif opcode == 97: # setproperty
972 index = u30(coder)
973 value = stack.pop()
974 idx = stack.pop()
975 obj = stack.pop()
976 assert isinstance(obj, list)
977 assert isinstance(idx, int)
978 obj[idx] = value
979 elif opcode == 98: # getlocal
980 index = u30(coder)
981 stack.append(registers[index])
982 elif opcode == 99: # setlocal
983 index = u30(coder)
984 value = stack.pop()
985 registers[index] = value
986 elif opcode == 102: # getproperty
987 index = u30(coder)
988 pname = multinames[index]
989 if pname == u'length':
990 obj = stack.pop()
991 assert isinstance(obj, list)
992 stack.append(len(obj))
993 else: # Assume attribute access
994 idx = stack.pop()
995 assert isinstance(idx, int)
996 obj = stack.pop()
997 assert isinstance(obj, list)
998 stack.append(obj[idx])
999 elif opcode == 128: # coerce
1000 u30(coder)
1001 elif opcode == 133: # coerce_s
1002 assert isinstance(stack[-1], (type(None), compat_str))
1003 elif opcode == 164: # modulo
1004 value2 = stack.pop()
1005 value1 = stack.pop()
1006 res = value1 % value2
1007 stack.append(res)
1008 elif opcode == 208: # getlocal_0
1009 stack.append(registers[0])
1010 elif opcode == 209: # getlocal_1
1011 stack.append(registers[1])
1012 elif opcode == 210: # getlocal_2
1013 stack.append(registers[2])
1014 elif opcode == 211: # getlocal_3
1015 stack.append(registers[3])
1016 elif opcode == 214: # setlocal_2
1017 registers[2] = stack.pop()
1018 elif opcode == 215: # setlocal_3
1019 registers[3] = stack.pop()
1020 else:
1021 raise NotImplementedError(
1022 u'Unsupported opcode %d' % opcode)
1023
1024 method_pyfunctions[func_name] = resfunc
1025 return resfunc
1026
1027 initial_function = extract_function(u'decipher')
1028 return lambda s: initial_function([s])
1029
1030 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1031 """Turn the encrypted s field into a working signature"""
1032
1033 if player_url is not None:
1034 if player_url.startswith(u'//'):
1035 player_url = u'https:' + player_url
1036 try:
1037 player_id = (player_url, len(s))
1038 if player_id not in self._player_cache:
1039 func = self._extract_signature_function(
1040 video_id, player_url, len(s)
1041 )
1042 self._player_cache[player_id] = func
1043 func = self._player_cache[player_id]
1044 if self._downloader.params.get('youtube_print_sig_code'):
1045 self._print_sig_code(func, len(s))
1046 return func(s)
1047 except Exception:
1048 tb = traceback.format_exc()
1049 self._downloader.report_warning(
1050 u'Automatic signature extraction failed: ' + tb)
1051
1052 self._downloader.report_warning(
1053 u'Warning: Falling back to static signature algorithm')
1054
1055 return self._static_decrypt_signature(
1056 s, video_id, player_url, age_gate)
1057
1058 def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
1059 if age_gate:
1060 # The videos with age protection use another player, so the
1061 # algorithms can be different.
1062 if len(s) == 86:
1063 return s[2:63] + s[82] + s[64:82] + s[63]
1064
1065 if len(s) == 93:
1066 return s[86:29:-1] + s[88] + s[28:5:-1]
1067 elif len(s) == 92:
1068 return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
1069 elif len(s) == 91:
1070 return s[84:27:-1] + s[86] + s[26:5:-1]
1071 elif len(s) == 90:
1072 return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
1073 elif len(s) == 89:
1074 return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
1075 elif len(s) == 88:
1076 return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
1077 elif len(s) == 87:
1078 return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
1079 elif len(s) == 86:
1080 return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1]
1081 elif len(s) == 85:
1082 return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
1083 elif len(s) == 84:
1084 return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1]
1085 elif len(s) == 83:
1086 return s[80:63:-1] + s[0] + s[62:0:-1] + s[63]
1087 elif len(s) == 82:
1088 return s[80:37:-1] + s[7] + s[36:7:-1] + s[0] + s[6:0:-1] + s[37]
1089 elif len(s) == 81:
1090 return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1091 elif len(s) == 80:
1092 return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
1093 elif len(s) == 79:
1094 return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1095
1096 else:
1097 raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
1098
1099 def _get_available_subtitles(self, video_id, webpage):
1100 try:
1101 sub_list = self._download_webpage(
1102 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1103 video_id, note=False)
1104 except ExtractorError as err:
1105 self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
1106 return {}
1107 lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
1108
1109 sub_lang_list = {}
1110 for l in lang_list:
1111 lang = l[1]
1112 params = compat_urllib_parse.urlencode({
1113 'lang': lang,
1114 'v': video_id,
1115 'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
1116 'name': l[0].encode('utf-8'),
1117 })
1118 url = u'http://www.youtube.com/api/timedtext?' + params
1119 sub_lang_list[lang] = url
1120 if not sub_lang_list:
1121 self._downloader.report_warning(u'video doesn\'t have subtitles')
1122 return {}
1123 return sub_lang_list
1124
1125 def _get_available_automatic_caption(self, video_id, webpage):
1126 """We need the webpage for getting the captions url, pass it as an
1127 argument to speed up the process."""
1128 sub_format = self._downloader.params.get('subtitlesformat', 'srt')
1129 self.to_screen(u'%s: Looking for automatic captions' % video_id)
1130 mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
1131 err_msg = u'Couldn\'t find automatic captions for %s' % video_id
1132 if mobj is None:
1133 self._downloader.report_warning(err_msg)
1134 return {}
1135 player_config = json.loads(mobj.group(1))
1136 try:
1137 args = player_config[u'args']
1138 caption_url = args[u'ttsurl']
1139 timestamp = args[u'timestamp']
1140 # We get the available subtitles
1141 list_params = compat_urllib_parse.urlencode({
1142 'type': 'list',
1143 'tlangs': 1,
1144 'asrs': 1,
1145 })
1146 list_url = caption_url + '&' + list_params
1147 caption_list = self._download_xml(list_url, video_id)
1148 original_lang_node = caption_list.find('track')
1149 if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
1150 self._downloader.report_warning(u'Video doesn\'t have automatic captions')
1151 return {}
1152 original_lang = original_lang_node.attrib['lang_code']
1153
1154 sub_lang_list = {}
1155 for lang_node in caption_list.findall('target'):
1156 sub_lang = lang_node.attrib['lang_code']
1157 params = compat_urllib_parse.urlencode({
1158 'lang': original_lang,
1159 'tlang': sub_lang,
1160 'fmt': sub_format,
1161 'ts': timestamp,
1162 'kind': 'asr',
1163 })
1164 sub_lang_list[sub_lang] = caption_url + '&' + params
1165 return sub_lang_list
1166 # An extractor error can be raise by the download process if there are
1167 # no automatic captions but there are subtitles
1168 except (KeyError, ExtractorError):
1169 self._downloader.report_warning(err_msg)
1170 return {}
1171
1172 def _print_formats(self, formats):
1173 print('Available formats:')
1174 for x in formats:
1175 print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
1176 self._video_dimensions.get(x, '???'),
1177 ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
1178
1179 def _extract_id(self, url):
1180 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1181 if mobj is None:
1182 raise ExtractorError(u'Invalid URL: %s' % url)
1183 video_id = mobj.group(2)
1184 return video_id
1185
1186 def _get_video_url_list(self, url_map):
1187 """
1188 Transform a dictionary in the format {itag:url} to a list of (itag, url)
1189 with the requested formats.
1190 """
1191 req_format = self._downloader.params.get('format', None)
1192 format_limit = self._downloader.params.get('format_limit', None)
1193 available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
1194 if format_limit is not None and format_limit in available_formats:
1195 format_list = available_formats[available_formats.index(format_limit):]
1196 else:
1197 format_list = available_formats
1198 existing_formats = [x for x in format_list if x in url_map]
1199 if len(existing_formats) == 0:
1200 raise ExtractorError(u'no known formats available for video')
1201 if self._downloader.params.get('listformats', None):
1202 self._print_formats(existing_formats)
1203 return
1204 if req_format is None or req_format == 'best':
1205 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1206 elif req_format == 'worst':
1207 video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
1208 elif req_format in ('-1', 'all'):
1209 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1210 else:
1211 # Specific formats. We pick the first in a slash-delimeted sequence.
1212 # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
1213 # available in the specified format. For example,
1214 # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
1215 # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
1216 # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
1217 req_formats = req_format.split('/')
1218 video_url_list = None
1219 for rf in req_formats:
1220 if rf in url_map:
1221 video_url_list = [(rf, url_map[rf])]
1222 break
1223 if rf in self._video_formats_map:
1224 for srf in self._video_formats_map[rf]:
1225 if srf in url_map:
1226 video_url_list = [(srf, url_map[srf])]
1227 break
1228 else:
1229 continue
1230 break
1231 if video_url_list is None:
1232 raise ExtractorError(u'requested format not available')
1233 return video_url_list
1234
1235 def _extract_from_m3u8(self, manifest_url, video_id):
1236 url_map = {}
1237 def _get_urls(_manifest):
1238 lines = _manifest.split('\n')
1239 urls = filter(lambda l: l and not l.startswith('#'),
1240 lines)
1241 return urls
1242 manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
1243 formats_urls = _get_urls(manifest)
1244 for format_url in formats_urls:
1245 itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1246 url_map[itag] = format_url
1247 return url_map
1248
1249 def _extract_annotations(self, video_id):
1250 url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
1251 return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
1252
1253 def _real_extract(self, url):
1254 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1255 mobj = re.search(self._NEXT_URL_RE, url)
1256 if mobj:
1257 url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
1258 video_id = self._extract_id(url)
1259
1260 # Get video webpage
1261 self.report_video_webpage_download(video_id)
1262 url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
1263 request = compat_urllib_request.Request(url)
1264 try:
1265 video_webpage_bytes = compat_urllib_request.urlopen(request).read()
1266 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1267 raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
1268
1269 video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
1270
1271 # Attempt to extract SWF player URL
1272 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1273 if mobj is not None:
1274 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1275 else:
1276 player_url = None
1277
1278 # Get video info
1279 self.report_video_info_webpage_download(video_id)
1280 if re.search(r'player-age-gate-content">', video_webpage) is not None:
1281 self.report_age_confirmation()
1282 age_gate = True
1283 # We simulate the access to the video from www.youtube.com/v/{video_id}
1284 # this can be viewed without login into Youtube
1285 data = compat_urllib_parse.urlencode({'video_id': video_id,
1286 'el': 'player_embedded',
1287 'gl': 'US',
1288 'hl': 'en',
1289 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1290 'asv': 3,
1291 'sts':'1588',
1292 })
1293 video_info_url = 'https://www.youtube.com/get_video_info?' + data
1294 video_info_webpage = self._download_webpage(video_info_url, video_id,
1295 note=False,
1296 errnote='unable to download video info webpage')
1297 video_info = compat_parse_qs(video_info_webpage)
1298 else:
1299 age_gate = False
1300 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1301 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1302 % (video_id, el_type))
1303 video_info_webpage = self._download_webpage(video_info_url, video_id,
1304 note=False,
1305 errnote='unable to download video info webpage')
1306 video_info = compat_parse_qs(video_info_webpage)
1307 if 'token' in video_info:
1308 break
1309 if 'token' not in video_info:
1310 if 'reason' in video_info:
1311 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
1312 else:
1313 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
1314
1315 if 'view_count' in video_info:
1316 view_count = int(video_info['view_count'][0])
1317 else:
1318 view_count = None
1319
1320 # Check for "rental" videos
1321 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1322 raise ExtractorError(u'"rental" videos not supported')
1323
1324 # Start extracting information
1325 self.report_information_extraction(video_id)
1326
1327 # uploader
1328 if 'author' not in video_info:
1329 raise ExtractorError(u'Unable to extract uploader name')
1330 video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
1331
1332 # uploader_id
1333 video_uploader_id = None
1334 mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1335 if mobj is not None:
1336 video_uploader_id = mobj.group(1)
1337 else:
1338 self._downloader.report_warning(u'unable to extract uploader nickname')
1339
1340 # title
1341 if 'title' in video_info:
1342 video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
1343 else:
1344 self._downloader.report_warning(u'Unable to extract video title')
1345 video_title = u'_'
1346
1347 # thumbnail image
1348 # We try first to get a high quality image:
1349 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1350 video_webpage, re.DOTALL)
1351 if m_thumb is not None:
1352 video_thumbnail = m_thumb.group(1)
1353 elif 'thumbnail_url' not in video_info:
1354 self._downloader.report_warning(u'unable to extract video thumbnail')
1355 video_thumbnail = None
1356 else: # don't panic if we can't find it
1357 video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
1358
1359 # upload date
1360 upload_date = None
1361 mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1362 if mobj is not None:
1363 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1364 upload_date = unified_strdate(upload_date)
1365
1366 # description
1367 video_description = get_element_by_id("eow-description", video_webpage)
1368 if video_description:
1369 video_description = re.sub(r'''(?x)
1370 <a\s+
1371 (?:[a-zA-Z-]+="[^"]+"\s+)*?
1372 title="([^"]+)"\s+
1373 (?:[a-zA-Z-]+="[^"]+"\s+)*?
1374 class="yt-uix-redirect-link"\s*>
1375 [^<]+
1376 </a>
1377 ''', r'\1', video_description)
1378 video_description = clean_html(video_description)
1379 else:
1380 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1381 if fd_mobj:
1382 video_description = unescapeHTML(fd_mobj.group(1))
1383 else:
1384 video_description = u''
1385
1386 # subtitles
1387 video_subtitles = self.extract_subtitles(video_id, video_webpage)
1388
1389 if self._downloader.params.get('listsubtitles', False):
1390 self._list_available_subtitles(video_id, video_webpage)
1391 return
1392
1393 if 'length_seconds' not in video_info:
1394 self._downloader.report_warning(u'unable to extract video duration')
1395 video_duration = ''
1396 else:
1397 video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
1398
1399 # annotations
1400 video_annotations = None
1401 if self._downloader.params.get('writeannotations', False):
1402 video_annotations = self._extract_annotations(video_id)
1403
1404 # Decide which formats to download
1405
1406 try:
1407 mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
1408 if not mobj:
1409 raise ValueError('Could not find vevo ID')
1410 info = json.loads(mobj.group(1))
1411 args = info['args']
1412 # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
1413 # this signatures are encrypted
1414 if 'url_encoded_fmt_stream_map' not in args:
1415 raise ValueError(u'No stream_map present') # caught below
1416 re_signature = re.compile(r'[&,]s=')
1417 m_s = re_signature.search(args['url_encoded_fmt_stream_map'])
1418 if m_s is not None:
1419 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
1420 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
1421 m_s = re_signature.search(args.get('adaptive_fmts', u''))
1422 if m_s is not None:
1423 if 'adaptive_fmts' in video_info:
1424 video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts']
1425 else:
1426 video_info['adaptive_fmts'] = [args['adaptive_fmts']]
1427 except ValueError:
1428 pass
1429
1430 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1431 self.report_rtmp_download()
1432 video_url_list = [(None, video_info['conn'][0])]
1433 elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
1434 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0]
1435 if 'rtmpe%3Dyes' in encoded_url_map:
1436 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1437 url_map = {}
1438 for url_data_str in encoded_url_map.split(','):
1439 url_data = compat_parse_qs(url_data_str)
1440 if 'itag' in url_data and 'url' in url_data:
1441 url = url_data['url'][0]
1442 if 'sig' in url_data:
1443 url += '&signature=' + url_data['sig'][0]
1444 elif 's' in url_data:
1445 encrypted_sig = url_data['s'][0]
1446 if self._downloader.params.get('verbose'):
1447 if age_gate:
1448 if player_url is None:
1449 player_version = 'unknown'
1450 else:
1451 player_version = self._search_regex(
1452 r'-(.+)\.swf$', player_url,
1453 u'flash player', fatal=False)
1454 player_desc = 'flash player %s' % player_version
1455 else:
1456 player_version = self._search_regex(
1457 r'html5player-(.+?)\.js', video_webpage,
1458 'html5 player', fatal=False)
1459 player_desc = u'html5 player %s' % player_version
1460
1461 parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
1462 self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
1463 (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
1464
1465 if not age_gate:
1466 jsplayer_url_json = self._search_regex(
1467 r'"assets":.+?"js":\s*("[^"]+")',
1468 video_webpage, u'JS player URL')
1469 player_url = json.loads(jsplayer_url_json)
1470
1471 signature = self._decrypt_signature(
1472 encrypted_sig, video_id, player_url, age_gate)
1473 url += '&signature=' + signature
1474 if 'ratebypass' not in url:
1475 url += '&ratebypass=yes'
1476 url_map[url_data['itag'][0]] = url
1477 video_url_list = self._get_video_url_list(url_map)
1478 if not video_url_list:
1479 return
1480 elif video_info.get('hlsvp'):
1481 manifest_url = video_info['hlsvp'][0]
1482 url_map = self._extract_from_m3u8(manifest_url, video_id)
1483 video_url_list = self._get_video_url_list(url_map)
1484 if not video_url_list:
1485 return
1486
1487 else:
1488 raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1489
1490 results = []
1491 for itag, video_real_url in video_url_list:
1492 # Extension
1493 video_extension = self._video_extensions.get(itag, 'flv')
1494
1495 video_format = '{0} - {1}{2}'.format(itag if itag else video_extension,
1496 self._video_dimensions.get(itag, '???'),
1497 ' ('+self._special_itags[itag]+')' if itag in self._special_itags else '')
1498
1499 results.append({
1500 'id': video_id,
1501 'url': video_real_url,
1502 'uploader': video_uploader,
1503 'uploader_id': video_uploader_id,
1504 'upload_date': upload_date,
1505 'title': video_title,
1506 'ext': video_extension,
1507 'format': video_format,
1508 'format_id': itag,
1509 'thumbnail': video_thumbnail,
1510 'description': video_description,
1511 'player_url': player_url,
1512 'subtitles': video_subtitles,
1513 'duration': video_duration,
1514 'age_limit': 18 if age_gate else 0,
1515 'annotations': video_annotations,
1516 'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
1517 'view_count': view_count,
1518 })
1519 return results
1520
1521 class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
1522 IE_DESC = u'YouTube.com playlists'
1523 _VALID_URL = r"""(?:
1524 (?:https?://)?
1525 (?:\w+\.)?
1526 youtube\.com/
1527 (?:
1528 (?:course|view_play_list|my_playlists|artist|playlist|watch)
1529 \? (?:.*?&)*? (?:p|a|list)=
1530 | p/
1531 )
1532 ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
1533 .*
1534 |
1535 ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
1536 )"""
1537 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
1538 _MORE_PAGES_INDICATOR = r'data-link-type="next"'
1539 _VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
1540 IE_NAME = u'youtube:playlist'
1541
1542 @classmethod
1543 def suitable(cls, url):
1544 """Receives a URL and returns True if suitable for this IE."""
1545 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
1546
1547 def _real_initialize(self):
1548 self._login()
1549
1550 def _ids_to_results(self, ids):
1551 return [self.url_result(vid_id, 'Youtube', video_id=vid_id)
1552 for vid_id in ids]
1553
1554 def _extract_mix(self, playlist_id):
1555 # The mixes are generated from a a single video
1556 # the id of the playlist is just 'RD' + video_id
1557 url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[2:], playlist_id)
1558 webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
1559 title_span = (get_element_by_attribute('class', 'title long-title', webpage) or
1560 get_element_by_attribute('class', 'title ', webpage))
1561 title = clean_html(title_span)
1562 video_re = r'data-index="\d+".*?href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s' % re.escape(playlist_id)
1563 ids = orderedSet(re.findall(video_re, webpage))
1564 url_results = self._ids_to_results(ids)
1565
1566 return self.playlist_result(url_results, playlist_id, title)
1567
1568 def _real_extract(self, url):
1569 # Extract playlist id
1570 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1571 if mobj is None:
1572 raise ExtractorError(u'Invalid URL: %s' % url)
1573 playlist_id = mobj.group(1) or mobj.group(2)
1574
1575 # Check if it's a video-specific URL
1576 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
1577 if 'v' in query_dict:
1578 video_id = query_dict['v'][0]
1579 if self._downloader.params.get('noplaylist'):
1580 self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
1581 return self.url_result(video_id, 'Youtube', video_id=video_id)
1582 else:
1583 self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
1584
1585 if len(playlist_id) == 13: # 'RD' + 11 characters for the video id
1586 # Mixes require a custom extraction process
1587 return self._extract_mix(playlist_id)
1588
1589 # Extract the video ids from the playlist pages
1590 ids = []
1591
1592 for page_num in itertools.count(1):
1593 url = self._TEMPLATE_URL % (playlist_id, page_num)
1594 page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
1595 matches = re.finditer(self._VIDEO_RE, page)
1596 # We remove the duplicates and the link with index 0
1597 # (it's not the first video of the playlist)
1598 new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
1599 ids.extend(new_ids)
1600
1601 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1602 break
1603
1604 playlist_title = self._og_search_title(page)
1605
1606 url_results = self._ids_to_results(ids)
1607 return self.playlist_result(url_results, playlist_id, playlist_title)
1608
1609
1610 class YoutubeChannelIE(InfoExtractor):
1611 IE_DESC = u'YouTube.com channels'
1612 _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
1613 _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
1614 _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
1615 IE_NAME = u'youtube:channel'
1616
1617 def extract_videos_from_page(self, page):
1618 ids_in_page = []
1619 for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
1620 if mobj.group(1) not in ids_in_page:
1621 ids_in_page.append(mobj.group(1))
1622 return ids_in_page
1623
1624 def _real_extract(self, url):
1625 # Extract channel id
1626 mobj = re.match(self._VALID_URL, url)
1627 if mobj is None:
1628 raise ExtractorError(u'Invalid URL: %s' % url)
1629
1630 # Download channel page
1631 channel_id = mobj.group(1)
1632 video_ids = []
1633 url = 'https://www.youtube.com/channel/%s/videos' % channel_id
1634 channel_page = self._download_webpage(url, channel_id)
1635 if re.search(r'channel-header-autogenerated-label', channel_page) is not None:
1636 autogenerated = True
1637 else:
1638 autogenerated = False
1639
1640 if autogenerated:
1641 # The videos are contained in a single page
1642 # the ajax pages can't be used, they are empty
1643 video_ids = self.extract_videos_from_page(channel_page)
1644 else:
1645 # Download all channel pages using the json-based channel_ajax query
1646 for pagenum in itertools.count(1):
1647 url = self._MORE_PAGES_URL % (pagenum, channel_id)
1648 page = self._download_webpage(url, channel_id,
1649 u'Downloading page #%s' % pagenum)
1650
1651 page = json.loads(page)
1652
1653 ids_in_page = self.extract_videos_from_page(page['content_html'])
1654 video_ids.extend(ids_in_page)
1655
1656 if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
1657 break
1658
1659 self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
1660
1661 url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
1662 for video_id in video_ids]
1663 return self.playlist_result(url_entries, channel_id)
1664
1665
1666 class YoutubeUserIE(InfoExtractor):
1667 IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
1668 _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
1669 _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1670 _GDATA_PAGE_SIZE = 50
1671 _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
1672 IE_NAME = u'youtube:user'
1673
1674 @classmethod
1675 def suitable(cls, url):
1676 # Don't return True if the url can be extracted with other youtube
1677 # extractor, the regex would is too permissive and it would match.
1678 other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1679 if any(ie.suitable(url) for ie in other_ies): return False
1680 else: return super(YoutubeUserIE, cls).suitable(url)
1681
1682 def _real_extract(self, url):
1683 # Extract username
1684 mobj = re.match(self._VALID_URL, url)
1685 if mobj is None:
1686 raise ExtractorError(u'Invalid URL: %s' % url)
1687
1688 username = mobj.group(1)
1689
1690 # Download video ids using YouTube Data API. Result size per
1691 # query is limited (currently to 50 videos) so we need to query
1692 # page by page until there are no video ids - it means we got
1693 # all of them.
1694
1695 video_ids = []
1696
1697 for pagenum in itertools.count(0):
1698 start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1699
1700 gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1701 page = self._download_webpage(gdata_url, username,
1702 u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
1703
1704 try:
1705 response = json.loads(page)
1706 except ValueError as err:
1707 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1708 if 'entry' not in response['feed']:
1709 # Number of videos is a multiple of self._MAX_RESULTS
1710 break
1711
1712 # Extract video identifiers
1713 ids_in_page = []
1714 for entry in response['feed']['entry']:
1715 ids_in_page.append(entry['id']['$t'].split('/')[-1])
1716 video_ids.extend(ids_in_page)
1717
1718 # A little optimization - if current page is not
1719 # "full", ie. does not contain PAGE_SIZE video ids then
1720 # we can assume that this page is the last one - there
1721 # are no more ids on further pages - no need to query
1722 # again.
1723
1724 if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1725 break
1726
1727 url_results = [
1728 self.url_result(video_id, 'Youtube', video_id=video_id)
1729 for video_id in video_ids]
1730 return self.playlist_result(url_results, playlist_title=username)
1731
1732
1733 class YoutubeSearchIE(SearchInfoExtractor):
1734 IE_DESC = u'YouTube.com searches'
1735 _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1736 _MAX_RESULTS = 1000
1737 IE_NAME = u'youtube:search'
1738 _SEARCH_KEY = 'ytsearch'
1739
1740 def report_download_page(self, query, pagenum):
1741 """Report attempt to download search page with given number."""
1742 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1743
1744 def _get_n_results(self, query, n):
1745 """Get a specified number of results for a query"""
1746
1747 video_ids = []
1748 pagenum = 0
1749 limit = n
1750
1751 while (50 * pagenum) < limit:
1752 self.report_download_page(query, pagenum+1)
1753 result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1754 request = compat_urllib_request.Request(result_url)
1755 try:
1756 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
1757 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1758 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
1759 api_response = json.loads(data)['data']
1760
1761 if not 'items' in api_response:
1762 raise ExtractorError(u'[youtube] No video results')
1763
1764 new_ids = list(video['id'] for video in api_response['items'])
1765 video_ids += new_ids
1766
1767 limit = min(n, api_response['totalItems'])
1768 pagenum += 1
1769
1770 if len(video_ids) > n:
1771 video_ids = video_ids[:n]
1772 videos = [self.url_result(video_id, 'Youtube', video_id=video_id)
1773 for video_id in video_ids]
1774 return self.playlist_result(videos, query)
1775
1776 class YoutubeSearchDateIE(YoutubeSearchIE):
1777 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
1778 _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
1779 _SEARCH_KEY = 'ytsearchdate'
1780 IE_DESC = u'YouTube.com searches, newest videos first'
1781
1782 class YoutubeShowIE(InfoExtractor):
1783 IE_DESC = u'YouTube.com (multi-season) shows'
1784 _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1785 IE_NAME = u'youtube:show'
1786
1787 def _real_extract(self, url):
1788 mobj = re.match(self._VALID_URL, url)
1789 show_name = mobj.group(1)
1790 webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1791 # There's one playlist for each season of the show
1792 m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1793 self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1794 return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1795
1796
1797 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1798 """
1799 Base class for extractors that fetch info from
1800 http://www.youtube.com/feed_ajax
1801 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1802 """
1803 _LOGIN_REQUIRED = True
1804 # use action_load_personal_feed instead of action_load_system_feed
1805 _PERSONAL_FEED = False
1806
1807 @property
1808 def _FEED_TEMPLATE(self):
1809 action = 'action_load_system_feed'
1810 if self._PERSONAL_FEED:
1811 action = 'action_load_personal_feed'
1812 return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1813
1814 @property
1815 def IE_NAME(self):
1816 return u'youtube:%s' % self._FEED_NAME
1817
1818 def _real_initialize(self):
1819 self._login()
1820
1821 def _real_extract(self, url):
1822 feed_entries = []
1823 paging = 0
1824 for i in itertools.count(1):
1825 info = self._download_webpage(self._FEED_TEMPLATE % paging,
1826 u'%s feed' % self._FEED_NAME,
1827 u'Downloading page %s' % i)
1828 info = json.loads(info)
1829 feed_html = info['feed_html']
1830 m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1831 ids = orderedSet(m.group(1) for m in m_ids)
1832 feed_entries.extend(
1833 self.url_result(video_id, 'Youtube', video_id=video_id)
1834 for video_id in ids)
1835 if info['paging'] is None:
1836 break
1837 paging = info['paging']
1838 return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1839
1840 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1841 IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1842 _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1843 _FEED_NAME = 'subscriptions'
1844 _PLAYLIST_TITLE = u'Youtube Subscriptions'
1845
1846 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1847 IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1848 _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1849 _FEED_NAME = 'recommended'
1850 _PLAYLIST_TITLE = u'Youtube Recommended videos'
1851
1852 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1853 IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1854 _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1855 _FEED_NAME = 'watch_later'
1856 _PLAYLIST_TITLE = u'Youtube Watch Later'
1857 _PERSONAL_FEED = True
1858
1859 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
1860 IE_DESC = u'Youtube watch history, "ythistory" keyword (requires authentication)'
1861 _VALID_URL = u'https?://www\.youtube\.com/feed/history|:ythistory'
1862 _FEED_NAME = 'history'
1863 _PERSONAL_FEED = True
1864 _PLAYLIST_TITLE = u'Youtube Watch History'
1865
1866 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1867 IE_NAME = u'youtube:favorites'
1868 IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1869 _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1870 _LOGIN_REQUIRED = True
1871
1872 def _real_extract(self, url):
1873 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1874 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1875 return self.url_result(playlist_id, 'YoutubePlaylist')
1876
1877
1878 class YoutubeTruncatedURLIE(InfoExtractor):
1879 IE_NAME = 'youtube:truncated_url'
1880 IE_DESC = False # Do not list
1881 _VALID_URL = r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$'
1882
1883 def _real_extract(self, url):
1884 raise ExtractorError(
1885 u'Did you forget to quote the URL? Remember that & is a meta '
1886 u'character in most shells, so you want to put the URL in quotes, '
1887 u'like youtube-dl '
1888 u'\'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\''
1889 u' (or simply youtube-dl BaW_jenozKc ).',
1890 expected=True)