9 from .common 
import InfoExtractor
, SearchInfoExtractor
 
  15     compat_urllib_request
, 
  26 class YoutubeBaseInfoExtractor(InfoExtractor
): 
  27     """Provide base functions for Youtube extractors""" 
  28     _LOGIN_URL 
= 'https://accounts.google.com/ServiceLogin' 
  29     _LANG_URL 
= r
'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' 
  30     _AGE_URL 
= 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' 
  31     _NETRC_MACHINE 
= 'youtube' 
  32     # If True it will raise an error if no login info is provided 
  33     _LOGIN_REQUIRED 
= False 
  35     def report_lang(self
): 
  36         """Report attempt to set language.""" 
  37         self
.to_screen(u
'Setting language') 
  39     def _set_language(self
): 
  40         request 
= compat_urllib_request
.Request(self
._LANG
_URL
) 
  43             compat_urllib_request
.urlopen(request
).read() 
  44         except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
: 
  45             self
._downloader
.report_warning(u
'unable to set language: %s' % compat_str(err
)) 
  50         (username
, password
) = self
._get
_login
_info
() 
  51         # No authentication to be performed 
  53             if self
._LOGIN
_REQUIRED
: 
  54                 raise ExtractorError(u
'No login info available, needed for using %s.' % self
.IE_NAME
, expected
=True) 
  57         request 
= compat_urllib_request
.Request(self
._LOGIN
_URL
) 
  59             login_page 
= compat_urllib_request
.urlopen(request
).read().decode('utf-8') 
  60         except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
: 
  61             self
._downloader
.report_warning(u
'unable to fetch login page: %s' % compat_str(err
)) 
  66         match 
= re
.search(re
.compile(r
'<input.+?name="GALX".+?value="(.+?)"', re
.DOTALL
), login_page
) 
  69         match 
= re
.search(re
.compile(r
'<input.+?name="dsh".+?value="(.+?)"', re
.DOTALL
), login_page
) 
  75                 u
'continue': u
'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1', 
  79                 u
'PersistentCookie': u
'yes', 
  81                 u
'bgresponse': u
'js_disabled', 
  82                 u
'checkConnection': u
'', 
  83                 u
'checkedDomains': u
'youtube', 
  89                 u
'signIn': u
'Sign in', 
  91                 u
'service': u
'youtube', 
  95         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode 
  97         login_form 
= dict((k
.encode('utf-8'), v
.encode('utf-8')) for k
,v 
in login_form_strs
.items()) 
  98         login_data 
= compat_urllib_parse
.urlencode(login_form
).encode('ascii') 
  99         request 
= compat_urllib_request
.Request(self
._LOGIN
_URL
, login_data
) 
 102             login_results 
= compat_urllib_request
.urlopen(request
).read().decode('utf-8') 
 103             if re
.search(r
'(?i)<form[^>]* id="gaia_loginform"', login_results
) is not None: 
 104                 self
._downloader
.report_warning(u
'unable to log in: bad username or password') 
 106         except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
: 
 107             self
._downloader
.report_warning(u
'unable to log in: %s' % compat_str(err
)) 
 111     def _confirm_age(self
): 
 114                 'action_confirm':   'Confirm', 
 116         request 
= compat_urllib_request
.Request(self
._AGE
_URL
, compat_urllib_parse
.urlencode(age_form
)) 
 118             self
.report_age_confirmation() 
 119             compat_urllib_request
.urlopen(request
).read().decode('utf-8') 
 120         except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
: 
 121             raise ExtractorError(u
'Unable to confirm age: %s' % compat_str(err
)) 
 124     def _real_initialize(self
): 
 125         if self
._downloader 
is None: 
 127         if not self
._set
_language
(): 
 129         if not self
._login
(): 
 133 class YoutubeIE(YoutubeBaseInfoExtractor
): 
 134     IE_DESC 
= u
'YouTube.com' 
 137                          (?:https?://)?                                       # http(s):// (optional) 
 138                          (?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/| 
 139                             tube\.majestyc\.net/)                             # the various hostnames, with wildcard subdomains 
 140                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls 
 141                          (?:                                                  # the various things that can precede the ID: 
 142                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/ 
 143                              |(?:                                             # or the v= param in all its forms 
 144                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?)?    # preceding watch(_popup|.php) or nothing (like /?v=xxxx) 
 145                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #! 
 146                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx) 
 149                          )?                                                   # optional -> youtube.com/xxxx is OK 
 150                      )?                                                       # all until now is optional -> you can pass the naked ID 
 151                      ([0-9A-Za-z_-]+)                                         # here is it! the YouTube video ID 
 152                      (?(1).+)?                                                # if we found the ID, everything can follow 
 154     _NEXT_URL_RE 
= r
'[\?&]next_url=([^&]+)' 
 155     # Listed in order of quality 
 156     _available_formats 
= ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13', 
 157                           '95', '94', '93', '92', '132', '151', 
 159                           '85', '84', '102', '83', '101', '82', '100', 
 161                           '138', '137', '248', '136', '247', '135', '246', 
 162                           '245', '244', '134', '243', '133', '242', '160', 
 164                           '141', '172', '140', '171', '139', 
 166     _available_formats_prefer_free 
= ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13', 
 167                                       '95', '94', '93', '92', '132', '151', 
 168                                       '85', '102', '84', '101', '83', '100', '82', 
 170                                       '138', '248', '137', '247', '136', '246', '245', 
 171                                       '244', '135', '243', '134', '242', '133', '160', 
 173                                       '172', '141', '171', '140', '139', 
 175     _video_extensions 
= { 
 196         # videos that use m3u8 
 228     _video_dimensions 
= { 
 309             u
"url":  u
"http://www.youtube.com/watch?v=BaW_jenozKc", 
 310             u
"file":  u
"BaW_jenozKc.mp4", 
 312                 u
"title": u
"youtube-dl test video \"'/\\ä↭𝕐", 
 313                 u
"uploader": u
"Philipp Hagemeister", 
 314                 u
"uploader_id": u
"phihag", 
 315                 u
"upload_date": u
"20121002", 
 316                 u
"description": u
"test chars:  \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ." 
 320             u
"url":  u
"http://www.youtube.com/watch?v=1ltcDfZMA3U", 
 321             u
"file":  u
"1ltcDfZMA3U.flv", 
 322             u
"note": u
"Test VEVO video (#897)", 
 324                 u
"upload_date": u
"20070518", 
 325                 u
"title": u
"Maps - It Will Find You", 
 326                 u
"description": u
"Music video by Maps performing It Will Find You.", 
 327                 u
"uploader": u
"MuteUSA", 
 328                 u
"uploader_id": u
"MuteUSA" 
 332             u
"url":  u
"http://www.youtube.com/watch?v=UxxajLWwzqY", 
 333             u
"file":  u
"UxxajLWwzqY.mp4", 
 334             u
"note": u
"Test generic use_cipher_signature video (#897)", 
 336                 u
"upload_date": u
"20120506", 
 337                 u
"title": u
"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]", 
 338                 u
"description": u
"md5:3e2666e0a55044490499ea45fe9037b7", 
 339                 u
"uploader": u
"Icona Pop", 
 340                 u
"uploader_id": u
"IconaPop" 
 344             u
"url":  u
"https://www.youtube.com/watch?v=07FYdnEawAQ", 
 345             u
"file":  u
"07FYdnEawAQ.mp4", 
 346             u
"note": u
"Test VEVO video with age protection (#956)", 
 348                 u
"upload_date": u
"20130703", 
 349                 u
"title": u
"Justin Timberlake - Tunnel Vision (Explicit)", 
 350                 u
"description": u
"md5:64249768eec3bc4276236606ea996373", 
 351                 u
"uploader": u
"justintimberlakeVEVO", 
 352                 u
"uploader_id": u
"justintimberlakeVEVO" 
 356             u
'url': u
'https://www.youtube.com/watch?v=TGi3HqYrWHE', 
 357             u
'file': u
'TGi3HqYrWHE.mp4', 
 358             u
'note': u
'm3u8 video', 
 360                 u
'title': u
'Triathlon - Men - London 2012 Olympic Games', 
 361                 u
'description': u
'- Men -  TR02 - Triathlon - 07 August 2012 - London 2012 Olympic Games', 
 362                 u
'uploader': u
'olympic', 
 363                 u
'upload_date': u
'20120807', 
 364                 u
'uploader_id': u
'olympic', 
 367                 u
'skip_download': True, 
 374     def suitable(cls
, url
): 
 375         """Receives a URL and returns True if suitable for this IE.""" 
 376         if YoutubePlaylistIE
.suitable(url
) or YoutubeSubscriptionsIE
.suitable(url
): return False 
 377         return re
.match(cls
._VALID
_URL
, url
, re
.VERBOSE
) is not None 
 379     def report_video_webpage_download(self
, video_id
): 
 380         """Report attempt to download video webpage.""" 
 381         self
.to_screen(u
'%s: Downloading video webpage' % video_id
) 
 383     def report_video_info_webpage_download(self
, video_id
): 
 384         """Report attempt to download video info webpage.""" 
 385         self
.to_screen(u
'%s: Downloading video info webpage' % video_id
) 
 387     def report_video_subtitles_download(self
, video_id
): 
 388         """Report attempt to download video info webpage.""" 
 389         self
.to_screen(u
'%s: Checking available subtitles' % video_id
) 
 391     def report_video_subtitles_request(self
, video_id
, sub_lang
, format
): 
 392         """Report attempt to download video info webpage.""" 
 393         self
.to_screen(u
'%s: Downloading video subtitles for %s.%s' % (video_id
, sub_lang
, format
)) 
 395     def report_video_subtitles_available(self
, video_id
, sub_lang_list
): 
 396         """Report available subtitles.""" 
 397         sub_lang 
= ",".join(list(sub_lang_list
.keys())) 
 398         self
.to_screen(u
'%s: Available subtitles for video: %s' % (video_id
, sub_lang
)) 
 400     def report_information_extraction(self
, video_id
): 
 401         """Report attempt to extract video information.""" 
 402         self
.to_screen(u
'%s: Extracting video information' % video_id
) 
 404     def report_unavailable_format(self
, video_id
, format
): 
 405         """Report extracted video URL.""" 
 406         self
.to_screen(u
'%s: Format %s not available' % (video_id
, format
)) 
 408     def report_rtmp_download(self
): 
 409         """Indicate the download will use the RTMP protocol.""" 
 410         self
.to_screen(u
'RTMP download detected') 
 412     def _decrypt_signature(self
, s
): 
 413         """Turn the encrypted s field into a working signature""" 
 416             return s
[25] + s
[3:25] + s
[0] + s
[26:42] + s
[79] + s
[43:79] + s
[91] + s
[80:83] 
 418             return s
[25] + s
[3:25] + s
[2] + s
[26:40] + s
[77] + s
[41:77] + s
[89] + s
[78:81] 
 420             return s
[84:78:-1] + s
[87] + s
[77:60:-1] + s
[0] + s
[59:3:-1] 
 422             return s
[7:28] + s
[87] + s
[29:45] + s
[55] + s
[46:55] + s
[2] + s
[56:87] + s
[28] 
 424             return s
[6:27] + s
[4] + s
[28:39] + s
[27] + s
[40:59] + s
[2] + s
[60:] 
 426             return s
[83:36:-1] + s
[0] + s
[35:2:-1] 
 428             return s
[83:34:-1] + s
[0] + s
[33:27:-1] + s
[3] + s
[26:19:-1] + s
[34] + s
[18:3:-1] + s
[27] 
 430             return s
[81:36:-1] + s
[0] + s
[35:2:-1] 
 432             return s
[81:64:-1] + s
[82] + s
[63:52:-1] + s
[45] + s
[51:45:-1] + s
[1] + s
[44:1:-1] + s
[0] 
 434             return s
[1:19] + s
[0] + s
[20:68] + s
[19] + s
[69:82] 
 436             return s
[56] + s
[79:56:-1] + s
[41] + s
[55:41:-1] + s
[80] + s
[40:34:-1] + s
[0] + s
[33:29:-1] + s
[34] + s
[28:9:-1] + s
[29] + s
[8:0:-1] + s
[9] 
 438             return s
[1:19] + s
[0] + s
[20:68] + s
[19] + s
[69:80] 
 440             return s
[54] + s
[77:54:-1] + s
[39] + s
[53:39:-1] + s
[78] + s
[38:34:-1] + s
[0] + s
[33:29:-1] + s
[34] + s
[28:9:-1] + s
[29] + s
[8:0:-1] + s
[9] 
 443             raise ExtractorError(u
'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s
))) 
 445     def _decrypt_signature_age_gate(self
, s
): 
 446         # The videos with age protection use another player, so the algorithms 
 449             return s
[2:63] + s
[82] + s
[64:82] + s
[63] 
 451             # Fallback to the other algortihms 
 452             return self
._decrypt
_signature
(s
) 
 455     def _get_available_subtitles(self
, video_id
): 
 456         self
.report_video_subtitles_download(video_id
) 
 457         request 
= compat_urllib_request
.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id
) 
 459             sub_list 
= compat_urllib_request
.urlopen(request
).read().decode('utf-8') 
 460         except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
: 
 461             self
._downloader
.report_warning(u
'unable to download video subtitles: %s' % compat_str(err
)) 
 463         sub_lang_list 
= re
.findall(r
'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list
) 
 464         sub_lang_list 
= dict((l
[1], l
[0]) for l 
in sub_lang_list
) 
 465         if not sub_lang_list
: 
 466             self
._downloader
.report_warning(u
'video doesn\'t have subtitles') 
 470     def _list_available_subtitles(self
, video_id
): 
 471         sub_lang_list 
= self
._get
_available
_subtitles
(video_id
) 
 472         self
.report_video_subtitles_available(video_id
, sub_lang_list
) 
 474     def _request_subtitle(self
, sub_lang
, sub_name
, video_id
, format
): 
 476         Return the subtitle as a string or None if they are not found 
 478         self
.report_video_subtitles_request(video_id
, sub_lang
, format
) 
 479         params 
= compat_urllib_parse
.urlencode({ 
 485         url 
= 'http://www.youtube.com/api/timedtext?' + params
 
 487             sub 
= compat_urllib_request
.urlopen(url
).read().decode('utf-8') 
 488         except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
: 
 489             self
._downloader
.report_warning(u
'unable to download video subtitles for %s: %s' % (sub_lang
, compat_str(err
))) 
 492             self
._downloader
.report_warning(u
'Did not fetch video subtitles') 
 496     def _request_automatic_caption(self
, video_id
, webpage
): 
 497         """We need the webpage for getting the captions url, pass it as an 
 498            argument to speed up the process.""" 
 499         sub_lang 
= (self
._downloader
.params
.get('subtitleslangs') or ['en'])[0] 
 500         sub_format 
= self
._downloader
.params
.get('subtitlesformat') 
 501         self
.to_screen(u
'%s: Looking for automatic captions' % video_id
) 
 502         mobj 
= re
.search(r
';ytplayer.config = ({.*?});', webpage
) 
 503         err_msg 
= u
'Couldn\'t find automatic captions for "%s"' % sub_lang
 
 505             self
._downloader
.report_warning(err_msg
) 
 507         player_config 
= json
.loads(mobj
.group(1)) 
 509             args 
= player_config
[u
'args'] 
 510             caption_url 
= args
[u
'ttsurl'] 
 511             timestamp 
= args
[u
'timestamp'] 
 512             params 
= compat_urllib_parse
.urlencode({ 
 519             subtitles_url 
= caption_url 
+ '&' + params
 
 520             sub 
= self
._download
_webpage
(subtitles_url
, video_id
, u
'Downloading automatic captions') 
 521             return {sub_lang
: sub
} 
 522         # An extractor error can be raise by the download process if there are 
 523         # no automatic captions but there are subtitles 
 524         except (KeyError, ExtractorError
): 
 525             self
._downloader
.report_warning(err_msg
) 
 528     def _extract_subtitles(self
, video_id
): 
 530         Return a dictionary: {language: subtitles} or {} if the subtitles 
 533         available_subs_list 
= self
._get
_available
_subtitles
(video_id
) 
 534         sub_format 
= self
._downloader
.params
.get('subtitlesformat') 
 535         if  not available_subs_list
: #There was some error, it didn't get the available subtitles 
 537         if self
._downloader
.params
.get('allsubtitles', False): 
 538             sub_lang_list 
= available_subs_list
 
 540             if self
._downloader
.params
.get('subtitleslangs', False): 
 541                 reqested_langs 
= self
._downloader
.params
.get('subtitleslangs') 
 542             elif 'en' in available_subs_list
: 
 543                 reqested_langs 
= ['en'] 
 545                 reqested_langs 
= [list(available_subs_list
.keys())[0]] 
 548             for sub_lang 
in reqested_langs
: 
 549                 if not sub_lang 
in available_subs_list
: 
 550                     self
._downloader
.report_warning(u
'no closed captions found in the specified language "%s"' % sub_lang
) 
 552                 sub_lang_list
[sub_lang
] = available_subs_list
[sub_lang
] 
 554         for sub_lang 
in sub_lang_list
: 
 555             subtitle 
= self
._request
_subtitle
(sub_lang
, sub_lang_list
[sub_lang
].encode('utf-8'), video_id
, sub_format
) 
 557                 subtitles
[sub_lang
] = subtitle
 
 560     def _print_formats(self
, formats
): 
 561         print('Available formats:') 
 563             print('%s\t:\t%s\t[%s]%s' %(x
, self
._video
_extensions
.get(x
, 'flv'), 
 564                                         self
._video
_dimensions
.get(x
, '???'), 
 565                                         ' ('+self
._special
_itags
[x
]+')' if x 
in self
._special
_itags 
else '')) 
 567     def _extract_id(self
, url
): 
 568         mobj 
= re
.match(self
._VALID
_URL
, url
, re
.VERBOSE
) 
 570             raise ExtractorError(u
'Invalid URL: %s' % url
) 
 571         video_id 
= mobj
.group(2) 
 574     def _get_video_url_list(self
, url_map
): 
 576         Transform a dictionary in the format {itag:url} to a list of (itag, url) 
 577         with the requested formats. 
 579         req_format 
= self
._downloader
.params
.get('format', None) 
 580         format_limit 
= self
._downloader
.params
.get('format_limit', None) 
 581         available_formats 
= self
._available
_formats
_prefer
_free 
if self
._downloader
.params
.get('prefer_free_formats', False) else self
._available
_formats
 
 582         if format_limit 
is not None and format_limit 
in available_formats
: 
 583             format_list 
= available_formats
[available_formats
.index(format_limit
):] 
 585             format_list 
= available_formats
 
 586         existing_formats 
= [x 
for x 
in format_list 
if x 
in url_map
] 
 587         if len(existing_formats
) == 0: 
 588             raise ExtractorError(u
'no known formats available for video') 
 589         if self
._downloader
.params
.get('listformats', None): 
 590             self
._print
_formats
(existing_formats
) 
 592         if req_format 
is None or req_format 
== 'best': 
 593             video_url_list 
= [(existing_formats
[0], url_map
[existing_formats
[0]])] # Best quality 
 594         elif req_format 
== 'worst': 
 595             video_url_list 
= [(existing_formats
[-1], url_map
[existing_formats
[-1]])] # worst quality 
 596         elif req_format 
in ('-1', 'all'): 
 597             video_url_list 
= [(f
, url_map
[f
]) for f 
in existing_formats
] # All formats 
 599             # Specific formats. We pick the first in a slash-delimeted sequence. 
 600             # For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'. 
 601             req_formats 
= req_format
.split('/') 
 602             video_url_list 
= None 
 603             for rf 
in req_formats
: 
 605                     video_url_list 
= [(rf
, url_map
[rf
])] 
 607             if video_url_list 
is None: 
 608                 raise ExtractorError(u
'requested format not available') 
 609         return video_url_list
 
 611     def _extract_from_m3u8(self
, manifest_url
, video_id
): 
 613         def _get_urls(_manifest
): 
 614             lines 
= _manifest
.split('\n') 
 615             urls 
= filter(lambda l
: l 
and not l
.startswith('#'), 
 618         manifest 
= self
._download
_webpage
(manifest_url
, video_id
, u
'Downloading formats manifest') 
 619         formats_urls 
= _get_urls(manifest
) 
 620         for format_url 
in formats_urls
: 
 621             itag 
= self
._search
_regex
(r
'itag/(\d+?)/', format_url
, 'itag') 
 622             url_map
[itag
] = format_url
 
 625     def _real_extract(self
, url
): 
 626         if re
.match(r
'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url
): 
 627             self
._downloader
.report_warning(u
'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like  youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply  youtube-dl BaW_jenozKc  ).') 
 629         # Extract original video URL from URL with redirection, like age verification, using next_url parameter 
 630         mobj 
= re
.search(self
._NEXT
_URL
_RE
, url
) 
 632             url 
= 'https://www.youtube.com/' + compat_urllib_parse
.unquote(mobj
.group(1)).lstrip('/') 
 633         video_id 
= self
._extract
_id
(url
) 
 636         self
.report_video_webpage_download(video_id
) 
 637         url 
= 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
 
 638         request 
= compat_urllib_request
.Request(url
) 
 640             video_webpage_bytes 
= compat_urllib_request
.urlopen(request
).read() 
 641         except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
: 
 642             raise ExtractorError(u
'Unable to download video webpage: %s' % compat_str(err
)) 
 644         video_webpage 
= video_webpage_bytes
.decode('utf-8', 'ignore') 
 646         # Attempt to extract SWF player URL 
 647         mobj 
= re
.search(r
'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage
) 
 649             player_url 
= re
.sub(r
'\\(.)', r
'\1', mobj
.group(1)) 
 654         self
.report_video_info_webpage_download(video_id
) 
 655         if re
.search(r
'player-age-gate-content">', video_webpage
) is not None: 
 656             self
.report_age_confirmation() 
 658             # We simulate the access to the video from www.youtube.com/v/{video_id} 
 659             # this can be viewed without login into Youtube 
 660             data 
= compat_urllib_parse
.urlencode({'video_id': video_id
, 
 664                                                   'eurl': 'https://youtube.googleapis.com/v/' + video_id
, 
 668             video_info_url 
= 'https://www.youtube.com/get_video_info?' + data
 
 669             video_info_webpage 
= self
._download
_webpage
(video_info_url
, video_id
, 
 671                                     errnote
='unable to download video info webpage') 
 672             video_info 
= compat_parse_qs(video_info_webpage
) 
 675             for el_type 
in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: 
 676                 video_info_url 
= ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' 
 677                         % (video_id
, el_type
)) 
 678                 video_info_webpage 
= self
._download
_webpage
(video_info_url
, video_id
, 
 680                                         errnote
='unable to download video info webpage') 
 681                 video_info 
= compat_parse_qs(video_info_webpage
) 
 682                 if 'token' in video_info
: 
 684         if 'token' not in video_info
: 
 685             if 'reason' in video_info
: 
 686                 raise ExtractorError(u
'YouTube said: %s' % video_info
['reason'][0], expected
=True) 
 688                 raise ExtractorError(u
'"token" parameter not in video info for unknown reason') 
 690         # Check for "rental" videos 
 691         if 'ypc_video_rental_bar_text' in video_info 
and 'author' not in video_info
: 
 692             raise ExtractorError(u
'"rental" videos not supported') 
 694         # Start extracting information 
 695         self
.report_information_extraction(video_id
) 
 698         if 'author' not in video_info
: 
 699             raise ExtractorError(u
'Unable to extract uploader name') 
 700         video_uploader 
= compat_urllib_parse
.unquote_plus(video_info
['author'][0]) 
 703         video_uploader_id 
= None 
 704         mobj 
= re
.search(r
'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage
) 
 706             video_uploader_id 
= mobj
.group(1) 
 708             self
._downloader
.report_warning(u
'unable to extract uploader nickname') 
 711         if 'title' not in video_info
: 
 712             raise ExtractorError(u
'Unable to extract video title') 
 713         video_title 
= compat_urllib_parse
.unquote_plus(video_info
['title'][0]) 
 716         # We try first to get a high quality image: 
 717         m_thumb 
= re
.search(r
'<span itemprop="thumbnail".*?href="(.*?)">', 
 718                             video_webpage
, re
.DOTALL
) 
 719         if m_thumb 
is not None: 
 720             video_thumbnail 
= m_thumb
.group(1) 
 721         elif 'thumbnail_url' not in video_info
: 
 722             self
._downloader
.report_warning(u
'unable to extract video thumbnail') 
 724         else:   # don't panic if we can't find it 
 725             video_thumbnail 
= compat_urllib_parse
.unquote_plus(video_info
['thumbnail_url'][0]) 
 729         mobj 
= re
.search(r
'id="eow-date.*?>(.*?)</span>', video_webpage
, re
.DOTALL
) 
 731             upload_date 
= ' '.join(re
.sub(r
'[/,-]', r
' ', mobj
.group(1)).split()) 
 732             upload_date 
= unified_strdate(upload_date
) 
 735         video_description 
= get_element_by_id("eow-description", video_webpage
) 
 736         if video_description
: 
 737             video_description 
= clean_html(video_description
) 
 739             fd_mobj 
= re
.search(r
'<meta name="description" content="([^"]+)"', video_webpage
) 
 741                 video_description 
= unescapeHTML(fd_mobj
.group(1)) 
 743                 video_description 
= u
'' 
 746         video_subtitles 
= None 
 748         if self
._downloader
.params
.get('writesubtitles', False) or self
._downloader
.params
.get('allsubtitles', False): 
 749             video_subtitles 
= self
._extract
_subtitles
(video_id
) 
 750         elif self
._downloader
.params
.get('writeautomaticsub', False): 
 751             video_subtitles 
= self
._request
_automatic
_caption
(video_id
, video_webpage
) 
 753         if self
._downloader
.params
.get('listsubtitles', False): 
 754             self
._list
_available
_subtitles
(video_id
) 
 757         if 'length_seconds' not in video_info
: 
 758             self
._downloader
.report_warning(u
'unable to extract video duration') 
 761             video_duration 
= compat_urllib_parse
.unquote_plus(video_info
['length_seconds'][0]) 
 763         # Decide which formats to download 
 766             mobj 
= re
.search(r
';ytplayer.config = ({.*?});', video_webpage
) 
 768                 raise ValueError('Could not find vevo ID') 
 769             info 
= json
.loads(mobj
.group(1)) 
 771             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map 
 772             # this signatures are encrypted 
 773             m_s 
= re
.search(r
'[&,]s=', args
['url_encoded_fmt_stream_map']) 
 775                 self
.to_screen(u
'%s: Encrypted signatures detected.' % video_id
) 
 776                 video_info
['url_encoded_fmt_stream_map'] = [args
['url_encoded_fmt_stream_map']] 
 777             m_s 
= re
.search(r
'[&,]s=', args
.get('adaptive_fmts', u
'')) 
 779                 if 'url_encoded_fmt_stream_map' in video_info
: 
 780                     video_info
['url_encoded_fmt_stream_map'][0] += ',' + args
['adaptive_fmts'] 
 782                     video_info
['url_encoded_fmt_stream_map'] = [args
['adaptive_fmts']] 
 783             elif 'adaptive_fmts' in video_info
: 
 784                 if 'url_encoded_fmt_stream_map' in video_info
: 
 785                     video_info
['url_encoded_fmt_stream_map'][0] += ',' + video_info
['adaptive_fmts'][0] 
 787                     video_info
['url_encoded_fmt_stream_map'] = video_info
['adaptive_fmts'] 
 791         if 'conn' in video_info 
and video_info
['conn'][0].startswith('rtmp'): 
 792             self
.report_rtmp_download() 
 793             video_url_list 
= [(None, video_info
['conn'][0])] 
 794         elif 'url_encoded_fmt_stream_map' in video_info 
and len(video_info
['url_encoded_fmt_stream_map']) >= 1: 
 795             if 'rtmpe%3Dyes' in video_info
['url_encoded_fmt_stream_map'][0]: 
 796                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected
=True) 
 798             for url_data_str 
in video_info
['url_encoded_fmt_stream_map'][0].split(','): 
 799                 url_data 
= compat_parse_qs(url_data_str
) 
 800                 if 'itag' in url_data 
and 'url' in url_data
: 
 801                     url 
= url_data
['url'][0] 
 802                     if 'sig' in url_data
: 
 803                         url 
+= '&signature=' + url_data
['sig'][0] 
 804                     elif 's' in url_data
: 
 805                         if self
._downloader
.params
.get('verbose'): 
 808                                 player_version 
= self
._search
_regex
(r
'ad3-(.+?)\.swf', 
 809                                     video_info
['ad3_module'][0] if 'ad3_module' in video_info 
else 'NOT FOUND', 
 810                                     'flash player', fatal
=False) 
 811                                 player 
= 'flash player %s' % player_version
 
 813                                 player 
= u
'html5 player %s' % self
._search
_regex
(r
'html5player-(.+?)\.js', video_webpage
, 
 814                                     'html5 player', fatal
=False) 
 815                             parts_sizes 
= u
'.'.join(compat_str(len(part
)) for part 
in s
.split('.')) 
 816                             self
.to_screen(u
'encrypted signature length %d (%s), itag %s, %s' % 
 817                                 (len(s
), parts_sizes
, url_data
['itag'][0], player
)) 
 818                         encrypted_sig 
= url_data
['s'][0] 
 820                             signature 
= self
._decrypt
_signature
_age
_gate
(encrypted_sig
) 
 822                             signature 
= self
._decrypt
_signature
(encrypted_sig
) 
 823                         url 
+= '&signature=' + signature
 
 824                     if 'ratebypass' not in url
: 
 825                         url 
+= '&ratebypass=yes' 
 826                     url_map
[url_data
['itag'][0]] = url
 
 827             video_url_list 
= self
._get
_video
_url
_list
(url_map
) 
 828             if not video_url_list
: 
 830         elif video_info
.get('hlsvp'): 
 831             manifest_url 
= video_info
['hlsvp'][0] 
 832             url_map 
= self
._extract
_from
_m
3u8(manifest_url
, video_id
) 
 833             video_url_list 
= self
._get
_video
_url
_list
(url_map
) 
 834             if not video_url_list
: 
 838             raise ExtractorError(u
'no conn or url_encoded_fmt_stream_map information found in video info') 
 841         for format_param
, video_real_url 
in video_url_list
: 
 843             video_extension 
= self
._video
_extensions
.get(format_param
, 'flv') 
 845             video_format 
= '{0} - {1}{2}'.format(format_param 
if format_param 
else video_extension
, 
 846                                               self
._video
_dimensions
.get(format_param
, '???'), 
 847                                               ' ('+self
._special
_itags
[format_param
]+')' if format_param 
in self
._special
_itags 
else '') 
 851                 'url':      video_real_url
, 
 852                 'uploader': video_uploader
, 
 853                 'uploader_id': video_uploader_id
, 
 854                 'upload_date':  upload_date
, 
 855                 'title':    video_title
, 
 856                 'ext':      video_extension
, 
 857                 'format':   video_format
, 
 858                 'thumbnail':    video_thumbnail
, 
 859                 'description':  video_description
, 
 860                 'player_url':   player_url
, 
 861                 'subtitles':    video_subtitles
, 
 862                 'duration':     video_duration
 
 866 class YoutubePlaylistIE(InfoExtractor
): 
 867     IE_DESC 
= u
'YouTube.com playlists' 
 873                            (?:course|view_play_list|my_playlists|artist|playlist|watch) 
 874                            \? (?:.*?&)*? (?:p|a|list)= 
 877                         ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,}) 
 880                         ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,}) 
 882     _TEMPLATE_URL 
= 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none' 
 884     IE_NAME 
= u
'youtube:playlist' 
 887     def suitable(cls
, url
): 
 888         """Receives a URL and returns True if suitable for this IE.""" 
 889         return re
.match(cls
._VALID
_URL
, url
, re
.VERBOSE
) is not None 
 891     def _real_extract(self
, url
): 
 892         # Extract playlist id 
 893         mobj 
= re
.match(self
._VALID
_URL
, url
, re
.VERBOSE
) 
 895             raise ExtractorError(u
'Invalid URL: %s' % url
) 
 897         # Download playlist videos from API 
 898         playlist_id 
= mobj
.group(1) or mobj
.group(2) 
 901         for page_num 
in itertools
.count(1): 
 902             start_index 
= self
._MAX
_RESULTS 
* (page_num 
- 1) + 1 
 903             if start_index 
>= 1000: 
 904                 self
._downloader
.report_warning(u
'Max number of results reached') 
 906             url 
= self
._TEMPLATE
_URL 
% (playlist_id
, self
._MAX
_RESULTS
, start_index
) 
 907             page 
= self
._download
_webpage
(url
, playlist_id
, u
'Downloading page #%s' % page_num
) 
 910                 response 
= json
.loads(page
) 
 911             except ValueError as err
: 
 912                 raise ExtractorError(u
'Invalid JSON in API response: ' + compat_str(err
)) 
 914             if 'feed' not in response
: 
 915                 raise ExtractorError(u
'Got a malformed response from YouTube API') 
 916             playlist_title 
= response
['feed']['title']['$t'] 
 917             if 'entry' not in response
['feed']: 
 918                 # Number of videos is a multiple of self._MAX_RESULTS 
 921             for entry 
in response
['feed']['entry']: 
 922                 index 
= entry
['yt$position']['$t'] 
 923                 if 'media$group' in entry 
and 'media$player' in entry
['media$group']: 
 924                     videos
.append((index
, entry
['media$group']['media$player']['url'])) 
 926         videos 
= [v
[1] for v 
in sorted(videos
)] 
 928         url_results 
= [self
.url_result(vurl
, 'Youtube') for vurl 
in videos
] 
 929         return [self
.playlist_result(url_results
, playlist_id
, playlist_title
)] 
 932 class YoutubeChannelIE(InfoExtractor
): 
 933     IE_DESC 
= u
'YouTube.com channels' 
 934     _VALID_URL 
= r
"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)" 
 935     _TEMPLATE_URL 
= 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en' 
 936     _MORE_PAGES_INDICATOR 
= 'yt-uix-load-more' 
 937     _MORE_PAGES_URL 
= 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s' 
 938     IE_NAME 
= u
'youtube:channel' 
 940     def extract_videos_from_page(self
, page
): 
 942         for mobj 
in re
.finditer(r
'href="/watch\?v=([0-9A-Za-z_-]+)&?', page
): 
 943             if mobj
.group(1) not in ids_in_page
: 
 944                 ids_in_page
.append(mobj
.group(1)) 
 947     def _real_extract(self
, url
): 
 949         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 951             raise ExtractorError(u
'Invalid URL: %s' % url
) 
 953         # Download channel page 
 954         channel_id 
= mobj
.group(1) 
 958         url 
= self
._TEMPLATE
_URL 
% (channel_id
, pagenum
) 
 959         page 
= self
._download
_webpage
(url
, channel_id
, 
 960                                       u
'Downloading page #%s' % pagenum
) 
 962         # Extract video identifiers 
 963         ids_in_page 
= self
.extract_videos_from_page(page
) 
 964         video_ids
.extend(ids_in_page
) 
 966         # Download any subsequent channel pages using the json-based channel_ajax query 
 967         if self
._MORE
_PAGES
_INDICATOR 
in page
: 
 968             for pagenum 
in itertools
.count(1): 
 969                 url 
= self
._MORE
_PAGES
_URL 
% (pagenum
, channel_id
) 
 970                 page 
= self
._download
_webpage
(url
, channel_id
, 
 971                                               u
'Downloading page #%s' % pagenum
) 
 973                 page 
= json
.loads(page
) 
 975                 ids_in_page 
= self
.extract_videos_from_page(page
['content_html']) 
 976                 video_ids
.extend(ids_in_page
) 
 978                 if self
._MORE
_PAGES
_INDICATOR  
not in page
['load_more_widget_html']: 
 981         self
._downloader
.to_screen(u
'[youtube] Channel %s: Found %i videos' % (channel_id
, len(video_ids
))) 
 983         urls 
= ['http://www.youtube.com/watch?v=%s' % id for id in video_ids
] 
 984         url_entries 
= [self
.url_result(eurl
, 'Youtube') for eurl 
in urls
] 
 985         return [self
.playlist_result(url_entries
, channel_id
)] 
 988 class YoutubeUserIE(InfoExtractor
): 
 989     IE_DESC 
= u
'YouTube.com user videos (URL or "ytuser" keyword)' 
 990     _VALID_URL 
= r
'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)' 
 991     _TEMPLATE_URL 
= 'http://gdata.youtube.com/feeds/api/users/%s' 
 992     _GDATA_PAGE_SIZE 
= 50 
 993     _GDATA_URL 
= 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d' 
 994     _VIDEO_INDICATOR 
= r
'/watch\?v=(.+?)[\<&]' 
 995     IE_NAME 
= u
'youtube:user' 
 997     def _real_extract(self
, url
): 
 999         mobj 
= re
.match(self
._VALID
_URL
, url
) 
1001             raise ExtractorError(u
'Invalid URL: %s' % url
) 
1003         username 
= mobj
.group(1) 
1005         # Download video ids using YouTube Data API. Result size per 
1006         # query is limited (currently to 50 videos) so we need to query 
1007         # page by page until there are no video ids - it means we got 
1012         for pagenum 
in itertools
.count(0): 
1013             start_index 
= pagenum 
* self
._GDATA
_PAGE
_SIZE 
+ 1 
1015             gdata_url 
= self
._GDATA
_URL 
% (username
, self
._GDATA
_PAGE
_SIZE
, start_index
) 
1016             page 
= self
._download
_webpage
(gdata_url
, username
, 
1017                                           u
'Downloading video ids from %d to %d' % (start_index
, start_index 
+ self
._GDATA
_PAGE
_SIZE
)) 
1019             # Extract video identifiers 
1022             for mobj 
in re
.finditer(self
._VIDEO
_INDICATOR
, page
): 
1023                 if mobj
.group(1) not in ids_in_page
: 
1024                     ids_in_page
.append(mobj
.group(1)) 
1026             video_ids
.extend(ids_in_page
) 
1028             # A little optimization - if current page is not 
1029             # "full", ie. does not contain PAGE_SIZE video ids then 
1030             # we can assume that this page is the last one - there 
1031             # are no more ids on further pages - no need to query 
1034             if len(ids_in_page
) < self
._GDATA
_PAGE
_SIZE
: 
1037         urls 
= ['http://www.youtube.com/watch?v=%s' % video_id 
for video_id 
in video_ids
] 
1038         url_results 
= [self
.url_result(rurl
, 'Youtube') for rurl 
in urls
] 
1039         return [self
.playlist_result(url_results
, playlist_title 
= username
)] 
1041 class YoutubeSearchIE(SearchInfoExtractor
): 
1042     IE_DESC 
= u
'YouTube.com searches' 
1043     _API_URL 
= 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc' 
1045     IE_NAME 
= u
'youtube:search' 
1046     _SEARCH_KEY 
= 'ytsearch' 
1048     def report_download_page(self
, query
, pagenum
): 
1049         """Report attempt to download search page with given number.""" 
1050         self
._downloader
.to_screen(u
'[youtube] query "%s": Downloading page %s' % (query
, pagenum
)) 
1052     def _get_n_results(self
, query
, n
): 
1053         """Get a specified number of results for a query""" 
1059         while (50 * pagenum
) < limit
: 
1060             self
.report_download_page(query
, pagenum
+1) 
1061             result_url 
= self
._API
_URL 
% (compat_urllib_parse
.quote_plus(query
), (50*pagenum
)+1) 
1062             request 
= compat_urllib_request
.Request(result_url
) 
1064                 data 
= compat_urllib_request
.urlopen(request
).read().decode('utf-8') 
1065             except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
: 
1066                 raise ExtractorError(u
'Unable to download API page: %s' % compat_str(err
)) 
1067             api_response 
= json
.loads(data
)['data'] 
1069             if not 'items' in api_response
: 
1070                 raise ExtractorError(u
'[youtube] No video results') 
1072             new_ids 
= list(video
['id'] for video 
in api_response
['items']) 
1073             video_ids 
+= new_ids
 
1075             limit 
= min(n
, api_response
['totalItems']) 
1078         if len(video_ids
) > n
: 
1079             video_ids 
= video_ids
[:n
] 
1080         videos 
= [self
.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids
] 
1081         return self
.playlist_result(videos
, query
) 
1084 class YoutubeShowIE(InfoExtractor
): 
1085     IE_DESC 
= u
'YouTube.com (multi-season) shows' 
1086     _VALID_URL 
= r
'https?://www\.youtube\.com/show/(.*)' 
1087     IE_NAME 
= u
'youtube:show' 
1089     def _real_extract(self
, url
): 
1090         mobj 
= re
.match(self
._VALID
_URL
, url
) 
1091         show_name 
= mobj
.group(1) 
1092         webpage 
= self
._download
_webpage
(url
, show_name
, u
'Downloading show webpage') 
1093         # There's one playlist for each season of the show 
1094         m_seasons 
= list(re
.finditer(r
'href="(/playlist\?list=.*?)"', webpage
)) 
1095         self
.to_screen(u
'%s: Found %s seasons' % (show_name
, len(m_seasons
))) 
1096         return [self
.url_result('https://www.youtube.com' + season
.group(1), 'YoutubePlaylist') for season 
in m_seasons
] 
1099 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor
): 
1101     Base class for extractors that fetch info from 
1102     http://www.youtube.com/feed_ajax 
1103     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties. 
1105     _LOGIN_REQUIRED 
= True 
1107     # use action_load_personal_feed instead of action_load_system_feed 
1108     _PERSONAL_FEED 
= False 
1111     def _FEED_TEMPLATE(self
): 
1112         action 
= 'action_load_system_feed' 
1113         if self
._PERSONAL
_FEED
: 
1114             action 
= 'action_load_personal_feed' 
1115         return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action
, self
._FEED
_NAME
) 
1119         return u
'youtube:%s' % self
._FEED
_NAME
 
1121     def _real_initialize(self
): 
1124     def _real_extract(self
, url
): 
1126         # The step argument is available only in 2.7 or higher 
1127         for i 
in itertools
.count(0): 
1128             paging 
= i
*self
._PAGING
_STEP
 
1129             info 
= self
._download
_webpage
(self
._FEED
_TEMPLATE 
% paging
, 
1130                                           u
'%s feed' % self
._FEED
_NAME
, 
1131                                           u
'Downloading page %s' % i
) 
1132             info 
= json
.loads(info
) 
1133             feed_html 
= info
['feed_html'] 
1134             m_ids 
= re
.finditer(r
'"/watch\?v=(.*?)["&]', feed_html
) 
1135             ids 
= orderedSet(m
.group(1) for m 
in m_ids
) 
1136             feed_entries
.extend(self
.url_result(id, 'Youtube') for id in ids
) 
1137             if info
['paging'] is None: 
1139         return self
.playlist_result(feed_entries
, playlist_title
=self
._PLAYLIST
_TITLE
) 
1141 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor
): 
1142     IE_DESC 
= u
'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)' 
1143     _VALID_URL 
= r
'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?' 
1144     _FEED_NAME 
= 'subscriptions' 
1145     _PLAYLIST_TITLE 
= u
'Youtube Subscriptions' 
1147 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor
): 
1148     IE_DESC 
= u
'YouTube.com recommended videos, "ytrec" keyword (requires authentication)' 
1149     _VALID_URL 
= r
'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?' 
1150     _FEED_NAME 
= 'recommended' 
1151     _PLAYLIST_TITLE 
= u
'Youtube Recommended videos' 
1153 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor
): 
1154     IE_DESC 
= u
'Youtube watch later list, "ytwatchlater" keyword (requires authentication)' 
1155     _VALID_URL 
= r
'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater' 
1156     _FEED_NAME 
= 'watch_later' 
1157     _PLAYLIST_TITLE 
= u
'Youtube Watch Later' 
1159     _PERSONAL_FEED 
= True 
1161 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor
): 
1162     IE_NAME 
= u
'youtube:favorites' 
1163     IE_DESC 
= u
'YouTube.com favourite videos, "ytfav" keyword (requires authentication)' 
1164     _VALID_URL 
= r
'https?://www\.youtube\.com/my_favorites|:ytfav(?:o?rites)?' 
1165     _LOGIN_REQUIRED 
= True 
1167     def _real_extract(self
, url
): 
1168         webpage 
= self
._download
_webpage
('https://www.youtube.com/my_favorites', 'Youtube Favourites videos') 
1169         playlist_id 
= self
._search
_regex
(r
'list=(.+?)["&]', webpage
, u
'favourites playlist id') 
1170         return self
.url_result(playlist_id
, 'YoutubePlaylist')