X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/9ed7fe4fe4c445eb7d9f3197bb300d0db8f1807a..5faa4f6150b14412aed793d7b9b4eb8ce21678cd:/youtube_dl/extractor/youtube.py diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index cfe9eed..c8d54f2 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -6,6 +6,7 @@ from __future__ import unicode_literals import itertools import json import os.path +import random import re import time import traceback @@ -16,23 +17,25 @@ from ..swfinterp import SWFInterpreter from ..compat import ( compat_chr, compat_parse_qs, - compat_urllib_parse, compat_urllib_parse_unquote, compat_urllib_parse_unquote_plus, + compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, compat_urlparse, compat_str, ) from ..utils import ( clean_html, - encode_dict, + error_to_compat_str, ExtractorError, float_or_none, get_element_by_attribute, get_element_by_id, int_or_none, + mimetype2ext, orderedSet, parse_duration, + remove_quotes, remove_start, sanitized_Request, smuggle_url, @@ -41,6 +44,7 @@ from ..utils import ( unified_strdate, unsmuggle_url, uppercase_escape, + urlencode_postdata, ISO3166Utils, ) @@ -112,7 +116,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'hl': 'en_US', } - login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('ascii') + login_data = urlencode_postdata(login_form_strs) req = sanitized_Request(self._LOGIN_URL, login_data) login_results = self._download_webpage( @@ -121,6 +125,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor): if login_results is False: return False + error_msg = self._html_search_regex( + r'<[^>]+id="errormsg_0_Passwd"[^>]*>([^<]+)<', + login_results, 'error message', default=None) + if error_msg: + raise ExtractorError('Unable to login: %s' % error_msg, expected=True) + if re.search(r'id="errormsg_0_Passwd"', login_results) is not None: raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True) @@ -145,7 +155,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'TrustDevice': 'on', }) - tfa_data = compat_urllib_parse.urlencode(encode_dict(tfa_form_strs)).encode('ascii') + tfa_data = urlencode_postdata(tfa_form_strs) tfa_req = sanitized_Request(self._TWOFACTOR_URL, tfa_data) tfa_results = self._download_webpage( @@ -178,7 +188,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): return -class YoutubeEntryListBaseInfoExtractor(InfoExtractor): +class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor): # Extract entries from page with "Load more" button def _entries(self, page, playlist_id): more_widget_html = content_html = page @@ -230,7 +240,9 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor): class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor): def _process_page(self, content): - for playlist_id in re.findall(r'href="/?playlist\?list=(.+?)"', content): + for playlist_id in orderedSet(re.findall( + r'