X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/11c4ce5827158ef3f817f2536e220d95e0576e6e..3e696c728febc53e74c912c40ccd3b9504d536df:/youtube_dl/extractor/youtube.py diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index c7922c5..bd24a28 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1,814 +1,2517 @@ # coding: utf-8 +from __future__ import unicode_literals + + +import itertools import json -import netrc +import os.path +import random import re -import socket +import time +import traceback from .common import InfoExtractor, SearchInfoExtractor -from ..utils import ( - compat_http_client, +from ..jsinterp import JSInterpreter +from ..swfinterp import SWFInterpreter +from ..compat import ( + compat_chr, compat_parse_qs, - compat_urllib_error, - compat_urllib_parse, - compat_urllib_request, + compat_urllib_parse_unquote, + compat_urllib_parse_unquote_plus, + compat_urllib_parse_urlencode, + compat_urllib_parse_urlparse, + compat_urlparse, compat_str, - +) +from ..utils import ( clean_html, - get_element_by_id, + error_to_compat_str, ExtractorError, + float_or_none, + get_element_by_attribute, + get_element_by_id, + int_or_none, + mimetype2ext, + orderedSet, + parse_duration, + remove_quotes, + remove_start, + sanitized_Request, + smuggle_url, + str_to_int, unescapeHTML, unified_strdate, + unsmuggle_url, + uppercase_escape, + urlencode_postdata, + ISO3166Utils, ) -class YoutubeIE(InfoExtractor): - """Information extractor for youtube.com.""" +class YoutubeBaseInfoExtractor(InfoExtractor): + """Provide base functions for Youtube extractors""" + _LOGIN_URL = 'https://accounts.google.com/ServiceLogin' + _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge' + _PASSWORD_CHALLENGE_URL = 'https://accounts.google.com/signin/challenge/sl/password' + _NETRC_MACHINE = 'youtube' + # If True it will raise an error if no login info is provided + _LOGIN_REQUIRED = False + + def _set_language(self): + self._set_cookie( + '.youtube.com', 'PREF', 'f1=50000000&hl=en', + # YouTube sets the expire time to about two months + expire_time=time.time() + 2 * 30 * 24 * 3600) + + def _ids_to_results(self, ids): + return [ + self.url_result(vid_id, 'Youtube', video_id=vid_id) + for vid_id in ids] - _VALID_URL = r"""^ + def _login(self): + """ + Attempt to log in to YouTube. + True is returned if successful or skipped. + False is returned if login failed. + + If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised. + """ + (username, password) = self._get_login_info() + # No authentication to be performed + if username is None: + if self._LOGIN_REQUIRED: + raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True) + return True + + login_page = self._download_webpage( + self._LOGIN_URL, None, + note='Downloading login page', + errnote='unable to fetch login page', fatal=False) + if login_page is False: + return + + login_form = self._hidden_inputs(login_page) + + login_form.update({ + 'checkConnection': 'youtube', + 'Email': username, + 'Passwd': password, + }) + + login_results = self._download_webpage( + self._PASSWORD_CHALLENGE_URL, None, + note='Logging in', errnote='unable to log in', fatal=False, + data=urlencode_postdata(login_form)) + if login_results is False: + return False + + error_msg = self._html_search_regex( + r'<[^>]+id="errormsg_0_Passwd"[^>]*>([^<]+)<', + login_results, 'error message', default=None) + if error_msg: + raise ExtractorError('Unable to login: %s' % error_msg, expected=True) + + if re.search(r'id="errormsg_0_Passwd"', login_results) is not None: + raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True) + + # Two-Factor + # TODO add SMS and phone call support - these require making a request and then prompting the user + + if re.search(r'(?i)