X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/af014acd27e0b471d5903630847eabb26437b46c..80893415fd8cecb59cb8ffbea17a183d4202f02e:/youtube_dl/extractor/youtube.py diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index ad2e933..9943ddd 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -332,6 +332,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/| (?:www\.)?deturl\.com/www\.youtube\.com/| (?:www\.)?pwnyoutube\.com/| + (?:www\.)?hooktube\.com/| (?:www\.)?yourepeat\.com/| tube\.majestyc\.net/| youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains @@ -1390,7 +1391,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): ) (["\']) (?P(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/ - (?:embed|v|p)/.+?) + (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?) \1''', webpage)] # lazyYT YouTube embed @@ -1621,6 +1622,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # description description_original = video_description = get_element_by_id("eow-description", video_webpage) if video_description: + + def replace_url(m): + redir_url = compat_urlparse.urljoin(url, m.group(1)) + parsed_redir_url = compat_urllib_parse_urlparse(redir_url) + if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect': + qs = compat_parse_qs(parsed_redir_url.query) + q = qs.get('q') + if q and q[0]: + return q[0] + return redir_url + description_original = video_description = re.sub(r'''(?x) ]*> [^<]+\.{3}\s* - ''', r'\1', video_description) + ''', replace_url, video_description) video_description = clean_html(video_description) else: fd_mobj = re.search(r'', + r'', video_webpage) if mobj is not None: video_uploader_id = mobj.group('uploader_id') @@ -2039,39 +2051,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): } -class YoutubeSharedVideoIE(InfoExtractor): - _VALID_URL = r'(?:https?:)?//(?:www\.)?youtube\.com/shared\?.*\bci=(?P[0-9A-Za-z_-]{11})' - IE_NAME = 'youtube:shared' - - _TEST = { - 'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU', - 'info_dict': { - 'id': 'uPDB5I9wfp8', - 'ext': 'webm', - 'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3', - 'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d', - 'upload_date': '20160219', - 'uploader': 'Pocoyo - Português (BR)', - 'uploader_id': 'PocoyoBrazil', - }, - 'add_ie': ['Youtube'], - 'params': { - # There are already too many Youtube downloads - 'skip_download': True, - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - real_video_id = self._html_search_meta( - 'videoId', webpage, 'YouTube video id', fatal=True) - - return self.url_result(real_video_id, YoutubeIE.ie_key()) - - class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): IE_DESC = 'YouTube.com playlists' _VALID_URL = r"""(?x)(?: