From: Rogério Brito Date: Wed, 16 Jul 2014 01:31:35 +0000 (-0300) Subject: Imported Upstream version 2014.07.15 X-Git-Url: https://git.rapsys.eu/youtubedl/commitdiff_plain/5d3bcae50f6f7185984ffdf960a0bc5444b3d556 Imported Upstream version 2014.07.15 --- diff --git a/README.md b/README.md index dffdaa9..bc5e0f7 100644 --- a/README.md +++ b/README.md @@ -255,7 +255,7 @@ which means you can modify it, redistribute it or use it however you like. 128K (default 5) --recode-video FORMAT Encode the video to another format if necessary (currently supported: - mp4|flv|ogg|webm) + mp4|flv|ogg|webm|mkv) -k, --keep-video keeps the video file on disk after the post-processing; the video is erased by default diff --git a/README.txt b/README.txt index 0f9c470..5555b2a 100644 --- a/README.txt +++ b/README.txt @@ -283,7 +283,7 @@ Post-processing Options: 128K (default 5) --recode-video FORMAT Encode the video to another format if necessary (currently supported: - mp4|flv|ogg|webm) + mp4|flv|ogg|webm|mkv) -k, --keep-video keeps the video file on disk after the post-processing; the video is erased by default diff --git a/test/test_playlists.py b/test/test_playlists.py index 994b1d4..1a38a66 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -111,7 +111,7 @@ class TestPlaylists(unittest.TestCase): ie = VineUserIE(dl) result = ie.extract('https://vine.co/Visa') self.assertIsPlaylist(result) - self.assertTrue(len(result['entries']) >= 50) + self.assertTrue(len(result['entries']) >= 47) def test_ustream_channel(self): dl = FakeYDL() @@ -137,6 +137,14 @@ class TestPlaylists(unittest.TestCase): self.assertEqual(result['id'], '9615865') self.assertTrue(len(result['entries']) >= 12) + def test_soundcloud_likes(self): + dl = FakeYDL() + ie = SoundcloudUserIE(dl) + result = ie.extract('https://soundcloud.com/the-concept-band/likes') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], '9615865') + self.assertTrue(len(result['entries']) >= 1) + def test_soundcloud_playlist(self): dl = FakeYDL() ie = SoundcloudPlaylistIE(dl) diff --git a/test/test_subtitles.py b/test/test_subtitles.py index 5736fe5..48c3021 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -87,7 +87,7 @@ class TestYoutubeSubtitles(BaseTestSubtitles): def test_youtube_nosubtitles(self): self.DL.expect_warning(u'video doesn\'t have subtitles') - self.url = 'sAjKT8FhjI8' + self.url = 'n5BB19UTcdA' self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 8d46fe1..d955339 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -33,6 +33,12 @@ _TESTS = [ 90, u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876', ), + ( + u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js', + u'js', + 84, + u'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=', + ), ( u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', u'js', diff --git a/youtube-dl b/youtube-dl index 3d28bb5..73304fe 100755 Binary files a/youtube-dl and b/youtube-dl differ diff --git a/youtube-dl.1 b/youtube-dl.1 index 47899f4..26833e8 100644 --- a/youtube-dl.1 +++ b/youtube-dl.1 @@ -278,7 +278,7 @@ redistribute it or use it however you like. \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 128K\ (default\ 5) \-\-recode\-video\ FORMAT\ \ \ \ \ \ \ \ \ \ \ \ Encode\ the\ video\ to\ another\ format\ if \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ necessary\ (currently\ supported: -\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ mp4|flv|ogg|webm) +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ mp4|flv|ogg|webm|mkv) \-k,\ \-\-keep\-video\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ keeps\ the\ video\ file\ on\ disk\ after\ the \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ post\-processing;\ the\ video\ is\ erased\ by \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ default diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 31ed63f..5e16a54 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -60,6 +60,10 @@ __authors__ = ( 'Georg Jähnig', 'Ralf Haring', 'Koki Takahashi', + 'Ariset Llerena', + 'Adam Malcontenti-Wilson', + 'Tobias Bell', + 'Naglis Jonaitis', ) __license__ = 'Public Domain' @@ -506,7 +510,7 @@ def parseOpts(overrideArguments=None): postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='5', help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5)') postproc.add_option('--recode-video', metavar='FORMAT', dest='recodevideo', default=None, - help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm)') + help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)') postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False, help='keeps the video file on disk after the post-processing; the video is erased by default') postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False, diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 12cca5c..e49ac3e 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -83,6 +83,7 @@ from .extremetube import ExtremeTubeIE from .facebook import FacebookIE from .faz import FazIE from .fc2 import FC2IE +from .firedrive import FiredriveIE from .firstpost import FirstpostIE from .firsttv import FirstTVIE from .fivemin import FiveMinIE @@ -105,6 +106,7 @@ from .freesound import FreesoundIE from .freespeech import FreespeechIE from .funnyordie import FunnyOrDieIE from .gamekings import GamekingsIE +from .gameone import GameOneIE from .gamespot import GameSpotIE from .gametrailers import GametrailersIE from .gdcvault import GDCVaultIE @@ -112,6 +114,7 @@ from .generic import GenericIE from .googleplus import GooglePlusIE from .googlesearch import GoogleSearchIE from .gorillavid import GorillaVidIE +from .goshgay import GoshgayIE from .hark import HarkIE from .helsinki import HelsinkiIE from .hentaistigma import HentaiStigmaIE @@ -229,6 +232,7 @@ from .radiofrance import RadioFranceIE from .rai import RaiIE from .rbmaradio import RBMARadioIE from .redtube import RedTubeIE +from .reverbnation import ReverbNationIE from .ringtv import RingTVIE from .ro220 import Ro220IE from .rottentomatoes import RottenTomatoesIE @@ -237,6 +241,7 @@ from .rtbf import RTBFIE from .rtlnow import RTLnowIE from .rts import RTSIE from .rtve import RTVEALaCartaIE +from .ruhd import RUHDIE from .rutube import ( RutubeIE, RutubeChannelIE, @@ -246,6 +251,7 @@ from .rutube import ( from .rutv import RUTVIE from .savefrom import SaveFromIE from .scivee import SciVeeIE +from .screencast import ScreencastIE from .servingsys import ServingSysIE from .sina import SinaIE from .slideshare import SlideshareIE @@ -264,8 +270,8 @@ from .soundcloud import ( SoundcloudPlaylistIE ) from .soundgasm import SoundgasmIE -from .southparkstudios import ( - SouthParkStudiosIE, +from .southpark import ( + SouthParkIE, SouthparkDeIE, ) from .space import SpaceIE @@ -289,6 +295,7 @@ from .teachingchannel import TeachingChannelIE from .teamcoco import TeamcocoIE from .techtalks import TechTalksIE from .ted import TEDIE +from .tenplay import TenPlayIE from .testurl import TestURLIE from .tf1 import TF1IE from .theplatform import ThePlatformIE @@ -336,12 +343,14 @@ from .vimeo import ( VimeoReviewIE, VimeoWatchLaterIE, ) +from .vimple import VimpleIE from .vine import ( VineIE, VineUserIE, ) from .viki import VikiIE from .vk import VKIE +from .vodlocker import VodlockerIE from .vube import VubeIE from .vuclip import VuClipIE from .vulture import VultureIE diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e4e4fee..e686573 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1,11 +1,12 @@ import base64 import hashlib import json +import netrc import os import re import socket import sys -import netrc +import time import xml.etree.ElementTree from ..utils import ( @@ -462,14 +463,14 @@ class InfoExtractor(object): def _og_search_url(self, html, **kargs): return self._og_search_property('url', html, **kargs) - def _html_search_meta(self, name, html, display_name=None, fatal=False): + def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs): if display_name is None: display_name = name return self._html_search_regex( r'''(?ix)]+(?:itemprop|name|property)=["\']%s["\']) [^>]+content=["\']([^"\']+)["\']''' % re.escape(name), - html, display_name, fatal=fatal) + html, display_name, fatal=fatal, **kwargs) def _dc_search_uploader(self, html): return self._html_search_meta('dc.creator', html, 'uploader') @@ -575,6 +576,13 @@ class InfoExtractor(object): else: return url + def _sleep(self, timeout, video_id, msg_template=None): + if msg_template is None: + msg_template = u'%(video_id)s: Waiting for %(timeout)s seconds' + msg = msg_template % {'video_id': video_id, 'timeout': timeout} + self.to_screen(msg) + time.sleep(timeout) + class SearchInfoExtractor(InfoExtractor): """ @@ -618,4 +626,3 @@ class SearchInfoExtractor(InfoExtractor): @property def SEARCH_KEY(self): return self._SEARCH_KEY - diff --git a/youtube_dl/extractor/criterion.py b/youtube_dl/extractor/criterion.py index 31fe3d5..4fb1781 100644 --- a/youtube_dl/extractor/criterion.py +++ b/youtube_dl/extractor/criterion.py @@ -1,40 +1,43 @@ # -*- coding: utf-8 -*- +from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import determine_ext + class CriterionIE(InfoExtractor): - _VALID_URL = r'https?://www\.criterion\.com/films/(\d*)-.+' + _VALID_URL = r'https?://www\.criterion\.com/films/(?P[0-9]+)-.+' _TEST = { - u'url': u'http://www.criterion.com/films/184-le-samourai', - u'file': u'184.mp4', - u'md5': u'bc51beba55685509883a9a7830919ec3', - u'info_dict': { - u"title": u"Le Samouraï", - u"description" : u'md5:a2b4b116326558149bef81f76dcbb93f', + 'url': 'http://www.criterion.com/films/184-le-samourai', + 'md5': 'bc51beba55685509883a9a7830919ec3', + 'info_dict': { + 'id': '184', + 'ext': 'mp4', + 'title': 'Le Samouraï', + 'description': 'md5:a2b4b116326558149bef81f76dcbb93f', } } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group(1) + video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) - final_url = self._search_regex(r'so.addVariable\("videoURL", "(.+?)"\)\;', - webpage, 'video url') - title = self._html_search_regex(r'', - webpage, 'video title') - description = self._html_search_regex(r'', - webpage, 'video description') - thumbnail = self._search_regex(r'so.addVariable\("thumbnailURL", "(.+?)"\)\;', - webpage, 'thumbnail url') + final_url = self._search_regex( + r'so.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url') + title = self._og_search_title(webpage) + description = self._html_search_regex( + r'', + webpage, 'video description') + thumbnail = self._search_regex( + r'so.addVariable\("thumbnailURL", "(.+?)"\)\;', + webpage, 'thumbnail url') - return {'id': video_id, - 'url' : final_url, - 'title': title, - 'ext': determine_ext(final_url), - 'description': description, - 'thumbnail': thumbnail, - } + return { + 'id': video_id, + 'url': final_url, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + } diff --git a/youtube_dl/extractor/firedrive.py b/youtube_dl/extractor/firedrive.py new file mode 100644 index 0000000..d26145d --- /dev/null +++ b/youtube_dl/extractor/firedrive.py @@ -0,0 +1,83 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + compat_urllib_parse, + compat_urllib_request, + determine_ext, +) + + +class FiredriveIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?firedrive\.com/' + \ + '(?:file|embed)/(?P[0-9a-zA-Z]+)' + _FILE_DELETED_REGEX = r'
' + + _TESTS = [{ + 'url': 'https://www.firedrive.com/file/FEB892FA160EBD01', + 'md5': 'd5d4252f80ebeab4dc2d5ceaed1b7970', + 'info_dict': { + 'id': 'FEB892FA160EBD01', + 'ext': 'flv', + 'title': 'bbb_theora_486kbit.flv', + 'thumbnail': 're:^http://.*\.jpg$', + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + url = 'http://firedrive.com/file/%s' % video_id + + webpage = self._download_webpage(url, video_id) + + if re.search(self._FILE_DELETED_REGEX, webpage) is not None: + raise ExtractorError('Video %s does not exist' % video_id, + expected=True) + + fields = dict(re.findall(r'''(?x)(.+)
', + webpage, 'title') + thumbnail = self._search_regex(r'image:\s?"(//[^\"]+)', webpage, + 'thumbnail', fatal=False) + if thumbnail is not None: + thumbnail = 'http:' + thumbnail + + ext = self._search_regex(r'type:\s?\'([^\']+)\',', + webpage, 'extension', fatal=False) + video_url = self._search_regex( + r'file:\s?\'(http[^\']+)\',', webpage, 'file url') + + formats = [{ + 'format_id': 'sd', + 'url': video_url, + 'ext': ext, + }] + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'formats': formats, + } diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py new file mode 100644 index 0000000..b580f52 --- /dev/null +++ b/youtube_dl/extractor/gameone.py @@ -0,0 +1,90 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + xpath_with_ns, + parse_iso8601 +) + +NAMESPACE_MAP = { + 'media': 'http://search.yahoo.com/mrss/', +} + +# URL prefix to download the mp4 files directly instead of streaming via rtmp +# Credits go to XBox-Maniac +# http://board.jdownloader.org/showpost.php?p=185835&postcount=31 +RAW_MP4_URL = 'http://cdn.riptide-mtvn.com/' + + +class GameOneIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P\d+)' + _TEST = { + 'url': 'http://www.gameone.de/tv/288', + 'md5': '136656b7fb4c9cb4a8e2d500651c499b', + 'info_dict': { + 'id': '288', + 'ext': 'mp4', + 'title': 'Game One - Folge 288', + 'duration': 1238, + 'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg', + 'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1', + 'age_limit': 16, + 'upload_date': '20140513', + 'timestamp': 1399980122, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + og_video = self._og_search_video_url(webpage, secure=False) + description = self._html_search_meta('description', webpage) + age_limit = int( + self._search_regex( + r'age=(\d+)', + self._html_search_meta( + 'age-de-meta-label', + webpage), + 'age_limit', + '0')) + mrss_url = self._search_regex(r'mrss=([^&]+)', og_video, 'mrss') + + mrss = self._download_xml(mrss_url, video_id, 'Downloading mrss') + title = mrss.find('.//item/title').text + thumbnail = mrss.find('.//item/image').get('url') + timestamp = parse_iso8601(mrss.find('.//pubDate').text, delimiter=' ') + content = mrss.find(xpath_with_ns('.//media:content', NAMESPACE_MAP)) + content_url = content.get('url') + + content = self._download_xml( + content_url, + video_id, + 'Downloading media:content') + rendition_items = content.findall('.//rendition') + duration = int(rendition_items[0].get('duration')) + formats = [ + { + 'url': re.sub(r'.*/(r2)', RAW_MP4_URL + r'\1', r.find('./src').text), + 'width': int(r.get('width')), + 'height': int(r.get('height')), + 'tbr': int(r.get('bitrate')), + } + for r in rendition_items + ] + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'formats': formats, + 'description': description, + 'age_limit': age_limit, + 'timestamp': timestamp, + } diff --git a/youtube_dl/extractor/gorillavid.py b/youtube_dl/extractor/gorillavid.py index aa15caf..ca5f7c4 100644 --- a/youtube_dl/extractor/gorillavid.py +++ b/youtube_dl/extractor/gorillavid.py @@ -12,7 +12,12 @@ from ..utils import ( class GorillaVidIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?gorillavid\.in/(?:embed-)?(?P[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?' + IE_DESC = 'GorillaVid.in and daclips.in' + _VALID_URL = r'''(?x) + https?://(?P(?:www\.)? + (?:daclips\.in|gorillavid\.in))/ + (?:embed-)?(?P[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)? + ''' _TESTS = [{ 'url': 'http://gorillavid.in/06y9juieqpmi', @@ -32,15 +37,22 @@ class GorillaVidIE(InfoExtractor): 'title': 'Say something nice', 'thumbnail': 're:http://.*\.jpg', }, + }, { + 'url': 'http://daclips.in/3rso4kdn6f9m', + 'md5': '1ad8fd39bb976eeb66004d3a4895f106', + 'info_dict': { + 'id': '3rso4kdn6f9m', + 'ext': 'mp4', + 'title': 'Micro Pig piglets ready on 16th July 2009', + 'thumbnail': 're:http://.*\.jpg', + }, }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - url = 'http://gorillavid.in/%s' % video_id - - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage('http://%s/%s' % (mobj.group('host'), video_id), video_id) fields = dict(re.findall(r'''(?x)\d+?)($|/)' + _TEST = { + 'url': 'http://www.goshgay.com/video4116282', + 'md5': '268b9f3c3229105c57859e166dd72b03', + 'info_dict': { + 'id': '4116282', + 'ext': 'flv', + 'title': 'md5:089833a4790b5e103285a07337f245bf', + 'thumbnail': 're:http://.*\.jpg', + 'age_limit': 18, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + title = self._search_regex(r'class="video-title">

(.+?)<', webpage, 'title') + + player_config = self._search_regex( + r'(?s)jwplayer\("player"\)\.setup\(({.+?})\)', webpage, 'config settings') + player_vars = json.loads(player_config.replace("'", '"')) + width = str_to_int(player_vars.get('width')) + height = str_to_int(player_vars.get('height')) + config_uri = player_vars.get('config') + + if config_uri is None: + raise ExtractorError('Missing config URI') + node = self._download_xml(config_uri, video_id, 'Downloading player config XML', + errnote='Unable to download XML') + if node is None: + raise ExtractorError('Missing config XML') + if node.tag != 'config': + raise ExtractorError('Missing config attribute') + fns = node.findall('file') + imgs = node.findall('image') + if len(fns) != 1: + raise ExtractorError('Missing media URI') + video_url = fns[0].text + if len(imgs) < 1: + thumbnail = None + else: + thumbnail = imgs[0].text + + url_comp = compat_urlparse.urlparse(url) + ref = "%s://%s%s" % (url_comp[0], url_comp[1], url_comp[2]) + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'width': width, + 'height': height, + 'thumbnail': thumbnail, + 'http_referer': ref, + 'age_limit': 18, + } diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index af9490c..228b42d 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -158,6 +158,9 @@ class MTVServicesInfoExtractor(InfoExtractor): if mgid.endswith('.swf'): mgid = mgid[:-4] except RegexNotFoundError: + mgid = None + + if mgid is None or ':' not in mgid: mgid = self._search_regex( [r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'], webpage, u'mgid') diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py index 3d6096e..94d5ba9 100644 --- a/youtube_dl/extractor/ndr.py +++ b/youtube_dl/extractor/ndr.py @@ -18,15 +18,15 @@ class NDRIE(InfoExtractor): _TESTS = [ { - 'url': 'http://www.ndr.de/fernsehen/sendungen/markt/markt7959.html', - 'md5': 'e7a6079ca39d3568f4996cb858dd6708', + 'url': 'http://www.ndr.de/fernsehen/media/dienordreportage325.html', + 'md5': '4a4eeafd17c3058b65f0c8f091355855', 'note': 'Video file', 'info_dict': { - 'id': '7959', + 'id': '325', 'ext': 'mp4', - 'title': 'Markt - die ganze Sendung', - 'description': 'md5:af9179cf07f67c5c12dc6d9997e05725', - 'duration': 2655, + 'title': 'Blaue Bohnen aus Blocken', + 'description': 'md5:190d71ba2ccddc805ed01547718963bc', + 'duration': 1715, }, }, { diff --git a/youtube_dl/extractor/pyvideo.py b/youtube_dl/extractor/pyvideo.py index 0bc0859..6d5732d 100644 --- a/youtube_dl/extractor/pyvideo.py +++ b/youtube_dl/extractor/pyvideo.py @@ -46,7 +46,7 @@ class PyvideoIE(InfoExtractor): return self.url_result(m_youtube.group(1), 'Youtube') title = self._html_search_regex( - r'
.*?([^>]+?)

', + r'
\s*]*)?>([^>]+?)', webpage, 'title', flags=re.DOTALL) video_url = self._search_regex( [r'Download.*?\d+).*?$' + _TESTS = [{ + 'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa', + 'file': '16965047.mp3', + 'md5': '3da12ebca28c67c111a7f8b262d3f7a7', + 'info_dict': { + "title": "MONA LISA", + "uploader": "ALKILADOS", + "uploader_id": 216429, + "thumbnail": "//gp1.wac.edgecastcdn.net/802892/production_public/Photo/13761700/image/1366002176_AVATAR_MONA_LISA.jpg" + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + song_id = mobj.group('id') + + api_res = self._download_json( + 'https://api.reverbnation.com/song/%s?callback=api_response_5&_=%d' + % (song_id, int(time.time() * 1000)), + song_id, + transform_source=strip_jsonp, + note='Downloading information of song %s' % song_id + ) + + return { + 'id': song_id, + 'title': api_res.get('name'), + 'url': api_res.get('url'), + 'uploader': api_res.get('artist', {}).get('name'), + 'uploader_id': api_res.get('artist', {}).get('id'), + 'thumbnail': api_res.get('image', api_res.get('thumbnail')), + 'ext': 'mp3', + 'vcodec': 'none', + } diff --git a/youtube_dl/extractor/ruhd.py b/youtube_dl/extractor/ruhd.py new file mode 100644 index 0000000..55b58e5 --- /dev/null +++ b/youtube_dl/extractor/ruhd.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class RUHDIE(InfoExtractor): + _VALID_URL = r'http://(?:www\.)?ruhd\.ru/play\.php\?vid=(?P\d+)' + _TEST = { + 'url': 'http://www.ruhd.ru/play.php?vid=207', + 'md5': 'd1a9ec4edf8598e3fbd92bb16072ba83', + 'info_dict': { + 'id': '207', + 'ext': 'divx', + 'title': 'КОТ бааааам', + 'description': 'классный кот)', + 'thumbnail': 're:^http://.*\.jpg$', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + + video_url = self._html_search_regex( + r'([^<]+)   RUHD.ru - Видео Высокого качества №1 в России!', webpage, 'title') + description = self._html_search_regex( + r'(?s)
(.+?)', webpage, 'description', fatal=False) + thumbnail = self._html_search_regex( + r'[a-zA-Z0-9]+)' + _TESTS = [{ + 'url': 'http://www.screencast.com/t/3ZEjQXlT', + 'md5': '917df1c13798a3e96211dd1561fded83', + 'info_dict': { + 'id': '3ZEjQXlT', + 'ext': 'm4v', + 'title': 'Color Measurement with Ocean Optics Spectrometers', + 'description': 'md5:240369cde69d8bed61349a199c5fb153', + 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$', + } + }, { + 'url': 'http://www.screencast.com/t/V2uXehPJa1ZI', + 'md5': 'e8e4b375a7660a9e7e35c33973410d34', + 'info_dict': { + 'id': 'V2uXehPJa1ZI', + 'ext': 'mov', + 'title': 'The Amadeus Spectrometer', + 'description': 're:^In this video, our friends at.*To learn more about Amadeus, visit', + 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$', + } + }, { + 'url': 'http://www.screencast.com/t/aAB3iowa', + 'md5': 'dedb2734ed00c9755761ccaee88527cd', + 'info_dict': { + 'id': 'aAB3iowa', + 'ext': 'mp4', + 'title': 'Google Earth Export', + 'description': 'Provides a demo of a CommunityViz export to Google Earth, one of the 3D viewing options.', + 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$', + } + }, { + 'url': 'http://www.screencast.com/t/X3ddTrYh', + 'md5': '669ee55ff9c51988b4ebc0877cc8b159', + 'info_dict': { + 'id': 'X3ddTrYh', + 'ext': 'wmv', + 'title': 'Toolkit 6 User Group Webinar (2014-03-04) - Default Judgment and First Impression', + 'description': 'md5:7b9f393bc92af02326a5c5889639eab0', + 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$', + } + }, + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) + + video_url = self._html_search_regex( + r'Title: ([^<]*)
', + r'class="tabSeperator">>(.*?)<'], + webpage, 'title') + thumbnail = self._og_search_thumbnail(webpage) + description = self._og_search_description(webpage, default=None) + if description is None: + description = self._html_search_meta('description', webpage) + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + } diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 7aa100f..8a77c13 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -81,16 +81,16 @@ class SoundcloudIE(InfoExtractor): }, # downloadable song { - 'url': 'https://soundcloud.com/simgretina/just-your-problem-baby-1', - 'md5': '56a8b69568acaa967b4c49f9d1d52d19', + 'url': 'https://soundcloud.com/oddsamples/bus-brakes', + 'md5': 'fee7b8747b09bb755cefd4b853e7249a', 'info_dict': { - 'id': '105614606', + 'id': '128590877', 'ext': 'wav', - 'title': 'Just Your Problem Baby (Acapella)', - 'description': 'Vocals', - 'uploader': 'Sim Gretina', - 'upload_date': '20130815', - #'duration': 42, + 'title': 'Bus Brakes', + 'description': 'md5:0170be75dd395c96025d210d261c784e', + 'uploader': 'oddsamples', + 'upload_date': '20140109', + 'duration': 17, }, }, ] @@ -255,7 +255,7 @@ class SoundcloudSetIE(SoundcloudIE): class SoundcloudUserIE(SoundcloudIE): - _VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P[^/]+)(/?(tracks/)?)?(\?.*)?$' + _VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P[^/]+)/?((?Ptracks|likes)/?)?(\?.*)?$' IE_NAME = 'soundcloud:user' # it's in tests/test_playlists.py @@ -264,24 +264,31 @@ class SoundcloudUserIE(SoundcloudIE): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) uploader = mobj.group('user') + resource = mobj.group('rsrc') + if resource is None: + resource = 'tracks' + elif resource == 'likes': + resource = 'favorites' url = 'http://soundcloud.com/%s/' % uploader resolv_url = self._resolv_url(url) user = self._download_json( resolv_url, uploader, 'Downloading user info') - base_url = 'http://api.soundcloud.com/users/%s/tracks.json?' % uploader + base_url = 'http://api.soundcloud.com/users/%s/%s.json?' % (uploader, resource) entries = [] for i in itertools.count(): data = compat_urllib_parse.urlencode({ 'offset': i * 50, + 'limit': 50, 'client_id': self._CLIENT_ID, }) new_entries = self._download_json( base_url + data, uploader, 'Downloading track page %s' % (i + 1)) - entries.extend(self._extract_info_dict(e, quiet=True) for e in new_entries) - if len(new_entries) < 50: + if len(new_entries) == 0: + self.to_screen('%s: End page received' % uploader) break + entries.extend(self._extract_info_dict(e, quiet=True) for e in new_entries) return { '_type': 'playlist', diff --git a/youtube_dl/extractor/southparkstudios.py b/youtube_dl/extractor/southpark.py similarity index 73% rename from youtube_dl/extractor/southparkstudios.py rename to youtube_dl/extractor/southpark.py index aea8e64..c20397b 100644 --- a/youtube_dl/extractor/southparkstudios.py +++ b/youtube_dl/extractor/southpark.py @@ -3,24 +3,24 @@ from __future__ import unicode_literals from .mtv import MTVServicesInfoExtractor -class SouthParkStudiosIE(MTVServicesInfoExtractor): - IE_NAME = 'southparkstudios.com' - _VALID_URL = r'https?://(www\.)?(?Psouthparkstudios\.com/(clips|full-episodes)/(?P.+?)(\?|#|$))' +class SouthParkIE(MTVServicesInfoExtractor): + IE_NAME = 'southpark.cc.com' + _VALID_URL = r'https?://(www\.)?(?Psouthpark\.cc\.com/(clips|full-episodes)/(?P.+?)(\?|#|$))' _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss' _TESTS = [{ - 'url': 'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured', + 'url': 'http://southpark.cc.com/clips/104437/bat-daded#tab=featured', 'info_dict': { 'id': 'a7bff6c2-ed00-11e0-aca6-0026b9414f30', 'ext': 'mp4', - 'title': 'Bat Daded', + 'title': 'South Park|Bat Daded', 'description': 'Randy disqualifies South Park by getting into a fight with Bat Dad.', }, }] -class SouthparkDeIE(SouthParkStudiosIE): +class SouthparkDeIE(SouthParkIE): IE_NAME = 'southpark.de' _VALID_URL = r'https?://(www\.)?(?Psouthpark\.de/(clips|alle-episoden)/(?P.+?)(\?|#|$))' _FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/' diff --git a/youtube_dl/extractor/tenplay.py b/youtube_dl/extractor/tenplay.py new file mode 100644 index 0000000..8477840 --- /dev/null +++ b/youtube_dl/extractor/tenplay.py @@ -0,0 +1,84 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class TenPlayIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ten(play)?\.com\.au/.+' + _TEST = { + 'url': 'http://tenplay.com.au/ten-insider/extra/season-2013/tenplay-tv-your-way', + #'md5': 'd68703d9f73dc8fccf3320ab34202590', + 'info_dict': { + 'id': '2695695426001', + 'ext': 'flv', + 'title': 'TENplay: TV your way', + 'description': 'Welcome to a new TV experience. Enjoy a taste of the TENplay benefits.', + 'timestamp': 1380150606.889, + 'upload_date': '20130925', + 'uploader': 'TENplay', + }, + 'params': { + 'skip_download': True, # Requires rtmpdump + } + } + + _video_fields = [ + "id", "name", "shortDescription", "longDescription", "creationDate", + "publishedDate", "lastModifiedDate", "customFields", "videoStillURL", + "thumbnailURL", "referenceId", "length", "playsTotal", + "playsTrailingWeek", "renditions", "captioning", "startDate", "endDate"] + + def _real_extract(self, url): + webpage = self._download_webpage(url, url) + video_id = self._html_search_regex( + r'videoID: "(\d+?)"', webpage, 'video_id') + api_token = self._html_search_regex( + r'apiToken: "([a-zA-Z0-9-_\.]+?)"', webpage, 'api_token') + title = self._html_search_regex( + r'', + webpage, 'title') + + json = self._download_json('https://api.brightcove.com/services/library?command=find_video_by_id&video_id=%s&token=%s&video_fields=%s' % (video_id, api_token, ','.join(self._video_fields)), title) + + formats = [] + for rendition in json['renditions']: + url = rendition['remoteUrl'] or rendition['url'] + protocol = 'rtmp' if url.startswith('rtmp') else 'http' + ext = 'flv' if protocol == 'rtmp' else rendition['videoContainer'].lower() + + if protocol == 'rtmp': + url = url.replace('&mp4:', '') + + formats.append({ + 'format_id': '_'.join(['rtmp', rendition['videoContainer'].lower(), rendition['videoCodec'].lower()]), + 'width': rendition['frameWidth'], + 'height': rendition['frameHeight'], + 'tbr': rendition['encodingRate'] / 1024, + 'filesize': rendition['size'], + 'protocol': protocol, + 'ext': ext, + 'vcodec': rendition['videoCodec'].lower(), + 'container': rendition['videoContainer'].lower(), + 'url': url, + }) + + return { + 'id': video_id, + 'display_id': json['referenceId'], + 'title': json['name'], + 'description': json['shortDescription'] or json['longDescription'], + 'formats': formats, + 'thumbnails': [{ + 'url': json['videoStillURL'] + }, { + 'url': json['thumbnailURL'] + }], + 'thumbnail': json['videoStillURL'], + 'duration': json['length'] / 1000, + 'timestamp': float(json['creationDate']) / 1000, + 'uploader': json['customFields']['production_company_distributor'] if 'production_company_distributor' in json['customFields'] else 'TENplay', + 'view_count': json['playsTotal'] + } diff --git a/youtube_dl/extractor/tlc.py b/youtube_dl/extractor/tlc.py index ad175b8..d848ee1 100644 --- a/youtube_dl/extractor/tlc.py +++ b/youtube_dl/extractor/tlc.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from .brightcove import BrightcoveIE from .discovery import DiscoveryIE +from ..utils import compat_urlparse class TlcIE(DiscoveryIE): @@ -51,6 +52,10 @@ class TlcDeIE(InfoExtractor): # Otherwise we don't get the correct 'BrightcoveExperience' element, # example: http://www.tlc.de/sendungen/cake-boss/videos/cake-boss-cannoli-drama/ iframe_url = iframe_url.replace('.htm?', '.php?') + url_fragment = compat_urlparse.urlparse(url).fragment + if url_fragment: + # Since the fragment is not send to the server, we always get the same iframe + iframe_url = re.sub(r'playlist=(\d+)', 'playlist=%s' % url_fragment, iframe_url) iframe = self._download_webpage(iframe_url, title) return { diff --git a/youtube_dl/extractor/tutv.py b/youtube_dl/extractor/tutv.py index c980153..d516b64 100644 --- a/youtube_dl/extractor/tutv.py +++ b/youtube_dl/extractor/tutv.py @@ -1,21 +1,21 @@ from __future__ import unicode_literals + import base64 import re from .common import InfoExtractor -from ..utils import ( - compat_parse_qs, -) +from ..utils import compat_parse_qs class TutvIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?tu\.tv/videos/(?P[^/?]+)' _TEST = { - 'url': 'http://tu.tv/videos/noah-en-pabellon-cuahutemoc', - 'file': '2742556.flv', - 'md5': '5eb766671f69b82e528dc1e7769c5cb2', + 'url': 'http://tu.tv/videos/robots-futbolistas', + 'md5': '627c7c124ac2a9b5ab6addb94e0e65f7', 'info_dict': { - 'title': 'Noah en pabellon cuahutemoc', + 'id': '2973058', + 'ext': 'flv', + 'title': 'Robots futbolistas', }, } @@ -26,10 +26,9 @@ class TutvIE(InfoExtractor): webpage = self._download_webpage(url, video_id) internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, 'internal video ID') - data_url = 'http://tu.tv/flvurl.php?codVideo=' + str(internal_id) - data_content = self._download_webpage(data_url, video_id, note='Downloading video info') - data = compat_parse_qs(data_content) - video_url = base64.b64decode(data['kpt'][0]).decode('utf-8') + data_content = self._download_webpage( + 'http://tu.tv/flvurl.php?codVideo=%s' % internal_id, video_id, 'Downloading video info') + video_url = base64.b64decode(compat_parse_qs(data_content)['kpt'][0]).decode('utf-8') return { 'id': internal_id, diff --git a/youtube_dl/extractor/vimple.py b/youtube_dl/extractor/vimple.py new file mode 100644 index 0000000..33d370e --- /dev/null +++ b/youtube_dl/extractor/vimple.py @@ -0,0 +1,86 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import base64 +import re +import xml.etree.ElementTree +import zlib + +from .common import InfoExtractor +from ..utils import int_or_none + + +class VimpleIE(InfoExtractor): + IE_DESC = 'Vimple.ru' + _VALID_URL = r'https?://(player.vimple.ru/iframe|vimple.ru)/(?P[a-f0-9]{10,})' + _TESTS = [ + # Quality: Large, from iframe + { + 'url': 'http://player.vimple.ru/iframe/b132bdfd71b546d3972f9ab9a25f201c', + 'info_dict': { + 'id': 'b132bdfd71b546d3972f9ab9a25f201c', + 'title': 'great-escape-minecraft.flv', + 'ext': 'mp4', + 'duration': 352, + 'webpage_url': 'http://vimple.ru/b132bdfd71b546d3972f9ab9a25f201c', + }, + }, + # Quality: Medium, from mainpage + { + 'url': 'http://vimple.ru/a15950562888453b8e6f9572dc8600cd', + 'info_dict': { + 'id': 'a15950562888453b8e6f9572dc8600cd', + 'title': 'DB 01', + 'ext': 'flv', + 'duration': 1484, + 'webpage_url': 'http://vimple.ru/a15950562888453b8e6f9572dc8600cd', + } + }, + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + iframe_url = 'http://player.vimple.ru/iframe/%s' % video_id + + iframe = self._download_webpage( + iframe_url, video_id, + note='Downloading iframe', errnote='unable to fetch iframe') + player_url = self._html_search_regex( + r'"(http://player.vimple.ru/flash/.+?)"', iframe, 'player url') + + player = self._request_webpage( + player_url, video_id, note='Downloading swf player').read() + + player = zlib.decompress(player[8:]) + + xml_pieces = re.findall(b'([a-zA-Z0-9 =+/]{500})', player) + xml_pieces = [piece[1:-1] for piece in xml_pieces] + + xml_data = b''.join(xml_pieces) + xml_data = base64.b64decode(xml_data) + + xml_data = xml.etree.ElementTree.fromstring(xml_data) + + video = xml_data.find('Video') + quality = video.get('quality') + q_tag = video.find(quality.capitalize()) + + formats = [ + { + 'url': q_tag.get('url'), + 'tbr': int(q_tag.get('bitrate')), + 'filesize': int(q_tag.get('filesize')), + 'format_id': quality, + }, + ] + + return { + 'id': video_id, + 'title': video.find('Title').text, + 'formats': formats, + 'thumbnail': video.find('Poster').get('url'), + 'duration': int_or_none(video.get('duration')), + 'webpage_url': video.find('Share').get('videoPageUrl'), + } diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py new file mode 100644 index 0000000..68c5936 --- /dev/null +++ b/youtube_dl/extractor/vodlocker.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse, + compat_urllib_request, +) + + +class VodlockerIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?vodlocker.com/(?P[0-9a-zA-Z]+)(?:\..*?)?' + + _TESTS = [{ + 'url': 'http://vodlocker.com/e8wvyzz4sl42', + 'md5': 'ce0c2d18fa0735f1bd91b69b0e54aacf', + 'info_dict': { + 'id': 'e8wvyzz4sl42', + 'ext': 'mp4', + 'title': 'Germany vs Brazil', + 'thumbnail': 're:http://.*\.jpg', + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) + + fields = dict(re.findall(r'''(?x)\s*(.*?)\s*[a-z]+)\.(?P.*)$', expr) if m: member = m.group('member') - val = local_vars[m.group('in')] + variable = m.group('in') + + if variable not in local_vars: + if variable not in self._objects: + self._objects[variable] = self.extract_object(variable) + obj = self._objects[variable] + key, args = member.split('(', 1) + args = args.strip(')') + argvals = [int(v) if v.isdigit() else local_vars[v] + for v in args.split(',')] + return obj[key](argvals) + + val = local_vars[variable] if member == 'split("")': return list(val) if member == 'join("")': @@ -97,6 +110,25 @@ class JSInterpreter(object): return self._functions[fname](argvals) raise ExtractorError('Unsupported JS expression %r' % expr) + def extract_object(self, objname): + obj = {} + obj_m = re.search( + (r'(?:var\s+)?%s\s*=\s*\{' % re.escape(objname)) + + r'\s*(?P([a-zA-Z$]+\s*:\s*function\(.*?\)\s*\{.*?\})*)' + + r'\}\s*;', + self.code) + fields = obj_m.group('fields') + # Currently, it only supports function definitions + fields_m = re.finditer( + r'(?P[a-zA-Z$]+)\s*:\s*function' + r'\((?P[a-z,]+)\){(?P[^}]+)}', + fields) + for f in fields_m: + argnames = f.group('args').split(',') + obj[f.group('key')] = self.build_function(argnames, f.group('code')) + + return obj + def extract_function(self, funcname): func_m = re.search( (r'(?:function %s|[{;]%s\s*=\s*function)' % ( @@ -107,10 +139,12 @@ class JSInterpreter(object): raise ExtractorError('Could not find JS function %r' % funcname) argnames = func_m.group('args').split(',') + return self.build_function(argnames, func_m.group('code')) + + def build_function(self, argnames, code): def resf(args): local_vars = dict(zip(argnames, args)) - for stmt in func_m.group('code').split(';'): + for stmt in code.split(';'): res = self.interpret_statement(stmt, local_vars) return res return resf - diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 09312e8..64a9618 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -775,7 +775,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): https_response = http_response -def parse_iso8601(date_str): +def parse_iso8601(date_str, delimiter='T'): """ Return a UNIX timestamp from the given date """ if date_str is None: @@ -795,8 +795,8 @@ def parse_iso8601(date_str): timezone = datetime.timedelta( hours=sign * int(m.group('hours')), minutes=sign * int(m.group('minutes'))) - - dt = datetime.datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S') - timezone + date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter) + dt = datetime.datetime.strptime(date_str, date_format) - timezone return calendar.timegm(dt.timetuple()) @@ -1428,7 +1428,7 @@ US_RATINGS = { def strip_jsonp(code): - return re.sub(r'(?s)^[a-zA-Z_]+\s*\(\s*(.*)\);\s*?\s*$', r'\1', code) + return re.sub(r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?\s*$', r'\1', code) def qualities(quality_ids): diff --git a/youtube_dl/version.py b/youtube_dl/version.py index d6b0589..4d606c3 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.07.11' +__version__ = '2014.07.15'