From: Rogério Brito Date: Sun, 13 Jul 2014 15:22:00 +0000 (-0300) Subject: Imported Upstream version 2014.07.11 X-Git-Url: https://git.rapsys.eu/youtubedl/commitdiff_plain/d58070b17fd9302b32eb12ae4b271bffaf67abb4?hp=-c Imported Upstream version 2014.07.11 --- d58070b17fd9302b32eb12ae4b271bffaf67abb4 diff --git a/README.md b/README.md index 2bea609..dffdaa9 100644 --- a/README.md +++ b/README.md @@ -70,8 +70,9 @@ which means you can modify it, redistribute it or use it however you like. --default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large - apple". By default (with value "auto") - youtube-dl guesses. + apple". Use the value "auto" to let + youtube-dl guess. The default value "error" + just throws an error. --ignore-config Do not read configuration files. When given in the global configuration file /etc /youtube-dl.conf: do not read the user diff --git a/README.txt b/README.txt index 4757a33..0f9c470 100644 --- a/README.txt +++ b/README.txt @@ -82,8 +82,9 @@ OPTIONS --default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large - apple". By default (with value "auto") - youtube-dl guesses. + apple". Use the value "auto" to let + youtube-dl guess. The default value "error" + just throws an error. --ignore-config Do not read configuration files. When given in the global configuration file /etc /youtube-dl.conf: do not read the user @@ -678,3 +679,4 @@ youtube-dl is released into the public domain by the copyright holders. This README file was originally written by Daniel Bolton (https://github.com/dbbolton) and is likewise released into the public domain. + diff --git a/test/test_all_urls.py b/test/test_all_urls.py index 4b56137..2bc81f0 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -69,9 +69,6 @@ class TestAllURLsMatching(unittest.TestCase): def test_youtube_show_matching(self): self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show']) - def test_youtube_truncated(self): - self.assertMatch('http://www.youtube.com/watch?', ['youtube:truncated_url']) - def test_youtube_search_matching(self): self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url']) self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url']) diff --git a/test/test_playlists.py b/test/test_playlists.py index ee91e41..994b1d4 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -28,8 +28,9 @@ from youtube_dl.extractor import ( SoundcloudSetIE, SoundcloudUserIE, SoundcloudPlaylistIE, - TeacherTubeClassroomIE, + TeacherTubeUserIE, LivestreamIE, + LivestreamOriginalIE, NHLVideocenterIE, BambuserChannelIE, BandcampAlbumIE, @@ -40,6 +41,7 @@ from youtube_dl.extractor import ( KhanAcademyIE, EveryonesMixtapeIE, RutubeChannelIE, + RutubePersonIE, GoogleSearchIE, GenericIE, TEDIE, @@ -154,6 +156,14 @@ class TestPlaylists(unittest.TestCase): self.assertEqual(result['title'], 'TEDCity2.0 (English)') self.assertTrue(len(result['entries']) >= 4) + def test_livestreamoriginal_folder(self): + dl = FakeYDL() + ie = LivestreamOriginalIE(dl) + result = ie.extract('https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], 'a07bf706-d0e4-4e75-a747-b021d84f2fd3') + self.assertTrue(len(result['entries']) >= 28) + def test_nhl_videocenter(self): dl = FakeYDL() ie = NHLVideocenterIE(dl) @@ -256,10 +266,18 @@ class TestPlaylists(unittest.TestCase): def test_rutube_channel(self): dl = FakeYDL() ie = RutubeChannelIE(dl) - result = ie.extract('http://rutube.ru/tags/video/1409') + result = ie.extract('http://rutube.ru/tags/video/1800/') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], '1800') + self.assertTrue(len(result['entries']) >= 68) + + def test_rutube_person(self): + dl = FakeYDL() + ie = RutubePersonIE(dl) + result = ie.extract('http://rutube.ru/video/person/313878/') self.assertIsPlaylist(result) - self.assertEqual(result['id'], '1409') - self.assertTrue(len(result['entries']) >= 34) + self.assertEqual(result['id'], '313878') + self.assertTrue(len(result['entries']) >= 37) def test_multiple_brightcove_videos(self): # https://github.com/rg3/youtube-dl/issues/2283 @@ -361,13 +379,13 @@ class TestPlaylists(unittest.TestCase): result['title'], 'Brace Yourself - Today\'s Weirdest News') self.assertTrue(len(result['entries']) >= 10) - def test_TeacherTubeClassroom(self): + def test_TeacherTubeUser(self): dl = FakeYDL() - ie = TeacherTubeClassroomIE(dl) - result = ie.extract('http://www.teachertube.com/view_classroom.php?user=rbhagwati2') + ie = TeacherTubeUserIE(dl) + result = ie.extract('http://www.teachertube.com/user/profile/rbhagwati2') self.assertIsPlaylist(result) self.assertEqual(result['id'], 'rbhagwati2') - self.assertTrue(len(result['entries']) >= 20) + self.assertTrue(len(result['entries']) >= 179) if __name__ == '__main__': unittest.main() diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 8417c55..8d46fe1 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -33,6 +33,12 @@ _TESTS = [ 90, u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876', ), + ( + u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', + u'js', + u'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA', + u'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2', + ), ] @@ -44,7 +50,7 @@ class TestSignature(unittest.TestCase): os.mkdir(self.TESTDATA_DIR) -def make_tfunc(url, stype, sig_length, expected_sig): +def make_tfunc(url, stype, sig_input, expected_sig): basename = url.rpartition('/')[2] m = re.match(r'.*-([a-zA-Z0-9_-]+)\.[a-z]+$', basename) assert m, '%r should follow URL format' % basename @@ -66,7 +72,9 @@ def make_tfunc(url, stype, sig_length, expected_sig): with open(fn, 'rb') as testf: swfcode = testf.read() func = ie._parse_sig_swf(swfcode) - src_sig = compat_str(string.printable[:sig_length]) + src_sig = ( + compat_str(string.printable[:sig_input]) + if isinstance(sig_input, int) else sig_input) got_sig = func(src_sig) self.assertEqual(got_sig, expected_sig) diff --git a/youtube-dl b/youtube-dl index 4c445a9..3d28bb5 100755 Binary files a/youtube-dl and b/youtube-dl differ diff --git a/youtube-dl.1 b/youtube-dl.1 index f17addd..47899f4 100644 --- a/youtube-dl.1 +++ b/youtube-dl.1 @@ -61,8 +61,9 @@ redistribute it or use it however you like. \-\-default\-search\ PREFIX\ \ \ \ \ \ \ \ \ \ Use\ this\ prefix\ for\ unqualified\ URLs.\ For \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ example\ "gvsearch2:"\ downloads\ two\ videos \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ from\ google\ videos\ for\ \ youtube\-dl\ "large -\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ apple".\ By\ default\ (with\ value\ "auto") -\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ youtube\-dl\ guesses. +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ apple".\ Use\ the\ value\ "auto"\ to\ let +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ youtube\-dl\ guess.\ The\ default\ value\ "error" +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ just\ throws\ an\ error. \-\-ignore\-config\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ read\ configuration\ files.\ When\ given \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ in\ the\ global\ configuration\ file\ /etc \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ /youtube\-dl.conf:\ do\ not\ read\ the\ user diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index dc0ba98..3dff723 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -993,6 +993,8 @@ class YoutubeDL(object): fd = get_suitable_downloader(info)(self, self.params) for ph in self._progress_hooks: fd.add_progress_hook(ph) + if self.params.get('verbose'): + self.to_stdout('[debug] Invoking downloader on %r' % info.get('url')) return fd.download(name, info) if info_dict.get('requested_formats') is not None: downloaded = [] diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 1e01432..31ed63f 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -59,6 +59,7 @@ __authors__ = ( 'Adam Thalhammer', 'Georg Jähnig', 'Ralf Haring', + 'Koki Takahashi', ) __license__ = 'Public Domain' @@ -269,7 +270,7 @@ def parseOpts(overrideArguments=None): general.add_option( '--default-search', dest='default_search', metavar='PREFIX', - help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.') + help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess. The default value "error" just throws an error.') general.add_option( '--ignore-config', action='store_true', diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index dcf64d0..12cca5c 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -3,6 +3,7 @@ from .addanime import AddAnimeIE from .aftonbladet import AftonbladetIE from .anitube import AnitubeIE from .aol import AolIE +from .allocine import AllocineIE from .aparat import AparatIE from .appletrailers import AppleTrailersIE from .archiveorg import ArchiveOrgIE @@ -63,6 +64,7 @@ from .dailymotion import ( from .daum import DaumIE from .dotsub import DotsubIE from .dreisat import DreiSatIE +from .drtv import DRTVIE from .defense import DefenseGouvFrIE from .discovery import DiscoveryIE from .divxstage import DivxStageIE @@ -147,7 +149,11 @@ from .ku6 import Ku6IE from .la7 import LA7IE from .lifenews import LifeNewsIE from .liveleak import LiveLeakIE -from .livestream import LivestreamIE, LivestreamOriginalIE +from .livestream import ( + LivestreamIE, + LivestreamOriginalIE, + LivestreamShortenerIE, +) from .lynda import ( LyndaIE, LyndaCourseIE @@ -165,11 +171,13 @@ from .mpora import MporaIE from .mofosex import MofosexIE from .mooshare import MooshareIE from .morningstar import MorningstarIE +from .motherless import MotherlessIE from .motorsport import MotorsportIE from .moviezine import MoviezineIE from .movshare import MovShareIE from .mtv import ( MTVIE, + MTVServicesEmbeddedIE, MTVIggyIE, ) from .musicplayon import MusicPlayOnIE @@ -196,6 +204,7 @@ from .normalboots import NormalbootsIE from .novamov import NovaMovIE from .nowness import NownessIE from .nowvideo import NowVideoIE +from .npo import NPOIE from .nrk import ( NRKIE, NRKTVIE, @@ -254,6 +263,7 @@ from .soundcloud import ( SoundcloudUserIE, SoundcloudPlaylistIE ) +from .soundgasm import SoundgasmIE from .southparkstudios import ( SouthParkStudiosIE, SouthparkDeIE, @@ -273,7 +283,7 @@ from .sztvhu import SztvHuIE from .tagesschau import TagesschauIE from .teachertube import ( TeacherTubeIE, - TeacherTubeClassroomIE, + TeacherTubeUserIE, ) from .teachingchannel import TeachingChannelIE from .teamcoco import TeamcocoIE diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py new file mode 100644 index 0000000..34f0cd4 --- /dev/null +++ b/youtube_dl/extractor/allocine.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re +import json + +from .common import InfoExtractor +from ..utils import ( + compat_str, + qualities, + determine_ext, +) + + +class AllocineIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?Particle|video|film)/(fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=)(?P[0-9]+)(?:\.html)?' + + _TESTS = [{ + 'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html', + 'md5': '0c9fcf59a841f65635fa300ac43d8269', + 'info_dict': { + 'id': '19546517', + 'ext': 'mp4', + 'title': 'Astérix - Le Domaine des Dieux Teaser VF', + 'description': 'md5:4a754271d9c6f16c72629a8a993ee884', + 'thumbnail': 're:http://.*\.jpg', + }, + }, { + 'url': 'http://www.allocine.fr/video/player_gen_cmedia=19540403&cfilm=222257.html', + 'md5': 'd0cdce5d2b9522ce279fdfec07ff16e0', + 'info_dict': { + 'id': '19540403', + 'ext': 'mp4', + 'title': 'Planes 2 Bande-annonce VF', + 'description': 'md5:c4b1f7bd682a91de6491ada267ec0f4d', + 'thumbnail': 're:http://.*\.jpg', + }, + }, { + 'url': 'http://www.allocine.fr/film/fichefilm_gen_cfilm=181290.html', + 'md5': '101250fb127ef9ca3d73186ff22a47ce', + 'info_dict': { + 'id': '19544709', + 'ext': 'mp4', + 'title': 'Dragons 2 - Bande annonce finale VF', + 'description': 'md5:e74a4dc750894bac300ece46c7036490', + 'thumbnail': 're:http://.*\.jpg', + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + typ = mobj.group('typ') + display_id = mobj.group('id') + + webpage = self._download_webpage(url, display_id) + + if typ == 'film': + video_id = self._search_regex(r'href="/video/player_gen_cmedia=([0-9]+).+"', webpage, 'video id') + else: + player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player') + + player_data = json.loads(player) + video_id = compat_str(player_data['refMedia']) + + xml = self._download_xml('http://www.allocine.fr/ws/AcVisiondataV4.ashx?media=%s' % video_id, display_id) + + video = xml.find('.//AcVisionVideo').attrib + quality = qualities(['ld', 'md', 'hd']) + + formats = [] + for k, v in video.items(): + if re.match(r'.+_path', k): + format_id = k.split('_')[0] + formats.append({ + 'format_id': format_id, + 'quality': quality(format_id), + 'url': v, + 'ext': determine_ext(v), + }) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video['videoTitle'], + 'thumbnail': self._og_search_thumbnail(webpage), + 'formats': formats, + 'description': self._og_search_description(webpage), + } diff --git a/youtube_dl/extractor/anitube.py b/youtube_dl/extractor/anitube.py index 2b019da..31f0d41 100644 --- a/youtube_dl/extractor/anitube.py +++ b/youtube_dl/extractor/anitube.py @@ -1,22 +1,24 @@ +from __future__ import unicode_literals + import re from .common import InfoExtractor class AnitubeIE(InfoExtractor): - IE_NAME = u'anitube.se' + IE_NAME = 'anitube.se' _VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P\d+)' _TEST = { - u'url': u'http://www.anitube.se/video/36621', - u'md5': u'59d0eeae28ea0bc8c05e7af429998d43', - u'file': u'36621.mp4', - u'info_dict': { - u'id': u'36621', - u'ext': u'mp4', - u'title': u'Recorder to Randoseru 01', + 'url': 'http://www.anitube.se/video/36621', + 'md5': '59d0eeae28ea0bc8c05e7af429998d43', + 'info_dict': { + 'id': '36621', + 'ext': 'mp4', + 'title': 'Recorder to Randoseru 01', + 'duration': 180.19, }, - u'skip': u'Blocked in the US', + 'skip': 'Blocked in the US', } def _real_extract(self, url): @@ -24,13 +26,15 @@ class AnitubeIE(InfoExtractor): video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) - key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)', - webpage, u'key') + key = self._html_search_regex( + r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)', webpage, 'key') - config_xml = self._download_xml('http://www.anitube.se/nuevo/econfig.php?key=%s' % key, - key) + config_xml = self._download_xml( + 'http://www.anitube.se/nuevo/econfig.php?key=%s' % key, key) video_title = config_xml.find('title').text + thumbnail = config_xml.find('image').text + duration = float(config_xml.find('duration').text) formats = [] video_url = config_xml.find('file') @@ -49,5 +53,7 @@ class AnitubeIE(InfoExtractor): return { 'id': video_id, 'title': video_title, + 'thumbnail': thumbnail, + 'duration': duration, 'formats': formats } diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index b528a9e..9591bad 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -39,7 +39,10 @@ class ArteTvIE(InfoExtractor): formats = [{ 'forma_id': q.attrib['quality'], - 'url': q.text, + # The playpath starts at 'mp4:', if we don't manually + # split the url, rtmpdump will incorrectly parse them + 'url': q.text.split('mp4:', 1)[0], + 'play_path': 'mp4:' + q.text.split('mp4:', 1)[1], 'ext': 'flv', 'quality': 2 if q.attrib['quality'] == 'hd' else 1, } for q in config.findall('./urls/url')] @@ -111,7 +114,7 @@ class ArteTVPlus7IE(InfoExtractor): if not formats: # Some videos are only available in the 'Originalversion' # they aren't tagged as being in French or German - if all(f['versionCode'] == 'VO' for f in all_formats): + if all(f['versionCode'] == 'VO' or f['versionCode'] == 'VA' for f in all_formats): formats = all_formats else: raise ExtractorError(u'The formats list is empty') @@ -189,9 +192,10 @@ class ArteTVFutureIE(ArteTVPlus7IE): _TEST = { 'url': 'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081', 'info_dict': { - 'id': '050940-003', + 'id': '5201', 'ext': 'mp4', 'title': 'Les champignons au secours de la planète', + 'upload_date': '20131101', }, } diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py index d4da089..acfc4ad 100644 --- a/youtube_dl/extractor/bliptv.py +++ b/youtube_dl/extractor/bliptv.py @@ -15,7 +15,7 @@ from ..utils import ( class BlipTVIE(SubtitlesInfoExtractor): - _VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P\d+)|((?:play/|api\.swf#)(?P[\da-zA-Z]+)))' + _VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P\d+)|((?:play/|api\.swf#)(?P[\da-zA-Z+]+)))' _TESTS = [ { diff --git a/youtube_dl/extractor/br.py b/youtube_dl/extractor/br.py index b5b56ff..9933607 100644 --- a/youtube_dl/extractor/br.py +++ b/youtube_dl/extractor/br.py @@ -17,15 +17,13 @@ class BRIE(InfoExtractor): _TESTS = [ { - 'url': 'http://www.br.de/mediathek/video/anselm-gruen-114.html', - 'md5': 'c4f83cf0f023ba5875aba0bf46860df2', + 'url': 'http://www.br.de/mediathek/video/sendungen/heimatsound/heimatsound-festival-2014-trailer-100.html', + 'md5': '93556dd2bcb2948d9259f8670c516d59', 'info_dict': { - 'id': '2c8d81c5-6fb7-4a74-88d4-e768e5856532', + 'id': '25e279aa-1ffd-40fd-9955-5325bd48a53a', 'ext': 'mp4', - 'title': 'Feiern und Verzichten', - 'description': 'Anselm Grün: Feiern und Verzichten', - 'uploader': 'BR/Birgit Baier', - 'upload_date': '20140301', + 'title': 'Am 1. und 2. August in Oberammergau', + 'description': 'md5:dfd224e5aa6819bc1fcbb7826a932021', } }, { diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index ba4d73a..8af0aba 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -130,7 +130,7 @@ class ComedyCentralShowsIE(InfoExtractor): raise ExtractorError('Invalid redirected URL: ' + url) if mobj.group('episode') == '': raise ExtractorError('Redirected URL is still not specific: ' + url) - epTitle = mobj.group('episode').rpartition('/')[-1] + epTitle = (mobj.group('episode') or mobj.group('videotitle')).rpartition('/')[-1] mMovieParams = re.findall('(?:[a-zA-Z0-9\-]*)(.htm)?' + _VALID_URL = r'http://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P[a-zA-Z0-9\-]*)(.htm)?' _TEST = { - 'url': 'http://dsc.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm', + 'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm', 'md5': 'e12614f9ee303a6ccef415cb0793eba2', 'info_dict': { 'id': '614784', diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py new file mode 100644 index 0000000..cdccfd3 --- /dev/null +++ b/youtube_dl/extractor/drtv.py @@ -0,0 +1,91 @@ +from __future__ import unicode_literals + +import re + +from .subtitles import SubtitlesInfoExtractor +from .common import ExtractorError +from ..utils import parse_iso8601 + + +class DRTVIE(SubtitlesInfoExtractor): + _VALID_URL = r'http://(?:www\.)?dr\.dk/tv/se/[^/]+/(?P[\da-z-]+)' + + _TEST = { + 'url': 'http://www.dr.dk/tv/se/partiets-mand/partiets-mand-7-8', + 'md5': '4a7e1dd65cdb2643500a3f753c942f25', + 'info_dict': { + 'id': 'partiets-mand-7-8', + 'ext': 'mp4', + 'title': 'Partiets mand (7:8)', + 'description': 'md5:a684b90a8f9336cd4aab94b7647d7862', + 'timestamp': 1403047940, + 'upload_date': '20140617', + 'duration': 1299.040, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + programcard = self._download_json( + 'http://www.dr.dk/mu/programcard/expanded/%s' % video_id, video_id, 'Downloading video JSON') + + data = programcard['Data'][0] + + title = data['Title'] + description = data['Description'] + timestamp = parse_iso8601(data['CreatedTime'][:-5]) + + thumbnail = None + duration = None + + restricted_to_denmark = False + + formats = [] + subtitles = {} + + for asset in data['Assets']: + if asset['Kind'] == 'Image': + thumbnail = asset['Uri'] + elif asset['Kind'] == 'VideoResource': + duration = asset['DurationInMilliseconds'] / 1000.0 + restricted_to_denmark = asset['RestrictedToDenmark'] + for link in asset['Links']: + target = link['Target'] + uri = link['Uri'] + formats.append({ + 'url': uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43' if target == 'HDS' else uri, + 'format_id': target, + 'ext': link['FileFormat'], + 'preference': -1 if target == 'HDS' else -2, + }) + subtitles_list = asset.get('SubtitlesList') + if isinstance(subtitles_list, list): + LANGS = { + 'Danish': 'dk', + } + for subs in subtitles_list: + lang = subs['Language'] + subtitles[LANGS.get(lang, lang)] = subs['Uri'] + + if not formats and restricted_to_denmark: + raise ExtractorError( + 'Unfortunately, DR is not allowed to show this program outside Denmark.', expected=True) + + self._sort_formats(formats) + + if self._downloader.params.get('listsubtitles', False): + self._list_available_subtitles(video_id, subtitles) + return + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'duration': duration, + 'formats': formats, + 'subtitles': self.extract_subtitles(video_id, subtitles), + } diff --git a/youtube_dl/extractor/firstpost.py b/youtube_dl/extractor/firstpost.py index eccd8dd..0993af1 100644 --- a/youtube_dl/extractor/firstpost.py +++ b/youtube_dl/extractor/firstpost.py @@ -15,6 +15,7 @@ class FirstpostIE(InfoExtractor): 'id': '1025403', 'ext': 'mp4', 'title': 'India to launch indigenous aircraft carrier INS Vikrant today', + 'description': 'md5:feef3041cb09724e0bdc02843348f5f4', } } @@ -22,13 +23,16 @@ class FirstpostIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') + page = self._download_webpage(url, video_id) + title = self._html_search_meta('twitter:title', page, 'title') + description = self._html_search_meta('twitter:description', page, 'title') + data = self._download_xml( 'http://www.firstpost.com/getvideoxml-%s.xml' % video_id, video_id, 'Downloading video XML') item = data.find('./playlist/item') thumbnail = item.find('./image').text - title = item.find('./title').text formats = [ { @@ -42,6 +46,7 @@ class FirstpostIE(InfoExtractor): return { 'id': video_id, 'title': title, + 'description': description, 'thumbnail': thumbnail, 'formats': formats, } diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 3105b47..f97b598 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -278,6 +278,17 @@ class GenericIE(InfoExtractor): 'skip_download': True, } }, + # MTVSercices embed + { + 'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too', + 'md5': '35727f82f58c76d996fc188f9755b0d5', + 'info_dict': { + 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9', + 'ext': 'mp4', + 'title': 'Review', + 'description': 'Mario\'s life in the fast lane has never looked so good.', + }, + }, ] def report_download_webpage(self, video_id): @@ -372,7 +383,7 @@ class GenericIE(InfoExtractor): if not parsed_url.scheme: default_search = self._downloader.params.get('default_search') if default_search is None: - default_search = 'auto_warning' + default_search = 'error' if default_search in ('auto', 'auto_warning'): if '/' in url: @@ -386,8 +397,13 @@ class GenericIE(InfoExtractor): expected=True) else: self._downloader.report_warning( - 'Falling back to youtube search for %s . Set --default-search to "auto" to suppress this warning.' % url) + 'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url) return self.url_result('ytsearch:' + url) + elif default_search == 'error': + raise ExtractorError( + ('%r is not a valid URL. ' + 'Set --default-search "ytseach" (or run youtube-dl "ytsearch:%s" ) to search YouTube' + ) % (url, url), expected=True) else: assert ':' in default_search return self.url_result(default_search + url) @@ -609,6 +625,11 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result(mobj.group('url'), 'VK') + # Look for embedded ivi player + mobj = re.search(r']+?src=(["\'])(?Phttps?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage) + if mobj is not None: + return self.url_result(mobj.group('url'), 'Ivi') + # Look for embedded Huffington Post player mobj = re.search( r']+?src=(["\'])(?Phttps?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage) @@ -676,6 +697,14 @@ class GenericIE(InfoExtractor): url = unescapeHTML(mobj.group('url')) return self.url_result(url, ie='Vulture') + # Look for embedded mtvservices player + mobj = re.search( + r'