--default-search PREFIX Use this prefix for unqualified URLs. For
example "gvsearch2:" downloads two videos
from google videos for youtube-dl "large
- apple". By default (with value "auto")
- youtube-dl guesses.
+ apple". Use the value "auto" to let
+ youtube-dl guess. The default value "error"
+ just throws an error.
--ignore-config Do not read configuration files. When given
in the global configuration file /etc
/youtube-dl.conf: do not read the user
--default-search PREFIX Use this prefix for unqualified URLs. For
example "gvsearch2:" downloads two videos
from google videos for youtube-dl "large
- apple". By default (with value "auto")
- youtube-dl guesses.
+ apple". Use the value "auto" to let
+ youtube-dl guess. The default value "error"
+ just throws an error.
--ignore-config Do not read configuration files. When given
in the global configuration file /etc
/youtube-dl.conf: do not read the user
This README file was originally written by Daniel Bolton
(https://github.com/dbbolton) and is likewise released into the public
domain.
+
def test_youtube_show_matching(self):
self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show'])
- def test_youtube_truncated(self):
- self.assertMatch('http://www.youtube.com/watch?', ['youtube:truncated_url'])
-
def test_youtube_search_matching(self):
self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
SoundcloudSetIE,
SoundcloudUserIE,
SoundcloudPlaylistIE,
- TeacherTubeClassroomIE,
+ TeacherTubeUserIE,
LivestreamIE,
+ LivestreamOriginalIE,
NHLVideocenterIE,
BambuserChannelIE,
BandcampAlbumIE,
KhanAcademyIE,
EveryonesMixtapeIE,
RutubeChannelIE,
+ RutubePersonIE,
GoogleSearchIE,
GenericIE,
TEDIE,
self.assertEqual(result['title'], 'TEDCity2.0 (English)')
self.assertTrue(len(result['entries']) >= 4)
+ def test_livestreamoriginal_folder(self):
+ dl = FakeYDL()
+ ie = LivestreamOriginalIE(dl)
+ result = ie.extract('https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3')
+ self.assertIsPlaylist(result)
+ self.assertEqual(result['id'], 'a07bf706-d0e4-4e75-a747-b021d84f2fd3')
+ self.assertTrue(len(result['entries']) >= 28)
+
def test_nhl_videocenter(self):
dl = FakeYDL()
ie = NHLVideocenterIE(dl)
def test_rutube_channel(self):
dl = FakeYDL()
ie = RutubeChannelIE(dl)
- result = ie.extract('http://rutube.ru/tags/video/1409')
+ result = ie.extract('http://rutube.ru/tags/video/1800/')
+ self.assertIsPlaylist(result)
+ self.assertEqual(result['id'], '1800')
+ self.assertTrue(len(result['entries']) >= 68)
+
+ def test_rutube_person(self):
+ dl = FakeYDL()
+ ie = RutubePersonIE(dl)
+ result = ie.extract('http://rutube.ru/video/person/313878/')
self.assertIsPlaylist(result)
- self.assertEqual(result['id'], '1409')
- self.assertTrue(len(result['entries']) >= 34)
+ self.assertEqual(result['id'], '313878')
+ self.assertTrue(len(result['entries']) >= 37)
def test_multiple_brightcove_videos(self):
# https://github.com/rg3/youtube-dl/issues/2283
result['title'], 'Brace Yourself - Today\'s Weirdest News')
self.assertTrue(len(result['entries']) >= 10)
- def test_TeacherTubeClassroom(self):
+ def test_TeacherTubeUser(self):
dl = FakeYDL()
- ie = TeacherTubeClassroomIE(dl)
- result = ie.extract('http://www.teachertube.com/view_classroom.php?user=rbhagwati2')
+ ie = TeacherTubeUserIE(dl)
+ result = ie.extract('http://www.teachertube.com/user/profile/rbhagwati2')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'rbhagwati2')
- self.assertTrue(len(result['entries']) >= 20)
+ self.assertTrue(len(result['entries']) >= 179)
if __name__ == '__main__':
unittest.main()
90,
u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
),
+ (
+ u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js',
+ u'js',
+ u'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA',
+ u'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2',
+ ),
]
os.mkdir(self.TESTDATA_DIR)
-def make_tfunc(url, stype, sig_length, expected_sig):
+def make_tfunc(url, stype, sig_input, expected_sig):
basename = url.rpartition('/')[2]
m = re.match(r'.*-([a-zA-Z0-9_-]+)\.[a-z]+$', basename)
assert m, '%r should follow URL format' % basename
with open(fn, 'rb') as testf:
swfcode = testf.read()
func = ie._parse_sig_swf(swfcode)
- src_sig = compat_str(string.printable[:sig_length])
+ src_sig = (
+ compat_str(string.printable[:sig_input])
+ if isinstance(sig_input, int) else sig_input)
got_sig = func(src_sig)
self.assertEqual(got_sig, expected_sig)
\-\-default\-search\ PREFIX\ \ \ \ \ \ \ \ \ \ Use\ this\ prefix\ for\ unqualified\ URLs.\ For
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ example\ "gvsearch2:"\ downloads\ two\ videos
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ from\ google\ videos\ for\ \ youtube\-dl\ "large
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ apple".\ By\ default\ (with\ value\ "auto")
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ youtube\-dl\ guesses.
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ apple".\ Use\ the\ value\ "auto"\ to\ let
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ youtube\-dl\ guess.\ The\ default\ value\ "error"
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ just\ throws\ an\ error.
\-\-ignore\-config\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ read\ configuration\ files.\ When\ given
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ in\ the\ global\ configuration\ file\ /etc
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ /youtube\-dl.conf:\ do\ not\ read\ the\ user
fd = get_suitable_downloader(info)(self, self.params)
for ph in self._progress_hooks:
fd.add_progress_hook(ph)
+ if self.params.get('verbose'):
+ self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
return fd.download(name, info)
if info_dict.get('requested_formats') is not None:
downloaded = []
'Adam Thalhammer',
'Georg Jähnig',
'Ralf Haring',
+ 'Koki Takahashi',
)
__license__ = 'Public Domain'
general.add_option(
'--default-search',
dest='default_search', metavar='PREFIX',
- help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.')
+ help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess. The default value "error" just throws an error.')
general.add_option(
'--ignore-config',
action='store_true',
from .aftonbladet import AftonbladetIE
from .anitube import AnitubeIE
from .aol import AolIE
+from .allocine import AllocineIE
from .aparat import AparatIE
from .appletrailers import AppleTrailersIE
from .archiveorg import ArchiveOrgIE
from .daum import DaumIE
from .dotsub import DotsubIE
from .dreisat import DreiSatIE
+from .drtv import DRTVIE
from .defense import DefenseGouvFrIE
from .discovery import DiscoveryIE
from .divxstage import DivxStageIE
from .la7 import LA7IE
from .lifenews import LifeNewsIE
from .liveleak import LiveLeakIE
-from .livestream import LivestreamIE, LivestreamOriginalIE
+from .livestream import (
+ LivestreamIE,
+ LivestreamOriginalIE,
+ LivestreamShortenerIE,
+)
from .lynda import (
LyndaIE,
LyndaCourseIE
from .mofosex import MofosexIE
from .mooshare import MooshareIE
from .morningstar import MorningstarIE
+from .motherless import MotherlessIE
from .motorsport import MotorsportIE
from .moviezine import MoviezineIE
from .movshare import MovShareIE
from .mtv import (
MTVIE,
+ MTVServicesEmbeddedIE,
MTVIggyIE,
)
from .musicplayon import MusicPlayOnIE
from .novamov import NovaMovIE
from .nowness import NownessIE
from .nowvideo import NowVideoIE
+from .npo import NPOIE
from .nrk import (
NRKIE,
NRKTVIE,
SoundcloudUserIE,
SoundcloudPlaylistIE
)
+from .soundgasm import SoundgasmIE
from .southparkstudios import (
SouthParkStudiosIE,
SouthparkDeIE,
from .tagesschau import TagesschauIE
from .teachertube import (
TeacherTubeIE,
- TeacherTubeClassroomIE,
+ TeacherTubeUserIE,
)
from .teachingchannel import TeachingChannelIE
from .teamcoco import TeamcocoIE
--- /dev/null
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ compat_str,
+ qualities,
+ determine_ext,
+)
+
+
+class AllocineIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?P<typ>article|video|film)/(fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=)(?P<id>[0-9]+)(?:\.html)?'
+
+ _TESTS = [{
+ 'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html',
+ 'md5': '0c9fcf59a841f65635fa300ac43d8269',
+ 'info_dict': {
+ 'id': '19546517',
+ 'ext': 'mp4',
+ 'title': 'Astérix - Le Domaine des Dieux Teaser VF',
+ 'description': 'md5:4a754271d9c6f16c72629a8a993ee884',
+ 'thumbnail': 're:http://.*\.jpg',
+ },
+ }, {
+ 'url': 'http://www.allocine.fr/video/player_gen_cmedia=19540403&cfilm=222257.html',
+ 'md5': 'd0cdce5d2b9522ce279fdfec07ff16e0',
+ 'info_dict': {
+ 'id': '19540403',
+ 'ext': 'mp4',
+ 'title': 'Planes 2 Bande-annonce VF',
+ 'description': 'md5:c4b1f7bd682a91de6491ada267ec0f4d',
+ 'thumbnail': 're:http://.*\.jpg',
+ },
+ }, {
+ 'url': 'http://www.allocine.fr/film/fichefilm_gen_cfilm=181290.html',
+ 'md5': '101250fb127ef9ca3d73186ff22a47ce',
+ 'info_dict': {
+ 'id': '19544709',
+ 'ext': 'mp4',
+ 'title': 'Dragons 2 - Bande annonce finale VF',
+ 'description': 'md5:e74a4dc750894bac300ece46c7036490',
+ 'thumbnail': 're:http://.*\.jpg',
+ },
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ typ = mobj.group('typ')
+ display_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ if typ == 'film':
+ video_id = self._search_regex(r'href="/video/player_gen_cmedia=([0-9]+).+"', webpage, 'video id')
+ else:
+ player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player')
+
+ player_data = json.loads(player)
+ video_id = compat_str(player_data['refMedia'])
+
+ xml = self._download_xml('http://www.allocine.fr/ws/AcVisiondataV4.ashx?media=%s' % video_id, display_id)
+
+ video = xml.find('.//AcVisionVideo').attrib
+ quality = qualities(['ld', 'md', 'hd'])
+
+ formats = []
+ for k, v in video.items():
+ if re.match(r'.+_path', k):
+ format_id = k.split('_')[0]
+ formats.append({
+ 'format_id': format_id,
+ 'quality': quality(format_id),
+ 'url': v,
+ 'ext': determine_ext(v),
+ })
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': video['videoTitle'],
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'formats': formats,
+ 'description': self._og_search_description(webpage),
+ }
+from __future__ import unicode_literals
+
import re
from .common import InfoExtractor
class AnitubeIE(InfoExtractor):
- IE_NAME = u'anitube.se'
+ IE_NAME = 'anitube.se'
_VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P<id>\d+)'
_TEST = {
- u'url': u'http://www.anitube.se/video/36621',
- u'md5': u'59d0eeae28ea0bc8c05e7af429998d43',
- u'file': u'36621.mp4',
- u'info_dict': {
- u'id': u'36621',
- u'ext': u'mp4',
- u'title': u'Recorder to Randoseru 01',
+ 'url': 'http://www.anitube.se/video/36621',
+ 'md5': '59d0eeae28ea0bc8c05e7af429998d43',
+ 'info_dict': {
+ 'id': '36621',
+ 'ext': 'mp4',
+ 'title': 'Recorder to Randoseru 01',
+ 'duration': 180.19,
},
- u'skip': u'Blocked in the US',
+ 'skip': 'Blocked in the US',
}
def _real_extract(self, url):
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
- key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)',
- webpage, u'key')
+ key = self._html_search_regex(
+ r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)', webpage, 'key')
- config_xml = self._download_xml('http://www.anitube.se/nuevo/econfig.php?key=%s' % key,
- key)
+ config_xml = self._download_xml(
+ 'http://www.anitube.se/nuevo/econfig.php?key=%s' % key, key)
video_title = config_xml.find('title').text
+ thumbnail = config_xml.find('image').text
+ duration = float(config_xml.find('duration').text)
formats = []
video_url = config_xml.find('file')
return {
'id': video_id,
'title': video_title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
'formats': formats
}
formats = [{
'forma_id': q.attrib['quality'],
- 'url': q.text,
+ # The playpath starts at 'mp4:', if we don't manually
+ # split the url, rtmpdump will incorrectly parse them
+ 'url': q.text.split('mp4:', 1)[0],
+ 'play_path': 'mp4:' + q.text.split('mp4:', 1)[1],
'ext': 'flv',
'quality': 2 if q.attrib['quality'] == 'hd' else 1,
} for q in config.findall('./urls/url')]
if not formats:
# Some videos are only available in the 'Originalversion'
# they aren't tagged as being in French or German
- if all(f['versionCode'] == 'VO' for f in all_formats):
+ if all(f['versionCode'] == 'VO' or f['versionCode'] == 'VA' for f in all_formats):
formats = all_formats
else:
raise ExtractorError(u'The formats list is empty')
_TEST = {
'url': 'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081',
'info_dict': {
- 'id': '050940-003',
+ 'id': '5201',
'ext': 'mp4',
'title': 'Les champignons au secours de la planète',
+ 'upload_date': '20131101',
},
}
class BlipTVIE(SubtitlesInfoExtractor):
- _VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z]+)))'
+ _VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+]+)))'
_TESTS = [
{
_TESTS = [
{
- 'url': 'http://www.br.de/mediathek/video/anselm-gruen-114.html',
- 'md5': 'c4f83cf0f023ba5875aba0bf46860df2',
+ 'url': 'http://www.br.de/mediathek/video/sendungen/heimatsound/heimatsound-festival-2014-trailer-100.html',
+ 'md5': '93556dd2bcb2948d9259f8670c516d59',
'info_dict': {
- 'id': '2c8d81c5-6fb7-4a74-88d4-e768e5856532',
+ 'id': '25e279aa-1ffd-40fd-9955-5325bd48a53a',
'ext': 'mp4',
- 'title': 'Feiern und Verzichten',
- 'description': 'Anselm Grün: Feiern und Verzichten',
- 'uploader': 'BR/Birgit Baier',
- 'upload_date': '20140301',
+ 'title': 'Am 1. und 2. August in Oberammergau',
+ 'description': 'md5:dfd224e5aa6819bc1fcbb7826a932021',
}
},
{
raise ExtractorError('Invalid redirected URL: ' + url)
if mobj.group('episode') == '':
raise ExtractorError('Redirected URL is still not specific: ' + url)
- epTitle = mobj.group('episode').rpartition('/')[-1]
+ epTitle = (mobj.group('episode') or mobj.group('videotitle')).rpartition('/')[-1]
mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage)
if len(mMovieParams) == 0:
if secure: regexes = self._og_regexes('video:secure_url') + regexes
return self._html_search_regex(regexes, html, name, **kargs)
+ def _og_search_url(self, html, **kargs):
+ return self._og_search_property('url', html, **kargs)
+
def _html_search_meta(self, name, html, display_name=None, fatal=False):
if display_name is None:
display_name = name
return {
'id': video_id,
'formats': formats,
- 'uploader': info['owner_screenname'],
+ 'uploader': info['owner.screenname'],
'upload_date': video_upload_date,
'title': self._og_search_title(webpage),
'subtitles': video_subtitles,
class DiscoveryIE(InfoExtractor):
- _VALID_URL = r'http://dsc\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
+ _VALID_URL = r'http://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
_TEST = {
- 'url': 'http://dsc.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
+ 'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
'md5': 'e12614f9ee303a6ccef415cb0793eba2',
'info_dict': {
'id': '614784',
--- /dev/null
+from __future__ import unicode_literals
+
+import re
+
+from .subtitles import SubtitlesInfoExtractor
+from .common import ExtractorError
+from ..utils import parse_iso8601
+
+
+class DRTVIE(SubtitlesInfoExtractor):
+ _VALID_URL = r'http://(?:www\.)?dr\.dk/tv/se/[^/]+/(?P<id>[\da-z-]+)'
+
+ _TEST = {
+ 'url': 'http://www.dr.dk/tv/se/partiets-mand/partiets-mand-7-8',
+ 'md5': '4a7e1dd65cdb2643500a3f753c942f25',
+ 'info_dict': {
+ 'id': 'partiets-mand-7-8',
+ 'ext': 'mp4',
+ 'title': 'Partiets mand (7:8)',
+ 'description': 'md5:a684b90a8f9336cd4aab94b7647d7862',
+ 'timestamp': 1403047940,
+ 'upload_date': '20140617',
+ 'duration': 1299.040,
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ programcard = self._download_json(
+ 'http://www.dr.dk/mu/programcard/expanded/%s' % video_id, video_id, 'Downloading video JSON')
+
+ data = programcard['Data'][0]
+
+ title = data['Title']
+ description = data['Description']
+ timestamp = parse_iso8601(data['CreatedTime'][:-5])
+
+ thumbnail = None
+ duration = None
+
+ restricted_to_denmark = False
+
+ formats = []
+ subtitles = {}
+
+ for asset in data['Assets']:
+ if asset['Kind'] == 'Image':
+ thumbnail = asset['Uri']
+ elif asset['Kind'] == 'VideoResource':
+ duration = asset['DurationInMilliseconds'] / 1000.0
+ restricted_to_denmark = asset['RestrictedToDenmark']
+ for link in asset['Links']:
+ target = link['Target']
+ uri = link['Uri']
+ formats.append({
+ 'url': uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43' if target == 'HDS' else uri,
+ 'format_id': target,
+ 'ext': link['FileFormat'],
+ 'preference': -1 if target == 'HDS' else -2,
+ })
+ subtitles_list = asset.get('SubtitlesList')
+ if isinstance(subtitles_list, list):
+ LANGS = {
+ 'Danish': 'dk',
+ }
+ for subs in subtitles_list:
+ lang = subs['Language']
+ subtitles[LANGS.get(lang, lang)] = subs['Uri']
+
+ if not formats and restricted_to_denmark:
+ raise ExtractorError(
+ 'Unfortunately, DR is not allowed to show this program outside Denmark.', expected=True)
+
+ self._sort_formats(formats)
+
+ if self._downloader.params.get('listsubtitles', False):
+ self._list_available_subtitles(video_id, subtitles)
+ return
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'formats': formats,
+ 'subtitles': self.extract_subtitles(video_id, subtitles),
+ }
'id': '1025403',
'ext': 'mp4',
'title': 'India to launch indigenous aircraft carrier INS Vikrant today',
+ 'description': 'md5:feef3041cb09724e0bdc02843348f5f4',
}
}
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
+ page = self._download_webpage(url, video_id)
+ title = self._html_search_meta('twitter:title', page, 'title')
+ description = self._html_search_meta('twitter:description', page, 'title')
+
data = self._download_xml(
'http://www.firstpost.com/getvideoxml-%s.xml' % video_id, video_id,
'Downloading video XML')
item = data.find('./playlist/item')
thumbnail = item.find('./image').text
- title = item.find('./title').text
formats = [
{
return {
'id': video_id,
'title': title,
+ 'description': description,
'thumbnail': thumbnail,
'formats': formats,
}
'skip_download': True,
}
},
+ # MTVSercices embed
+ {
+ 'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
+ 'md5': '35727f82f58c76d996fc188f9755b0d5',
+ 'info_dict': {
+ 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
+ 'ext': 'mp4',
+ 'title': 'Review',
+ 'description': 'Mario\'s life in the fast lane has never looked so good.',
+ },
+ },
]
def report_download_webpage(self, video_id):
if not parsed_url.scheme:
default_search = self._downloader.params.get('default_search')
if default_search is None:
- default_search = 'auto_warning'
+ default_search = 'error'
if default_search in ('auto', 'auto_warning'):
if '/' in url:
expected=True)
else:
self._downloader.report_warning(
- 'Falling back to youtube search for %s . Set --default-search to "auto" to suppress this warning.' % url)
+ 'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url)
return self.url_result('ytsearch:' + url)
+ elif default_search == 'error':
+ raise ExtractorError(
+ ('%r is not a valid URL. '
+ 'Set --default-search "ytseach" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
+ ) % (url, url), expected=True)
else:
assert ':' in default_search
return self.url_result(default_search + url)
if mobj is not None:
return self.url_result(mobj.group('url'), 'VK')
+ # Look for embedded ivi player
+ mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'Ivi')
+
# Look for embedded Huffington Post player
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
url = unescapeHTML(mobj.group('url'))
return self.url_result(url, ie='Vulture')
+ # Look for embedded mtvservices player
+ mobj = re.search(
+ r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
+ webpage)
+ if mobj is not None:
+ url = unescapeHTML(mobj.group('url'))
+ return self.url_result(url, ie='MTVServicesEmbedded')
+
# Start with something easy: JW Player in SWFObject
found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
if not found:
# Extract title
# Get the first line for title
- video_title = self._html_search_regex(r'<meta name\=\"Description\" content\=\"(.*?)[\n<"]',
- webpage, 'title', default='NA')
+ video_title = self._og_search_description(webpage).splitlines()[0]
# Step 2, Simulate clicking the image box to launch video
DOMAIN = 'https://plus.google.com/'
class IviIE(InfoExtractor):
IE_DESC = 'ivi.ru'
IE_NAME = 'ivi'
- _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<videoid>\d+)'
_TESTS = [
# Single movie
compat_urlparse,
xpath_with_ns,
compat_str,
+ orderedSet,
)
# The original version of Livestream uses a different system
class LivestreamOriginalIE(InfoExtractor):
IE_NAME = 'livestream:original'
- _VALID_URL = r'https?://www\.livestream\.com/(?P<user>[^/]+)/video\?.*?clipId=(?P<id>.*?)(&|$)'
+ _VALID_URL = r'''(?x)https?://www\.livestream\.com/
+ (?P<user>[^/]+)/(?P<type>video|folder)
+ (?:\?.*?Id=|/)(?P<id>.*?)(&|$)
+ '''
_TEST = {
'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
'info_dict': {
},
}
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- user = mobj.group('user')
+ def _extract_video(self, user, video_id):
api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
info = self._download_xml(api_url, video_id)
'ext': 'flv',
'thumbnail': thumbnail_url,
}
+
+ def _extract_folder(self, url, folder_id):
+ webpage = self._download_webpage(url, folder_id)
+ urls = orderedSet(re.findall(r'<a href="(https?://livestre\.am/.*?)"', webpage))
+
+ return {
+ '_type': 'playlist',
+ 'id': folder_id,
+ 'entries': [{
+ '_type': 'url',
+ 'url': video_url,
+ } for video_url in urls],
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ id = mobj.group('id')
+ user = mobj.group('user')
+ url_type = mobj.group('type')
+ if url_type == 'folder':
+ return self._extract_folder(url, id)
+ else:
+ return self._extract_video(user, id)
+
+
+# The server doesn't support HEAD request, the generic extractor can't detect
+# the redirection
+class LivestreamShortenerIE(InfoExtractor):
+ IE_NAME = 'livestream:shortener'
+ IE_DESC = False # Do not list
+ _VALID_URL = r'https?://livestre\.am/(?P<id>.+)'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ id = mobj.group('id')
+ webpage = self._download_webpage(url, id)
+
+ return {
+ '_type': 'url',
+ 'url': self._og_search_url(webpage),
+ }
--- /dev/null
+from __future__ import unicode_literals
+
+import datetime
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ unified_strdate,
+)
+
+
+class MotherlessIE(InfoExtractor):
+ _VALID_URL = r'http://(?:www\.)?motherless\.com/(?P<id>[A-Z0-9]+)'
+ _TESTS = [
+ {
+ 'url': 'http://motherless.com/AC3FFE1',
+ 'md5': '5527fef81d2e529215dad3c2d744a7d9',
+ 'info_dict': {
+ 'id': 'AC3FFE1',
+ 'ext': 'flv',
+ 'title': 'Fucked in the ass while playing PS3',
+ 'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'],
+ 'upload_date': '20100913',
+ 'uploader_id': 'famouslyfuckedup',
+ 'thumbnail': 're:http://.*\.jpg',
+ 'age_limit': 18,
+ }
+ },
+ {
+ 'url': 'http://motherless.com/532291B',
+ 'md5': 'bc59a6b47d1f958e61fbd38a4d31b131',
+ 'info_dict': {
+ 'id': '532291B',
+ 'ext': 'mp4',
+ 'title': 'Amazing girl playing the omegle game, PERFECT!',
+ 'categories': ['Amateur', 'webcam', 'omegle', 'pink', 'young', 'masturbate', 'teen', 'game', 'hairy'],
+ 'upload_date': '20140622',
+ 'uploader_id': 'Sulivana7x',
+ 'thumbnail': 're:http://.*\.jpg',
+ 'age_limit': 18,
+ }
+ }
+ ]
+
+ def _real_extract(self,url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(r'id="view-upload-title">\s+([^<]+)<', webpage, 'title')
+
+ video_url = self._html_search_regex(r'setup\(\{\s+"file".+: "([^"]+)",', webpage, 'video_url')
+ age_limit = self._rta_search(webpage)
+
+ view_count = self._html_search_regex(r'<strong>Views</strong>\s+([^<]+)<', webpage, 'view_count')
+
+ upload_date = self._html_search_regex(r'<strong>Uploaded</strong>\s+([^<]+)<', webpage, 'upload_date')
+ if 'Ago' in upload_date:
+ days = int(re.search(r'([0-9]+)', upload_date).group(1))
+ upload_date = (datetime.datetime.now() - datetime.timedelta(days=days)).strftime('%Y%m%d')
+ else:
+ upload_date = unified_strdate(upload_date)
+
+ like_count = self._html_search_regex(r'<strong>Favorited</strong>\s+([^<]+)<', webpage, 'like_count')
+
+ comment_count = webpage.count('class="media-comment-contents"')
+ uploader_id = self._html_search_regex(r'"thumb-member-username">\s+<a href="/m/([^"]+)"', webpage, 'uploader_id')
+
+ categories = self._html_search_meta('keywords', webpage)
+ if categories:
+ categories = [cat.strip() for cat in categories.split(',')]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'upload_date': upload_date,
+ 'uploader_id': uploader_id,
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'categories': categories,
+ 'view_count': int_or_none(view_count.replace(',', '')),
+ 'like_count': int_or_none(like_count.replace(',', '')),
+ 'comment_count': comment_count,
+ 'age_limit': age_limit,
+ 'url': video_url,
+ }
webpage = self._download_webpage(url, video_id)
data_json = self._search_regex(
- r"new FM\.Player\('[^']+',\s*(\{.*?)\);\n", webpage, 'json')
+ r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", webpage, 'json')
data = json.loads(data_json)
class MTVServicesInfoExtractor(InfoExtractor):
_MOBILE_TEMPLATE = None
+
@staticmethod
def _id_from_uri(uri):
return uri.split(':')[-1]
base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
return base + m.group('finalid')
+ def _get_feed_url(self, uri):
+ return self._FEED_URL
+
def _get_thumbnail_url(self, uri, itemdoc):
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
thumb_node = itemdoc.find(search_path)
def _get_videos_info(self, uri):
video_id = self._id_from_uri(uri)
+ feed_url = self._get_feed_url(uri)
data = compat_urllib_parse.urlencode({'uri': uri})
-
idoc = self._download_xml(
- self._FEED_URL + '?' + data, video_id,
+ feed_url + '?' + data, video_id,
'Downloading info', transform_source=fix_xml_ampersands)
return [self._get_video_info(item) for item in idoc.findall('.//item')]
return self._get_videos_info(mgid)
+class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
+ IE_NAME = 'mtvservices:embedded'
+ _VALID_URL = r'https?://media\.mtvnservices\.com/embed/(?P<mgid>.+?)(\?|/|$)'
+
+ _TEST = {
+ # From http://www.thewrap.com/peter-dinklage-sums-up-game-of-thrones-in-45-seconds-video/
+ 'url': 'http://media.mtvnservices.com/embed/mgid:uma:video:mtv.com:1043906/cp~vid%3D1043906%26uri%3Dmgid%3Auma%3Avideo%3Amtv.com%3A1043906',
+ 'md5': 'cb349b21a7897164cede95bd7bf3fbb9',
+ 'info_dict': {
+ 'id': '1043906',
+ 'ext': 'mp4',
+ 'title': 'Peter Dinklage Sums Up \'Game Of Thrones\' In 45 Seconds',
+ 'description': '"Sexy sexy sexy, stabby stabby stabby, beautiful language," says Peter Dinklage as he tries summarizing "Game of Thrones" in under a minute.',
+ },
+ }
+
+ def _get_feed_url(self, uri):
+ video_id = self._id_from_uri(uri)
+ site_id = uri.replace(video_id, '')
+ config_url = 'http://media.mtvnservices.com/pmt/e1/players/{0}/config.xml'.format(site_id)
+ config_doc = self._download_xml(config_url, video_id)
+ feed_node = config_doc.find('.//feed')
+ feed_url = feed_node.text.strip().split('?')[0]
+ return feed_url
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ mgid = mobj.group('mgid')
+ return self._get_videos_info(mgid)
+
+
class MTVIE(MTVServicesInfoExtractor):
_VALID_URL = r'''(?x)^https?://
(?:(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$|
import re
from .common import InfoExtractor
+from ..utils import ExtractorError
class NewstubeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?newstube\.ru/media/(?P<id>.+)'
_TEST = {
- 'url': 'http://newstube.ru/media/na-korable-progress-prodolzhaetsya-testirovanie-sistemy-kurs',
+ 'url': 'http://www.newstube.ru/media/telekanal-cnn-peremestil-gorod-slavyansk-v-krym',
'info_dict': {
- 'id': 'd156a237-a6e9-4111-a682-039995f721f1',
+ 'id': '728e0ef2-e187-4012-bac0-5a081fdcb1f6',
'ext': 'flv',
- 'title': 'Ð\9dа коÑ\80абле «Ð\9fÑ\80огÑ\80еÑ\81Ñ\81» пÑ\80одолжаеÑ\82Ñ\81Ñ\8f Ñ\82еÑ\81Ñ\82иÑ\80ование Ñ\81иÑ\81Ñ\82емÑ\8b «Ð\9aÑ\83Ñ\80Ñ\81»',
- 'description': 'md5:d0cbe7b4a6f600552617e48548d5dc77',
- 'duration': 20.04,
+ 'title': 'Телеканал CNN пеÑ\80емеÑ\81Ñ\82ил гоÑ\80од СлавÑ\8fнÑ\81к в Ð\9aÑ\80Ñ\8bм',
+ 'description': 'md5:419a8c9f03442bc0b0a794d689360335',
+ 'duration': 31.05,
},
'params': {
# rtmp download
def ns(s):
return s.replace('/', '/%(ns)s') % {'ns': '{http://app1.newstube.ru/N2SiteWS/player.asmx}'}
+ error_message = player.find(ns('./ErrorMessage'))
+ if error_message is not None:
+ raise ExtractorError('%s returned error: %s' % (self.IE_NAME, error_message.text), expected=True)
+
session_id = player.find(ns('./SessionId')).text
media_info = player.find(ns('./Medias/MediaInfo'))
title = media_info.find(ns('./Name')).text
compat_urllib_parse,
compat_urllib_request,
compat_urlparse,
- compat_str,
-
- ExtractorError,
unified_strdate,
+ parse_duration,
+ int_or_none,
)
'uploader_id': '2698420',
'upload_date': '20131123',
'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
+ 'duration': 33,
},
'params': {
'username': 'ydl.niconico@gmail.com',
},
}
- _VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$'
+ _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/((?:[a-z]{2})?[0-9]+)'
_NETRC_MACHINE = 'niconico'
+ # Determine whether the downloader uses authentication to download video
+ _AUTHENTICATE = False
def _real_initialize(self):
- self._login()
+ if self._downloader.params.get('username', None) is not None:
+ self._AUTHENTICATE = True
+
+ if self._AUTHENTICATE:
+ self._login()
def _login(self):
(username, password) = self._get_login_info()
- if username is None:
- # Login is required
- raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
# Log in
login_form_strs = {
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
note='Downloading video info page')
- # Get flv info
- flv_info_webpage = self._download_webpage(
- 'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
- video_id, 'Downloading flv info')
+ if self._AUTHENTICATE:
+ # Get flv info
+ flv_info_webpage = self._download_webpage(
+ 'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
+ video_id, 'Downloading flv info')
+ else:
+ # Get external player info
+ ext_player_info = self._download_webpage(
+ 'http://ext.nicovideo.jp/thumb_watch/' + video_id, video_id)
+ thumb_play_key = self._search_regex(
+ r'\'thumbPlayKey\'\s*:\s*\'(.*?)\'', ext_player_info, 'thumbPlayKey')
+
+ # Get flv info
+ flv_info_data = compat_urllib_parse.urlencode({
+ 'k': thumb_play_key,
+ 'v': video_id
+ })
+ flv_info_request = compat_urllib_request.Request(
+ 'http://ext.nicovideo.jp/thumb_watch', flv_info_data,
+ {'Content-Type': 'application/x-www-form-urlencoded'})
+ flv_info_webpage = self._download_webpage(
+ flv_info_request, video_id,
+ note='Downloading flv info', errnote='Unable to download flv info')
+
video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
# Start extracting information
- video_title = video_info.find('.//title').text
- video_extension = video_info.find('.//movie_type').text
- video_format = video_extension.upper()
- video_thumbnail = video_info.find('.//thumbnail_url').text
- video_description = video_info.find('.//description').text
- video_uploader_id = video_info.find('.//user_id').text
- video_upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0])
- video_view_count = video_info.find('.//view_counter').text
- video_webpage_url = video_info.find('.//watch_url').text
-
- # uploader
- video_uploader = video_uploader_id
- url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
- try:
- user_info = self._download_xml(
- url, video_id, note='Downloading user information')
- video_uploader = user_info.find('.//nickname').text
- except ExtractorError as err:
- self._downloader.report_warning('Unable to download user info webpage: %s' % compat_str(err))
+ title = video_info.find('.//title').text
+ extension = video_info.find('.//movie_type').text
+ video_format = extension.upper()
+ thumbnail = video_info.find('.//thumbnail_url').text
+ description = video_info.find('.//description').text
+ upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0])
+ view_count = int_or_none(video_info.find('.//view_counter').text)
+ comment_count = int_or_none(video_info.find('.//comment_num').text)
+ duration = parse_duration(video_info.find('.//length').text)
+ webpage_url = video_info.find('.//watch_url').text
+
+ if video_info.find('.//ch_id') is not None:
+ uploader_id = video_info.find('.//ch_id').text
+ uploader = video_info.find('.//ch_name').text
+ elif video_info.find('.//user_id') is not None:
+ uploader_id = video_info.find('.//user_id').text
+ uploader = video_info.find('.//user_nickname').text
+ else:
+ uploader_id = uploader = None
return {
'id': video_id,
'url': video_real_url,
- 'title': video_title,
- 'ext': video_extension,
+ 'title': title,
+ 'ext': extension,
'format': video_format,
- 'thumbnail': video_thumbnail,
- 'description': video_description,
- 'uploader': video_uploader,
- 'upload_date': video_upload_date,
- 'uploader_id': video_uploader_id,
- 'view_count': video_view_count,
- 'webpage_url': video_webpage_url,
+ 'thumbnail': thumbnail,
+ 'description': description,
+ 'uploader': uploader,
+ 'upload_date': upload_date,
+ 'uploader_id': uploader_id,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'duration': duration,
+ 'webpage_url': webpage_url,
}
webpage = self._download_webpage(url, display_id)
post_view = json.loads(self._html_search_regex(
- r'var postView = new app\.PostView\({\s*post:\s*({.+?}),', webpage, 'post view'))
+ r'var postView = new app\.PostView\({\s*post:\s*({.+?}),\s*posts:\s*prefetchedCurrentPost', webpage, 'post view'))
youtube_id = post_view['videoExternalId']
title = post_view['title']
video_id = mobj.group('id')
medias = self._download_json(
- 'http://api.noco.tv/1.0/video/medias/%s' % video_id, video_id, 'Downloading video JSON')
+ 'https://api.noco.tv/1.0/video/medias/%s' % video_id, video_id, 'Downloading video JSON')
formats = []
format_id = fmt['quality_key']
file = self._download_json(
- 'http://api.noco.tv/1.0/video/file/%s/fr/%s' % (format_id.lower(), video_id),
+ 'https://api.noco.tv/1.0/video/file/%s/fr/%s' % (format_id.lower(), video_id),
video_id, 'Downloading %s video JSON' % format_id)
file_url = file['file']
self._sort_formats(formats)
show = self._download_json(
- 'http://api.noco.tv/1.0/shows/show/%s' % video_id, video_id, 'Downloading show JSON')[0]
+ 'https://api.noco.tv/1.0/shows/show/%s' % video_id, video_id, 'Downloading show JSON')[0]
upload_date = unified_strdate(show['indexed'])
uploader = show['partner_name']
--- /dev/null
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ unified_strdate,
+)
+
+
+class NPOIE(InfoExtractor):
+ IE_NAME = 'npo.nl'
+ _VALID_URL = r'https?://www\.npo\.nl/[^/]+/[^/]+/(?P<id>[^/?]+)'
+
+ _TEST = {
+ 'url': 'http://www.npo.nl/nieuwsuur/22-06-2014/VPWON_1220719',
+ 'md5': '4b3f9c429157ec4775f2c9cb7b911016',
+ 'info_dict': {
+ 'id': 'VPWON_1220719',
+ 'ext': 'mp4',
+ 'title': 'Nieuwsuur',
+ 'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.',
+ 'upload_date': '20140622',
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ metadata = self._download_json(
+ 'http://e.omroep.nl/metadata/aflevering/%s' % video_id,
+ video_id,
+ # We have to remove the javascript callback
+ transform_source=lambda j: re.sub(r'parseMetadata\((.*?)\);\n//epc', r'\1', j)
+ )
+ token_page = self._download_webpage(
+ 'http://ida.omroep.nl/npoplayer/i.js',
+ video_id,
+ note='Downloading token'
+ )
+ token = self._search_regex(r'npoplayer.token = "(.+?)"', token_page, 'token')
+ streams_info = self._download_json(
+ 'http://ida.omroep.nl/odi/?prid=%s&puboptions=h264_std&adaptive=yes&token=%s' % (video_id, token),
+ video_id
+ )
+
+ stream_info = self._download_json(
+ streams_info['streams'][0] + '&type=json',
+ video_id,
+ 'Downloading stream info'
+ )
+
+ return {
+ 'id': video_id,
+ 'title': metadata['titel'],
+ 'ext': 'mp4',
+ 'url': stream_info['url'],
+ 'description': metadata['info'],
+ 'thumbnail': metadata['images'][-1]['url'],
+ 'upload_date': unified_strdate(metadata['gidsdatum']),
+ }
'description': '',
'upload_date': '20140612',
'duration': 1758,
- }
+ },
+ 'skip': 'Error 404',
},
{
'url': 'http://www.rainews.it/dl/rainews/media/state-of-the-net-Antonella-La-Carpia-regole-virali-7aafdea9-0e5d-49d5-88a6-7e65da67ae13.html',
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class SoundgasmIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_\-]+)/(?P<title>[0-9a-zA-Z_\-]+)'
+ _TEST = {
+ 'url': 'http://soundgasm.net/u/ytdl/Piano-sample',
+ 'md5': '010082a2c802c5275bb00030743e75ad',
+ 'info_dict': {
+ 'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9',
+ 'ext': 'm4a',
+ 'title': 'ytdl_Piano-sample',
+ 'description': 'Royalty Free Sample Music'
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ display_id = mobj.group('title')
+ audio_title = mobj.group('user') + '_' + mobj.group('title')
+ webpage = self._download_webpage(url, display_id)
+ audio_url = self._html_search_regex(
+ r'(?s)m4a\:\s"([^"]+)"', webpage, 'audio URL')
+ audio_id = re.split('\/|\.', audio_url)[-2]
+ description = self._html_search_regex(
+ r'(?s)<li>Description:\s(.*?)<\/li>', webpage, 'description',
+ fatal=False)
+
+ return {
+ 'id': audio_id,
+ 'display_id': display_id,
+ 'url': audio_url,
+ 'title': audio_title,
+ 'description': description
+ }
+# encoding: utf-8
from __future__ import unicode_literals
import re
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
_TESTS = [{
'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
- 'file': '1259285.mp4',
'md5': '2c2754212136f35fb4b19767d242f66e',
'info_dict': {
+ 'id': '1259285',
+ 'ext': 'mp4',
'title': 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv',
+ 'description': 'md5:8029d8310232196eb235d27575a8b9f4',
+ 'duration': 49,
},
- },
- {
+ }, {
'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
- 'file': '1309159.mp4',
'md5': 'f2cdf638d7aa47654e251e1aee360af1',
'info_dict': {
+ 'id': '1309159',
+ 'ext': 'mp4',
'title': 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers',
+ 'description': 'md5:c2322b65e58f385a820c10fa03b2d088',
+ 'duration': 983,
+ },
+ }, {
+ 'url': 'http://www.spiegel.de/video/johann-westhauser-videobotschaft-des-hoehlenforschers-video-1502367.html',
+ 'md5': '54f58ba0e752e3c07bc2a26222dd0acf',
+ 'info_dict': {
+ 'id': '1502367',
+ 'ext': 'mp4',
+ 'title': 'Videobotschaft: Höhlenforscher Westhauser dankt seinen Rettern',
+ 'description': 'md5:c6f1ec11413ebd1088b6813943e5fc91',
+ 'duration': 42,
},
}]
webpage = self._download_webpage(url, video_id)
- video_title = self._html_search_regex(
+ title = self._html_search_regex(
r'<div class="module-title">(.*?)</div>', webpage, 'title')
+ description = self._html_search_meta('description', webpage, 'description')
+
+ base_url = self._search_regex(
+ r'var\s+server\s*=\s*"([^"]+)\"', webpage, 'server URL')
- xml_url = 'http://video2.spiegel.de/flash/' + video_id + '.xml'
- idoc = self._download_xml(
- xml_url, video_id,
- note='Downloading XML', errnote='Failed to download XML')
+ xml_url = base_url + video_id + '.xml'
+ idoc = self._download_xml(xml_url, video_id)
formats = [
{
'format_id': n.tag.rpartition('type')[2],
- 'url': 'http://video2.spiegel.de/flash/' + n.find('./filename').text,
+ 'url': base_url + n.find('./filename').text,
'width': int(n.find('./width').text),
'height': int(n.find('./height').text),
'abr': int(n.find('./audiobitrate').text),
return {
'id': video_id,
- 'title': video_title,
+ 'title': title,
+ 'description': description,
'duration': duration,
'formats': formats,
}
'thumbnail': 're:^http:.*\.jpg$',
},
}, {
- 'url': 'http://www.tagesschau.de/multimedia/video/video-196.html',
- 'md5': '8aaa8bf3ae1ca2652309718c03019128',
+ 'url': 'http://www.tagesschau.de/multimedia/video/video-5964.html',
+ 'md5': '66652566900963a3f962333579eeffcf',
'info_dict': {
- 'id': '196',
+ 'id': '5964',
'ext': 'mp4',
- 'title': 'Ukraine-Konflikt: Klitschko in Kiew als Bürgermeister vereidigt',
- 'description': 'md5:f22e4af75821d174fa6c977349682691',
+ 'title': 'Nahost-Konflikt: Israel bombadiert Ziele im Gazastreifen und Westjordanland',
+ 'description': 'md5:07bfc78c48eec3145ed4805299a1900a',
'thumbnail': 're:http://.*\.jpg',
},
}]
IE_NAME = 'teachertube'
IE_DESC = 'teachertube.com videos'
- _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(viewVideo\.php\?video_id=|music\.php\?music_id=)(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(viewVideo\.php\?video_id=|music\.php\?music_id=|video/(?:[\da-z-]+-)?|audio/)(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.teachertube.com/viewVideo.php?video_id=339997',
'info_dict': {
'id': '339997',
'ext': 'mp4',
- 'title': 'Measures of dispersion from a frequency table_x264',
- 'description': 'md5:a3e9853487185e9fcd7181a07164650b',
+ 'title': 'Measures of dispersion from a frequency table',
+ 'description': 'Measures of dispersion from a frequency table',
'thumbnail': 're:http://.*\.jpg',
},
}, {
'id': '340064',
'ext': 'mp4',
'title': 'How to Make Paper Dolls _ Paper Art Projects',
- 'description': 'md5:2ca52b20cd727773d1dc418b3d6bd07b',
+ 'description': 'Learn how to make paper dolls in this simple',
'thumbnail': 're:http://.*\.jpg',
},
}, {
'id': '8805',
'ext': 'mp3',
'title': 'PER ASPERA AD ASTRA',
- 'description': 'RADIJSKA EMISIJA ZRAKOPLOVNE TEHNIČKE ŠKOLE PER ASPERA AD ASTRA',
+ 'description': 'RADIJSKA EMISIJA ZRAKOPLOVNE TEHNI?KE ?KOLE P',
+ },
+ }, {
+ 'url': 'http://www.teachertube.com/video/intro-video-schleicher-297790',
+ 'md5': '9c79fbb2dd7154823996fc28d4a26998',
+ 'info_dict': {
+ 'id': '297790',
+ 'ext': 'mp4',
+ 'title': 'Intro Video - Schleicher',
+ 'description': 'Intro Video - Why to flip, how flipping will',
},
}]
webpage = self._download_webpage(url, video_id)
+ title = self._html_search_meta('title', webpage, 'title')
+ TITLE_SUFFIX = ' - TeacherTube'
+ if title.endswith(TITLE_SUFFIX):
+ title = title[:-len(TITLE_SUFFIX)].strip()
+
+ description = self._html_search_meta('description', webpage, 'description')
+ if description:
+ description = description.strip()
+
quality = qualities(['mp3', 'flv', 'mp4'])
- _, media_urls = zip(*re.findall(r'([\'"])file\1\s*:\s*"([^"]+)"', webpage))
+ media_urls = re.findall(r'data-contenturl="([^"]+)"', webpage)
+ media_urls.extend(re.findall(r'var\s+filePath\s*=\s*"([^"]+)"', webpage))
+ media_urls.extend(re.findall(r'\'file\'\s*:\s*["\']([^"\']+)["\'],', webpage))
formats = [
{
return {
'id': video_id,
- 'title': self._og_search_title(webpage),
- 'thumbnail': self._og_search_thumbnail(webpage),
+ 'title': title,
+ 'thumbnail': self._html_search_regex(r'\'image\'\s*:\s*["\']([^"\']+)["\']', webpage, 'thumbnail'),
'formats': formats,
- 'description': self._og_search_description(webpage),
+ 'description': description,
}
-class TeacherTubeClassroomIE(InfoExtractor):
- IE_NAME = 'teachertube:classroom'
- IE_DESC = 'teachertube.com online classrooms'
+class TeacherTubeUserIE(InfoExtractor):
+ IE_NAME = 'teachertube:user:collection'
+ IE_DESC = 'teachertube.com user and collection videos'
+
+ _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(user/profile|collection)/(?P<user>[0-9a-zA-Z]+)/?'
- _VALID_URL = r'https?://(?:www\.)?teachertube\.com/view_classroom\.php\?user=(?P<user>[0-9a-zA-Z]+)'
+ _MEDIA_RE = r'(?s)"sidebar_thumb_time">[0-9:]+</div>.+?<a href="(https?://(?:www\.)?teachertube\.com/(?:video|audio)/[^"]+)">'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
user_id = mobj.group('user')
- rss = self._download_xml('http://www.teachertube.com/rssclassroom.php?mode=user&username=%s' % user_id,
- user_id, 'Downloading classroom RSS')
+ urls = []
+ webpage = self._download_webpage(url, user_id)
+ urls.extend(re.findall(self._MEDIA_RE, webpage))
+
+ pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[1:-1]
+ for p in pages:
+ more = 'http://www.teachertube.com/ajax-user/user-videos/%s?page=%s' % (user_id, p)
+ webpage = self._download_webpage(more, user_id, 'Downloading page %s/%s' % (p, len(pages) + 1))
+ urls.extend(re.findall(self._MEDIA_RE, webpage))
entries = []
- for url in rss.findall('.//{http://search.yahoo.com/mrss/}player'):
- entries.append(self.url_result(url.attrib['url'], 'TeacherTube'))
+ for url in urls:
+ entries.append(self.url_result(url, 'TeacherTube'))
return self.playlist_result(entries, user_id)
+# -*- coding:utf-8 -*-
+from __future__ import unicode_literals
+
from .common import InfoExtractor
import re
class ToypicsIE(InfoExtractor):
IE_DESC = 'Toypics user profile'
- _VALID_URL = r'http://videos\.toypics\.net/view/(?P<id>[0-9]+)/.*'
+ _VALID_URL = r'https?://videos\.toypics\.net/view/(?P<id>[0-9]+)/.*'
_TEST = {
'url': 'http://videos.toypics.net/view/514/chancebulged,-2-1/',
'md5': '16e806ad6d6f58079d210fe30985e08b',
note='Downloading page %d/%d' % (n, page_count))
urls.extend(
re.findall(
- r'<p class="video-entry-title">\n\s*<a href="(http://videos.toypics.net/view/[^"]+)">',
+ r'<p class="video-entry-title">\s+<a href="(https?://videos.toypics.net/view/[^"]+)">',
lpage))
return {
+# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
class TumblrIE(InfoExtractor):
_VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)($|/)'
- _TEST = {
+ _TESTS = [{
'url': 'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes',
- 'file': '54196191430.mp4',
'md5': '479bb068e5b16462f5176a6828829767',
'info_dict': {
- "title": "tatiana maslany news"
+ 'id': '54196191430',
+ 'ext': 'mp4',
+ 'title': 'tatiana maslany news, Orphan Black || DVD extra - behind the scenes ↳...',
+ 'description': 'md5:dfac39636969fe6bf1caa2d50405f069',
+ 'thumbnail': 're:http://.*\.jpg',
}
- }
+ }, {
+ 'url': 'http://5sostrum.tumblr.com/post/90208453769/yall-forgetting-the-greatest-keek-of-them-all',
+ 'md5': 'bf348ef8c0ef84fbf1cbd6fa6e000359',
+ 'info_dict': {
+ 'id': '90208453769',
+ 'ext': 'mp4',
+ 'title': '5SOS STRUM ;)',
+ 'description': 'md5:dba62ac8639482759c8eb10ce474586a',
+ 'thumbnail': 're:http://.*\.jpg',
+ }
+ }]
def _real_extract(self, url):
m_url = re.match(self._VALID_URL, url)
return [{'id': video_id,
'url': video_url,
'title': video_title,
+ 'description': self._html_search_meta('description', webpage),
'thumbnail': video_thumbnail,
'ext': ext
}]
'description': 'md5:f5a11c51f8fb51d2315bca0937526891',
'uploader': 'newsy-videos',
},
+ 'skip': 'This video has been deleted.',
},
]
import base64
from .common import InfoExtractor
-from ..utils import unified_strdate
+from ..utils import (
+ unified_strdate,
+ int_or_none,
+)
class VideoTtIE(InfoExtractor):
'thumbnail': settings['config']['thumbnail'],
'upload_date': unified_strdate(video['added']),
'uploader': video['owner'],
- 'view_count': int(video['view_count']),
- 'comment_count': int(video['comment_count']),
- 'like_count': int(video['liked']),
- 'dislike_count': int(video['disliked']),
+ 'view_count': int_or_none(video['view_count']),
+ 'comment_count': None if video.get('comment_count') == '--' else int_or_none(video['comment_count']),
+ 'like_count': int_or_none(video['liked']),
+ 'dislike_count': int_or_none(video['disliked']),
'formats': formats,
}
\ No newline at end of file
class VKIE(InfoExtractor):
IE_NAME = 'vk.com'
- _VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:videos.*?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))'
+ _VALID_URL = r'https?://(?:m\.)?vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:.+?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))'
_NETRC_MACHINE = 'vk'
_TESTS = [
'id': '162222515',
'ext': 'flv',
'title': 'ProtivoGunz - Хуёвая песня',
- 'uploader': 'Noize MC',
+ 'uploader': 're:Noize MC.*',
'duration': 195,
},
},
'id': '164049491',
'ext': 'mp4',
'uploader': 'Триллеры',
- 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]\u00a0',
+ 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]',
'duration': 8352,
},
'skip': 'Requires vk account credentials',
},
+ {
+ 'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
+ 'md5': 'd82c22e449f036282d1d3f7f4d276869',
+ 'info_dict': {
+ 'id': '166094326',
+ 'ext': 'mp4',
+ 'uploader': 'Киномания - лучшее из мира кино',
+ 'title': 'Запах женщины (1992)',
+ 'duration': 9392,
+ },
+ 'skip': 'Requires vk account credentials',
+ },
+ {
+ 'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d',
+ 'md5': '4d7a5ef8cf114dfa09577e57b2993202',
+ 'info_dict': {
+ 'id': '168067957',
+ 'ext': 'mp4',
+ 'uploader': 'Киномания - лучшее из мира кино',
+ 'title': ' ',
+ 'duration': 7291,
+ },
+ 'skip': 'Requires vk account credentials',
+ },
+ {
+ 'url': 'http://m.vk.com/video-43215063_169084319?list=125c627d1aa1cebb83&from=wall-43215063_2566540',
+ 'md5': '0c45586baa71b7cb1d0784ee3f4e00a6',
+ 'note': 'ivi.ru embed',
+ 'info_dict': {
+ 'id': '60690',
+ 'ext': 'mp4',
+ 'title': 'Книга Илая',
+ 'duration': 6771,
+ },
+ 'skip': 'Only works from Russia',
+ },
]
def _login(self):
if m_yt is not None:
self.to_screen('Youtube video detected')
return self.url_result(m_yt.group(1), 'Youtube')
+
+ m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.*?});', info_page)
+ if m_opts:
+ m_opts_url = re.search(r"url\s*:\s*'([^']+)", m_opts.group(1))
+ if m_opts_url:
+ opts_url = m_opts_url.group(1)
+ if opts_url.startswith('//'):
+ opts_url = 'http:' + opts_url
+ return self.url_result(opts_url)
+
data_json = self._search_regex(r'var vars = ({.*?});', info_page, 'vars')
data = json.loads(data_json)
+# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
},
},
{
- 'url': 'http://www.funkhauseuropa.de/av/audiogrenzenlosleckerbaklava101-audioplayer.html',
- 'md5': 'cfff440d4ee64114083ac44676df5d15',
+ 'url': 'http://www.funkhauseuropa.de/av/audiosuepersongsoulbossanova100-audioplayer.html',
+ 'md5': '24e83813e832badb0a8d7d1ef9ef0691',
'info_dict': {
- 'id': 'mdb-363068',
+ 'id': 'mdb-463528',
'ext': 'mp3',
- 'title': 'Grenzenlos lecker - Baklava',
+ 'title': 'Süpersong: Soul Bossa Nova',
'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a',
- 'upload_date': '20140311',
+ 'upload_date': '20140630',
},
},
]
'info_dict': {
'title': '4283021',
'id': '421735',
+ 'ext': 'mp4',
'age_limit': 0,
},
- '_skip': 'Will be depublicized shortly'
+ 'skip': 'Problems with loading data.'
}
def _real_extract(self, url):
'title': mobj.group('title'),
'age_limit': int(mobj.group('age_limit')),
'url': url,
+ 'ext': determine_ext(url),
'user_agent': 'mobile',
}
+from __future__ import unicode_literals
+
import json
import re
class WistiaIE(InfoExtractor):
- _VALID_URL = r'^https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)'
+ _VALID_URL = r'https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)'
_TEST = {
- u"url": u"http://fast.wistia.net/embed/iframe/sh7fpupwlt",
- u"file": u"sh7fpupwlt.mov",
- u"md5": u"cafeb56ec0c53c18c97405eecb3133df",
- u"info_dict": {
- u"title": u"cfh_resourceful_zdkh_final_1"
+ 'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt',
+ 'md5': 'cafeb56ec0c53c18c97405eecb3133df',
+ 'info_dict': {
+ 'id': 'sh7fpupwlt',
+ 'ext': 'mov',
+ 'title': 'Being Resourceful',
+ 'duration': 117,
},
}
webpage = self._download_webpage(url, video_id)
data_json = self._html_search_regex(
- r'Wistia.iframeInit\((.*?), {}\);', webpage, u'video data')
+ r'Wistia\.iframeInit\((.*?), {}\);', webpage, 'video data')
data = json.loads(data_json)
'title': data['name'],
'formats': formats,
'thumbnails': thumbnails,
+ 'duration': data.get('duration'),
}
'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+ '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
# Dash webm audio
'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 48, 'preference': -50},
def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
"""Turn the encrypted s field into a working signature"""
- if player_url is not None:
- if player_url.startswith(u'//'):
- player_url = u'https:' + player_url
- try:
- player_id = (player_url, len(s))
- if player_id not in self._player_cache:
- func = self._extract_signature_function(
- video_id, player_url, len(s)
- )
- self._player_cache[player_id] = func
- func = self._player_cache[player_id]
- if self._downloader.params.get('youtube_print_sig_code'):
- self._print_sig_code(func, len(s))
- return func(s)
- except Exception:
- tb = traceback.format_exc()
- self._downloader.report_warning(
- u'Automatic signature extraction failed: ' + tb)
-
- self._downloader.report_warning(
- u'Warning: Falling back to static signature algorithm')
-
- return self._static_decrypt_signature(
- s, video_id, player_url, age_gate)
-
- def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
- if age_gate:
- # The videos with age protection use another player, so the
- # algorithms can be different.
- if len(s) == 86:
- return s[2:63] + s[82] + s[64:82] + s[63]
-
- if len(s) == 93:
- return s[86:29:-1] + s[88] + s[28:5:-1]
- elif len(s) == 92:
- return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
- elif len(s) == 91:
- return s[84:27:-1] + s[86] + s[26:5:-1]
- elif len(s) == 90:
- return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
- elif len(s) == 89:
- return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
- elif len(s) == 88:
- return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
- elif len(s) == 87:
- return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
- elif len(s) == 86:
- return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1]
- elif len(s) == 85:
- return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
- elif len(s) == 84:
- return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1]
- elif len(s) == 83:
- return s[80:63:-1] + s[0] + s[62:0:-1] + s[63]
- elif len(s) == 82:
- return s[80:37:-1] + s[7] + s[36:7:-1] + s[0] + s[6:0:-1] + s[37]
- elif len(s) == 81:
- return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
- elif len(s) == 80:
- return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
- elif len(s) == 79:
- return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
+ if player_url is None:
+ raise ExtractorError(u'Cannot decrypt signature without player_url')
- else:
- raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
+ if player_url.startswith(u'//'):
+ player_url = u'https:' + player_url
+ try:
+ player_id = (player_url, len(s))
+ if player_id not in self._player_cache:
+ func = self._extract_signature_function(
+ video_id, player_url, len(s)
+ )
+ self._player_cache[player_id] = func
+ func = self._player_cache[player_id]
+ if self._downloader.params.get('youtube_print_sig_code'):
+ self._print_sig_code(func, len(s))
+ return func(s)
+ except Exception as e:
+ tb = traceback.format_exc()
+ raise ExtractorError(
+ u'Automatic signature extraction failed: ' + tb, cause=e)
def _get_available_subtitles(self, video_id, webpage):
try:
webpage = self._download_webpage(url, query)
result_code = self._search_regex(
- r'(?s)<ol id="search-results"(.*?)</ol>', webpage, u'result HTML')
+ r'(?s)<ol class="item-section"(.*?)</ol>', webpage, u'result HTML')
part_codes = re.findall(
r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
entries = []
for part_code in part_codes:
part_title = self._html_search_regex(
- r'(?s)title="([^"]+)"', part_code, 'item title', fatal=False)
+ [r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)
part_url_snippet = self._html_search_regex(
r'(?s)href="([^"]+)"', part_code, 'item URL')
part_url = compat_urlparse.urljoin(
IE_NAME = 'youtube:truncated_url'
IE_DESC = False # Do not list
_VALID_URL = r'''(?x)
- (?:https?://)?[^/]+/watch\?(?:feature=[a-z_]+)?$|
+ (?:https?://)?[^/]+/watch\?(?:
+ feature=[a-z_]+|
+ annotation_id=annotation_[^&]+
+ )?$|
(?:https?://)?(?:www\.)?youtube\.com/attribution_link\?a=[^&]+$
'''
+ _TESTS = [{
+ 'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.youtube.com/watch?',
+ 'only_matching': True,
+ }]
+
def _real_extract(self, url):
raise ExtractorError(
u'Did you forget to quote the URL? Remember that & is a meta '
if member == 'split("")':
return list(val)
if member == 'join("")':
- return u''.join(val)
+ return ''.join(val)
if member == 'length':
return len(val)
if member == 'reverse()':
def extract_function(self, funcname):
func_m = re.search(
- (r'(?:function %s|%s\s*=\s*function)' % (
+ (r'(?:function %s|[{;]%s\s*=\s*function)' % (
re.escape(funcname), re.escape(funcname))) +
r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
self.code)
'%d %b %Y',
'%B %d %Y',
'%b %d %Y',
+ '%b %dst %Y %I:%M%p',
+ '%b %dnd %Y %I:%M%p',
+ '%b %dth %Y %I:%M%p',
'%Y-%m-%d',
'%d.%m.%Y',
'%d/%m/%Y',
-__version__ = '2014.06.19'
+__version__ = '2014.07.11'