]> Raphaël G. Git Repositories - youtubedl/commitdiff
Imported Upstream version 2014.07.11
authorRogério Brito <rbrito@ime.usp.br>
Sun, 13 Jul 2014 15:22:00 +0000 (12:22 -0300)
committerRogério Brito <rbrito@ime.usp.br>
Sun, 13 Jul 2014 15:22:00 +0000 (12:22 -0300)
49 files changed:
README.md
README.txt
test/test_all_urls.py
test/test_playlists.py
test/test_youtube_signature.py
youtube-dl
youtube-dl.1
youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/allocine.py [new file with mode: 0644]
youtube_dl/extractor/anitube.py
youtube_dl/extractor/arte.py
youtube_dl/extractor/bliptv.py
youtube_dl/extractor/br.py
youtube_dl/extractor/comedycentral.py
youtube_dl/extractor/common.py
youtube_dl/extractor/dailymotion.py
youtube_dl/extractor/discovery.py
youtube_dl/extractor/drtv.py [new file with mode: 0644]
youtube_dl/extractor/firstpost.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/googleplus.py
youtube_dl/extractor/ivi.py
youtube_dl/extractor/livestream.py
youtube_dl/extractor/motherless.py [new file with mode: 0644]
youtube_dl/extractor/mpora.py
youtube_dl/extractor/mtv.py
youtube_dl/extractor/newstube.py
youtube_dl/extractor/niconico.py
youtube_dl/extractor/ninegag.py
youtube_dl/extractor/noco.py
youtube_dl/extractor/npo.py [new file with mode: 0644]
youtube_dl/extractor/rai.py
youtube_dl/extractor/soundgasm.py [new file with mode: 0644]
youtube_dl/extractor/spiegel.py
youtube_dl/extractor/tagesschau.py
youtube_dl/extractor/teachertube.py
youtube_dl/extractor/toypics.py
youtube_dl/extractor/tumblr.py
youtube_dl/extractor/veoh.py
youtube_dl/extractor/videott.py
youtube_dl/extractor/vk.py
youtube_dl/extractor/wdr.py
youtube_dl/extractor/wistia.py
youtube_dl/extractor/youtube.py
youtube_dl/jsinterp.py
youtube_dl/utils.py
youtube_dl/version.py

index 2bea609bfc397940d369628d27a8142ddcdbf867..dffdaa9dc3b8b334d3cc2e868533961fff0ed356 100644 (file)
--- a/README.md
+++ b/README.md
@@ -70,8 +70,9 @@ which means you can modify it, redistribute it or use it however you like.
     --default-search PREFIX          Use this prefix for unqualified URLs. For
                                      example "gvsearch2:" downloads two videos
                                      from google videos for  youtube-dl "large
-                                     apple". By default (with value "auto")
-                                     youtube-dl guesses.
+                                     apple". Use the value "auto" to let
+                                     youtube-dl guess. The default value "error"
+                                     just throws an error.
     --ignore-config                  Do not read configuration files. When given
                                      in the global configuration file /etc
                                      /youtube-dl.conf: do not read the user
index 4757a338b25780dcca13935d81efb5c1f5fb33b0..0f9c4700538f47c8f0228d3dbb5e504225146c5a 100644 (file)
@@ -82,8 +82,9 @@ OPTIONS
     --default-search PREFIX          Use this prefix for unqualified URLs. For
                                      example "gvsearch2:" downloads two videos
                                      from google videos for  youtube-dl "large
-                                     apple". By default (with value "auto")
-                                     youtube-dl guesses.
+                                     apple". Use the value "auto" to let
+                                     youtube-dl guess. The default value "error"
+                                     just throws an error.
     --ignore-config                  Do not read configuration files. When given
                                      in the global configuration file /etc
                                      /youtube-dl.conf: do not read the user
@@ -678,3 +679,4 @@ youtube-dl is released into the public domain by the copyright holders.
 This README file was originally written by Daniel Bolton
 (https://github.com/dbbolton) and is likewise released into the public
 domain.
+
index 4b56137cebb63287e4410959c091b09b6ffd785b..2bc81f0205165068f7c9cbc06634ddfaacbba68b 100644 (file)
@@ -69,9 +69,6 @@ class TestAllURLsMatching(unittest.TestCase):
     def test_youtube_show_matching(self):
         self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show'])
 
-    def test_youtube_truncated(self):
-        self.assertMatch('http://www.youtube.com/watch?', ['youtube:truncated_url'])
-
     def test_youtube_search_matching(self):
         self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
         self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
index ee91e412ab33199fd27996dba25bccb3b646387f..994b1d4b05714634e2028a0433898bbff30da05c 100644 (file)
@@ -28,8 +28,9 @@ from youtube_dl.extractor import (
     SoundcloudSetIE,
     SoundcloudUserIE,
     SoundcloudPlaylistIE,
-    TeacherTubeClassroomIE,
+    TeacherTubeUserIE,
     LivestreamIE,
+    LivestreamOriginalIE,
     NHLVideocenterIE,
     BambuserChannelIE,
     BandcampAlbumIE,
@@ -40,6 +41,7 @@ from youtube_dl.extractor import (
     KhanAcademyIE,
     EveryonesMixtapeIE,
     RutubeChannelIE,
+    RutubePersonIE,
     GoogleSearchIE,
     GenericIE,
     TEDIE,
@@ -154,6 +156,14 @@ class TestPlaylists(unittest.TestCase):
         self.assertEqual(result['title'], 'TEDCity2.0 (English)')
         self.assertTrue(len(result['entries']) >= 4)
 
+    def test_livestreamoriginal_folder(self):
+        dl = FakeYDL()
+        ie = LivestreamOriginalIE(dl)
+        result = ie.extract('https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['id'], 'a07bf706-d0e4-4e75-a747-b021d84f2fd3')
+        self.assertTrue(len(result['entries']) >= 28)
+
     def test_nhl_videocenter(self):
         dl = FakeYDL()
         ie = NHLVideocenterIE(dl)
@@ -256,10 +266,18 @@ class TestPlaylists(unittest.TestCase):
     def test_rutube_channel(self):
         dl = FakeYDL()
         ie = RutubeChannelIE(dl)
-        result = ie.extract('http://rutube.ru/tags/video/1409')
+        result = ie.extract('http://rutube.ru/tags/video/1800/')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['id'], '1800')
+        self.assertTrue(len(result['entries']) >= 68)
+
+    def test_rutube_person(self):
+        dl = FakeYDL()
+        ie = RutubePersonIE(dl)
+        result = ie.extract('http://rutube.ru/video/person/313878/')
         self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], '1409')
-        self.assertTrue(len(result['entries']) >= 34)
+        self.assertEqual(result['id'], '313878')
+        self.assertTrue(len(result['entries']) >= 37)
 
     def test_multiple_brightcove_videos(self):
         # https://github.com/rg3/youtube-dl/issues/2283
@@ -361,13 +379,13 @@ class TestPlaylists(unittest.TestCase):
             result['title'], 'Brace Yourself - Today\'s Weirdest News')
         self.assertTrue(len(result['entries']) >= 10)
 
-    def test_TeacherTubeClassroom(self):
+    def test_TeacherTubeUser(self):
         dl = FakeYDL()
-        ie = TeacherTubeClassroomIE(dl)
-        result = ie.extract('http://www.teachertube.com/view_classroom.php?user=rbhagwati2')
+        ie = TeacherTubeUserIE(dl)
+        result = ie.extract('http://www.teachertube.com/user/profile/rbhagwati2')
         self.assertIsPlaylist(result)
         self.assertEqual(result['id'], 'rbhagwati2')
-        self.assertTrue(len(result['entries']) >= 20)
+        self.assertTrue(len(result['entries']) >= 179)
 
 if __name__ == '__main__':
     unittest.main()
index 8417c55a6d50059c3612e97f05511be90f606f56..8d46fe10826851cd8e6f72251087495313ed6fc9 100644 (file)
@@ -33,6 +33,12 @@ _TESTS = [
         90,
         u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
     ),
+    (
+        u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js',
+        u'js',
+        u'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA',
+        u'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2',
+    ),
 ]
 
 
@@ -44,7 +50,7 @@ class TestSignature(unittest.TestCase):
             os.mkdir(self.TESTDATA_DIR)
 
 
-def make_tfunc(url, stype, sig_length, expected_sig):
+def make_tfunc(url, stype, sig_input, expected_sig):
     basename = url.rpartition('/')[2]
     m = re.match(r'.*-([a-zA-Z0-9_-]+)\.[a-z]+$', basename)
     assert m, '%r should follow URL format' % basename
@@ -66,7 +72,9 @@ def make_tfunc(url, stype, sig_length, expected_sig):
             with open(fn, 'rb') as testf:
                 swfcode = testf.read()
             func = ie._parse_sig_swf(swfcode)
-        src_sig = compat_str(string.printable[:sig_length])
+        src_sig = (
+            compat_str(string.printable[:sig_input])
+            if isinstance(sig_input, int) else sig_input)
         got_sig = func(src_sig)
         self.assertEqual(got_sig, expected_sig)
 
index 4c445a9e9f0168344a24c5eb5f6b23adeef687e0..3d28bb5f02d859062b9e6da302c60f58888139b0 100755 (executable)
Binary files a/youtube-dl and b/youtube-dl differ
index f17adddce87ce25ef3f7a24f3b3daaf29b3e242f..47899f499a06bfacecbfa7a4ad6bbf78850aa35f 100644 (file)
@@ -61,8 +61,9 @@ redistribute it or use it however you like.
 \-\-default\-search\ PREFIX\ \ \ \ \ \ \ \ \ \ Use\ this\ prefix\ for\ unqualified\ URLs.\ For
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ example\ "gvsearch2:"\ downloads\ two\ videos
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ from\ google\ videos\ for\ \ youtube\-dl\ "large
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ apple".\ By\ default\ (with\ value\ "auto")
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ youtube\-dl\ guesses.
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ apple".\ Use\ the\ value\ "auto"\ to\ let
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ youtube\-dl\ guess.\ The\ default\ value\ "error"
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ just\ throws\ an\ error.
 \-\-ignore\-config\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ read\ configuration\ files.\ When\ given
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ in\ the\ global\ configuration\ file\ /etc
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ /youtube\-dl.conf:\ do\ not\ read\ the\ user
index dc0ba986a98744151cafd932acbcd6bbe33fb4a0..3dff723b81fff6947ac8cf08c62a275843f359f9 100755 (executable)
@@ -993,6 +993,8 @@ class YoutubeDL(object):
                         fd = get_suitable_downloader(info)(self, self.params)
                         for ph in self._progress_hooks:
                             fd.add_progress_hook(ph)
+                        if self.params.get('verbose'):
+                            self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
                         return fd.download(name, info)
                     if info_dict.get('requested_formats') is not None:
                         downloaded = []
index 1e01432d27c92dd92e82794f91870ca0eb2eff68..31ed63fcce7c9f1edaa95ce4b446e3defff0768e 100644 (file)
@@ -59,6 +59,7 @@ __authors__  = (
     'Adam Thalhammer',
     'Georg Jähnig',
     'Ralf Haring',
+    'Koki Takahashi',
 )
 
 __license__ = 'Public Domain'
@@ -269,7 +270,7 @@ def parseOpts(overrideArguments=None):
     general.add_option(
         '--default-search',
         dest='default_search', metavar='PREFIX',
-        help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for  youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.')
+        help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for  youtube-dl "large apple". Use the value "auto" to let youtube-dl guess. The default value "error" just throws an error.')
     general.add_option(
         '--ignore-config',
         action='store_true',
index dcf64d0344816e971fbe6d45615e2aaae50a2e08..12cca5c2e039826f9a0554dbfda8fead3128ea9b 100644 (file)
@@ -3,6 +3,7 @@ from .addanime import AddAnimeIE
 from .aftonbladet import AftonbladetIE
 from .anitube import AnitubeIE
 from .aol import AolIE
+from .allocine import AllocineIE
 from .aparat import AparatIE
 from .appletrailers import AppleTrailersIE
 from .archiveorg import ArchiveOrgIE
@@ -63,6 +64,7 @@ from .dailymotion import (
 from .daum import DaumIE
 from .dotsub import DotsubIE
 from .dreisat import DreiSatIE
+from .drtv import DRTVIE
 from .defense import DefenseGouvFrIE
 from .discovery import DiscoveryIE
 from .divxstage import DivxStageIE
@@ -147,7 +149,11 @@ from .ku6 import Ku6IE
 from .la7 import LA7IE
 from .lifenews import LifeNewsIE
 from .liveleak import LiveLeakIE
-from .livestream import LivestreamIE, LivestreamOriginalIE
+from .livestream import (
+    LivestreamIE,
+    LivestreamOriginalIE,
+    LivestreamShortenerIE,
+)
 from .lynda import (
     LyndaIE,
     LyndaCourseIE
@@ -165,11 +171,13 @@ from .mpora import MporaIE
 from .mofosex import MofosexIE
 from .mooshare import MooshareIE
 from .morningstar import MorningstarIE
+from .motherless import MotherlessIE
 from .motorsport import MotorsportIE
 from .moviezine import MoviezineIE
 from .movshare import MovShareIE
 from .mtv import (
     MTVIE,
+    MTVServicesEmbeddedIE,
     MTVIggyIE,
 )
 from .musicplayon import MusicPlayOnIE
@@ -196,6 +204,7 @@ from .normalboots import NormalbootsIE
 from .novamov import NovaMovIE
 from .nowness import NownessIE
 from .nowvideo import NowVideoIE
+from .npo import NPOIE
 from .nrk import (
     NRKIE,
     NRKTVIE,
@@ -254,6 +263,7 @@ from .soundcloud import (
     SoundcloudUserIE,
     SoundcloudPlaylistIE
 )
+from .soundgasm import SoundgasmIE
 from .southparkstudios import (
     SouthParkStudiosIE,
     SouthparkDeIE,
@@ -273,7 +283,7 @@ from .sztvhu import SztvHuIE
 from .tagesschau import TagesschauIE
 from .teachertube import (
     TeacherTubeIE,
-    TeacherTubeClassroomIE,
+    TeacherTubeUserIE,
 )
 from .teachingchannel import TeachingChannelIE
 from .teamcoco import TeamcocoIE
diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py
new file mode 100644 (file)
index 0000000..34f0cd4
--- /dev/null
@@ -0,0 +1,89 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_str,
+    qualities,
+    determine_ext,
+)
+
+
+class AllocineIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?P<typ>article|video|film)/(fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=)(?P<id>[0-9]+)(?:\.html)?'
+
+    _TESTS = [{
+        'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html',
+        'md5': '0c9fcf59a841f65635fa300ac43d8269',
+        'info_dict': {
+            'id': '19546517',
+            'ext': 'mp4',
+            'title': 'Astérix - Le Domaine des Dieux Teaser VF',
+            'description': 'md5:4a754271d9c6f16c72629a8a993ee884',
+            'thumbnail': 're:http://.*\.jpg',
+        },
+    }, {
+        'url': 'http://www.allocine.fr/video/player_gen_cmedia=19540403&cfilm=222257.html',
+        'md5': 'd0cdce5d2b9522ce279fdfec07ff16e0',
+        'info_dict': {
+            'id': '19540403',
+            'ext': 'mp4',
+            'title': 'Planes 2 Bande-annonce VF',
+            'description': 'md5:c4b1f7bd682a91de6491ada267ec0f4d',
+            'thumbnail': 're:http://.*\.jpg',
+        },
+    }, {
+        'url': 'http://www.allocine.fr/film/fichefilm_gen_cfilm=181290.html',
+        'md5': '101250fb127ef9ca3d73186ff22a47ce',
+        'info_dict': {
+            'id': '19544709',
+            'ext': 'mp4',
+            'title': 'Dragons 2 - Bande annonce finale VF',
+            'description': 'md5:e74a4dc750894bac300ece46c7036490',
+            'thumbnail': 're:http://.*\.jpg',
+        },
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        typ = mobj.group('typ')
+        display_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, display_id)
+
+        if typ == 'film':
+            video_id = self._search_regex(r'href="/video/player_gen_cmedia=([0-9]+).+"', webpage, 'video id')
+        else:
+            player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player')
+
+            player_data = json.loads(player)
+            video_id = compat_str(player_data['refMedia'])
+
+        xml = self._download_xml('http://www.allocine.fr/ws/AcVisiondataV4.ashx?media=%s' % video_id, display_id)
+
+        video = xml.find('.//AcVisionVideo').attrib
+        quality = qualities(['ld', 'md', 'hd'])
+
+        formats = []
+        for k, v in video.items():
+            if re.match(r'.+_path', k):
+                format_id = k.split('_')[0]
+                formats.append({
+                    'format_id': format_id,
+                    'quality': quality(format_id),
+                    'url': v,
+                    'ext': determine_ext(v),
+                })
+
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': video['videoTitle'],
+            'thumbnail': self._og_search_thumbnail(webpage),
+            'formats': formats,
+            'description': self._og_search_description(webpage),
+        }
index 2b019daa997ab3bb5dd15cdf5c2df489e181d177..31f0d417ce420de69f9e06ac0498242dc913bf73 100644 (file)
@@ -1,22 +1,24 @@
+from __future__ import unicode_literals
+
 import re
 
 from .common import InfoExtractor
 
 
 class AnitubeIE(InfoExtractor):
-    IE_NAME = u'anitube.se'
+    IE_NAME = 'anitube.se'
     _VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P<id>\d+)'
 
     _TEST = {
-        u'url': u'http://www.anitube.se/video/36621',
-        u'md5': u'59d0eeae28ea0bc8c05e7af429998d43',
-        u'file': u'36621.mp4',
-        u'info_dict': {
-            u'id': u'36621',
-            u'ext': u'mp4',
-            u'title': u'Recorder to Randoseru 01',
+        'url': 'http://www.anitube.se/video/36621',
+        'md5': '59d0eeae28ea0bc8c05e7af429998d43',
+        'info_dict': {
+            'id': '36621',
+            'ext': 'mp4',
+            'title': 'Recorder to Randoseru 01',
+            'duration': 180.19,
         },
-        u'skip': u'Blocked in the US',
+        'skip': 'Blocked in the US',
     }
 
     def _real_extract(self, url):
@@ -24,13 +26,15 @@ class AnitubeIE(InfoExtractor):
         video_id = mobj.group('id')
 
         webpage = self._download_webpage(url, video_id)
-        key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)',
-                                      webpage, u'key')
+        key = self._html_search_regex(
+            r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)', webpage, 'key')
 
-        config_xml = self._download_xml('http://www.anitube.se/nuevo/econfig.php?key=%s' % key,
-                                                key)
+        config_xml = self._download_xml(
+            'http://www.anitube.se/nuevo/econfig.php?key=%s' % key, key)
 
         video_title = config_xml.find('title').text
+        thumbnail = config_xml.find('image').text
+        duration = float(config_xml.find('duration').text)
 
         formats = []
         video_url = config_xml.find('file')
@@ -49,5 +53,7 @@ class AnitubeIE(InfoExtractor):
         return {
             'id': video_id,
             'title': video_title,
+            'thumbnail': thumbnail,
+            'duration': duration,
             'formats': formats
         }
index b528a9ec50ca6c2dac1a52fe66de2cd66194dd23..9591bad8a66254e90247a204b43b10b6db4f6406 100644 (file)
@@ -39,7 +39,10 @@ class ArteTvIE(InfoExtractor):
 
         formats = [{
             'forma_id': q.attrib['quality'],
-            'url': q.text,
+            # The playpath starts at 'mp4:', if we don't manually
+            # split the url, rtmpdump will incorrectly parse them
+            'url': q.text.split('mp4:', 1)[0],
+            'play_path': 'mp4:' + q.text.split('mp4:', 1)[1],
             'ext': 'flv',
             'quality': 2 if q.attrib['quality'] == 'hd' else 1,
         } for q in config.findall('./urls/url')]
@@ -111,7 +114,7 @@ class ArteTVPlus7IE(InfoExtractor):
         if not formats:
             # Some videos are only available in the 'Originalversion'
             # they aren't tagged as being in French or German
-            if all(f['versionCode'] == 'VO' for f in all_formats):
+            if all(f['versionCode'] == 'VO' or f['versionCode'] == 'VA' for f in all_formats):
                 formats = all_formats
             else:
                 raise ExtractorError(u'The formats list is empty')
@@ -189,9 +192,10 @@ class ArteTVFutureIE(ArteTVPlus7IE):
     _TEST = {
         'url': 'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081',
         'info_dict': {
-            'id': '050940-003',
+            'id': '5201',
             'ext': 'mp4',
             'title': 'Les champignons au secours de la planète',
+            'upload_date': '20131101',
         },
     }
 
index d4da08991d937823fefba19da6a2e64a705e9434..acfc4ad736d9deecf1ed9cdadd4063e2fc8e7243 100644 (file)
@@ -15,7 +15,7 @@ from ..utils import (
 
 
 class BlipTVIE(SubtitlesInfoExtractor):
-    _VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z]+)))'
+    _VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+]+)))'
 
     _TESTS = [
         {
index b5b56ff00d0d6443b75552acf902837f147f98a5..993360714baa6feeb1b276308eca13ba61c0dd4b 100644 (file)
@@ -17,15 +17,13 @@ class BRIE(InfoExtractor):
 
     _TESTS = [
         {
-            'url': 'http://www.br.de/mediathek/video/anselm-gruen-114.html',
-            'md5': 'c4f83cf0f023ba5875aba0bf46860df2',
+            'url': 'http://www.br.de/mediathek/video/sendungen/heimatsound/heimatsound-festival-2014-trailer-100.html',
+            'md5': '93556dd2bcb2948d9259f8670c516d59',
             'info_dict': {
-                'id': '2c8d81c5-6fb7-4a74-88d4-e768e5856532',
+                'id': '25e279aa-1ffd-40fd-9955-5325bd48a53a',
                 'ext': 'mp4',
-                'title': 'Feiern und Verzichten',
-                'description': 'Anselm Grün: Feiern und Verzichten',
-                'uploader': 'BR/Birgit Baier',
-                'upload_date': '20140301',
+                'title': 'Am 1. und 2. August in Oberammergau',
+                'description': 'md5:dfd224e5aa6819bc1fcbb7826a932021',
             }
         },
         {
index ba4d73ab8bf3ff893fdb2c07fc57f0cbc009ec44..8af0abade8c88fea3fa7fc4e7329e10802b43a5a 100644 (file)
@@ -130,7 +130,7 @@ class ComedyCentralShowsIE(InfoExtractor):
                 raise ExtractorError('Invalid redirected URL: ' + url)
             if mobj.group('episode') == '':
                 raise ExtractorError('Redirected URL is still not specific: ' + url)
-            epTitle = mobj.group('episode').rpartition('/')[-1]
+            epTitle = (mobj.group('episode') or mobj.group('videotitle')).rpartition('/')[-1]
 
         mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage)
         if len(mMovieParams) == 0:
index 49e75405e8b079eef83191f9429ebd34a6c0bc26..e4e4feef9ea18787d196b2ca7d3414191409ba97 100644 (file)
@@ -459,6 +459,9 @@ class InfoExtractor(object):
         if secure: regexes = self._og_regexes('video:secure_url') + regexes
         return self._html_search_regex(regexes, html, name, **kargs)
 
+    def _og_search_url(self, html, **kargs):
+        return self._og_search_property('url', html, **kargs)
+
     def _html_search_meta(self, name, html, display_name=None, fatal=False):
         if display_name is None:
             display_name = name
index 55216201fe7f137747ad4ac24137b8fe54494d72..5d0bfe454c9bfe1a3e16d1273b18ed3be2f436b8 100644 (file)
@@ -150,7 +150,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
         return {
             'id':       video_id,
             'formats': formats,
-            'uploader': info['owner_screenname'],
+            'uploader': info['owner.screenname'],
             'upload_date':  video_upload_date,
             'title':    self._og_search_title(webpage),
             'subtitles':    video_subtitles,
index 2ae6ecc12e7d5a5d546c5e4f56ef2a37b6fcb27f..554df673506a88cada08b9db8300cd15d301087d 100644 (file)
@@ -7,9 +7,9 @@ from .common import InfoExtractor
 
 
 class DiscoveryIE(InfoExtractor):
-    _VALID_URL = r'http://dsc\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
+    _VALID_URL = r'http://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
     _TEST = {
-        'url': 'http://dsc.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
+        'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
         'md5': 'e12614f9ee303a6ccef415cb0793eba2',
         'info_dict': {
             'id': '614784',
diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py
new file mode 100644 (file)
index 0000000..cdccfd3
--- /dev/null
@@ -0,0 +1,91 @@
+from __future__ import unicode_literals
+
+import re
+
+from .subtitles import SubtitlesInfoExtractor
+from .common import ExtractorError
+from ..utils import parse_iso8601
+
+
+class DRTVIE(SubtitlesInfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?dr\.dk/tv/se/[^/]+/(?P<id>[\da-z-]+)'
+
+    _TEST = {
+        'url': 'http://www.dr.dk/tv/se/partiets-mand/partiets-mand-7-8',
+        'md5': '4a7e1dd65cdb2643500a3f753c942f25',
+        'info_dict': {
+            'id': 'partiets-mand-7-8',
+            'ext': 'mp4',
+            'title': 'Partiets mand (7:8)',
+            'description': 'md5:a684b90a8f9336cd4aab94b7647d7862',
+            'timestamp': 1403047940,
+            'upload_date': '20140617',
+            'duration': 1299.040,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        programcard = self._download_json(
+            'http://www.dr.dk/mu/programcard/expanded/%s' % video_id, video_id, 'Downloading video JSON')
+
+        data = programcard['Data'][0]
+
+        title = data['Title']
+        description = data['Description']
+        timestamp = parse_iso8601(data['CreatedTime'][:-5])
+
+        thumbnail = None
+        duration = None
+
+        restricted_to_denmark = False
+
+        formats = []
+        subtitles = {}
+
+        for asset in data['Assets']:
+            if asset['Kind'] == 'Image':
+                thumbnail = asset['Uri']
+            elif asset['Kind'] == 'VideoResource':
+                duration = asset['DurationInMilliseconds'] / 1000.0
+                restricted_to_denmark = asset['RestrictedToDenmark']
+                for link in asset['Links']:
+                    target = link['Target']
+                    uri = link['Uri']
+                    formats.append({
+                        'url': uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43' if target == 'HDS' else uri,
+                        'format_id': target,
+                        'ext': link['FileFormat'],
+                        'preference': -1 if target == 'HDS' else -2,
+                    })
+                subtitles_list = asset.get('SubtitlesList')
+                if isinstance(subtitles_list, list):
+                    LANGS = {
+                        'Danish': 'dk',
+                    }
+                    for subs in subtitles_list:
+                        lang = subs['Language']
+                        subtitles[LANGS.get(lang, lang)] = subs['Uri']
+
+        if not formats and restricted_to_denmark:
+            raise ExtractorError(
+                'Unfortunately, DR is not allowed to show this program outside Denmark.', expected=True)
+
+        self._sort_formats(formats)
+
+        if self._downloader.params.get('listsubtitles', False):
+            self._list_available_subtitles(video_id, subtitles)
+            return
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'timestamp': timestamp,
+            'duration': duration,
+            'formats': formats,
+            'subtitles': self.extract_subtitles(video_id, subtitles),
+        }
index eccd8dde9e007583b9f73f63df45a38b89c21286..0993af1c9455cf6bc2189f1a15b6fd6f0066ae36 100644 (file)
@@ -15,6 +15,7 @@ class FirstpostIE(InfoExtractor):
             'id': '1025403',
             'ext': 'mp4',
             'title': 'India to launch indigenous aircraft carrier INS Vikrant today',
+            'description': 'md5:feef3041cb09724e0bdc02843348f5f4',
         }
     }
 
@@ -22,13 +23,16 @@ class FirstpostIE(InfoExtractor):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
 
+        page = self._download_webpage(url, video_id)
+        title = self._html_search_meta('twitter:title', page, 'title')
+        description = self._html_search_meta('twitter:description', page, 'title')
+
         data = self._download_xml(
             'http://www.firstpost.com/getvideoxml-%s.xml' % video_id, video_id,
             'Downloading video XML')
 
         item = data.find('./playlist/item')
         thumbnail = item.find('./image').text
-        title = item.find('./title').text
 
         formats = [
             {
@@ -42,6 +46,7 @@ class FirstpostIE(InfoExtractor):
         return {
             'id': video_id,
             'title': title,
+            'description': description,
             'thumbnail': thumbnail,
             'formats': formats,
         }
index 3105b47abf025a690b14d8878cdb6a671ba00217..f97b59845706b8e33d4438b5cab968a2251b1ad9 100644 (file)
@@ -278,6 +278,17 @@ class GenericIE(InfoExtractor):
                 'skip_download': True,
             }
         },
+        # MTVSercices embed
+        {
+            'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
+            'md5': '35727f82f58c76d996fc188f9755b0d5',
+            'info_dict': {
+                'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
+                'ext': 'mp4',
+                'title': 'Review',
+                'description': 'Mario\'s life in the fast lane has never looked so good.',
+            },
+        },
     ]
 
     def report_download_webpage(self, video_id):
@@ -372,7 +383,7 @@ class GenericIE(InfoExtractor):
         if not parsed_url.scheme:
             default_search = self._downloader.params.get('default_search')
             if default_search is None:
-                default_search = 'auto_warning'
+                default_search = 'error'
 
             if default_search in ('auto', 'auto_warning'):
                 if '/' in url:
@@ -386,8 +397,13 @@ class GenericIE(InfoExtractor):
                                 expected=True)
                         else:
                             self._downloader.report_warning(
-                                'Falling back to youtube search for  %s . Set --default-search to "auto" to suppress this warning.' % url)
+                                'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
                     return self.url_result('ytsearch:' + url)
+            elif default_search == 'error':
+                raise ExtractorError(
+                    ('%r is not a valid URL. '
+                     'Set --default-search "ytseach" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
+                    ) % (url, url), expected=True)
             else:
                 assert ':' in default_search
                 return self.url_result(default_search + url)
@@ -609,6 +625,11 @@ class GenericIE(InfoExtractor):
         if mobj is not None:
             return self.url_result(mobj.group('url'), 'VK')
 
+        # Look for embedded ivi player
+        mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'), 'Ivi')
+
         # Look for embedded Huffington Post player
         mobj = re.search(
             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
@@ -676,6 +697,14 @@ class GenericIE(InfoExtractor):
             url = unescapeHTML(mobj.group('url'))
             return self.url_result(url, ie='Vulture')
 
+        # Look for embedded mtvservices player
+        mobj = re.search(
+            r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
+            webpage)
+        if mobj is not None:
+            url = unescapeHTML(mobj.group('url'))
+            return self.url_result(url, ie='MTVServicesEmbedded')
+
         # Start with something easy: JW Player in SWFObject
         found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
         if not found:
index cc29a7e5df2059096afe89ecf7175f317a755e94..07d994b448040fb80912593b9cdae4ac66e63bbb 100644 (file)
@@ -52,8 +52,7 @@ class GooglePlusIE(InfoExtractor):
 
         # Extract title
         # Get the first line for title
-        video_title = self._html_search_regex(r'<meta name\=\"Description\" content\=\"(.*?)[\n<"]',
-            webpage, 'title', default='NA')
+        video_title = self._og_search_description(webpage).splitlines()[0]
 
         # Step 2, Simulate clicking the image box to launch video
         DOMAIN = 'https://plus.google.com/'
index 528be1524ae645f7bb8b36ee2ac2378fd91561be..4027deb7071806fcba313a80cebe694e9f96580e 100644 (file)
@@ -14,7 +14,7 @@ from ..utils import (
 class IviIE(InfoExtractor):
     IE_DESC = 'ivi.ru'
     IE_NAME = 'ivi'
-    _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<videoid>\d+)'
 
     _TESTS = [
         # Single movie
index 5c71f4f091ab9e30baa0700e86829f5928ddcccf..2c100d424650fed5d98330b1e5124df117296c75 100644 (file)
@@ -9,6 +9,7 @@ from ..utils import (
     compat_urlparse,
     xpath_with_ns,
     compat_str,
+    orderedSet,
 )
 
 
@@ -64,7 +65,10 @@ class LivestreamIE(InfoExtractor):
 # The original version of Livestream uses a different system
 class LivestreamOriginalIE(InfoExtractor):
     IE_NAME = 'livestream:original'
-    _VALID_URL = r'https?://www\.livestream\.com/(?P<user>[^/]+)/video\?.*?clipId=(?P<id>.*?)(&|$)'
+    _VALID_URL = r'''(?x)https?://www\.livestream\.com/
+        (?P<user>[^/]+)/(?P<type>video|folder)
+        (?:\?.*?Id=|/)(?P<id>.*?)(&|$)
+        '''
     _TEST = {
         'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
         'info_dict': {
@@ -78,10 +82,7 @@ class LivestreamOriginalIE(InfoExtractor):
         },
     }
 
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        user = mobj.group('user')
+    def _extract_video(self, user, video_id):
         api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
 
         info = self._download_xml(api_url, video_id)
@@ -99,3 +100,44 @@ class LivestreamOriginalIE(InfoExtractor):
             'ext': 'flv',
             'thumbnail': thumbnail_url,
         }
+
+    def _extract_folder(self, url, folder_id):
+        webpage = self._download_webpage(url, folder_id)
+        urls = orderedSet(re.findall(r'<a href="(https?://livestre\.am/.*?)"', webpage))
+
+        return {
+            '_type': 'playlist',
+            'id': folder_id,
+            'entries': [{
+                '_type': 'url',
+                'url': video_url,
+            } for video_url in urls],
+        }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        id = mobj.group('id')
+        user = mobj.group('user')
+        url_type = mobj.group('type')
+        if url_type == 'folder':
+            return self._extract_folder(url, id)
+        else:
+            return self._extract_video(user, id)
+
+
+# The server doesn't support HEAD request, the generic extractor can't detect
+# the redirection
+class LivestreamShortenerIE(InfoExtractor):
+    IE_NAME = 'livestream:shortener'
+    IE_DESC = False  # Do not list
+    _VALID_URL = r'https?://livestre\.am/(?P<id>.+)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        id = mobj.group('id')
+        webpage = self._download_webpage(url, id)
+
+        return {
+            '_type': 'url',
+            'url': self._og_search_url(webpage),
+        }
diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py
new file mode 100644 (file)
index 0000000..6229b21
--- /dev/null
@@ -0,0 +1,87 @@
+from __future__ import unicode_literals
+
+import datetime
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    unified_strdate,
+)
+
+
+class MotherlessIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?motherless\.com/(?P<id>[A-Z0-9]+)'
+    _TESTS = [
+        {
+            'url': 'http://motherless.com/AC3FFE1',
+            'md5': '5527fef81d2e529215dad3c2d744a7d9',
+            'info_dict': {
+                'id': 'AC3FFE1',
+                'ext': 'flv',
+                'title': 'Fucked in the ass while playing PS3',
+                'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'],
+                'upload_date': '20100913',
+                'uploader_id': 'famouslyfuckedup',
+                'thumbnail': 're:http://.*\.jpg',
+                'age_limit': 18,
+            }
+        },
+        {
+            'url': 'http://motherless.com/532291B',
+            'md5': 'bc59a6b47d1f958e61fbd38a4d31b131',
+            'info_dict': {
+                'id': '532291B',
+                'ext': 'mp4',
+                'title': 'Amazing girl playing the omegle game, PERFECT!',
+                'categories': ['Amateur', 'webcam', 'omegle', 'pink', 'young', 'masturbate', 'teen', 'game', 'hairy'],
+                'upload_date': '20140622',
+                'uploader_id': 'Sulivana7x',
+                'thumbnail': 're:http://.*\.jpg',
+                'age_limit': 18,
+            }
+        }
+    ]
+
+    def _real_extract(self,url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._html_search_regex(r'id="view-upload-title">\s+([^<]+)<', webpage, 'title')
+        
+        video_url = self._html_search_regex(r'setup\(\{\s+"file".+: "([^"]+)",', webpage, 'video_url')
+        age_limit = self._rta_search(webpage)
+
+        view_count = self._html_search_regex(r'<strong>Views</strong>\s+([^<]+)<', webpage, 'view_count')
+        upload_date = self._html_search_regex(r'<strong>Uploaded</strong>\s+([^<]+)<', webpage, 'upload_date')
+        if 'Ago' in upload_date:
+            days = int(re.search(r'([0-9]+)', upload_date).group(1))
+            upload_date = (datetime.datetime.now() - datetime.timedelta(days=days)).strftime('%Y%m%d')
+        else:
+            upload_date = unified_strdate(upload_date)
+
+        like_count = self._html_search_regex(r'<strong>Favorited</strong>\s+([^<]+)<', webpage, 'like_count')
+
+        comment_count = webpage.count('class="media-comment-contents"')
+        uploader_id = self._html_search_regex(r'"thumb-member-username">\s+<a href="/m/([^"]+)"', webpage, 'uploader_id')
+
+        categories = self._html_search_meta('keywords', webpage)
+        if categories:
+            categories = [cat.strip() for cat in categories.split(',')]
+
+        return {
+            'id': video_id,
+            'title': title,
+            'upload_date': upload_date,
+            'uploader_id': uploader_id,
+            'thumbnail': self._og_search_thumbnail(webpage),
+            'categories': categories,
+            'view_count': int_or_none(view_count.replace(',', '')),
+            'like_count': int_or_none(like_count.replace(',', '')),
+            'comment_count': comment_count,
+            'age_limit': age_limit,
+            'url': video_url,
+        }
index 39d6feb98d171f16b2ae5d69d71cde67b3a21372..387935d4db784641377b72f5be9ec5e7649f5908 100644 (file)
@@ -28,7 +28,7 @@ class MporaIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
         data_json = self._search_regex(
-            r"new FM\.Player\('[^']+',\s*(\{.*?)\);\n", webpage, 'json')
+            r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", webpage, 'json')
 
         data = json.loads(data_json)
 
index e5ca41b4091698ad2a180f9d2ca00b4b96218c1e..af9490cccf05a372134585b8ac8957bb26e1c985 100644 (file)
@@ -22,6 +22,7 @@ def _media_xml_tag(tag):
 
 class MTVServicesInfoExtractor(InfoExtractor):
     _MOBILE_TEMPLATE = None
+
     @staticmethod
     def _id_from_uri(uri):
         return uri.split(':')[-1]
@@ -35,6 +36,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
         base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
         return base + m.group('finalid')
 
+    def _get_feed_url(self, uri):
+        return self._FEED_URL
+
     def _get_thumbnail_url(self, uri, itemdoc):
         search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
         thumb_node = itemdoc.find(search_path)
@@ -136,10 +140,10 @@ class MTVServicesInfoExtractor(InfoExtractor):
 
     def _get_videos_info(self, uri):
         video_id = self._id_from_uri(uri)
+        feed_url = self._get_feed_url(uri)
         data = compat_urllib_parse.urlencode({'uri': uri})
-
         idoc = self._download_xml(
-            self._FEED_URL + '?' + data, video_id,
+            feed_url + '?' + data, video_id,
             'Downloading info', transform_source=fix_xml_ampersands)
         return [self._get_video_info(item) for item in idoc.findall('.//item')]
 
@@ -160,6 +164,37 @@ class MTVServicesInfoExtractor(InfoExtractor):
         return self._get_videos_info(mgid)
 
 
+class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
+    IE_NAME = 'mtvservices:embedded'
+    _VALID_URL = r'https?://media\.mtvnservices\.com/embed/(?P<mgid>.+?)(\?|/|$)'
+
+    _TEST = {
+        # From http://www.thewrap.com/peter-dinklage-sums-up-game-of-thrones-in-45-seconds-video/
+        'url': 'http://media.mtvnservices.com/embed/mgid:uma:video:mtv.com:1043906/cp~vid%3D1043906%26uri%3Dmgid%3Auma%3Avideo%3Amtv.com%3A1043906',
+        'md5': 'cb349b21a7897164cede95bd7bf3fbb9',
+        'info_dict': {
+            'id': '1043906',
+            'ext': 'mp4',
+            'title': 'Peter Dinklage Sums Up \'Game Of Thrones\' In 45 Seconds',
+            'description': '"Sexy sexy sexy, stabby stabby stabby, beautiful language," says Peter Dinklage as he tries summarizing "Game of Thrones" in under a minute.',
+        },
+    }
+
+    def _get_feed_url(self, uri):
+        video_id = self._id_from_uri(uri)
+        site_id = uri.replace(video_id, '')
+        config_url = 'http://media.mtvnservices.com/pmt/e1/players/{0}/config.xml'.format(site_id)
+        config_doc = self._download_xml(config_url, video_id)
+        feed_node = config_doc.find('.//feed')
+        feed_url = feed_node.text.strip().split('?')[0]
+        return feed_url
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        mgid = mobj.group('mgid')
+        return self._get_videos_info(mgid)
+
+
 class MTVIE(MTVServicesInfoExtractor):
     _VALID_URL = r'''(?x)^https?://
         (?:(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$|
index 2fd5b8f0430351cd7eadf8b5acef3cf5786d27ab..551bd4d7a511c51f3d5809cf488d7e454b6ed6b5 100644 (file)
@@ -4,18 +4,19 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..utils import ExtractorError
 
 
 class NewstubeIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?newstube\.ru/media/(?P<id>.+)'
     _TEST = {
-        'url': 'http://newstube.ru/media/na-korable-progress-prodolzhaetsya-testirovanie-sistemy-kurs',
+        'url': 'http://www.newstube.ru/media/telekanal-cnn-peremestil-gorod-slavyansk-v-krym',
         'info_dict': {
-            'id': 'd156a237-a6e9-4111-a682-039995f721f1',
+            'id': '728e0ef2-e187-4012-bac0-5a081fdcb1f6',
             'ext': 'flv',
-            'title': 'Ð\9dа ÐºÐ¾Ñ\80абле Â«Ð\9fÑ\80огÑ\80еÑ\81Ñ\81» Ð¿Ñ\80одолжаеÑ\82Ñ\81Ñ\8f Ñ\82еÑ\81Ñ\82иÑ\80ование Ñ\81иÑ\81Ñ\82емÑ\8b Â«Ð\9aÑ\83Ñ\80Ñ\81»',
-            'description': 'md5:d0cbe7b4a6f600552617e48548d5dc77',
-            'duration': 20.04,
+            'title': 'Телеканал CNN Ð¿ÐµÑ\80емеÑ\81Ñ\82ил Ð³Ð¾Ñ\80од Ð¡Ð»Ð°Ð²Ñ\8fнÑ\81к Ð² Ð\9aÑ\80Ñ\8bм',
+            'description': 'md5:419a8c9f03442bc0b0a794d689360335',
+            'duration': 31.05,
         },
         'params': {
             # rtmp download
@@ -40,6 +41,10 @@ class NewstubeIE(InfoExtractor):
         def ns(s):
             return s.replace('/', '/%(ns)s') % {'ns': '{http://app1.newstube.ru/N2SiteWS/player.asmx}'}
 
+        error_message = player.find(ns('./ErrorMessage'))
+        if error_message is not None:
+            raise ExtractorError('%s returned error: %s' % (self.IE_NAME, error_message.text), expected=True)
+
         session_id = player.find(ns('./SessionId')).text
         media_info = player.find(ns('./Medias/MediaInfo'))
         title = media_info.find(ns('./Name')).text
index 517a72561bbaf444c54daabde4bae61f341086b3..c0c139b5df16ce900ba6920a1a004bc433eab4e9 100644 (file)
@@ -8,10 +8,9 @@ from ..utils import (
     compat_urllib_parse,
     compat_urllib_request,
     compat_urlparse,
-    compat_str,
-
-    ExtractorError,
     unified_strdate,
+    parse_duration,
+    int_or_none,
 )
 
 
@@ -30,6 +29,7 @@ class NiconicoIE(InfoExtractor):
             'uploader_id': '2698420',
             'upload_date': '20131123',
             'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
+            'duration': 33,
         },
         'params': {
             'username': 'ydl.niconico@gmail.com',
@@ -37,17 +37,20 @@ class NiconicoIE(InfoExtractor):
         },
     }
 
-    _VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$'
+    _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/((?:[a-z]{2})?[0-9]+)'
     _NETRC_MACHINE = 'niconico'
+    # Determine whether the downloader uses authentication to download video
+    _AUTHENTICATE = False
 
     def _real_initialize(self):
-        self._login()
+        if self._downloader.params.get('username', None) is not None:
+            self._AUTHENTICATE = True
+
+        if self._AUTHENTICATE:
+            self._login()
 
     def _login(self):
         (username, password) = self._get_login_info()
-        if username is None:
-            # Login is required
-            raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 
         # Log in
         login_form_strs = {
@@ -79,44 +82,66 @@ class NiconicoIE(InfoExtractor):
             'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
             note='Downloading video info page')
 
-        # Get flv info
-        flv_info_webpage = self._download_webpage(
-            'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
-            video_id, 'Downloading flv info')
+        if self._AUTHENTICATE:
+            # Get flv info
+            flv_info_webpage = self._download_webpage(
+                'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
+                video_id, 'Downloading flv info')
+        else:
+            # Get external player info
+            ext_player_info = self._download_webpage(
+                'http://ext.nicovideo.jp/thumb_watch/' + video_id, video_id)
+            thumb_play_key = self._search_regex(
+                r'\'thumbPlayKey\'\s*:\s*\'(.*?)\'', ext_player_info, 'thumbPlayKey')
+
+            # Get flv info
+            flv_info_data = compat_urllib_parse.urlencode({
+                'k': thumb_play_key,
+                'v': video_id
+            })
+            flv_info_request = compat_urllib_request.Request(
+                'http://ext.nicovideo.jp/thumb_watch', flv_info_data,
+                {'Content-Type': 'application/x-www-form-urlencoded'})
+            flv_info_webpage = self._download_webpage(
+                flv_info_request, video_id,
+                note='Downloading flv info', errnote='Unable to download flv info')
+
         video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
 
         # Start extracting information
-        video_title = video_info.find('.//title').text
-        video_extension = video_info.find('.//movie_type').text
-        video_format = video_extension.upper()
-        video_thumbnail = video_info.find('.//thumbnail_url').text
-        video_description = video_info.find('.//description').text
-        video_uploader_id = video_info.find('.//user_id').text
-        video_upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0])
-        video_view_count = video_info.find('.//view_counter').text
-        video_webpage_url = video_info.find('.//watch_url').text
-
-        # uploader
-        video_uploader = video_uploader_id
-        url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
-        try:
-            user_info = self._download_xml(
-                url, video_id, note='Downloading user information')
-            video_uploader = user_info.find('.//nickname').text
-        except ExtractorError as err:
-            self._downloader.report_warning('Unable to download user info webpage: %s' % compat_str(err))
+        title = video_info.find('.//title').text
+        extension = video_info.find('.//movie_type').text
+        video_format = extension.upper()
+        thumbnail = video_info.find('.//thumbnail_url').text
+        description = video_info.find('.//description').text
+        upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0])
+        view_count = int_or_none(video_info.find('.//view_counter').text)
+        comment_count = int_or_none(video_info.find('.//comment_num').text)
+        duration = parse_duration(video_info.find('.//length').text)
+        webpage_url = video_info.find('.//watch_url').text
+
+        if video_info.find('.//ch_id') is not None:
+            uploader_id = video_info.find('.//ch_id').text
+            uploader = video_info.find('.//ch_name').text
+        elif video_info.find('.//user_id') is not None:
+            uploader_id = video_info.find('.//user_id').text
+            uploader = video_info.find('.//user_nickname').text
+        else:
+            uploader_id = uploader = None
 
         return {
             'id': video_id,
             'url': video_real_url,
-            'title': video_title,
-            'ext': video_extension,
+            'title': title,
+            'ext': extension,
             'format': video_format,
-            'thumbnail': video_thumbnail,
-            'description': video_description,
-            'uploader': video_uploader,
-            'upload_date': video_upload_date,
-            'uploader_id': video_uploader_id,
-            'view_count': video_view_count,
-            'webpage_url': video_webpage_url,
+            'thumbnail': thumbnail,
+            'description': description,
+            'uploader': uploader,
+            'upload_date': upload_date,
+            'uploader_id': uploader_id,
+            'view_count': view_count,
+            'comment_count': comment_count,
+            'duration': duration,
+            'webpage_url': webpage_url,
         }
index c2e7b67c7e5334c4f906c6a93387d18170a03c26..33daa0dec327dea3f691f5dccab5b6b312d79e4f 100644 (file)
@@ -47,7 +47,7 @@ class NineGagIE(InfoExtractor):
         webpage = self._download_webpage(url, display_id)
 
         post_view = json.loads(self._html_search_regex(
-            r'var postView = new app\.PostView\({\s*post:\s*({.+?}),', webpage, 'post view'))
+            r'var postView = new app\.PostView\({\s*post:\s*({.+?}),\s*posts:\s*prefetchedCurrentPost', webpage, 'post view'))
 
         youtube_id = post_view['videoExternalId']
         title = post_view['title']
index d451cd1bf8db30dd7b3315ee1edb35489ad6a8c0..da203538dbea3781d0daf05edbbaacbd72be622f 100644 (file)
@@ -35,7 +35,7 @@ class NocoIE(InfoExtractor):
         video_id = mobj.group('id')
 
         medias = self._download_json(
-            'http://api.noco.tv/1.0/video/medias/%s' % video_id, video_id, 'Downloading video JSON')
+            'https://api.noco.tv/1.0/video/medias/%s' % video_id, video_id, 'Downloading video JSON')
 
         formats = []
 
@@ -43,7 +43,7 @@ class NocoIE(InfoExtractor):
             format_id = fmt['quality_key']
 
             file = self._download_json(
-                'http://api.noco.tv/1.0/video/file/%s/fr/%s' % (format_id.lower(), video_id),
+                'https://api.noco.tv/1.0/video/file/%s/fr/%s' % (format_id.lower(), video_id),
                 video_id, 'Downloading %s video JSON' % format_id)
 
             file_url = file['file']
@@ -71,7 +71,7 @@ class NocoIE(InfoExtractor):
         self._sort_formats(formats)
 
         show = self._download_json(
-            'http://api.noco.tv/1.0/shows/show/%s' % video_id, video_id, 'Downloading show JSON')[0]
+            'https://api.noco.tv/1.0/shows/show/%s' % video_id, video_id, 'Downloading show JSON')[0]
 
         upload_date = unified_strdate(show['indexed'])
         uploader = show['partner_name']
diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
new file mode 100644 (file)
index 0000000..fbcbe1f
--- /dev/null
@@ -0,0 +1,62 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    unified_strdate,
+)
+
+
+class NPOIE(InfoExtractor):
+    IE_NAME = 'npo.nl'
+    _VALID_URL = r'https?://www\.npo\.nl/[^/]+/[^/]+/(?P<id>[^/?]+)'
+
+    _TEST = {
+        'url': 'http://www.npo.nl/nieuwsuur/22-06-2014/VPWON_1220719',
+        'md5': '4b3f9c429157ec4775f2c9cb7b911016',
+        'info_dict': {
+            'id': 'VPWON_1220719',
+            'ext': 'mp4',
+            'title': 'Nieuwsuur',
+            'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.',
+            'upload_date': '20140622',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        metadata = self._download_json(
+            'http://e.omroep.nl/metadata/aflevering/%s' % video_id,
+            video_id,
+            # We have to remove the javascript callback
+            transform_source=lambda j: re.sub(r'parseMetadata\((.*?)\);\n//epc', r'\1', j)
+        )
+        token_page = self._download_webpage(
+            'http://ida.omroep.nl/npoplayer/i.js',
+            video_id,
+            note='Downloading token'
+        )
+        token = self._search_regex(r'npoplayer.token = "(.+?)"', token_page, 'token')
+        streams_info = self._download_json(
+            'http://ida.omroep.nl/odi/?prid=%s&puboptions=h264_std&adaptive=yes&token=%s' % (video_id, token),
+            video_id
+        )
+
+        stream_info = self._download_json(
+            streams_info['streams'][0] + '&type=json',
+            video_id,
+            'Downloading stream info'
+        )
+
+        return {
+            'id': video_id,
+            'title': metadata['titel'],
+            'ext': 'mp4',
+            'url': stream_info['url'],
+            'description': metadata['info'],
+            'thumbnail': metadata['images'][-1]['url'],
+            'upload_date': unified_strdate(metadata['gidsdatum']),
+        }
index cb4305349d6eb814a3f532384280b504837a9b1f..ba3dd707f8b5d38363dacce62e9fe684265c6a96 100644 (file)
@@ -35,7 +35,8 @@ class RaiIE(SubtitlesInfoExtractor):
                 'description': '',
                 'upload_date': '20140612',
                 'duration': 1758,
-            }
+            },
+            'skip': 'Error 404',
         },
         {
             'url': 'http://www.rainews.it/dl/rainews/media/state-of-the-net-Antonella-La-Carpia-regole-virali-7aafdea9-0e5d-49d5-88a6-7e65da67ae13.html',
diff --git a/youtube_dl/extractor/soundgasm.py b/youtube_dl/extractor/soundgasm.py
new file mode 100644 (file)
index 0000000..a4f8ce6
--- /dev/null
@@ -0,0 +1,40 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class SoundgasmIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_\-]+)/(?P<title>[0-9a-zA-Z_\-]+)'
+    _TEST = {
+        'url': 'http://soundgasm.net/u/ytdl/Piano-sample',
+        'md5': '010082a2c802c5275bb00030743e75ad',
+        'info_dict': {
+            'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9',
+            'ext': 'm4a',
+            'title': 'ytdl_Piano-sample',
+            'description': 'Royalty Free Sample Music'
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        display_id = mobj.group('title')
+        audio_title = mobj.group('user') + '_' + mobj.group('title')
+        webpage = self._download_webpage(url, display_id)
+        audio_url = self._html_search_regex(
+            r'(?s)m4a\:\s"([^"]+)"', webpage, 'audio URL')
+        audio_id = re.split('\/|\.', audio_url)[-2]
+        description = self._html_search_regex(
+            r'(?s)<li>Description:\s(.*?)<\/li>', webpage, 'description',
+            fatal=False)
+
+        return {
+            'id': audio_id,
+            'display_id': display_id,
+            'url': audio_url,
+            'title': audio_title,
+            'description': description
+        }
index 9156d7fafd6ac2688ab329b31f1b683ab868def0..340a38440d02ad28b5eb6ab19916eee870818c35 100644 (file)
@@ -1,3 +1,4 @@
+# encoding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -9,18 +10,33 @@ class SpiegelIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
     _TESTS = [{
         'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
-        'file': '1259285.mp4',
         'md5': '2c2754212136f35fb4b19767d242f66e',
         'info_dict': {
+            'id': '1259285',
+            'ext': 'mp4',
             'title': 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv',
+            'description': 'md5:8029d8310232196eb235d27575a8b9f4',
+            'duration': 49,
         },
-    },
-    {
+    }, {
         'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
-        'file': '1309159.mp4',
         'md5': 'f2cdf638d7aa47654e251e1aee360af1',
         'info_dict': {
+            'id': '1309159',
+            'ext': 'mp4',
             'title': 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers',
+            'description': 'md5:c2322b65e58f385a820c10fa03b2d088',
+            'duration': 983,
+        },
+    }, {
+        'url': 'http://www.spiegel.de/video/johann-westhauser-videobotschaft-des-hoehlenforschers-video-1502367.html',
+        'md5': '54f58ba0e752e3c07bc2a26222dd0acf',
+        'info_dict': {
+            'id': '1502367',
+            'ext': 'mp4',
+            'title': 'Videobotschaft: Höhlenforscher Westhauser dankt seinen Rettern',
+            'description': 'md5:c6f1ec11413ebd1088b6813943e5fc91',
+            'duration': 42,
         },
     }]
 
@@ -30,18 +46,20 @@ class SpiegelIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
-        video_title = self._html_search_regex(
+        title = self._html_search_regex(
             r'<div class="module-title">(.*?)</div>', webpage, 'title')
+        description = self._html_search_meta('description', webpage, 'description')
+
+        base_url = self._search_regex(
+            r'var\s+server\s*=\s*"([^"]+)\"', webpage, 'server URL')
 
-        xml_url = 'http://video2.spiegel.de/flash/' + video_id + '.xml'
-        idoc = self._download_xml(
-            xml_url, video_id,
-            note='Downloading XML', errnote='Failed to download XML')
+        xml_url = base_url + video_id + '.xml'
+        idoc = self._download_xml(xml_url, video_id)
 
         formats = [
             {
                 'format_id': n.tag.rpartition('type')[2],
-                'url': 'http://video2.spiegel.de/flash/' + n.find('./filename').text,
+                'url': base_url + n.find('./filename').text,
                 'width': int(n.find('./width').text),
                 'height': int(n.find('./height').text),
                 'abr': int(n.find('./audiobitrate').text),
@@ -59,7 +77,8 @@ class SpiegelIE(InfoExtractor):
 
         return {
             'id': video_id,
-            'title': video_title,
+            'title': title,
+            'description': description,
             'duration': duration,
             'formats': formats,
         }
index 36331529eb92f81d97d910b30610b4176a56667b..25b9864add9dc8422a5948111d25ea8243e10441 100644 (file)
@@ -20,13 +20,13 @@ class TagesschauIE(InfoExtractor):
             'thumbnail': 're:^http:.*\.jpg$',
         },
     }, {
-        'url': 'http://www.tagesschau.de/multimedia/video/video-196.html',
-        'md5': '8aaa8bf3ae1ca2652309718c03019128',
+        'url': 'http://www.tagesschau.de/multimedia/video/video-5964.html',
+        'md5': '66652566900963a3f962333579eeffcf',
         'info_dict': {
-            'id': '196',
+            'id': '5964',
             'ext': 'mp4',
-            'title': 'Ukraine-Konflikt: Klitschko in Kiew als Bürgermeister vereidigt',
-            'description': 'md5:f22e4af75821d174fa6c977349682691',
+            'title': 'Nahost-Konflikt: Israel bombadiert Ziele im Gazastreifen und Westjordanland',
+            'description': 'md5:07bfc78c48eec3145ed4805299a1900a',
             'thumbnail': 're:http://.*\.jpg',
         },
     }]
index 4d9666c6b14980d6b04ebe1bd07cc3e79a9dd2bd..2c2113b1404fb3631126636bdb38ba839008a404 100644 (file)
@@ -14,7 +14,7 @@ class TeacherTubeIE(InfoExtractor):
     IE_NAME = 'teachertube'
     IE_DESC = 'teachertube.com videos'
 
-    _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(viewVideo\.php\?video_id=|music\.php\?music_id=)(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(viewVideo\.php\?video_id=|music\.php\?music_id=|video/(?:[\da-z-]+-)?|audio/)(?P<id>\d+)'
 
     _TESTS = [{
         'url': 'http://www.teachertube.com/viewVideo.php?video_id=339997',
@@ -22,8 +22,8 @@ class TeacherTubeIE(InfoExtractor):
         'info_dict': {
             'id': '339997',
             'ext': 'mp4',
-            'title': 'Measures of dispersion from a frequency table_x264',
-            'description': 'md5:a3e9853487185e9fcd7181a07164650b',
+            'title': 'Measures of dispersion from a frequency table',
+            'description': 'Measures of dispersion from a frequency table',
             'thumbnail': 're:http://.*\.jpg',
         },
     }, {
@@ -33,7 +33,7 @@ class TeacherTubeIE(InfoExtractor):
             'id': '340064',
             'ext': 'mp4',
             'title': 'How to Make Paper Dolls _ Paper Art Projects',
-            'description': 'md5:2ca52b20cd727773d1dc418b3d6bd07b',
+            'description': 'Learn how to make paper dolls in this simple',
             'thumbnail': 're:http://.*\.jpg',
         },
     }, {
@@ -43,7 +43,16 @@ class TeacherTubeIE(InfoExtractor):
             'id': '8805',
             'ext': 'mp3',
             'title': 'PER ASPERA AD ASTRA',
-            'description': 'RADIJSKA EMISIJA ZRAKOPLOVNE TEHNIČKE ŠKOLE PER ASPERA AD ASTRA',
+            'description': 'RADIJSKA EMISIJA ZRAKOPLOVNE TEHNI?KE ?KOLE P',
+        },
+    }, {
+        'url': 'http://www.teachertube.com/video/intro-video-schleicher-297790',
+        'md5': '9c79fbb2dd7154823996fc28d4a26998',
+        'info_dict': {
+            'id': '297790',
+            'ext': 'mp4',
+            'title': 'Intro Video - Schleicher',
+            'description': 'Intro Video - Why to flip, how flipping will',
         },
     }]
 
@@ -53,9 +62,20 @@ class TeacherTubeIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
+        title = self._html_search_meta('title', webpage, 'title')
+        TITLE_SUFFIX = ' - TeacherTube'
+        if title.endswith(TITLE_SUFFIX):
+            title = title[:-len(TITLE_SUFFIX)].strip()
+
+        description = self._html_search_meta('description', webpage, 'description')
+        if description:
+            description = description.strip()
+
         quality = qualities(['mp3', 'flv', 'mp4'])
 
-        _, media_urls = zip(*re.findall(r'([\'"])file\1\s*:\s*"([^"]+)"', webpage))
+        media_urls = re.findall(r'data-contenturl="([^"]+)"', webpage)
+        media_urls.extend(re.findall(r'var\s+filePath\s*=\s*"([^"]+)"', webpage))
+        media_urls.extend(re.findall(r'\'file\'\s*:\s*["\']([^"\']+)["\'],', webpage))
 
         formats = [
             {
@@ -68,28 +88,37 @@ class TeacherTubeIE(InfoExtractor):
 
         return {
             'id': video_id,
-            'title': self._og_search_title(webpage),
-            'thumbnail': self._og_search_thumbnail(webpage),
+            'title': title,
+            'thumbnail': self._html_search_regex(r'\'image\'\s*:\s*["\']([^"\']+)["\']', webpage, 'thumbnail'),
             'formats': formats,
-            'description': self._og_search_description(webpage),
+            'description': description,
         }
 
 
-class TeacherTubeClassroomIE(InfoExtractor):
-    IE_NAME = 'teachertube:classroom'
-    IE_DESC = 'teachertube.com online classrooms'
+class TeacherTubeUserIE(InfoExtractor):
+    IE_NAME = 'teachertube:user:collection'
+    IE_DESC = 'teachertube.com user and collection videos'
+
+    _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(user/profile|collection)/(?P<user>[0-9a-zA-Z]+)/?'
 
-    _VALID_URL = r'https?://(?:www\.)?teachertube\.com/view_classroom\.php\?user=(?P<user>[0-9a-zA-Z]+)'
+    _MEDIA_RE = r'(?s)"sidebar_thumb_time">[0-9:]+</div>.+?<a href="(https?://(?:www\.)?teachertube\.com/(?:video|audio)/[^"]+)">'
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         user_id = mobj.group('user')
 
-        rss = self._download_xml('http://www.teachertube.com/rssclassroom.php?mode=user&username=%s' % user_id,
-                                      user_id, 'Downloading classroom RSS')
+        urls = []
+        webpage = self._download_webpage(url, user_id)
+        urls.extend(re.findall(self._MEDIA_RE, webpage))
+        
+        pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[1:-1]
+        for p in pages:
+            more = 'http://www.teachertube.com/ajax-user/user-videos/%s?page=%s' % (user_id, p)
+            webpage = self._download_webpage(more, user_id, 'Downloading page %s/%s' % (p, len(pages) + 1))
+            urls.extend(re.findall(self._MEDIA_RE, webpage))
 
         entries = []
-        for url in rss.findall('.//{http://search.yahoo.com/mrss/}player'):
-            entries.append(self.url_result(url.attrib['url'], 'TeacherTube'))
+        for url in urls:
+            entries.append(self.url_result(url, 'TeacherTube'))
 
         return self.playlist_result(entries, user_id)
index 34008afc6b87226d37aee4612a4b782c7c392a57..0f389bd93a1f35eb35346f7ee99b0b91a9c9b876 100644 (file)
@@ -1,10 +1,13 @@
+# -*- coding:utf-8 -*-
+from __future__ import unicode_literals
+
 from .common import InfoExtractor
 import re
 
 
 class ToypicsIE(InfoExtractor):
     IE_DESC = 'Toypics user profile'
-    _VALID_URL = r'http://videos\.toypics\.net/view/(?P<id>[0-9]+)/.*'
+    _VALID_URL = r'https?://videos\.toypics\.net/view/(?P<id>[0-9]+)/.*'
     _TEST = {
         'url': 'http://videos.toypics.net/view/514/chancebulged,-2-1/',
         'md5': '16e806ad6d6f58079d210fe30985e08b',
@@ -61,7 +64,7 @@ class ToypicsUserIE(InfoExtractor):
                 note='Downloading page %d/%d' % (n, page_count))
             urls.extend(
                 re.findall(
-                    r'<p class="video-entry-title">\n\s*<a href="(http://videos.toypics.net/view/[^"]+)">',
+                    r'<p class="video-entry-title">\s+<a href="(https?://videos.toypics.net/view/[^"]+)">',
                     lpage))
 
         return {
index 54436906875a727ac6f6eeb3de555f0c0cd461bf..2882c1809e0bd55c1e6c8b441c19293aeb64d301 100644 (file)
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 from __future__ import unicode_literals
 
 import re
@@ -10,14 +11,27 @@ from ..utils import (
 
 class TumblrIE(InfoExtractor):
     _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)($|/)'
-    _TEST = {
+    _TESTS = [{
         'url': 'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes',
-        'file': '54196191430.mp4',
         'md5': '479bb068e5b16462f5176a6828829767',
         'info_dict': {
-            "title": "tatiana maslany news"
+            'id': '54196191430',
+            'ext': 'mp4',
+            'title': 'tatiana maslany news, Orphan Black || DVD extra - behind the scenes ↳...',
+            'description': 'md5:dfac39636969fe6bf1caa2d50405f069',
+            'thumbnail': 're:http://.*\.jpg',
         }
-    }
+    }, {
+        'url': 'http://5sostrum.tumblr.com/post/90208453769/yall-forgetting-the-greatest-keek-of-them-all',
+        'md5': 'bf348ef8c0ef84fbf1cbd6fa6e000359',
+        'info_dict': {
+            'id': '90208453769',
+            'ext': 'mp4',
+            'title': '5SOS STRUM ;)',
+            'description': 'md5:dba62ac8639482759c8eb10ce474586a',
+            'thumbnail': 're:http://.*\.jpg',
+        }
+    }]
 
     def _real_extract(self, url):
         m_url = re.match(self._VALID_URL, url)
@@ -48,6 +62,7 @@ class TumblrIE(InfoExtractor):
         return [{'id': video_id,
                  'url': video_url,
                  'title': video_title,
+                 'description': self._html_search_meta('description', webpage),
                  'thumbnail': video_thumbnail,
                  'ext': ext
                  }]
index fb132aef68fff7dc9ae3c4098ad2d31a5945825a..a7953a7e7c5d33b154435cd7b4afa354994f4bf5 100644 (file)
@@ -49,6 +49,7 @@ class VeohIE(InfoExtractor):
                 'description': 'md5:f5a11c51f8fb51d2315bca0937526891',
                 'uploader': 'newsy-videos',
             },
+            'skip': 'This video has been deleted.',
         },
     ]
 
index b5034b02f9bb352ad7bb081995ad3e9963c98088..a647807d01f8b3e5a2ed3eab99acc545533327d4 100644 (file)
@@ -4,7 +4,10 @@ import re
 import base64
 
 from .common import InfoExtractor
-from ..utils import unified_strdate
+from ..utils import (
+    unified_strdate,
+    int_or_none,
+)
 
 
 class VideoTtIE(InfoExtractor):
@@ -50,9 +53,9 @@ class VideoTtIE(InfoExtractor):
             'thumbnail': settings['config']['thumbnail'],
             'upload_date': unified_strdate(video['added']),
             'uploader': video['owner'],
-            'view_count': int(video['view_count']),
-            'comment_count': int(video['comment_count']),
-            'like_count': int(video['liked']),
-            'dislike_count': int(video['disliked']),
+            'view_count': int_or_none(video['view_count']),
+            'comment_count': None if video.get('comment_count') == '--' else int_or_none(video['comment_count']),
+            'like_count': int_or_none(video['liked']),
+            'dislike_count': int_or_none(video['disliked']),
             'formats': formats,
         }
\ No newline at end of file
index fb082f36412bb714669e59d071bf609622a5cc56..918bd10988a2c49fac2fc76bb7481e1a3ed7fdf1 100644 (file)
@@ -16,7 +16,7 @@ from ..utils import (
 
 class VKIE(InfoExtractor):
     IE_NAME = 'vk.com'
-    _VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:videos.*?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))'
+    _VALID_URL = r'https?://(?:m\.)?vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:.+?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))'
     _NETRC_MACHINE = 'vk'
 
     _TESTS = [
@@ -27,7 +27,7 @@ class VKIE(InfoExtractor):
                 'id': '162222515',
                 'ext': 'flv',
                 'title': 'ProtivoGunz - Хуёвая песня',
-                'uploader': 'Noize MC',
+                'uploader': 're:Noize MC.*',
                 'duration': 195,
             },
         },
@@ -62,11 +62,47 @@ class VKIE(InfoExtractor):
                 'id': '164049491',
                 'ext': 'mp4',
                 'uploader': 'Триллеры',
-                'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]\u00a0',
+                'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]',
                 'duration': 8352,
             },
             'skip': 'Requires vk account credentials',
         },
+        {
+            'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
+            'md5': 'd82c22e449f036282d1d3f7f4d276869',
+            'info_dict': {
+                'id': '166094326',
+                'ext': 'mp4',
+                'uploader': 'Киномания - лучшее из мира кино',
+                'title': 'Запах женщины (1992)',
+                'duration': 9392,
+            },
+            'skip': 'Requires vk account credentials',
+        },
+        {
+            'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d',
+            'md5': '4d7a5ef8cf114dfa09577e57b2993202',
+            'info_dict': {
+                'id': '168067957',
+                'ext': 'mp4',
+                'uploader': 'Киномания - лучшее из мира кино',
+                'title': ' ',
+                'duration': 7291,
+            },
+            'skip': 'Requires vk account credentials',
+        },
+        {
+            'url': 'http://m.vk.com/video-43215063_169084319?list=125c627d1aa1cebb83&from=wall-43215063_2566540',
+            'md5': '0c45586baa71b7cb1d0784ee3f4e00a6',
+            'note': 'ivi.ru embed',
+            'info_dict': {
+                'id': '60690',
+                'ext': 'mp4',
+                'title': 'Книга Илая',
+                'duration': 6771,
+            },
+            'skip': 'Only works from Russia',
+        },
     ]
 
     def _login(self):
@@ -110,6 +146,16 @@ class VKIE(InfoExtractor):
         if m_yt is not None:
             self.to_screen('Youtube video detected')
             return self.url_result(m_yt.group(1), 'Youtube')
+
+        m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.*?});', info_page)
+        if m_opts:
+            m_opts_url = re.search(r"url\s*:\s*'([^']+)", m_opts.group(1))
+            if m_opts_url:
+                opts_url = m_opts_url.group(1)
+                if opts_url.startswith('//'):
+                    opts_url = 'http:' + opts_url
+                return self.url_result(opts_url)
+
         data_json = self._search_regex(r'var vars = ({.*?});', info_page, 'vars')
         data = json.loads(data_json)
 
index feeb44b45ff32b4738957e44a6603cde93c8b9a4..f741ba54007737e132f327178be283a478b6527f 100644 (file)
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 from __future__ import unicode_literals
 
 import re
@@ -54,14 +55,14 @@ class WDRIE(InfoExtractor):
             },
         },
         {
-            'url': 'http://www.funkhauseuropa.de/av/audiogrenzenlosleckerbaklava101-audioplayer.html',
-            'md5': 'cfff440d4ee64114083ac44676df5d15',
+            'url': 'http://www.funkhauseuropa.de/av/audiosuepersongsoulbossanova100-audioplayer.html',
+            'md5': '24e83813e832badb0a8d7d1ef9ef0691',
             'info_dict': {
-                'id': 'mdb-363068',
+                'id': 'mdb-463528',
                 'ext': 'mp3',
-                'title': 'Grenzenlos lecker - Baklava',
+                'title': 'Süpersong: Soul Bossa Nova',
                 'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a',
-                'upload_date': '20140311',
+                'upload_date': '20140630',
             },
         },
     ]
@@ -127,9 +128,10 @@ class WDRMobileIE(InfoExtractor):
         'info_dict': {
             'title': '4283021',
             'id': '421735',
+            'ext': 'mp4',
             'age_limit': 0,
         },
-        '_skip': 'Will be depublicized shortly'
+        'skip': 'Problems with loading data.'
     }
 
     def _real_extract(self, url):
@@ -139,6 +141,7 @@ class WDRMobileIE(InfoExtractor):
             'title': mobj.group('title'),
             'age_limit': int(mobj.group('age_limit')),
             'url': url,
+            'ext': determine_ext(url),
             'user_agent': 'mobile',
         }
 
index bc31c2e64f22999adf575e60d59bde3d903bb9cc..e6bfa9e147a2b62e6dbcdd52343675bb9ca52a65 100644 (file)
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import json
 import re
 
@@ -5,14 +7,16 @@ from .common import InfoExtractor
 
 
 class WistiaIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)'
+    _VALID_URL = r'https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)'
 
     _TEST = {
-        u"url": u"http://fast.wistia.net/embed/iframe/sh7fpupwlt",
-        u"file": u"sh7fpupwlt.mov",
-        u"md5": u"cafeb56ec0c53c18c97405eecb3133df",
-        u"info_dict": {
-            u"title": u"cfh_resourceful_zdkh_final_1"
+        'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt',
+        'md5': 'cafeb56ec0c53c18c97405eecb3133df',
+        'info_dict': {
+            'id': 'sh7fpupwlt',
+            'ext': 'mov',
+            'title': 'Being Resourceful',
+            'duration': 117,
         },
     }
 
@@ -22,7 +26,7 @@ class WistiaIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
         data_json = self._html_search_regex(
-            r'Wistia.iframeInit\((.*?), {}\);', webpage, u'video data')
+            r'Wistia\.iframeInit\((.*?), {}\);', webpage, 'video data')
 
         data = json.loads(data_json)
 
@@ -54,4 +58,5 @@ class WistiaIE(InfoExtractor):
             'title': data['name'],
             'formats': formats,
             'thumbnails': thumbnails,
+            'duration': data.get('duration'),
         }
index d45545ee490867d8f2e89d5c36e038364281a72d..6123e12564b7934032ed619b672b6277a75bace0 100644 (file)
@@ -224,6 +224,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 
         # Dash webm audio
         '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 48, 'preference': -50},
@@ -864,71 +865,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
         """Turn the encrypted s field into a working signature"""
 
-        if player_url is not None:
-            if player_url.startswith(u'//'):
-                player_url = u'https:' + player_url
-            try:
-                player_id = (player_url, len(s))
-                if player_id not in self._player_cache:
-                    func = self._extract_signature_function(
-                        video_id, player_url, len(s)
-                    )
-                    self._player_cache[player_id] = func
-                func = self._player_cache[player_id]
-                if self._downloader.params.get('youtube_print_sig_code'):
-                    self._print_sig_code(func, len(s))
-                return func(s)
-            except Exception:
-                tb = traceback.format_exc()
-                self._downloader.report_warning(
-                    u'Automatic signature extraction failed: ' + tb)
-
-            self._downloader.report_warning(
-                u'Warning: Falling back to static signature algorithm')
-
-        return self._static_decrypt_signature(
-            s, video_id, player_url, age_gate)
-
-    def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
-        if age_gate:
-            # The videos with age protection use another player, so the
-            # algorithms can be different.
-            if len(s) == 86:
-                return s[2:63] + s[82] + s[64:82] + s[63]
-
-        if len(s) == 93:
-            return s[86:29:-1] + s[88] + s[28:5:-1]
-        elif len(s) == 92:
-            return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
-        elif len(s) == 91:
-            return s[84:27:-1] + s[86] + s[26:5:-1]
-        elif len(s) == 90:
-            return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
-        elif len(s) == 89:
-            return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
-        elif len(s) == 88:
-            return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
-        elif len(s) == 87:
-            return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
-        elif len(s) == 86:
-            return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1]
-        elif len(s) == 85:
-            return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
-        elif len(s) == 84:
-            return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1]
-        elif len(s) == 83:
-            return s[80:63:-1] + s[0] + s[62:0:-1] + s[63]
-        elif len(s) == 82:
-            return s[80:37:-1] + s[7] + s[36:7:-1] + s[0] + s[6:0:-1] + s[37]
-        elif len(s) == 81:
-            return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
-        elif len(s) == 80:
-            return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
-        elif len(s) == 79:
-            return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
+        if player_url is None:
+            raise ExtractorError(u'Cannot decrypt signature without player_url')
 
-        else:
-            raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
+        if player_url.startswith(u'//'):
+            player_url = u'https:' + player_url
+        try:
+            player_id = (player_url, len(s))
+            if player_id not in self._player_cache:
+                func = self._extract_signature_function(
+                    video_id, player_url, len(s)
+                )
+                self._player_cache[player_id] = func
+            func = self._player_cache[player_id]
+            if self._downloader.params.get('youtube_print_sig_code'):
+                self._print_sig_code(func, len(s))
+            return func(s)
+        except Exception as e:
+            tb = traceback.format_exc()
+            raise ExtractorError(
+                u'Automatic signature extraction failed: ' + tb, cause=e)
 
     def _get_available_subtitles(self, video_id, webpage):
         try:
@@ -1697,14 +1653,14 @@ class YoutubeSearchURLIE(InfoExtractor):
 
         webpage = self._download_webpage(url, query)
         result_code = self._search_regex(
-            r'(?s)<ol id="search-results"(.*?)</ol>', webpage, u'result HTML')
+            r'(?s)<ol class="item-section"(.*?)</ol>', webpage, u'result HTML')
 
         part_codes = re.findall(
             r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
         entries = []
         for part_code in part_codes:
             part_title = self._html_search_regex(
-                r'(?s)title="([^"]+)"', part_code, 'item title', fatal=False)
+                [r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)
             part_url_snippet = self._html_search_regex(
                 r'(?s)href="([^"]+)"', part_code, 'item URL')
             part_url = compat_urlparse.urljoin(
@@ -1824,10 +1780,21 @@ class YoutubeTruncatedURLIE(InfoExtractor):
     IE_NAME = 'youtube:truncated_url'
     IE_DESC = False  # Do not list
     _VALID_URL = r'''(?x)
-        (?:https?://)?[^/]+/watch\?(?:feature=[a-z_]+)?$|
+        (?:https?://)?[^/]+/watch\?(?:
+            feature=[a-z_]+|
+            annotation_id=annotation_[^&]+
+        )?$|
         (?:https?://)?(?:www\.)?youtube\.com/attribution_link\?a=[^&]+$
     '''
 
+    _TESTS = [{
+        'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.youtube.com/watch?',
+        'only_matching': True,
+    }]
+
     def _real_extract(self, url):
         raise ExtractorError(
             u'Did you forget to quote the URL? Remember that & is a meta '
index 449482d3cf8dd147c62717ebc3332bbcbf6eb869..3bbb07704128cf1ab27197ca1abbeeaec086b36f 100644 (file)
@@ -59,7 +59,7 @@ class JSInterpreter(object):
             if member == 'split("")':
                 return list(val)
             if member == 'join("")':
-                return u''.join(val)
+                return ''.join(val)
             if member == 'length':
                 return len(val)
             if member == 'reverse()':
@@ -99,7 +99,7 @@ class JSInterpreter(object):
 
     def extract_function(self, funcname):
         func_m = re.search(
-            (r'(?:function %s|%s\s*=\s*function)' % (
+            (r'(?:function %s|[{;]%s\s*=\s*function)' % (
                 re.escape(funcname), re.escape(funcname))) +
             r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
             self.code)
index b97e62ae9307f7e2380db7ec9c723e8ae8517708..09312e81a485de8d04f7757eb2e26a80a10f161c 100644 (file)
@@ -816,6 +816,9 @@ def unified_strdate(date_str):
         '%d %b %Y',
         '%B %d %Y',
         '%b %d %Y',
+        '%b %dst %Y %I:%M%p',
+        '%b %dnd %Y %I:%M%p',
+        '%b %dth %Y %I:%M%p',
         '%Y-%m-%d',
         '%d.%m.%Y',
         '%d/%m/%Y',
index a332b5a8edf7d3a7bc5d2de32d176927d8a6c18e..d6b05892ca58e4fffd33d275aa9bc43a3c6b7247 100644 (file)
@@ -1,2 +1,2 @@
 
-__version__ = '2014.06.19'
+__version__ = '2014.07.11'