]> Raphaël G. Git Repositories - youtubedl/commitdiff
Merge tag 'upstream/2014.07.15'
authorRogério Brito <rbrito@ime.usp.br>
Wed, 16 Jul 2014 01:31:36 +0000 (22:31 -0300)
committerRogério Brito <rbrito@ime.usp.br>
Wed, 16 Jul 2014 01:31:36 +0000 (22:31 -0300)
Upstream version 2014.07.15

31 files changed:
README.md
README.txt
test/test_playlists.py
test/test_subtitles.py
test/test_youtube_signature.py
youtube-dl
youtube-dl.1
youtube_dl/__init__.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/common.py
youtube_dl/extractor/criterion.py
youtube_dl/extractor/firedrive.py [new file with mode: 0644]
youtube_dl/extractor/gameone.py [new file with mode: 0644]
youtube_dl/extractor/gorillavid.py
youtube_dl/extractor/goshgay.py [new file with mode: 0644]
youtube_dl/extractor/mtv.py
youtube_dl/extractor/ndr.py
youtube_dl/extractor/pyvideo.py
youtube_dl/extractor/reverbnation.py [new file with mode: 0644]
youtube_dl/extractor/ruhd.py [new file with mode: 0644]
youtube_dl/extractor/screencast.py [new file with mode: 0644]
youtube_dl/extractor/soundcloud.py
youtube_dl/extractor/southpark.py [moved from youtube_dl/extractor/southparkstudios.py with 73% similarity]
youtube_dl/extractor/tenplay.py [new file with mode: 0644]
youtube_dl/extractor/tlc.py
youtube_dl/extractor/tutv.py
youtube_dl/extractor/vimple.py [new file with mode: 0644]
youtube_dl/extractor/vodlocker.py [new file with mode: 0644]
youtube_dl/jsinterp.py
youtube_dl/utils.py
youtube_dl/version.py

index dffdaa9dc3b8b334d3cc2e868533961fff0ed356..bc5e0f76df2759b004be225d4419d021a5bcf3a0 100644 (file)
--- a/README.md
+++ b/README.md
@@ -255,7 +255,7 @@ which means you can modify it, redistribute it or use it however you like.
                                      128K (default 5)
     --recode-video FORMAT            Encode the video to another format if
                                      necessary (currently supported:
-                                     mp4|flv|ogg|webm)
+                                     mp4|flv|ogg|webm|mkv)
     -k, --keep-video                 keeps the video file on disk after the
                                      post-processing; the video is erased by
                                      default
index 0f9c4700538f47c8f0228d3dbb5e504225146c5a..5555b2a72b8d6b228e606a862e5e2a13b6698db9 100644 (file)
@@ -283,7 +283,7 @@ Post-processing Options:
                                      128K (default 5)
     --recode-video FORMAT            Encode the video to another format if
                                      necessary (currently supported:
-                                     mp4|flv|ogg|webm)
+                                     mp4|flv|ogg|webm|mkv)
     -k, --keep-video                 keeps the video file on disk after the
                                      post-processing; the video is erased by
                                      default
index 994b1d4b05714634e2028a0433898bbff30da05c..1a38a667b1391ab744fa88e91d79e4eea742bbf6 100644 (file)
@@ -111,7 +111,7 @@ class TestPlaylists(unittest.TestCase):
         ie = VineUserIE(dl)
         result = ie.extract('https://vine.co/Visa')
         self.assertIsPlaylist(result)
-        self.assertTrue(len(result['entries']) >= 50)
+        self.assertTrue(len(result['entries']) >= 47)
 
     def test_ustream_channel(self):
         dl = FakeYDL()
@@ -137,6 +137,14 @@ class TestPlaylists(unittest.TestCase):
         self.assertEqual(result['id'], '9615865')
         self.assertTrue(len(result['entries']) >= 12)
 
+    def test_soundcloud_likes(self):
+        dl = FakeYDL()
+        ie = SoundcloudUserIE(dl)
+        result = ie.extract('https://soundcloud.com/the-concept-band/likes')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['id'], '9615865')
+        self.assertTrue(len(result['entries']) >= 1)
+
     def test_soundcloud_playlist(self):
         dl = FakeYDL()
         ie = SoundcloudPlaylistIE(dl)
index 5736fe58112fc88b5ae15a53863221aa806ba4eb..48c30219868b1975a8aa19b6c6fd6f2f80da6e98 100644 (file)
@@ -87,7 +87,7 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
 
     def test_youtube_nosubtitles(self):
         self.DL.expect_warning(u'video doesn\'t have subtitles')
-        self.url = 'sAjKT8FhjI8'
+        self.url = 'n5BB19UTcdA'
         self.DL.params['writesubtitles'] = True
         self.DL.params['allsubtitles'] = True
         subtitles = self.getSubtitles()
index 8d46fe10826851cd8e6f72251087495313ed6fc9..d95533959481df9b458f56c14d4857d3c5230252 100644 (file)
@@ -33,6 +33,12 @@ _TESTS = [
         90,
         u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
     ),
+    (
+        u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js',
+        u'js',
+        84,
+        u'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=',
+    ),
     (
         u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js',
         u'js',
index 3d28bb5f02d859062b9e6da302c60f58888139b0..73304fea2d5a7b72201c315456252900fdb584c9 100755 (executable)
Binary files a/youtube-dl and b/youtube-dl differ
index 47899f499a06bfacecbfa7a4ad6bbf78850aa35f..26833e8f082276b9a1f8f0de3fbdc4ccf49242d6 100644 (file)
@@ -278,7 +278,7 @@ redistribute it or use it however you like.
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 128K\ (default\ 5)
 \-\-recode\-video\ FORMAT\ \ \ \ \ \ \ \ \ \ \ \ Encode\ the\ video\ to\ another\ format\ if
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ necessary\ (currently\ supported:
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ mp4|flv|ogg|webm)
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ mp4|flv|ogg|webm|mkv)
 \-k,\ \-\-keep\-video\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ keeps\ the\ video\ file\ on\ disk\ after\ the
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ post\-processing;\ the\ video\ is\ erased\ by
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ default
index 31ed63fcce7c9f1edaa95ce4b446e3defff0768e..5e16a549177255a7bca62292d3ca87233835269c 100644 (file)
@@ -60,6 +60,10 @@ __authors__  = (
     'Georg Jähnig',
     'Ralf Haring',
     'Koki Takahashi',
+    'Ariset Llerena',
+    'Adam Malcontenti-Wilson',
+    'Tobias Bell',
+    'Naglis Jonaitis',
 )
 
 __license__ = 'Public Domain'
@@ -506,7 +510,7 @@ def parseOpts(overrideArguments=None):
     postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='5',
             help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5)')
     postproc.add_option('--recode-video', metavar='FORMAT', dest='recodevideo', default=None,
-            help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm)')
+            help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)')
     postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
             help='keeps the video file on disk after the post-processing; the video is erased by default')
     postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False,
index 12cca5c2e039826f9a0554dbfda8fead3128ea9b..e49ac3e52783608942b34deed950703d2a85dc39 100644 (file)
@@ -83,6 +83,7 @@ from .extremetube import ExtremeTubeIE
 from .facebook import FacebookIE
 from .faz import FazIE
 from .fc2 import FC2IE
+from .firedrive import FiredriveIE
 from .firstpost import FirstpostIE
 from .firsttv import FirstTVIE
 from .fivemin import FiveMinIE
@@ -105,6 +106,7 @@ from .freesound import FreesoundIE
 from .freespeech import FreespeechIE
 from .funnyordie import FunnyOrDieIE
 from .gamekings import GamekingsIE
+from .gameone import GameOneIE
 from .gamespot import GameSpotIE
 from .gametrailers import GametrailersIE
 from .gdcvault import GDCVaultIE
@@ -112,6 +114,7 @@ from .generic import GenericIE
 from .googleplus import GooglePlusIE
 from .googlesearch import GoogleSearchIE
 from .gorillavid import GorillaVidIE
+from .goshgay import GoshgayIE
 from .hark import HarkIE
 from .helsinki import HelsinkiIE
 from .hentaistigma import HentaiStigmaIE
@@ -229,6 +232,7 @@ from .radiofrance import RadioFranceIE
 from .rai import RaiIE
 from .rbmaradio import RBMARadioIE
 from .redtube import RedTubeIE
+from .reverbnation import ReverbNationIE
 from .ringtv import RingTVIE
 from .ro220 import Ro220IE
 from .rottentomatoes import RottenTomatoesIE
@@ -237,6 +241,7 @@ from .rtbf import RTBFIE
 from .rtlnow import RTLnowIE
 from .rts import RTSIE
 from .rtve import RTVEALaCartaIE
+from .ruhd import RUHDIE
 from .rutube import (
     RutubeIE,
     RutubeChannelIE,
@@ -246,6 +251,7 @@ from .rutube import (
 from .rutv import RUTVIE
 from .savefrom import SaveFromIE
 from .scivee import SciVeeIE
+from .screencast import ScreencastIE
 from .servingsys import ServingSysIE
 from .sina import SinaIE
 from .slideshare import SlideshareIE
@@ -264,8 +270,8 @@ from .soundcloud import (
     SoundcloudPlaylistIE
 )
 from .soundgasm import SoundgasmIE
-from .southparkstudios import (
-    SouthParkStudiosIE,
+from .southpark import (
+    SouthParkIE,
     SouthparkDeIE,
 )
 from .space import SpaceIE
@@ -289,6 +295,7 @@ from .teachingchannel import TeachingChannelIE
 from .teamcoco import TeamcocoIE
 from .techtalks import TechTalksIE
 from .ted import TEDIE
+from .tenplay import TenPlayIE
 from .testurl import TestURLIE
 from .tf1 import TF1IE
 from .theplatform import ThePlatformIE
@@ -336,12 +343,14 @@ from .vimeo import (
     VimeoReviewIE,
     VimeoWatchLaterIE,
 )
+from .vimple import VimpleIE
 from .vine import (
     VineIE,
     VineUserIE,
 )
 from .viki import VikiIE
 from .vk import VKIE
+from .vodlocker import VodlockerIE
 from .vube import VubeIE
 from .vuclip import VuClipIE
 from .vulture import VultureIE
index e4e4feef9ea18787d196b2ca7d3414191409ba97..e68657314ecde5406ec2d27fef005f899341daf1 100644 (file)
@@ -1,11 +1,12 @@
 import base64
 import hashlib
 import json
+import netrc
 import os
 import re
 import socket
 import sys
-import netrc
+import time
 import xml.etree.ElementTree
 
 from ..utils import (
@@ -462,14 +463,14 @@ class InfoExtractor(object):
     def _og_search_url(self, html, **kargs):
         return self._og_search_property('url', html, **kargs)
 
-    def _html_search_meta(self, name, html, display_name=None, fatal=False):
+    def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
         if display_name is None:
             display_name = name
         return self._html_search_regex(
             r'''(?ix)<meta
                     (?=[^>]+(?:itemprop|name|property)=["\']%s["\'])
                     [^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
-            html, display_name, fatal=fatal)
+            html, display_name, fatal=fatal, **kwargs)
 
     def _dc_search_uploader(self, html):
         return self._html_search_meta('dc.creator', html, 'uploader')
@@ -575,6 +576,13 @@ class InfoExtractor(object):
         else:
             return url
 
+    def _sleep(self, timeout, video_id, msg_template=None):
+        if msg_template is None:
+            msg_template = u'%(video_id)s: Waiting for %(timeout)s seconds'
+        msg = msg_template % {'video_id': video_id, 'timeout': timeout}
+        self.to_screen(msg)
+        time.sleep(timeout)
+
 
 class SearchInfoExtractor(InfoExtractor):
     """
@@ -618,4 +626,3 @@ class SearchInfoExtractor(InfoExtractor):
     @property
     def SEARCH_KEY(self):
         return self._SEARCH_KEY
-
index 31fe3d57b31ccddccadbbcbb66244ef6aea63cd2..4fb1781659b3266b4c475b566911385ec2f7c5b7 100644 (file)
@@ -1,40 +1,43 @@
 # -*- coding: utf-8 -*-
+from __future__ import unicode_literals
 
 import re
 
 from .common import InfoExtractor
-from ..utils import determine_ext
+
 
 class CriterionIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.criterion\.com/films/(\d*)-.+'
+    _VALID_URL = r'https?://www\.criterion\.com/films/(?P<id>[0-9]+)-.+'
     _TEST = {
-        u'url': u'http://www.criterion.com/films/184-le-samourai',
-        u'file': u'184.mp4',
-        u'md5': u'bc51beba55685509883a9a7830919ec3',
-        u'info_dict': {
-            u"title": u"Le Samouraï",
-            u"description" : u'md5:a2b4b116326558149bef81f76dcbb93f',
+        'url': 'http://www.criterion.com/films/184-le-samourai',
+        'md5': 'bc51beba55685509883a9a7830919ec3',
+        'info_dict': {
+            'id': '184',
+            'ext': 'mp4',
+            'title': 'Le Samouraï',
+            'description': 'md5:a2b4b116326558149bef81f76dcbb93f',
         }
     }
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group(1)
+        video_id = mobj.group('id')
         webpage = self._download_webpage(url, video_id)
 
-        final_url = self._search_regex(r'so.addVariable\("videoURL", "(.+?)"\)\;',
-                                webpage, 'video url')
-        title = self._html_search_regex(r'<meta content="(.+?)" property="og:title" />',
-                                webpage, 'video title')
-        description = self._html_search_regex(r'<meta name="description" content="(.+?)" />',
-                                webpage, 'video description')
-        thumbnail = self._search_regex(r'so.addVariable\("thumbnailURL", "(.+?)"\)\;',
-                                webpage, 'thumbnail url')
+        final_url = self._search_regex(
+            r'so.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
+        title = self._og_search_title(webpage)
+        description = self._html_search_regex(
+            r'<meta name="description" content="(.+?)" />',
+            webpage, 'video description')
+        thumbnail = self._search_regex(
+            r'so.addVariable\("thumbnailURL", "(.+?)"\)\;',
+            webpage, 'thumbnail url')
 
-        return {'id': video_id,
-                'url' : final_url,
-                'title': title,
-                'ext': determine_ext(final_url),
-                'description': description,
-                'thumbnail': thumbnail,
-                }
+        return {
+            'id': video_id,
+            'url': final_url,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+        }
diff --git a/youtube_dl/extractor/firedrive.py b/youtube_dl/extractor/firedrive.py
new file mode 100644 (file)
index 0000000..d26145d
--- /dev/null
@@ -0,0 +1,83 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    compat_urllib_parse,
+    compat_urllib_request,
+    determine_ext,
+)
+
+
+class FiredriveIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?firedrive\.com/' + \
+                 '(?:file|embed)/(?P<id>[0-9a-zA-Z]+)'
+    _FILE_DELETED_REGEX = r'<div class="removed_file_image">'
+
+    _TESTS = [{
+        'url': 'https://www.firedrive.com/file/FEB892FA160EBD01',
+        'md5': 'd5d4252f80ebeab4dc2d5ceaed1b7970',
+        'info_dict': {
+            'id': 'FEB892FA160EBD01',
+            'ext': 'flv',
+            'title': 'bbb_theora_486kbit.flv',
+            'thumbnail': 're:^http://.*\.jpg$',
+        },
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        url = 'http://firedrive.com/file/%s' % video_id
+
+        webpage = self._download_webpage(url, video_id)
+
+        if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
+            raise ExtractorError('Video %s does not exist' % video_id,
+                                 expected=True)
+
+        fields = dict(re.findall(r'''(?x)<input\s+
+            type="hidden"\s+
+            name="([^"]+)"\s+
+            (?:id="[^"]+"\s+)?
+            value="([^"]*)"
+            ''', webpage))
+
+        post = compat_urllib_parse.urlencode(fields)
+        req = compat_urllib_request.Request(url, post)
+        req.add_header('Content-type', 'application/x-www-form-urlencoded')
+
+        # Apparently, this header is required for confirmation to work.
+        req.add_header('Host', 'www.firedrive.com')
+
+        webpage = self._download_webpage(req, video_id,
+                                         'Downloading video page')
+
+        title = self._search_regex(r'class="external_title_left">(.+)</div>',
+                                   webpage, 'title')
+        thumbnail = self._search_regex(r'image:\s?"(//[^\"]+)', webpage,
+                                       'thumbnail', fatal=False)
+        if thumbnail is not None:
+            thumbnail = 'http:' + thumbnail
+
+        ext = self._search_regex(r'type:\s?\'([^\']+)\',',
+                                 webpage, 'extension', fatal=False)
+        video_url = self._search_regex(
+            r'file:\s?\'(http[^\']+)\',', webpage, 'file url')
+
+        formats = [{
+            'format_id': 'sd',
+            'url': video_url,
+            'ext': ext,
+        }]
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py
new file mode 100644 (file)
index 0000000..b580f52
--- /dev/null
@@ -0,0 +1,90 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    xpath_with_ns,
+    parse_iso8601
+)
+
+NAMESPACE_MAP = {
+    'media': 'http://search.yahoo.com/mrss/',
+}
+
+# URL prefix to download the mp4 files directly instead of streaming via rtmp
+# Credits go to XBox-Maniac
+# http://board.jdownloader.org/showpost.php?p=185835&postcount=31
+RAW_MP4_URL = 'http://cdn.riptide-mtvn.com/'
+
+
+class GameOneIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://www.gameone.de/tv/288',
+        'md5': '136656b7fb4c9cb4a8e2d500651c499b',
+        'info_dict': {
+            'id': '288',
+            'ext': 'mp4',
+            'title': 'Game One - Folge 288',
+            'duration': 1238,
+            'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg',
+            'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1',
+            'age_limit': 16,
+            'upload_date': '20140513',
+            'timestamp': 1399980122,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        og_video = self._og_search_video_url(webpage, secure=False)
+        description = self._html_search_meta('description', webpage)
+        age_limit = int(
+            self._search_regex(
+                r'age=(\d+)',
+                self._html_search_meta(
+                    'age-de-meta-label',
+                    webpage),
+                'age_limit',
+                '0'))
+        mrss_url = self._search_regex(r'mrss=([^&]+)', og_video, 'mrss')
+
+        mrss = self._download_xml(mrss_url, video_id, 'Downloading mrss')
+        title = mrss.find('.//item/title').text
+        thumbnail = mrss.find('.//item/image').get('url')
+        timestamp = parse_iso8601(mrss.find('.//pubDate').text, delimiter=' ')
+        content = mrss.find(xpath_with_ns('.//media:content', NAMESPACE_MAP))
+        content_url = content.get('url')
+
+        content = self._download_xml(
+            content_url,
+            video_id,
+            'Downloading media:content')
+        rendition_items = content.findall('.//rendition')
+        duration = int(rendition_items[0].get('duration'))
+        formats = [
+            {
+                'url': re.sub(r'.*/(r2)', RAW_MP4_URL + r'\1', r.find('./src').text),
+                'width': int(r.get('width')),
+                'height': int(r.get('height')),
+                'tbr': int(r.get('bitrate')),
+            }
+            for r in rendition_items
+        ]
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'formats': formats,
+            'description': description,
+            'age_limit': age_limit,
+            'timestamp': timestamp,
+        }
index aa15cafc3e169e41708ab2079e20ad0e8b91ce0c..ca5f7c4178e23fb6e10e644807175a65f042f296 100644 (file)
@@ -12,7 +12,12 @@ from ..utils import (
 
 
 class GorillaVidIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?gorillavid\.in/(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?'
+    IE_DESC = 'GorillaVid.in and daclips.in'
+    _VALID_URL = r'''(?x)
+        https?://(?P<host>(?:www\.)?
+            (?:daclips\.in|gorillavid\.in))/
+        (?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?
+    '''
 
     _TESTS = [{
         'url': 'http://gorillavid.in/06y9juieqpmi',
@@ -32,15 +37,22 @@ class GorillaVidIE(InfoExtractor):
             'title': 'Say something nice',
             'thumbnail': 're:http://.*\.jpg',
         },
+    }, {
+        'url': 'http://daclips.in/3rso4kdn6f9m',
+        'md5': '1ad8fd39bb976eeb66004d3a4895f106',
+        'info_dict': {
+            'id': '3rso4kdn6f9m',
+            'ext': 'mp4',
+            'title': 'Micro Pig piglets ready on 16th July 2009',
+            'thumbnail': 're:http://.*\.jpg',
+        },
     }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
 
-        url = 'http://gorillavid.in/%s' % video_id
-
-        webpage = self._download_webpage(url, video_id)
+        webpage = self._download_webpage('http://%s/%s' % (mobj.group('host'), video_id), video_id)
 
         fields = dict(re.findall(r'''(?x)<input\s+
             type="hidden"\s+
diff --git a/youtube_dl/extractor/goshgay.py b/youtube_dl/extractor/goshgay.py
new file mode 100644 (file)
index 0000000..7bca21a
--- /dev/null
@@ -0,0 +1,73 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urlparse,
+    str_to_int,
+    ExtractorError,
+)
+import json
+
+
+class GoshgayIE(InfoExtractor):
+    _VALID_URL = r'^(?:https?://)www.goshgay.com/video(?P<id>\d+?)($|/)'
+    _TEST = {
+        'url': 'http://www.goshgay.com/video4116282',
+        'md5': '268b9f3c3229105c57859e166dd72b03',
+        'info_dict': {
+            'id': '4116282',
+            'ext': 'flv',
+            'title': 'md5:089833a4790b5e103285a07337f245bf',
+            'thumbnail': 're:http://.*\.jpg',
+            'age_limit': 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        title = self._search_regex(r'class="video-title"><h1>(.+?)<', webpage, 'title')
+
+        player_config = self._search_regex(
+            r'(?s)jwplayer\("player"\)\.setup\(({.+?})\)', webpage, 'config settings')
+        player_vars = json.loads(player_config.replace("'", '"'))
+        width = str_to_int(player_vars.get('width'))
+        height = str_to_int(player_vars.get('height'))
+        config_uri = player_vars.get('config')
+
+        if config_uri is None:
+            raise ExtractorError('Missing config URI')
+        node = self._download_xml(config_uri, video_id, 'Downloading player config XML',
+                                  errnote='Unable to download XML')
+        if node is None:
+            raise ExtractorError('Missing config XML')
+        if node.tag != 'config':
+            raise ExtractorError('Missing config attribute')
+        fns = node.findall('file')
+        imgs = node.findall('image')
+        if len(fns) != 1:
+            raise ExtractorError('Missing media URI')
+        video_url = fns[0].text
+        if len(imgs) < 1:
+            thumbnail = None
+        else:
+            thumbnail = imgs[0].text
+
+        url_comp = compat_urlparse.urlparse(url)
+        ref = "%s://%s%s" % (url_comp[0], url_comp[1], url_comp[2])
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'width': width,
+            'height': height,
+            'thumbnail': thumbnail,
+            'http_referer': ref,
+            'age_limit': 18,
+        }
index af9490cccf05a372134585b8ac8957bb26e1c985..228b42d2b940d8eadd0fa3d5e61d0836fd19b7b7 100644 (file)
@@ -158,6 +158,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
             if mgid.endswith('.swf'):
                 mgid = mgid[:-4]
         except RegexNotFoundError:
+            mgid = None
+
+        if mgid is None or ':' not in mgid:
             mgid = self._search_regex(
                 [r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
                 webpage, u'mgid')
index 3d6096e46fbe6df0f6885fbdae483f05ac07cf6f..94d5ba98289529ee3148fe1460fe67ca463cc6ab 100644 (file)
@@ -18,15 +18,15 @@ class NDRIE(InfoExtractor):
 
     _TESTS = [
         {
-            'url': 'http://www.ndr.de/fernsehen/sendungen/markt/markt7959.html',
-            'md5': 'e7a6079ca39d3568f4996cb858dd6708',
+            'url': 'http://www.ndr.de/fernsehen/media/dienordreportage325.html',
+            'md5': '4a4eeafd17c3058b65f0c8f091355855',
             'note': 'Video file',
             'info_dict': {
-                'id': '7959',
+                'id': '325',
                 'ext': 'mp4',
-                'title': 'Markt - die ganze Sendung',
-                'description': 'md5:af9179cf07f67c5c12dc6d9997e05725',
-                'duration': 2655,
+                'title': 'Blaue Bohnen aus Blocken',
+                'description': 'md5:190d71ba2ccddc805ed01547718963bc',
+                'duration': 1715,
             },
         },
         {
index 0bc0859b466e533419d5647d7f0250988d2f36db..6d5732d45c3d3e22d085319ff45449881ac73ad2 100644 (file)
@@ -46,7 +46,7 @@ class PyvideoIE(InfoExtractor):
             return self.url_result(m_youtube.group(1), 'Youtube')
 
         title = self._html_search_regex(
-            r'<div class="section">.*?<h3(?:\s+class="[^"]*")?>([^>]+?)</h3>',
+            r'<div class="section">\s*<h3(?:\s+class="[^"]*"[^>]*)?>([^>]+?)</h3>',
             webpage, 'title', flags=re.DOTALL)
         video_url = self._search_regex(
             [r'<source src="(.*?)"', r'<dt>Download</dt>.*?<a href="(.+?)"'],
diff --git a/youtube_dl/extractor/reverbnation.py b/youtube_dl/extractor/reverbnation.py
new file mode 100644 (file)
index 0000000..49cf427
--- /dev/null
@@ -0,0 +1,45 @@
+from __future__ import unicode_literals
+
+import re
+import time
+
+from .common import InfoExtractor
+from ..utils import strip_jsonp
+
+
+class ReverbNationIE(InfoExtractor):
+    _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
+    _TESTS = [{
+        'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
+        'file': '16965047.mp3',
+        'md5': '3da12ebca28c67c111a7f8b262d3f7a7',
+        'info_dict': {
+            "title": "MONA LISA",
+            "uploader": "ALKILADOS",
+            "uploader_id": 216429,
+            "thumbnail": "//gp1.wac.edgecastcdn.net/802892/production_public/Photo/13761700/image/1366002176_AVATAR_MONA_LISA.jpg"
+        },
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        song_id = mobj.group('id')
+
+        api_res = self._download_json(
+            'https://api.reverbnation.com/song/%s?callback=api_response_5&_=%d'
+                % (song_id, int(time.time() * 1000)),
+            song_id,
+            transform_source=strip_jsonp,
+            note='Downloading information of song %s' % song_id
+        )
+
+        return {
+            'id': song_id,
+            'title': api_res.get('name'),
+            'url': api_res.get('url'),
+            'uploader': api_res.get('artist', {}).get('name'),
+            'uploader_id': api_res.get('artist', {}).get('id'),
+            'thumbnail': api_res.get('image', api_res.get('thumbnail')),
+            'ext': 'mp3',
+            'vcodec': 'none',
+        }
diff --git a/youtube_dl/extractor/ruhd.py b/youtube_dl/extractor/ruhd.py
new file mode 100644 (file)
index 0000000..55b58e5
--- /dev/null
@@ -0,0 +1,46 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class RUHDIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?ruhd\.ru/play\.php\?vid=(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://www.ruhd.ru/play.php?vid=207',
+        'md5': 'd1a9ec4edf8598e3fbd92bb16072ba83',
+        'info_dict': {
+            'id': '207',
+            'ext': 'divx',
+            'title': 'КОТ бааааам',
+            'description': 'классный кот)',
+            'thumbnail': 're:^http://.*\.jpg$',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_url = self._html_search_regex(
+            r'<param name="src" value="([^"]+)"', webpage, 'video url')
+        title = self._html_search_regex(
+            r'<title>([^<]+)&nbsp;&nbsp; RUHD.ru - Видео Высокого качества №1 в России!</title>', webpage, 'title')
+        description = self._html_search_regex(
+            r'(?s)<div id="longdesc">(.+?)<span id="showlink">', webpage, 'description', fatal=False)
+        thumbnail = self._html_search_regex(
+            r'<param name="previewImage" value="([^"]+)"', webpage, 'thumbnail', fatal=False)
+        if thumbnail:
+            thumbnail = 'http://www.ruhd.ru' + thumbnail
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+        }
diff --git a/youtube_dl/extractor/screencast.py b/youtube_dl/extractor/screencast.py
new file mode 100644 (file)
index 0000000..306869e
--- /dev/null
@@ -0,0 +1,112 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    compat_parse_qs,
+    compat_urllib_request,
+)
+
+
+class ScreencastIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.screencast\.com/t/(?P<id>[a-zA-Z0-9]+)'
+    _TESTS = [{
+        'url': 'http://www.screencast.com/t/3ZEjQXlT',
+        'md5': '917df1c13798a3e96211dd1561fded83',
+        'info_dict': {
+            'id': '3ZEjQXlT',
+            'ext': 'm4v',
+            'title': 'Color Measurement with Ocean Optics Spectrometers',
+            'description': 'md5:240369cde69d8bed61349a199c5fb153',
+            'thumbnail': 're:^https?://.*\.(?:gif|jpg)$',
+        }
+    }, {
+        'url': 'http://www.screencast.com/t/V2uXehPJa1ZI',
+        'md5': 'e8e4b375a7660a9e7e35c33973410d34',
+        'info_dict': {
+            'id': 'V2uXehPJa1ZI',
+            'ext': 'mov',
+            'title': 'The Amadeus Spectrometer',
+            'description': 're:^In this video, our friends at.*To learn more about Amadeus, visit',
+            'thumbnail': 're:^https?://.*\.(?:gif|jpg)$',
+        }
+    }, {
+        'url': 'http://www.screencast.com/t/aAB3iowa',
+        'md5': 'dedb2734ed00c9755761ccaee88527cd',
+        'info_dict': {
+            'id': 'aAB3iowa',
+            'ext': 'mp4',
+            'title': 'Google Earth Export',
+            'description': 'Provides a demo of a CommunityViz export to Google Earth, one of the 3D viewing options.',
+            'thumbnail': 're:^https?://.*\.(?:gif|jpg)$',
+        }
+    }, {
+        'url': 'http://www.screencast.com/t/X3ddTrYh',
+        'md5': '669ee55ff9c51988b4ebc0877cc8b159',
+        'info_dict': {
+            'id': 'X3ddTrYh',
+            'ext': 'wmv',
+            'title': 'Toolkit 6 User Group Webinar (2014-03-04) - Default Judgment and First Impression',
+            'description': 'md5:7b9f393bc92af02326a5c5889639eab0',
+            'thumbnail': 're:^https?://.*\.(?:gif|jpg)$',
+        }
+    },
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id)
+
+        video_url = self._html_search_regex(
+            r'<embed name="Video".*?src="([^"]+)"', webpage,
+            'QuickTime embed', default=None)
+
+        if video_url is None:
+            flash_vars_s = self._html_search_regex(
+                r'<param name="flashVars" value="([^"]+)"', webpage, 'flash vars',
+                default=None)
+            if not flash_vars_s:
+                flash_vars_s = self._html_search_regex(
+                    r'<param name="initParams" value="([^"]+)"', webpage, 'flash vars',
+                    default=None)
+                if flash_vars_s:
+                    flash_vars_s = flash_vars_s.replace(',', '&')
+            if flash_vars_s:
+                flash_vars = compat_parse_qs(flash_vars_s)
+                video_url_raw = compat_urllib_request.quote(
+                    flash_vars['content'][0])
+                video_url = video_url_raw.replace('http%3A', 'http:')
+
+        if video_url is None:
+            video_meta = self._html_search_meta(
+                'og:video', webpage, default=None)
+            if video_meta:
+                video_url = self._search_regex(
+                    r'src=(.*?)(?:$|&)', video_meta,
+                    'meta tag video URL', default=None)
+
+        if video_url is None:
+            raise ExtractorError('Cannot find video')
+
+        title = self._og_search_title(webpage, default=None)
+        if title is None:
+            title = self._html_search_regex(
+                [r'<b>Title:</b> ([^<]*)</div>',
+                r'class="tabSeperator">></span><span class="tabText">(.*?)<'],
+                webpage, 'title')
+        thumbnail = self._og_search_thumbnail(webpage)
+        description = self._og_search_description(webpage, default=None)
+        if description is None:
+            description = self._html_search_meta('description', webpage)
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+        }
index 7aa100fb22fcfe56c09370067c94a953254885a2..8a77c13709e16f9f65dbd3111dafb1fc9ae8451a 100644 (file)
@@ -81,16 +81,16 @@ class SoundcloudIE(InfoExtractor):
         },
         # downloadable song
         {
-            'url': 'https://soundcloud.com/simgretina/just-your-problem-baby-1',
-            'md5': '56a8b69568acaa967b4c49f9d1d52d19',
+            'url': 'https://soundcloud.com/oddsamples/bus-brakes',
+            'md5': 'fee7b8747b09bb755cefd4b853e7249a',
             'info_dict': {
-                'id': '105614606',
+                'id': '128590877',
                 'ext': 'wav',
-                'title': 'Just Your Problem Baby (Acapella)',
-                'description': 'Vocals',
-                'uploader': 'Sim Gretina',
-                'upload_date': '20130815',
-                #'duration': 42,
+                'title': 'Bus Brakes',
+                'description': 'md5:0170be75dd395c96025d210d261c784e',
+                'uploader': 'oddsamples',
+                'upload_date': '20140109',
+                'duration': 17,
             },
         },
     ]
@@ -255,7 +255,7 @@ class SoundcloudSetIE(SoundcloudIE):
 
 
 class SoundcloudUserIE(SoundcloudIE):
-    _VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)(/?(tracks/)?)?(\?.*)?$'
+    _VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'
     IE_NAME = 'soundcloud:user'
 
     # it's in tests/test_playlists.py
@@ -264,24 +264,31 @@ class SoundcloudUserIE(SoundcloudIE):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         uploader = mobj.group('user')
+        resource = mobj.group('rsrc')
+        if resource is None:
+            resource = 'tracks'
+        elif resource == 'likes':
+            resource = 'favorites'
 
         url = 'http://soundcloud.com/%s/' % uploader
         resolv_url = self._resolv_url(url)
         user = self._download_json(
             resolv_url, uploader, 'Downloading user info')
-        base_url = 'http://api.soundcloud.com/users/%s/tracks.json?' % uploader
+        base_url = 'http://api.soundcloud.com/users/%s/%s.json?' % (uploader, resource)
 
         entries = []
         for i in itertools.count():
             data = compat_urllib_parse.urlencode({
                 'offset': i * 50,
+                'limit': 50,
                 'client_id': self._CLIENT_ID,
             })
             new_entries = self._download_json(
                 base_url + data, uploader, 'Downloading track page %s' % (i + 1))
-            entries.extend(self._extract_info_dict(e, quiet=True) for e in new_entries)
-            if len(new_entries) < 50:
+            if len(new_entries) == 0:
+                self.to_screen('%s: End page received' % uploader)
                 break
+            entries.extend(self._extract_info_dict(e, quiet=True) for e in new_entries)
 
         return {
             '_type': 'playlist',
similarity index 73%
rename from youtube_dl/extractor/southparkstudios.py
rename to youtube_dl/extractor/southpark.py
index aea8e64393afc9c9e41a1e2d2a09f9d379151992..c20397b3d1bbffb69188ac872facd36cce5b11f7 100644 (file)
@@ -3,24 +3,24 @@ from __future__ import unicode_literals
 from .mtv import MTVServicesInfoExtractor
 
 
-class SouthParkStudiosIE(MTVServicesInfoExtractor):
-    IE_NAME = 'southparkstudios.com'
-    _VALID_URL = r'https?://(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
+class SouthParkIE(MTVServicesInfoExtractor):
+    IE_NAME = 'southpark.cc.com'
+    _VALID_URL = r'https?://(www\.)?(?P<url>southpark\.cc\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
 
     _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
 
     _TESTS = [{
-        'url': 'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
+        'url': 'http://southpark.cc.com/clips/104437/bat-daded#tab=featured',
         'info_dict': {
             'id': 'a7bff6c2-ed00-11e0-aca6-0026b9414f30',
             'ext': 'mp4',
-            'title': 'Bat Daded',
+            'title': 'South Park|Bat Daded',
             'description': 'Randy disqualifies South Park by getting into a fight with Bat Dad.',
         },
     }]
 
 
-class SouthparkDeIE(SouthParkStudiosIE):
+class SouthparkDeIE(SouthParkIE):
     IE_NAME = 'southpark.de'
     _VALID_URL = r'https?://(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
     _FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/'
diff --git a/youtube_dl/extractor/tenplay.py b/youtube_dl/extractor/tenplay.py
new file mode 100644 (file)
index 0000000..8477840
--- /dev/null
@@ -0,0 +1,84 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class TenPlayIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?ten(play)?\.com\.au/.+'
+    _TEST = {
+        'url': 'http://tenplay.com.au/ten-insider/extra/season-2013/tenplay-tv-your-way',
+        #'md5': 'd68703d9f73dc8fccf3320ab34202590',
+        'info_dict': {
+            'id': '2695695426001',
+            'ext': 'flv',
+            'title': 'TENplay: TV your way',
+            'description': 'Welcome to a new TV experience. Enjoy a taste of the TENplay benefits.',
+            'timestamp': 1380150606.889,
+            'upload_date': '20130925',
+            'uploader': 'TENplay',
+        },
+        'params': {
+            'skip_download': True,  # Requires rtmpdump
+        }
+    }
+
+    _video_fields = [
+        "id", "name", "shortDescription", "longDescription", "creationDate",
+        "publishedDate", "lastModifiedDate", "customFields", "videoStillURL",
+        "thumbnailURL", "referenceId", "length", "playsTotal",
+        "playsTrailingWeek", "renditions", "captioning", "startDate", "endDate"]
+
+    def _real_extract(self, url):
+        webpage = self._download_webpage(url, url)
+        video_id = self._html_search_regex(
+            r'videoID: "(\d+?)"', webpage, 'video_id')
+        api_token = self._html_search_regex(
+            r'apiToken: "([a-zA-Z0-9-_\.]+?)"', webpage, 'api_token')
+        title = self._html_search_regex(
+            r'<meta property="og:title" content="\s*(.*?)\s*"\s*/?\s*>',
+            webpage, 'title')
+
+        json = self._download_json('https://api.brightcove.com/services/library?command=find_video_by_id&video_id=%s&token=%s&video_fields=%s' % (video_id, api_token, ','.join(self._video_fields)), title)
+
+        formats = []
+        for rendition in json['renditions']:
+            url = rendition['remoteUrl'] or rendition['url']
+            protocol = 'rtmp' if url.startswith('rtmp') else 'http'
+            ext = 'flv' if protocol == 'rtmp' else rendition['videoContainer'].lower()
+
+            if protocol == 'rtmp':
+                url = url.replace('&mp4:', '')
+
+            formats.append({
+                'format_id': '_'.join(['rtmp', rendition['videoContainer'].lower(), rendition['videoCodec'].lower()]),
+                'width': rendition['frameWidth'],
+                'height': rendition['frameHeight'],
+                'tbr': rendition['encodingRate'] / 1024,
+                'filesize': rendition['size'],
+                'protocol': protocol,
+                'ext': ext,
+                'vcodec': rendition['videoCodec'].lower(),
+                'container': rendition['videoContainer'].lower(),
+                'url': url,
+            })
+
+        return {
+            'id': video_id,
+            'display_id': json['referenceId'],
+            'title': json['name'],
+            'description': json['shortDescription'] or json['longDescription'],
+            'formats': formats,
+            'thumbnails': [{
+                'url': json['videoStillURL']
+            }, {
+                'url': json['thumbnailURL']
+            }],
+            'thumbnail': json['videoStillURL'],
+            'duration': json['length'] / 1000,
+            'timestamp': float(json['creationDate']) / 1000,
+            'uploader': json['customFields']['production_company_distributor'] if 'production_company_distributor' in json['customFields'] else 'TENplay',
+            'view_count': json['playsTotal']
+        }
index ad175b83ebb6ada365722787932ec80179456560..d848ee1863252dbc155652c997210476c42479e4 100644 (file)
@@ -5,6 +5,7 @@ import re
 from .common import InfoExtractor
 from .brightcove import BrightcoveIE
 from .discovery import DiscoveryIE
+from ..utils import compat_urlparse
 
 
 class TlcIE(DiscoveryIE):
@@ -51,6 +52,10 @@ class TlcDeIE(InfoExtractor):
         # Otherwise we don't get the correct 'BrightcoveExperience' element,
         # example: http://www.tlc.de/sendungen/cake-boss/videos/cake-boss-cannoli-drama/
         iframe_url = iframe_url.replace('.htm?', '.php?')
+        url_fragment = compat_urlparse.urlparse(url).fragment
+        if url_fragment:
+            # Since the fragment is not send to the server, we always get the same iframe
+            iframe_url = re.sub(r'playlist=(\d+)', 'playlist=%s' % url_fragment, iframe_url)
         iframe = self._download_webpage(iframe_url, title)
 
         return {
index c980153ec9190640e47f3883d086d8c9d1b56bc0..d516b6427bd271fa8f7e1129cdbbcd9dda692ae1 100644 (file)
@@ -1,21 +1,21 @@
 from __future__ import unicode_literals
+
 import base64
 import re
 
 from .common import InfoExtractor
-from ..utils import (
-    compat_parse_qs,
-)
+from ..utils import compat_parse_qs
 
 
 class TutvIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?tu\.tv/videos/(?P<id>[^/?]+)'
     _TEST = {
-        'url': 'http://tu.tv/videos/noah-en-pabellon-cuahutemoc',
-        'file': '2742556.flv',
-        'md5': '5eb766671f69b82e528dc1e7769c5cb2',
+        'url': 'http://tu.tv/videos/robots-futbolistas',
+        'md5': '627c7c124ac2a9b5ab6addb94e0e65f7',
         'info_dict': {
-            'title': 'Noah en pabellon cuahutemoc',
+            'id': '2973058',
+            'ext': 'flv',
+            'title': 'Robots futbolistas',
         },
     }
 
@@ -26,10 +26,9 @@ class TutvIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
         internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, 'internal video ID')
 
-        data_url = 'http://tu.tv/flvurl.php?codVideo=' + str(internal_id)
-        data_content = self._download_webpage(data_url, video_id, note='Downloading video info')
-        data = compat_parse_qs(data_content)
-        video_url = base64.b64decode(data['kpt'][0]).decode('utf-8')
+        data_content = self._download_webpage(
+            'http://tu.tv/flvurl.php?codVideo=%s' % internal_id, video_id, 'Downloading video info')
+        video_url = base64.b64decode(compat_parse_qs(data_content)['kpt'][0]).decode('utf-8')
 
         return {
             'id': internal_id,
diff --git a/youtube_dl/extractor/vimple.py b/youtube_dl/extractor/vimple.py
new file mode 100644 (file)
index 0000000..33d370e
--- /dev/null
@@ -0,0 +1,86 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import base64
+import re
+import xml.etree.ElementTree
+import zlib
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class VimpleIE(InfoExtractor):
+    IE_DESC = 'Vimple.ru'
+    _VALID_URL = r'https?://(player.vimple.ru/iframe|vimple.ru)/(?P<id>[a-f0-9]{10,})'
+    _TESTS = [
+        # Quality: Large, from iframe
+        {
+            'url': 'http://player.vimple.ru/iframe/b132bdfd71b546d3972f9ab9a25f201c',
+            'info_dict': {
+                'id': 'b132bdfd71b546d3972f9ab9a25f201c',
+                'title': 'great-escape-minecraft.flv',
+                'ext': 'mp4',
+                'duration': 352,
+                'webpage_url': 'http://vimple.ru/b132bdfd71b546d3972f9ab9a25f201c',
+            },
+        },
+        # Quality: Medium, from mainpage
+        {
+            'url': 'http://vimple.ru/a15950562888453b8e6f9572dc8600cd',
+            'info_dict': {
+                'id': 'a15950562888453b8e6f9572dc8600cd',
+                'title': 'DB 01',
+                'ext': 'flv',
+                'duration': 1484,
+                'webpage_url': 'http://vimple.ru/a15950562888453b8e6f9572dc8600cd',
+            }
+        },
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        iframe_url = 'http://player.vimple.ru/iframe/%s' % video_id
+
+        iframe = self._download_webpage(
+            iframe_url, video_id,
+            note='Downloading iframe', errnote='unable to fetch iframe')
+        player_url = self._html_search_regex(
+            r'"(http://player.vimple.ru/flash/.+?)"', iframe, 'player url')
+
+        player = self._request_webpage(
+            player_url, video_id, note='Downloading swf player').read()
+
+        player = zlib.decompress(player[8:])
+
+        xml_pieces = re.findall(b'([a-zA-Z0-9 =+/]{500})', player)
+        xml_pieces = [piece[1:-1] for piece in xml_pieces]
+
+        xml_data = b''.join(xml_pieces)
+        xml_data = base64.b64decode(xml_data)
+
+        xml_data = xml.etree.ElementTree.fromstring(xml_data)
+
+        video = xml_data.find('Video')
+        quality = video.get('quality')
+        q_tag = video.find(quality.capitalize())
+
+        formats = [
+            {
+                'url': q_tag.get('url'),
+                'tbr': int(q_tag.get('bitrate')),
+                'filesize': int(q_tag.get('filesize')),
+                'format_id': quality,
+            },
+        ]
+
+        return {
+            'id': video_id,
+            'title': video.find('Title').text,
+            'formats': formats,
+            'thumbnail': video.find('Poster').get('url'),
+            'duration': int_or_none(video.get('duration')),
+            'webpage_url': video.find('Share').get('videoPageUrl'),
+        }
diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py
new file mode 100644 (file)
index 0000000..68c5936
--- /dev/null
@@ -0,0 +1,63 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import re
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse,
+    compat_urllib_request,
+)
+
+
+class VodlockerIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?vodlocker.com/(?P<id>[0-9a-zA-Z]+)(?:\..*?)?'
+
+    _TESTS = [{
+        'url': 'http://vodlocker.com/e8wvyzz4sl42',
+        'md5': 'ce0c2d18fa0735f1bd91b69b0e54aacf',
+        'info_dict': {
+            'id': 'e8wvyzz4sl42',
+            'ext': 'mp4',
+            'title': 'Germany vs Brazil',
+            'thumbnail': 're:http://.*\.jpg',
+        },
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id)
+
+        fields = dict(re.findall(r'''(?x)<input\s+
+            type="hidden"\s+
+            name="([^"]+)"\s+
+            (?:id="[^"]+"\s+)?
+            value="([^"]*)"
+            ''', webpage))
+
+        if fields['op'] == 'download1':
+            self._sleep(3, video_id)  # they do detect when requests happen too fast!
+            post = compat_urllib_parse.urlencode(fields)
+            req = compat_urllib_request.Request(url, post)
+            req.add_header('Content-type', 'application/x-www-form-urlencoded')
+            webpage = self._download_webpage(
+                req, video_id, 'Downloading video page')
+
+        title = self._search_regex(
+            r'id="file_title".*?>\s*(.*?)\s*<span', webpage, 'title')
+        thumbnail = self._search_regex(
+            r'image:\s*"(http[^\"]+)",', webpage, 'thumbnail')
+        url = self._search_regex(
+            r'file:\s*"(http[^\"]+)",', webpage, 'file url')
+
+        formats = [{
+            'format_id': 'sd',
+            'url': url,
+        }]
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }
index 3bbb07704128cf1ab27197ca1abbeeaec086b36f..ae5bca2e643f1ec4d719e8e895b0a655a98a151a 100644 (file)
@@ -11,6 +11,7 @@ class JSInterpreter(object):
     def __init__(self, code):
         self.code = code
         self._functions = {}
+        self._objects = {}
 
     def interpret_statement(self, stmt, local_vars, allow_recursion=20):
         if allow_recursion < 0:
@@ -55,7 +56,19 @@ class JSInterpreter(object):
         m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
         if m:
             member = m.group('member')
-            val = local_vars[m.group('in')]
+            variable = m.group('in')
+
+            if variable not in local_vars:
+                if variable not in self._objects:
+                    self._objects[variable] = self.extract_object(variable)
+                obj = self._objects[variable]
+                key, args = member.split('(', 1)
+                args = args.strip(')')
+                argvals = [int(v) if v.isdigit() else local_vars[v]
+                           for v in args.split(',')]
+                return obj[key](argvals)
+
+            val = local_vars[variable]
             if member == 'split("")':
                 return list(val)
             if member == 'join("")':
@@ -97,6 +110,25 @@ class JSInterpreter(object):
             return self._functions[fname](argvals)
         raise ExtractorError('Unsupported JS expression %r' % expr)
 
+    def extract_object(self, objname):
+        obj = {}
+        obj_m = re.search(
+            (r'(?:var\s+)?%s\s*=\s*\{' % re.escape(objname)) +
+            r'\s*(?P<fields>([a-zA-Z$]+\s*:\s*function\(.*?\)\s*\{.*?\})*)' +
+            r'\}\s*;',
+            self.code)
+        fields = obj_m.group('fields')
+        # Currently, it only supports function definitions
+        fields_m = re.finditer(
+            r'(?P<key>[a-zA-Z$]+)\s*:\s*function'
+            r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
+            fields)
+        for f in fields_m:
+            argnames = f.group('args').split(',')
+            obj[f.group('key')] = self.build_function(argnames, f.group('code'))
+
+        return obj
+
     def extract_function(self, funcname):
         func_m = re.search(
             (r'(?:function %s|[{;]%s\s*=\s*function)' % (
@@ -107,10 +139,12 @@ class JSInterpreter(object):
             raise ExtractorError('Could not find JS function %r' % funcname)
         argnames = func_m.group('args').split(',')
 
+        return self.build_function(argnames, func_m.group('code'))
+
+    def build_function(self, argnames, code):
         def resf(args):
             local_vars = dict(zip(argnames, args))
-            for stmt in func_m.group('code').split(';'):
+            for stmt in code.split(';'):
                 res = self.interpret_statement(stmt, local_vars)
             return res
         return resf
-
index 09312e81a485de8d04f7757eb2e26a80a10f161c..64a9618ca62493f893af16b31b3fbd331bbdc1e7 100644 (file)
@@ -775,7 +775,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
     https_response = http_response
 
 
-def parse_iso8601(date_str):
+def parse_iso8601(date_str, delimiter='T'):
     """ Return a UNIX timestamp from the given date """
 
     if date_str is None:
@@ -795,8 +795,8 @@ def parse_iso8601(date_str):
             timezone = datetime.timedelta(
                 hours=sign * int(m.group('hours')),
                 minutes=sign * int(m.group('minutes')))
-
-    dt = datetime.datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S') - timezone
+    date_format =  '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
+    dt = datetime.datetime.strptime(date_str, date_format) - timezone
     return calendar.timegm(dt.timetuple())
 
 
@@ -1428,7 +1428,7 @@ US_RATINGS = {
 
 
 def strip_jsonp(code):
-    return re.sub(r'(?s)^[a-zA-Z_]+\s*\(\s*(.*)\);\s*?\s*$', r'\1', code)
+    return re.sub(r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?\s*$', r'\1', code)
 
 
 def qualities(quality_ids):
index d6b05892ca58e4fffd33d275aa9bc43a3c6b7247..4d606c3d2333ffbcdbfb64d55f4e8a2a8db3bf75 100644 (file)
@@ -1,2 +1,2 @@
 
-__version__ = '2014.07.11'
+__version__ = '2014.07.15'