Imported Upstream version 2014.06.19

author Rogério Brito <rbrito@ime.usp.br>

Sun, 22 Jun 2014 14:48:31 +0000 (11:48 -0300)

committer Rogério Brito <rbrito@ime.usp.br>

Sun, 22 Jun 2014 14:48:31 +0000 (11:48 -0300)
author Rogério Brito <rbrito@ime.usp.br>
Sun, 22 Jun 2014 14:48:31 +0000 (11:48 -0300)
committer Rogério Brito <rbrito@ime.usp.br>
Sun, 22 Jun 2014 14:48:31 +0000 (11:48 -0300)
diff --git a/test/test_playlists.py b/test/test_playlists.py

index 465b07b9e28e48ce9fe3b8a0a477a712b9f06940..ee91e412ab33199fd27996dba25bccb3b646387f 100644 (file)
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -114,10 +114,10 @@ class TestPlaylists(unittest.TestCase):
      def test_ustream_channel(self):
          dl = FakeYDL()
          ie = UstreamChannelIE(dl)
-        result = ie.extract('http://www.ustream.tv/channel/young-americans-for-liberty')
+        result = ie.extract('http://www.ustream.tv/channel/channeljapan')
          self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], '5124905')
-        self.assertTrue(len(result['entries']) >= 6)
+        self.assertEqual(result['id'], '10874166')
+        self.assertTrue(len(result['entries']) >= 54)
  
      def test_soundcloud_set(self):
          dl = FakeYDL()
diff --git a/youtube-dl b/youtube-dl

index b98d36a19b220dd47c8259ed766b2634f84f9251..4c445a9e9f0168344a24c5eb5f6b23adeef687e0 100755 (executable)

Binary files a/youtube-dl and b/youtube-dl differ
diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py

index 9d407fe6eb81683b19c5671ef7b92050e0a690c3..9f29e2f8110ef09d8bba4c1d57e38acb8da8a52e 100644 (file)
--- a/youtube_dl/downloader/hls.py
+++ b/youtube_dl/downloader/hls.py
@@ -25,7 +25,7 @@ class HlsFD(FileDownloader):
              except (OSError, IOError):
                  pass
          else:
-            self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found')
+            self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
          cmd = [program] + args
  
          retval = subprocess.call(cmd)
diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py

index cc6a84106b4ccc1221b74da313eb619544c4a8ef..68646709a16cf7c9dcec0ac1c5e09f5643a9a7a2 100644 (file)
--- a/youtube_dl/downloader/rtmp.py
+++ b/youtube_dl/downloader/rtmp.py
@@ -106,7 +106,7 @@ class RtmpFD(FileDownloader):
          try:
              subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
          except (OSError, IOError):
-            self.report_error('RTMP download detected but "rtmpdump" could not be run')
+            self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install it.')
              return False
  
          # Download using rtmpdump. rtmpdump returns exit code 2 when
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index 15a42ce44246f708a6b06027c366a4b41eab5c7b..dcf64d0344816e971fbe6d45615e2aaae50a2e08 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -109,6 +109,7 @@ from .gdcvault import GDCVaultIE
  from .generic import GenericIE
  from .googleplus import GooglePlusIE
  from .googlesearch import GoogleSearchIE
+from .gorillavid import GorillaVidIE
  from .hark import HarkIE
  from .helsinki import HelsinkiIE
  from .hentaistigma import HentaiStigmaIE
@@ -216,6 +217,7 @@ from .pornotube import PornotubeIE
  from .prosiebensat1 import ProSiebenSat1IE
  from .pyvideo import PyvideoIE
  from .radiofrance import RadioFranceIE
+from .rai import RaiIE
  from .rbmaradio import RBMARadioIE
  from .redtube import RedTubeIE
  from .ringtv import RingTVIE
@@ -332,6 +334,7 @@ from .viki import VikiIE
  from .vk import VKIE
  from .vube import VubeIE
  from .vuclip import VuClipIE
+from .vulture import VultureIE
  from .washingtonpost import WashingtonPostIE
  from .wat import WatIE
  from .wdr import (
@@ -343,6 +346,7 @@ from .weibo import WeiboIE
  from .wimp import WimpIE
  from .wistia import WistiaIE
  from .worldstarhiphop import WorldStarHipHopIE
+from .wrzuta import WrzutaIE
  from .xbef import XBefIE
  from .xhamster import XHamsterIE
  from .xnxx import XNXXIE
diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py

index c6d22c029ef1c8dcdef44df172fe3e9391fea6eb..b36a4d46a6dd435883eb911de2e3530604476c07 100644 (file)
--- a/youtube_dl/extractor/ard.py
+++ b/youtube_dl/extractor/ard.py
@@ -56,7 +56,18 @@ class ARDIE(InfoExtractor):
                  raise ExtractorError('This video is only available after 20:00')
  
          formats = []
+
          for s in streams:
+            if type(s['_stream']) == list:
+                for index, url in enumerate(s['_stream'][::-1]):
+                    quality = s['_quality'] + index
+                    formats.append({
+                        'quality': quality,
+                        'url': url,
+                        'format_id': '%s-%s' % (determine_ext(url), quality)
+                        })
+                continue
+
              format = {
                  'quality': s['_quality'],
                  'url': s['_stream'],
diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py

index 45067b944996f7d3f6ce1ced97b379cd6ce40ff9..0d5889f5d17c17ffa75eeca1f1079efd7f9c2b8f 100644 (file)
--- a/youtube_dl/extractor/bilibili.py
+++ b/youtube_dl/extractor/bilibili.py
@@ -13,7 +13,7 @@ from ..utils import (
  
  
  class BiliBiliIE(InfoExtractor):
-    _VALID_URL = r'http://www\.bilibili\.tv/video/av(?P<id>[0-9]+)/'
+    _VALID_URL = r'http://www\.bilibili\.(?:tv|com)/video/av(?P<id>[0-9]+)/'
  
      _TEST = {
          'url': 'http://www.bilibili.tv/video/av1074402/',
@@ -56,7 +56,7 @@ class BiliBiliIE(InfoExtractor):
              'thumbnailUrl', video_code, 'thumbnail', fatal=False)
  
          player_params = compat_parse_qs(self._html_search_regex(
-            r'<iframe .*?class="player" src="https://secure.bilibili.tv/secure,([^"]+)"',
+            r'<iframe .*?class="player" src="https://secure\.bilibili\.(?:tv|com)/secure,([^"]+)"',
              webpage, 'player params'))
  
          if 'cid' in player_params:
diff --git a/youtube_dl/extractor/blinkx.py b/youtube_dl/extractor/blinkx.py

index 38ccd957f3eb61a761950bb9a70cdbbeec6bea6d..7d558e262ecea44df6b025f0db716b82d975b314 100644 (file)
--- a/youtube_dl/extractor/blinkx.py
+++ b/youtube_dl/extractor/blinkx.py
@@ -4,9 +4,7 @@ import json
  import re
  
  from .common import InfoExtractor
-from ..utils import (
-    remove_start,
-)
+from ..utils import remove_start
  
  
  class BlinkxIE(InfoExtractor):
@@ -15,9 +13,10 @@ class BlinkxIE(InfoExtractor):
  
      _TEST = {
          'url': 'http://www.blinkx.com/ce/8aQUy7GVFYgFzpKhT0oqsilwOGFRVXk3R1ZGWWdGenBLaFQwb3FzaWx3OGFRVXk3R1ZGWWdGenB',
-        'file': '8aQUy7GV.mp4',
          'md5': '2e9a07364af40163a908edbf10bb2492',
          'info_dict': {
+            'id': '8aQUy7GV',
+            'ext': 'mp4',
              'title': 'Police Car Rolls Away',
              'uploader': 'stupidvideos.com',
              'upload_date': '20131215',
@@ -27,6 +26,7 @@ class BlinkxIE(InfoExtractor):
              'thumbnails': [{
                  'width': 100,
                  'height': 76,
+                'resolution': '100x76',
                  'url': 'http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg',
              }],
          },
@@ -37,7 +37,7 @@ class BlinkxIE(InfoExtractor):
          video_id = m.group('id')
          display_id = video_id[:8]
  
-        api_url = (u'https://apib4.blinkx.com/api.php?action=play_video&' +
+        api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' +
                     'video=%s' % video_id)
          data_json = self._download_webpage(api_url, display_id)
          data = json.loads(data_json)['api']['results'][0]
@@ -55,13 +55,13 @@ class BlinkxIE(InfoExtractor):
                  duration = m['d']
              elif m['type'] == 'youtube':
                  yt_id = m['link']
-                self.to_screen(u'Youtube video detected: %s' % yt_id)
+                self.to_screen('Youtube video detected: %s' % yt_id)
                  return self.url_result(yt_id, 'Youtube', video_id=yt_id)
              elif m['type'] in ('flv', 'mp4'):
                  vcodec = remove_start(m['vcodec'], 'ff')
                  acodec = remove_start(m['acodec'], 'ff')
                  tbr = (int(m['vbr']) + int(m['abr'])) // 1000
-                format_id = u'%s-%sk-%s' % (vcodec, tbr, m['w'])
+                format_id = '%s-%sk-%s' % (vcodec, tbr, m['w'])
                  formats.append({
                      'format_id': format_id,
                      'url': m['link'],
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py

index 3c02c297a58a32cf536e8ccc972dea68021f650b..419951b6279ae87fb8f0dab1c4f5249ce221a268 100644 (file)
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -15,6 +15,7 @@ from ..utils import (
      compat_urllib_request,
      compat_parse_qs,
  
+    determine_ext,
      ExtractorError,
      unsmuggle_url,
      unescapeHTML,
@@ -29,10 +30,11 @@ class BrightcoveIE(InfoExtractor):
          {
              # From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/
              'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001',
-            'file': '2371591881001.mp4',
              'md5': '5423e113865d26e40624dce2e4b45d95',
              'note': 'Test Brightcove downloads and detection in GenericIE',
              'info_dict': {
+                'id': '2371591881001',
+                'ext': 'mp4',
                  'title': 'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
                  'uploader': '8TV',
                  'description': 'md5:a950cc4285c43e44d763d036710cd9cd',
@@ -41,8 +43,9 @@ class BrightcoveIE(InfoExtractor):
          {
              # From http://medianetwork.oracle.com/video/player/1785452137001
              'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1217746023001&flashID=myPlayer&%40videoPlayer=1785452137001',
-            'file': '1785452137001.flv',
              'info_dict': {
+                'id': '1785452137001',
+                'ext': 'flv',
                  'title': 'JVMLS 2012: Arrays 2.0 - Opportunities and Challenges',
                  'description': 'John Rose speaks at the JVM Language Summit, August 1, 2012.',
                  'uploader': 'Oracle',
@@ -70,7 +73,20 @@ class BrightcoveIE(InfoExtractor):
                  'description': 'md5:363109c02998fee92ec02211bd8000df',
                  'uploader': 'National Ballet of Canada',
              },
-        }
+        },
+        {
+            # test flv videos served by akamaihd.net
+            # From http://www.redbull.com/en/bike/stories/1331655643987/replay-uci-dh-world-cup-2014-from-fort-william
+            'url': 'http://c.brightcove.com/services/viewer/htmlFederated?%40videoPlayer=ref%3ABC2996102916001&linkBaseURL=http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fvideos%2F1331655630249%2Freplay-uci-fort-william-2014-dh&playerKey=AQ%7E%7E%2CAAAApYJ7UqE%7E%2Cxqr_zXk0I-zzNndy8NlHogrCb5QdyZRf&playerID=1398061561001#__youtubedl_smuggle=%7B%22Referer%22%3A+%22http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fstories%2F1331655643987%2Freplay-uci-dh-world-cup-2014-from-fort-william%22%7D',
+            # The md5 checksum changes on each download
+            'info_dict': {
+                'id': '2996102916001',
+                'ext': 'flv',
+                'title': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals',
+                'uploader': 'Red Bull TV',
+                'description': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals',
+            },
+        },
      ]
  
      @classmethod
@@ -187,7 +203,7 @@ class BrightcoveIE(InfoExtractor):
          webpage = self._download_webpage(req, video_id)
  
          self.report_extraction(video_id)
-        info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json')
+        info = self._search_regex(r'var experienceJSON = ({.*});', webpage, 'json')
          info = json.loads(info)['data']
          video_info = info['programmedContent']['videoPlayer']['mediaDTO']
          video_info['_youtubedl_adServerURL'] = info.get('adServerURL')
@@ -219,12 +235,26 @@ class BrightcoveIE(InfoExtractor):
  
          renditions = video_info.get('renditions')
          if renditions:
-            renditions = sorted(renditions, key=lambda r: r['size'])
-            info['formats'] = [{
-                'url': rend['defaultURL'],
-                'height': rend.get('frameHeight'),
-                'width': rend.get('frameWidth'),
-            } for rend in renditions]
+            formats = []
+            for rend in renditions:
+                url = rend['defaultURL']
+                if rend['remote']:
+                    # This type of renditions are served through akamaihd.net,
+                    # but they don't use f4m manifests
+                    url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB'
+                    ext = 'flv'
+                else:
+                    ext = determine_ext(url)
+                size = rend.get('size')
+                formats.append({
+                    'url': url,
+                    'ext': ext,
+                    'height': rend.get('frameHeight'),
+                    'width': rend.get('frameWidth'),
+                    'filesize': size if size != 0 else None,
+                })
+            self._sort_formats(formats)
+            info['formats'] = formats
          elif video_info.get('FLVFullLengthURL') is not None:
              info.update({
                  'url': video_info['FLVFullLengthURL'],
diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py

index 0b11d1f10e18e4358b35f76d0a0e0816b00eaa4c..69ca75423cb1d4692f1958829dc61bcf5c2bac73 100644 (file)
--- a/youtube_dl/extractor/dreisat.py
+++ b/youtube_dl/extractor/dreisat.py
@@ -1,39 +1,37 @@
-# coding: utf-8
+from __future__ import unicode_literals
  
  import re
  
  from .common import InfoExtractor
-from ..utils import (
-    unified_strdate,
-)
+from ..utils import unified_strdate
  
  
  class DreiSatIE(InfoExtractor):
      IE_NAME = '3sat'
      _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
      _TEST = {
-        u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983",
-        u'file': u'36983.mp4',
-        u'md5': u'9dcfe344732808dbfcc901537973c922',
-        u'info_dict': {
-            u"title": u"Kaffeeland Schweiz",
-            u"description": u"Über 80 Kaffeeröstereien liefern in der Schweiz das Getränk, in das das Land so vernarrt ist: Mehr als 1000 Tassen trinkt ein Schweizer pro Jahr. SCHWEIZWEIT nimmt die Kaffeekultur unter die...", 
-            u"uploader": u"3sat",
-            u"upload_date": u"20130622"
+        'url': 'http://www.3sat.de/mediathek/index.php?obj=36983',
+        'md5': '9dcfe344732808dbfcc901537973c922',
+        'info_dict': {
+            'id': '36983',
+            'ext': 'mp4',
+            'title': 'Kaffeeland Schweiz',
+            'description': 'md5:cc4424b18b75ae9948b13929a0814033',
+            'uploader': '3sat',
+            'upload_date': '20130622'
          }
      }
  
-
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          video_id = mobj.group('id')
          details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
-        details_doc = self._download_xml(details_url, video_id, note=u'Downloading video details')
+        details_doc = self._download_xml(details_url, video_id, 'Downloading video details')
  
          thumbnail_els = details_doc.findall('.//teaserimage')
          thumbnails = [{
-            'width': te.attrib['key'].partition('x')[0],
-            'height': te.attrib['key'].partition('x')[2],
+            'width': int(te.attrib['key'].partition('x')[0]),
+            'height': int(te.attrib['key'].partition('x')[2]),
              'url': te.text,
          } for te in thumbnail_els]
  
diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py

index 18f91efac450d1ec697652570b6366b1709553d0..c663a0f81d08650b24616d2c3c2daef262c95aa2 100644 (file)
--- a/youtube_dl/extractor/fc2.py
+++ b/youtube_dl/extractor/fc2.py
@@ -50,10 +50,13 @@ class FC2IE(InfoExtractor):
              raise ExtractorError('Error code: %s' % info['err_code'][0])
  
          video_url = info['filepath'][0] + '?mid=' + info['mid'][0]
+        title_info = info.get('title')
+        if title_info:
+            title = title_info[0]
  
          return {
              'id': video_id,
-            'title': info['title'][0],
+            'title': title,
              'url': video_url,
              'ext': 'flv',
              'thumbnail': thumbnail,
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index 38a357d3b0406906144e25cbbc45fbe74d2f6c2c..3105b47abf025a690b14d8878cdb6a671ba00217 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -260,7 +260,24 @@ class GenericIE(InfoExtractor):
                  'uploader': 'Spi0n',
              },
              'add_ie': ['Dailymotion'],
-        }
+        },
+        # YouTube embed
+        {
+            'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
+            'info_dict': {
+                'id': 'FXRb4ykk4S0',
+                'ext': 'mp4',
+                'title': 'The NBL Auction 2014',
+                'uploader': 'BADMINTON England',
+                'uploader_id': 'BADMINTONEvents',
+                'upload_date': '20140603',
+                'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
+            },
+            'add_ie': ['Youtube'],
+            'params': {
+                'skip_download': True,
+            }
+        },
      ]
  
      def report_download_webpage(self, video_id):
@@ -478,8 +495,13 @@ class GenericIE(InfoExtractor):
  
          # Look for embedded YouTube player
          matches = re.findall(r'''(?x)
-            (?:<iframe[^>]+?src=|embedSWF\(\s*)
-            (["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/
+            (?:
+                <iframe[^>]+?src=|
+                <embed[^>]+?src=|
+                embedSWF\(?:\s*
+            )
+            (["\'])
+                (?P<url>(?:https?:)?//(?:www\.)?youtube\.com/
                  (?:embed|v)/.+?)
              \1''', webpage)
          if matches:
@@ -646,6 +668,14 @@ class GenericIE(InfoExtractor):
              url = unescapeHTML(mobj.group('url'))
              return self.url_result(url)
  
+        # Look for embedded vulture.com player
+        mobj = re.search(
+            r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
+            webpage)
+        if mobj is not None:
+            url = unescapeHTML(mobj.group('url'))
+            return self.url_result(url, ie='Vulture')
+
          # Start with something easy: JW Player in SWFObject
          found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
          if not found:
diff --git a/youtube_dl/extractor/gorillavid.py b/youtube_dl/extractor/gorillavid.py

new file mode 100644 (file)

index 0000000..aa15caf
--- /dev/null
+++ b/youtube_dl/extractor/gorillavid.py
@@ -0,0 +1,76 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+    compat_urllib_parse,
+    compat_urllib_request,
+)
+
+
+class GorillaVidIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?gorillavid\.in/(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?'
+
+    _TESTS = [{
+        'url': 'http://gorillavid.in/06y9juieqpmi',
+        'md5': '5ae4a3580620380619678ee4875893ba',
+        'info_dict': {
+            'id': '06y9juieqpmi',
+            'ext': 'flv',
+            'title': 'Rebecca Black My Moment Official Music Video Reaction',
+            'thumbnail': 're:http://.*\.jpg',
+        },
+    }, {
+        'url': 'http://gorillavid.in/embed-z08zf8le23c6-960x480.html',
+        'md5': 'c9e293ca74d46cad638e199c3f3fe604',
+        'info_dict': {
+            'id': 'z08zf8le23c6',
+            'ext': 'mp4',
+            'title': 'Say something nice',
+            'thumbnail': 're:http://.*\.jpg',
+        },
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        url = 'http://gorillavid.in/%s' % video_id
+
+        webpage = self._download_webpage(url, video_id)
+
+        fields = dict(re.findall(r'''(?x)<input\s+
+            type="hidden"\s+
+            name="([^"]+)"\s+
+            (?:id="[^"]+"\s+)?
+            value="([^"]*)"
+            ''', webpage))
+        
+        if fields['op'] == 'download1':
+            post = compat_urllib_parse.urlencode(fields)
+
+            req = compat_urllib_request.Request(url, post)
+            req.add_header('Content-type', 'application/x-www-form-urlencoded')
+
+            webpage = self._download_webpage(req, video_id, 'Downloading video page')
+
+        title = self._search_regex(r'style="z-index: [0-9]+;">([0-9a-zA-Z ]+)(?:-.+)?</span>', webpage, 'title')
+        thumbnail = self._search_regex(r'image:\'(http[^\']+)\',', webpage, 'thumbnail')
+        url = self._search_regex(r'file: \'(http[^\']+)\',', webpage, 'file url')
+
+        formats = [{
+            'format_id': 'sd',
+            'url': url,
+            'ext': determine_ext(url),
+            'quality': 1,
+        }]
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/hypem.py b/youtube_dl/extractor/hypem.py

index 9bd06e7c7913e9c7492f63417760012f1219c875..6d0d847c6d3461a02c6eab71b24848247e9678ab 100644 (file)
--- a/youtube_dl/extractor/hypem.py
+++ b/youtube_dl/extractor/hypem.py
@@ -1,10 +1,11 @@
+from __future__ import unicode_literals
+
  import json
  import re
  import time
  
  from .common import InfoExtractor
  from ..utils import (
-    compat_str,
      compat_urllib_parse,
      compat_urllib_request,
  
@@ -13,59 +14,55 @@ from ..utils import (
  
  
  class HypemIE(InfoExtractor):
-    """Information Extractor for hypem"""
-    _VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
+    _VALID_URL = r'http://(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
      _TEST = {
-        u'url': u'http://hypem.com/track/1v6ga/BODYWORK+-+TAME',
-        u'file': u'1v6ga.mp3',
-        u'md5': u'b9cc91b5af8995e9f0c1cee04c575828',
-        u'info_dict': {
-            u"title": u"Tame"
+        'url': 'http://hypem.com/track/1v6ga/BODYWORK+-+TAME',
+        'md5': 'b9cc91b5af8995e9f0c1cee04c575828',
+        'info_dict': {
+            'id': '1v6ga',
+            'ext': 'mp3',
+            'title': 'Tame',
+            'uploader': 'BODYWORK',
          }
      }
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
          track_id = mobj.group(1)
  
          data = {'ax': 1, 'ts': time.time()}
          data_encoded = compat_urllib_parse.urlencode(data)
          complete_url = url + "?" + data_encoded
          request = compat_urllib_request.Request(complete_url)
-        response, urlh = self._download_webpage_handle(request, track_id, u'Downloading webpage with the url')
+        response, urlh = self._download_webpage_handle(
+            request, track_id, 'Downloading webpage with the url')
          cookie = urlh.headers.get('Set-Cookie', '')
  
-        self.report_extraction(track_id)
-
-        html_tracks = self._html_search_regex(r'<script type="application/json" id="displayList-data">(.*?)</script>',
-            response, u'tracks', flags=re.MULTILINE|re.DOTALL).strip()
+        html_tracks = self._html_search_regex(
+            r'(?ms)<script type="application/json" id="displayList-data">\s*(.*?)\s*</script>',
+            response, 'tracks')
          try:
              track_list = json.loads(html_tracks)
-            track = track_list[u'tracks'][0]
+            track = track_list['tracks'][0]
          except ValueError:
-            raise ExtractorError(u'Hypemachine contained invalid JSON.')
+            raise ExtractorError('Hypemachine contained invalid JSON.')
  
-        key = track[u"key"]
-        track_id = track[u"id"]
-        artist = track[u"artist"]
-        title = track[u"song"]
+        key = track['key']
+        track_id = track['id']
+        artist = track['artist']
+        title = track['song']
  
-        serve_url = "http://hypem.com/serve/source/%s/%s" % (compat_str(track_id), compat_str(key))
-        request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'})
+        serve_url = "http://hypem.com/serve/source/%s/%s" % (track_id, key)
+        request = compat_urllib_request.Request(
+            serve_url, '', {'Content-Type': 'application/json'})
          request.add_header('cookie', cookie)
-        song_data_json = self._download_webpage(request, track_id, u'Downloading metadata')
-        try:
-            song_data = json.loads(song_data_json)
-        except ValueError:
-            raise ExtractorError(u'Hypemachine contained invalid JSON.')
-        final_url = song_data[u"url"]
+        song_data = self._download_json(request, track_id, 'Downloading metadata')
+        final_url = song_data["url"]
  
-        return [{
-            'id':       track_id,
-            'url':      final_url,
-            'ext':      "mp3",
-            'title':    title,
-            'artist':   artist,
-        }]
+        return {
+            'id': track_id,
+            'url': final_url,
+            'ext': 'mp3',
+            'title': title,
+            'uploader': artist,
+        }
diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py

index 7a431a274abc5b189af8ee8779f6024f430704c7..8d9491f233bf578bc9274a18fe022a1538effad6 100644 (file)
--- a/youtube_dl/extractor/lifenews.py
+++ b/youtube_dl/extractor/lifenews.py
@@ -24,7 +24,7 @@ class LifeNewsIE(InfoExtractor):
              'ext': 'mp4',
              'title': 'МВД разыскивает мужчин, оставивших в IKEA сумку с автоматом',
              'description': 'Камеры наблюдения гипермаркета зафиксировали троих мужчин, спрятавших оружейный арсенал в камере хранения.',
-            'thumbnail': 'http://lifenews.ru/static/posts/2014/1/126342/.video.jpg',
+            'thumbnail': 're:http://.*\.jpg',
              'upload_date': '20140130',
          }
      }
diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py

index 1dcd1fb2de42894d80c494185caeb600540b02da..5c71f4f091ab9e30baa0700e86829f5928ddcccf 100644 (file)
--- a/youtube_dl/extractor/livestream.py
+++ b/youtube_dl/extractor/livestream.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
  import re
  import json
  
@@ -6,31 +8,34 @@ from ..utils import (
      compat_urllib_parse_urlparse,
      compat_urlparse,
      xpath_with_ns,
+    compat_str,
  )
  
  
  class LivestreamIE(InfoExtractor):
-    IE_NAME = u'livestream'
+    IE_NAME = 'livestream'
      _VALID_URL = r'http://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'
      _TEST = {
-        u'url': u'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
-        u'file': u'4719370.mp4',
-        u'md5': u'0d2186e3187d185a04b3cdd02b828836',
-        u'info_dict': {
-            u'title': u'Live from Webster Hall NYC',
-            u'upload_date': u'20121012',
+        'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
+        'md5': '53274c76ba7754fb0e8d072716f2292b',
+        'info_dict': {
+            'id': '4719370',
+            'ext': 'mp4',
+            'title': 'Live from Webster Hall NYC',
+            'upload_date': '20121012',
          }
      }
  
      def _extract_video_info(self, video_data):
          video_url = video_data.get('progressive_url_hd') or video_data.get('progressive_url')
-        return {'id': video_data['id'],
-                'url': video_url,
-                'ext': 'mp4',
-                'title': video_data['caption'],
-                'thumbnail': video_data['thumbnail_url'],
-                'upload_date': video_data['updated_at'].replace('-','')[:8],
-                }
+        return {
+            'id': compat_str(video_data['id']),
+            'url': video_url,
+            'ext': 'mp4',
+            'title': video_data['caption'],
+            'thumbnail': video_data['thumbnail_url'],
+            'upload_date': video_data['updated_at'].replace('-', '')[:8],
+        }
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
@@ -40,36 +45,36 @@ class LivestreamIE(InfoExtractor):
  
          if video_id is None:
              # This is an event page:
-            config_json = self._search_regex(r'window.config = ({.*?});',
-                webpage, u'window config')
+            config_json = self._search_regex(
+                r'window.config = ({.*?});', webpage, 'window config')
              info = json.loads(config_json)['event']
              videos = [self._extract_video_info(video_data['data'])
-                for video_data in info['feed']['data'] if video_data['type'] == u'video']
+                for video_data in info['feed']['data'] if video_data['type'] == 'video']
              return self.playlist_result(videos, info['id'], info['full_name'])
          else:
-            og_video = self._og_search_video_url(webpage, name=u'player url')
+            og_video = self._og_search_video_url(webpage, 'player url')
              query_str = compat_urllib_parse_urlparse(og_video).query
              query = compat_urlparse.parse_qs(query_str)
              api_url = query['play_url'][0].replace('.smil', '')
-            info = json.loads(self._download_webpage(api_url, video_id,
-                                                     u'Downloading video info'))
+            info = json.loads(self._download_webpage(
+                api_url, video_id, 'Downloading video info'))
              return self._extract_video_info(info)
  
  
  # The original version of Livestream uses a different system
  class LivestreamOriginalIE(InfoExtractor):
-    IE_NAME = u'livestream:original'
+    IE_NAME = 'livestream:original'
      _VALID_URL = r'https?://www\.livestream\.com/(?P<user>[^/]+)/video\?.*?clipId=(?P<id>.*?)(&|$)'
      _TEST = {
-        u'url': u'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
-        u'info_dict': {
-            u'id': u'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
-            u'ext': u'flv',
-            u'title': u'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital',
+        'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
+        'info_dict': {
+            'id': 'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
+            'ext': 'flv',
+            'title': 'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital',
          },
-        u'params': {
+        'params': {
              # rtmp
-            u'skip_download': True,
+            'skip_download': True,
          },
      }
  
@@ -84,7 +89,7 @@ class LivestreamOriginalIE(InfoExtractor):
          ns = {'media': 'http://search.yahoo.com/mrss'}
          thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url']
          # Remove the extension and number from the path (like 1.jpg)
-        path = self._search_regex(r'(user-files/.+)_.*?\.jpg$', thumbnail_url, u'path')
+        path = self._search_regex(r'(user-files/.+)_.*?\.jpg$', thumbnail_url, 'path')
  
          return {
              'id': video_id,
diff --git a/youtube_dl/extractor/ndtv.py b/youtube_dl/extractor/ndtv.py

index d81df3c10668492383c11b03cf30db7d797f7c90..95e7d63aade1edbc6a0c300bcc18168d707f8716 100644 (file)
--- a/youtube_dl/extractor/ndtv.py
+++ b/youtube_dl/extractor/ndtv.py
@@ -1,22 +1,28 @@
+from __future__ import unicode_literals
+
  import re
  
  from .common import InfoExtractor
-from ..utils import month_by_name
+from ..utils import (
+    month_by_name,
+    int_or_none,
+)
  
  
  class NDTVIE(InfoExtractor):
      _VALID_URL = r'^https?://(?:www\.)?ndtv\.com/video/player/[^/]*/[^/]*/(?P<id>[a-z0-9]+)'
  
      _TEST = {
-        u"url": u"http://www.ndtv.com/video/player/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal/300710",
-        u"file": u"300710.mp4",
-        u"md5": u"39f992dbe5fb531c395d8bbedb1e5e88",
-        u"info_dict": {
-            u"title": u"NDTV exclusive: Don't need character certificate from Rahul Gandhi, says Arvind Kejriwal",
-            u"description": u"In an exclusive interview to NDTV, Aam Aadmi Party's Arvind Kejriwal says it makes no difference to him that Rahul Gandhi said the Congress needs to learn from his party.",
-            u"upload_date": u"20131208",
-            u"duration": 1327,
-            u"thumbnail": u"http://i.ndtvimg.com/video/images/vod/medium/2013-12/big_300710_1386518307.jpg",
+        'url': 'http://www.ndtv.com/video/player/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal/300710',
+        'md5': '39f992dbe5fb531c395d8bbedb1e5e88',
+        'info_dict': {
+            'id': '300710',
+            'ext': 'mp4',
+            'title': "NDTV exclusive: Don't need character certificate from Rahul Gandhi, says Arvind Kejriwal",
+            'description': 'md5:ab2d4b4a6056c5cb4caa6d729deabf02',
+            'upload_date': '20131208',
+            'duration': 1327,
+            'thumbnail': 'http://i.ndtvimg.com/video/images/vod/medium/2013-12/big_300710_1386518307.jpg',
          },
      }
  
@@ -27,13 +33,12 @@ class NDTVIE(InfoExtractor):
          webpage = self._download_webpage(url, video_id)
  
          filename = self._search_regex(
-            r"__filename='([^']+)'", webpage, u'video filename')
-        video_url = (u'http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' %
+            r"__filename='([^']+)'", webpage, 'video filename')
+        video_url = ('http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' %
                       filename)
  
-        duration_str = filename = self._search_regex(
-            r"__duration='([^']+)'", webpage, u'duration', fatal=False)
-        duration = None if duration_str is None else int(duration_str)
+        duration = int_or_none(self._search_regex(
+            r"__duration='([^']+)'", webpage, 'duration', fatal=False))
  
          date_m = re.search(r'''(?x)
              <p\s+class="vod_dateline">\s*
@@ -41,7 +46,7 @@ class NDTVIE(InfoExtractor):
                  (?P<monthname>[A-Za-z]+)\s+(?P<day>[0-9]+),\s*(?P<year>[0-9]+)
              ''', webpage)
          upload_date = None
-        assert date_m
+
          if date_m is not None:
              month = month_by_name(date_m.group('monthname'))
              if month is not None:
@@ -49,14 +54,19 @@ class NDTVIE(InfoExtractor):
                      date_m.group('year'), month, int(date_m.group('day')))
  
          description = self._og_search_description(webpage)
-        READ_MORE = u' (Read more)'
+        READ_MORE = ' (Read more)'
          if description.endswith(READ_MORE):
              description = description[:-len(READ_MORE)]
  
+        title = self._og_search_title(webpage)
+        TITLE_SUFFIX = ' - NDTV'
+        if title.endswith(TITLE_SUFFIX):
+            title = title[:-len(TITLE_SUFFIX)]
+
          return {
              'id': video_id,
              'url': video_url,
-            'title': self._og_search_title(webpage),
+            'title': title,
              'description': description,
              'thumbnail': self._og_search_thumbnail(webpage),
              'duration': duration,
diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py

index 3a6a7883e31f2ff309157f0e4d27765ef98fdf1b..96f0ae1ebde53402a3e651dcd86c1397d141c542 100644 (file)
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@@ -6,7 +6,7 @@ import re
  from .common import InfoExtractor
  from ..utils import (
      ExtractorError,
-    int_or_none,
+    float_or_none,
      unified_strdate,
  )
  
@@ -72,14 +72,14 @@ class NRKIE(InfoExtractor):
  
  
  class NRKTVIE(InfoExtractor):
-    _VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-z]{4}\d{8})'
+    _VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})'
  
      _TESTS = [
          {
-            'url': 'http://tv.nrk.no/serie/20-spoersmaal-tv/muhh48000314/23-05-2014',
+            'url': 'http://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
              'md5': '7b96112fbae1faf09a6f9ae1aff6cb84',
              'info_dict': {
-                'id': 'muhh48000314',
+                'id': 'MUHH48000314',
                  'ext': 'flv',
                  'title': '20 spørsmål',
                  'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
@@ -89,7 +89,7 @@ class NRKTVIE(InfoExtractor):
          },
          {
              'url': 'http://tv.nrk.no/program/mdfp15000514',
-            'md5': '383650ece2b25ecec996ad7b5bb2a384',
+            'md5': 'af01795a31f1cf7265c8657534d8077b',
              'info_dict': {
                  'id': 'mdfp15000514',
                  'ext': 'flv',
@@ -111,9 +111,8 @@ class NRKTVIE(InfoExtractor):
          description = self._html_search_meta('description', page, 'description')
          thumbnail = self._html_search_regex(r'data-posterimage="([^"]+)"', page, 'thumbnail', fatal=False)
          upload_date = unified_strdate(self._html_search_meta('rightsfrom', page, 'upload date', fatal=False))
-        duration = self._html_search_regex(r'data-duration="([^"]+)"', page, 'duration', fatal=False)
-        if duration:
-            duration = float(duration)
+        duration = float_or_none(
+            self._html_search_regex(r'data-duration="([^"]+)"', page, 'duration', fatal=False))
  
          formats = []
  
@@ -142,4 +141,4 @@ class NRKTVIE(InfoExtractor):
              'upload_date': upload_date,
              'duration': duration,
              'formats': formats,
-        }
-\ No newline at end of file
+        }
diff --git a/youtube_dl/extractor/ntv.py b/youtube_dl/extractor/ntv.py

index 733ed6c264484a4c9bcff2f43a04a5830d36ab59..ed60314eca4f918392da7fe2637e47ed3cdf5ee9 100644 (file)
--- a/youtube_dl/extractor/ntv.py
+++ b/youtube_dl/extractor/ntv.py
@@ -5,7 +5,6 @@ import re
  
  from .common import InfoExtractor
  from ..utils import (
-    ExtractorError,
      unescapeHTML
  )
  
diff --git a/youtube_dl/extractor/nuvid.py b/youtube_dl/extractor/nuvid.py

index e3db9fe8c6c643d49aa8c521921bc3f508010f98..280328b78306e5ab332cbb7111127f832c6c9aba 100644 (file)
--- a/youtube_dl/extractor/nuvid.py
+++ b/youtube_dl/extractor/nuvid.py
@@ -3,6 +3,11 @@ from __future__ import unicode_literals
  import re
  
  from .common import InfoExtractor
+from ..utils import (
+    parse_duration,
+    unified_strdate,
+    compat_urllib_request,
+)
  
  
  class NuvidIE(InfoExtractor):
@@ -13,8 +18,10 @@ class NuvidIE(InfoExtractor):
          'info_dict': {
              'id': '1310741',
              'ext': 'mp4',
-            "title": "Horny babes show their awesome bodeis and",
-            "age_limit": 18,
+            'title': 'Horny babes show their awesome bodeis and',
+            'duration': 129,
+            'upload_date': '20140508',
+            'age_limit': 18,
          }
      }
  
@@ -22,27 +29,41 @@ class NuvidIE(InfoExtractor):
          mobj = re.match(self._VALID_URL, url)
          video_id = mobj.group('id')
  
-        murl = url.replace('://www.', '://m.')
-        webpage = self._download_webpage(murl, video_id)
-
-        title = self._html_search_regex(
-            r'<div class="title">\s+<h2[^>]*>([^<]+)</h2>',
-            webpage, 'title').strip()
+        formats = []
  
-        url_end = self._html_search_regex(
-            r'href="(/[^"]+)"[^>]*data-link_type="mp4"',
-            webpage, 'video_url')
-        video_url = 'http://m.nuvid.com' + url_end
+        for dwnld_speed, format_id in [(0, '3gp'), (5, 'mp4')]:
+            request = compat_urllib_request.Request(
+                'http://m.nuvid.com/play/%s' % video_id)
+            request.add_header('Cookie', 'skip_download_page=1; dwnld_speed=%d; adv_show=1' % dwnld_speed)
+            webpage = self._download_webpage(
+                request, video_id, 'Downloading %s page' % format_id)
+            video_url = self._html_search_regex(
+                r'<a href="([^"]+)"\s*>Continue to watch video', webpage, '%s video URL' % format_id, fatal=False)
+            if not video_url:
+                continue
+            formats.append({
+                'url': video_url,
+                'format_id': format_id,
+            })
  
+        webpage = self._download_webpage(
+            'http://m.nuvid.com/video/%s' % video_id, video_id, 'Downloading video page')
+        title = self._html_search_regex(
+            r'<div class="title">\s+<h2[^>]*>([^<]+)</h2>', webpage, 'title').strip()
          thumbnail = self._html_search_regex(
              r'href="(/thumbs/[^"]+)"[^>]*data-link_type="thumbs"',
              webpage, 'thumbnail URL', fatal=False)
+        duration = parse_duration(self._html_search_regex(
+            r'Length:\s*<span>(\d{2}:\d{2})</span>',webpage, 'duration', fatal=False))
+        upload_date = unified_strdate(self._html_search_regex(
+            r'Added:\s*<span>(\d{4}-\d{2}-\d{2})</span>', webpage, 'upload date', fatal=False))
  
          return {
              'id': video_id,
-            'url': video_url,
-            'ext': 'mp4',
              'title': title,
-            'thumbnail': thumbnail,
+            'thumbnail': 'http://m.nuvid.com%s' % thumbnail,
+            'duration': duration,
+            'upload_date': upload_date,
              'age_limit': 18,
-        }
+            'formats': formats,
+        }
+\ No newline at end of file
diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py

index e4c4ad7145395c1805fa97c6f8e83024fa28fde8..da64a1a7b4c0d8bceb89415894c84d651c7ac566 100644 (file)
--- a/youtube_dl/extractor/prosiebensat1.py
+++ b/youtube_dl/extractor/prosiebensat1.py
@@ -158,19 +158,19 @@ class ProSiebenSat1IE(InfoExtractor):
      _CLIPID_REGEXES = [
          r'"clip_id"\s*:\s+"(\d+)"',
          r'clipid: "(\d+)"',
-        r'clipId=(\d+)',
+        r'clip[iI]d=(\d+)',
      ]
      _TITLE_REGEXES = [
          r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>',
          r'<header class="clearfix">\s*<h3>(.+?)</h3>',
          r'<!-- start video -->\s*<h1>(.+?)</h1>',
-        r'<div class="ep-femvideos-pi4-video-txt">\s*<h2>(.+?)</h2>',
+        r'<h1 class="att-name">\s*(.+?)</h1>',
      ]
      _DESCRIPTION_REGEXES = [
          r'<p itemprop="description">\s*(.+?)</p>',
          r'<div class="videoDecription">\s*<p><strong>Beschreibung</strong>: (.+?)</p>',
          r'<div class="g-plusone" data-size="medium"></div>\s*</div>\s*</header>\s*(.+?)\s*<footer>',
-        r'<p>(.+?)</p>\s*<div class="ep-femvideos-pi4-video-footer">',
+        r'<p class="att-description">\s*(.+?)\s*</p>',
      ]
      _UPLOAD_DATE_REGEXES = [
          r'<meta property="og:published_time" content="(.+?)">',
diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py

new file mode 100644 (file)

index 0000000..cb43053
--- /dev/null
+++ b/youtube_dl/extractor/rai.py
@@ -0,0 +1,121 @@
+from __future__ import unicode_literals
+
+import re
+
+from .subtitles import SubtitlesInfoExtractor
+from ..utils import (
+    parse_duration,
+    unified_strdate,
+    compat_urllib_parse,
+)
+
+
+class RaiIE(SubtitlesInfoExtractor):
+    _VALID_URL = r'(?P<url>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)'
+    _TESTS = [
+        {
+            'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
+            'md5': 'c064c0b2d09c278fb293116ef5d0a32d',
+            'info_dict': {
+                'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391',
+                'ext': 'mp4',
+                'title': 'Report del 07/04/2014',
+                'description': 'md5:f27c544694cacb46a078db84ec35d2d9',
+                'upload_date': '20140407',
+                'duration': 6160,
+            }
+        },
+        {
+            'url': 'http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html',
+            'md5': '8bb9c151924ce241b74dd52ef29ceafa',
+            'info_dict': {
+                'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9',
+                'ext': 'mp4',
+                'title': 'TG PRIMO TEMPO',
+                'description': '',
+                'upload_date': '20140612',
+                'duration': 1758,
+            }
+        },
+        {
+            'url': 'http://www.rainews.it/dl/rainews/media/state-of-the-net-Antonella-La-Carpia-regole-virali-7aafdea9-0e5d-49d5-88a6-7e65da67ae13.html',
+            'md5': '35cf7c229f22eeef43e48b5cf923bef0',
+            'info_dict': {
+                'id': '7aafdea9-0e5d-49d5-88a6-7e65da67ae13',
+                'ext': 'mp4',
+                'title': 'State of the Net, Antonella La Carpia: regole virali',
+                'description': 'md5:b0ba04a324126903e3da7763272ae63c',
+                'upload_date': '20140613',
+            },
+            'skip': 'Error 404',
+        },
+        {
+            'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-b4a49761-e0cc-4b14-8736-2729f6f73132-tg2.html',
+            'md5': '35694f062977fe6619943f08ed935730',
+            'info_dict': {
+                'id': 'b4a49761-e0cc-4b14-8736-2729f6f73132',
+                'ext': 'mp4',
+                'title': 'Alluvione in Sardegna e dissesto idrogeologico',
+                'description': 'Edizione delle ore 20:30 ',
+            }
+        },
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        media = self._download_json('%s?json' % mobj.group('url'), video_id, 'Downloading video JSON')
+
+        title = media.get('name')
+        description = media.get('desc')
+        thumbnail = media.get('image_300') or media.get('image_medium') or media.get('image')
+        duration = parse_duration(media.get('length'))
+        uploader = media.get('author')
+        upload_date = unified_strdate(media.get('date'))
+
+        formats = []
+
+        for format_id in ['wmv', 'm3u8', 'mediaUri', 'h264']:
+            media_url = media.get(format_id)
+            if not media_url:
+                continue
+            formats.append({
+                'url': media_url,
+                'format_id': format_id,
+                'ext': 'mp4',
+            })
+
+        if self._downloader.params.get('listsubtitles', False):
+            page = self._download_webpage(url, video_id)
+            self._list_available_subtitles(video_id, page)
+            return
+
+        subtitles = {}
+        if self._have_to_download_any_subtitles:
+            page = self._download_webpage(url, video_id)
+            subtitles = self.extract_subtitles(video_id, page)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'uploader': uploader,
+            'upload_date': upload_date,
+            'duration': duration,
+            'formats': formats,
+            'subtitles': subtitles,
+        }
+
+    def _get_available_subtitles(self, video_id, webpage):
+        subtitles = {}
+        m = re.search(r'<meta name="closedcaption" content="(?P<captions>[^"]+)"', webpage)
+        if m:
+            captions = m.group('captions')
+            STL_EXT = '.stl'
+            SRT_EXT = '.srt'
+            if captions.endswith(STL_EXT):
+                captions = captions[:-len(STL_EXT)] + SRT_EXT
+            subtitles['it'] = 'http://www.rai.tv%s' % compat_urllib_parse.quote(captions)
+        return subtitles
+\ No newline at end of file
diff --git a/youtube_dl/extractor/slutload.py b/youtube_dl/extractor/slutload.py

index ecc0abfdacf4353035d27f5c451f35341837c70e..e6e7d086503a04a3fda862f601e109003169b9d7 100644 (file)
--- a/youtube_dl/extractor/slutload.py
+++ b/youtube_dl/extractor/slutload.py
@@ -3,9 +3,6 @@ from __future__ import unicode_literals
  import re
  
  from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-)
  
  
  class SlutloadIE(InfoExtractor):
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py

index 25515f0686b0725075005da7f93f17544bd1b1ea..7aa100fb22fcfe56c09370067c94a953254885a2 100644 (file)
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -1,7 +1,6 @@
  # encoding: utf-8
  from __future__ import unicode_literals
  
-import json
  import re
  import itertools
  
diff --git a/youtube_dl/extractor/steam.py b/youtube_dl/extractor/steam.py

index 1d8d5722468a4dcf763d1c1b36a91ab1cf69b0e3..af689e2c20411ef4e8ce1badc82a9d24f9a6da31 100644 (file)
--- a/youtube_dl/extractor/steam.py
+++ b/youtube_dl/extractor/steam.py
@@ -53,7 +53,7 @@ class SteamIE(InfoExtractor):
              'ext': 'mp4',
              'upload_date': '20140329',
              'title': 'FRONTIERS - Final Greenlight Trailer',
-            'description': "The final trailer for the Steam Greenlight launch. Hooray, progress! Here's the official Greenlight page: http://steamcommunity.com/sharedfiles/filedetails/?id=242472205",
+            'description': 'md5:6df4fe8dd494ae811869672b0767e025',
              'uploader': 'AAD Productions',
              'uploader_id': 'AtomicAgeDogGames',
          }
diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dl/extractor/teachertube.py

index 6d52763f95b26b9e2273a62f7a777841a3f9882f..4d9666c6b14980d6b04ebe1bd07cc3e79a9dd2bd 100644 (file)
--- a/youtube_dl/extractor/teachertube.py
+++ b/youtube_dl/extractor/teachertube.py
@@ -55,11 +55,13 @@ class TeacherTubeIE(InfoExtractor):
  
          quality = qualities(['mp3', 'flv', 'mp4'])
  
+        _, media_urls = zip(*re.findall(r'([\'"])file\1\s*:\s*"([^"]+)"', webpage))
+
          formats = [
              {
                  'url': media_url,
                  'quality': quality(determine_ext(media_url))
-            } for media_url in set(zip(*re.findall(r'([\'"])file\1\s*:\s*"([^"]+)"', webpage))[1])
+            } for media_url in set(media_urls)
          ]
  
          self._sort_formats(formats)
diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py

index d260c91c2172deabb697c3ad242cfda24f395d41..bce32a87330731e229c17e267ca7f65342d22952 100644 (file)
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -27,7 +27,7 @@ class TEDIE(SubtitlesInfoExtractor):
          '''
      _TESTS = [{
          'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
-        'md5': '4ea1dada91e4174b53dac2bb8ace429d',
+        'md5': 'fc94ac279feebbce69f21c0c6ee82810',
          'info_dict': {
              'id': '102',
              'ext': 'mp4',
diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py

index 36bc36ad8aa7bf3680db6e30d081fec644d61bcd..08a48c05acf34b6cd190ba52d58189935fe6b20f 100644 (file)
--- a/youtube_dl/extractor/tube8.py
+++ b/youtube_dl/extractor/tube8.py
@@ -17,9 +17,10 @@ class Tube8IE(InfoExtractor):
      _VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/){2}(?P<id>\d+)'
      _TEST = {
          'url': 'http://www.tube8.com/teen/kasia-music-video/229795/',
-        'file': '229795.mp4',
-        'md5': 'e9e0b0c86734e5e3766e653509475db0',
+        'md5': '44bf12b98313827dd52d35b8706a4ea0',
          'info_dict': {
+            'id': '229795',
+            'ext': 'mp4',
              'description': 'hot teen Kasia grinding',
              'uploader': 'unknown',
              'title': 'Kasia music video',
diff --git a/youtube_dl/extractor/veoh.py b/youtube_dl/extractor/veoh.py

index d16993daf0ddb8546f838ed59220a7efeb6cdcc6..fb132aef68fff7dc9ae3c4098ad2d31a5945825a 100644 (file)
--- a/youtube_dl/extractor/veoh.py
+++ b/youtube_dl/extractor/veoh.py
@@ -7,6 +7,7 @@ from .common import InfoExtractor
  from ..utils import (
      compat_urllib_request,
      int_or_none,
+    ExtractorError,
  )
  
  
@@ -94,8 +95,12 @@ class VeohIE(InfoExtractor):
          if video_id.startswith('v'):
              rsp = self._download_xml(
                  r'http://www.veoh.com/api/findByPermalink?permalink=%s' % video_id, video_id, 'Downloading video XML')
-            if rsp.get('stat') == 'ok':
+            stat = rsp.get('stat')
+            if stat == 'ok':
                  return self._extract_video(rsp.find('./videoList/video'))
+            elif stat == 'fail':
+                raise ExtractorError(
+                    '%s said: %s' % (self.IE_NAME, rsp.find('./errorList/error').get('errorMessage')), expected=True)
  
          webpage = self._download_webpage(url, video_id)
          age_limit = 0
diff --git a/youtube_dl/extractor/vulture.py b/youtube_dl/extractor/vulture.py

new file mode 100644 (file)

index 0000000..1eb24a3
--- /dev/null
+++ b/youtube_dl/extractor/vulture.py
@@ -0,0 +1,69 @@
+from __future__ import unicode_literals
+
+import json
+import os.path
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    parse_iso8601,
+)
+
+
+class VultureIE(InfoExtractor):
+    IE_NAME = 'vulture.com'
+    _VALID_URL = r'https?://video\.vulture\.com/video/(?P<display_id>[^/]+)/'
+    _TEST = {
+        'url': 'http://video.vulture.com/video/Mindy-Kaling-s-Harvard-Speech/player?layout=compact&read_more=1',
+        'md5': '8d997845642a2b5152820f7257871bc8',
+        'info_dict': {
+            'id': '6GHRQL3RV7MSD1H4',
+            'ext': 'mp4',
+            'title': 'kaling-speech-2-MAGNIFY STANDARD CONTAINER REVISED',
+            'uploader_id': 'Sarah',
+            'thumbnail': 're:^http://.*\.jpg$',
+            'timestamp': 1401288564,
+            'upload_date': '20140528',
+            'description': 'Uplifting and witty, as predicted.',
+            'duration': 1015,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        display_id = mobj.group('display_id')
+
+        webpage = self._download_webpage(url, display_id)
+        query_string = self._search_regex(
+            r"queryString\s*=\s*'([^']+)'", webpage, 'query string')
+        video_id = self._search_regex(
+            r'content=([^&]+)', query_string, 'video ID')
+        query_url = 'http://video.vulture.com/embed/player/container/1000/1000/?%s' % query_string
+
+        query_webpage = self._download_webpage(
+            query_url, display_id, note='Downloading query page')
+        params_json = self._search_regex(
+            r'(?sm)new MagnifyEmbeddablePlayer\({.*?contentItem:\s*(\{.*?\})\n,\n',
+            query_webpage,
+            'player params')
+        params = json.loads(params_json)
+
+        upload_timestamp = parse_iso8601(params['posted'].replace(' ', 'T'))
+        uploader_id = params.get('user', {}).get('handle')
+
+        media_item = params['media_item']
+        title = os.path.splitext(media_item['title'])[0]
+        duration = int_or_none(media_item.get('duration_seconds'))
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'url': media_item['pipeline_xid'],
+            'title': title,
+            'timestamp': upload_timestamp,
+            'thumbnail': params.get('thumbnail_url'),
+            'uploader_id': uploader_id,
+            'description': params.get('description'),
+            'duration': duration,
+        }
diff --git a/youtube_dl/extractor/wrzuta.py b/youtube_dl/extractor/wrzuta.py

new file mode 100644 (file)

index 0000000..34dd6d9
--- /dev/null
+++ b/youtube_dl/extractor/wrzuta.py
@@ -0,0 +1,81 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    qualities,
+)
+
+
+class WrzutaIE(InfoExtractor):
+    IE_NAME = 'wrzuta.pl'
+
+    _VALID_URL = r'https?://(?P<uploader>[0-9a-zA-Z]+)\.wrzuta\.pl/(?P<typ>film|audio)/(?P<id>[0-9a-zA-Z]+)'
+
+    _TESTS = [{
+        'url': 'http://laboratoriumdextera.wrzuta.pl/film/aq4hIZWrkBu/nike_football_the_last_game',
+        'md5': '9e67e05bed7c03b82488d87233a9efe7',
+        'info_dict': {
+            'id': 'aq4hIZWrkBu',
+            'ext': 'mp4',
+            'title': 'Nike Football: The Last Game',
+            'duration': 307,
+            'uploader_id': 'laboratoriumdextera',
+            'description': 'md5:7fb5ef3c21c5893375fda51d9b15d9cd',
+        },
+    }, {
+        'url': 'http://w729.wrzuta.pl/audio/9oXJqdcndqv/david_guetta_amp_showtek_ft._vassy_-_bad',
+        'md5': '1e546a18e1c22ac6e9adce17b8961ff5',
+        'info_dict': {
+            'id': '9oXJqdcndqv',
+            'ext': 'ogg',
+            'title': 'David Guetta & Showtek ft. Vassy - Bad',
+            'duration': 270,
+            'uploader_id': 'w729',
+            'description': 'md5:4628f01c666bbaaecefa83476cfa794a',
+        },
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        typ = mobj.group('typ')
+        uploader = mobj.group('uploader')
+
+        webpage = self._download_webpage(url, video_id)
+
+        quality = qualities(['SD', 'MQ', 'HQ', 'HD'])
+
+        audio_table = {'flv': 'mp3', 'webm': 'ogg'}
+
+        embedpage = self._download_json('http://www.wrzuta.pl/npp/embed/%s/%s' % (uploader, video_id), video_id)
+
+        formats = []
+        for media in embedpage['url']:
+            if typ == 'audio':
+                ext = audio_table[media['type'].split('@')[0]]
+            else:
+                ext = media['type'].split('@')[0]
+
+            formats.append({
+                'format_id': '%s_%s' % (ext, media['quality'].lower()),
+                'url': media['url'],
+                'ext': ext,
+                'quality': quality(media['quality']),
+            })
+
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': self._og_search_title(webpage),
+            'thumbnail': self._og_search_thumbnail(webpage),
+            'formats': formats,
+            'duration': int_or_none(embedpage['duration']),
+            'uploader_id': uploader,
+            'description': self._og_search_description(webpage),
+            'age_limit': embedpage.get('minimalAge', 0),
+        }
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index 7c50881c4453eaff4ac69776fcc2dc94feef8d31..d45545ee490867d8f2e89d5c36e038364281a72d 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -440,7 +440,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
  
      def _parse_sig_js(self, jscode):
          funcname = self._search_regex(
-            r'signature=([a-zA-Z]+)', jscode,
+            r'signature=([$a-zA-Z]+)', jscode,
               u'Initial JS player signature function name')
  
          jsi = JSInterpreter(jscode)
@@ -1386,13 +1386,13 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
                          |  p/
                          )
                          (
-                            (?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,}
+                            (?:PL|LL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,}
                              # Top tracks, they can also include dots 
                              |(?:MC)[\w\.]*
                          )
                          .*
                       |
-                        ((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
+                        ((?:PL|LL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
                       )"""
      _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
      _MORE_PAGES_INDICATOR = r'data-link-type="next"'
diff --git a/youtube_dl/version.py b/youtube_dl/version.py

index 6fe7c7b257d64cc3c928a445e213c9a79e7d030f..a332b5a8edf7d3a7bc5d2de32d176927d8a6c18e 100644 (file)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
  
-__version__ = '2014.06.07'
+__version__ = '2014.06.19'
author	Rogério Brito <rbrito@ime.usp.br>
	Sun, 22 Jun 2014 14:48:31 +0000 (11:48 -0300)
committer	Rogério Brito <rbrito@ime.usp.br>
	Sun, 22 Jun 2014 14:48:31 +0000 (11:48 -0300)
test/test_playlists.py		patch \| blob \| history
youtube-dl		patch \| blob \| history
youtube_dl/downloader/hls.py		patch \| blob \| history
youtube_dl/downloader/rtmp.py		patch \| blob \| history
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/ard.py		patch \| blob \| history
youtube_dl/extractor/bilibili.py		patch \| blob \| history
youtube_dl/extractor/blinkx.py		patch \| blob \| history
youtube_dl/extractor/brightcove.py		patch \| blob \| history
youtube_dl/extractor/dreisat.py		patch \| blob \| history
youtube_dl/extractor/fc2.py		patch \| blob \| history
youtube_dl/extractor/generic.py		patch \| blob \| history
youtube_dl/extractor/gorillavid.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/hypem.py		patch \| blob \| history
youtube_dl/extractor/lifenews.py		patch \| blob \| history
youtube_dl/extractor/livestream.py		patch \| blob \| history
youtube_dl/extractor/ndtv.py		patch \| blob \| history
youtube_dl/extractor/nrk.py		patch \| blob \| history
youtube_dl/extractor/ntv.py		patch \| blob \| history
youtube_dl/extractor/nuvid.py		patch \| blob \| history
youtube_dl/extractor/prosiebensat1.py		patch \| blob \| history
youtube_dl/extractor/rai.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/slutload.py		patch \| blob \| history
youtube_dl/extractor/soundcloud.py		patch \| blob \| history
youtube_dl/extractor/steam.py		patch \| blob \| history
youtube_dl/extractor/teachertube.py		patch \| blob \| history
youtube_dl/extractor/ted.py		patch \| blob \| history
youtube_dl/extractor/tube8.py		patch \| blob \| history
youtube_dl/extractor/veoh.py		patch \| blob \| history
youtube_dl/extractor/vulture.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/wrzuta.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/youtube.py		patch \| blob \| history
youtube_dl/version.py		patch \| blob \| history