]> Raphaël G. Git Repositories - youtubedl/commitdiff
Imported Upstream version 2014.11.23
authorRogério Brito <rbrito@ime.usp.br>
Mon, 24 Nov 2014 00:01:05 +0000 (22:01 -0200)
committerRogério Brito <rbrito@ime.usp.br>
Mon, 24 Nov 2014 00:01:05 +0000 (22:01 -0200)
youtube-dl
youtube_dl/YoutubeDL.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/folketinget.py [new file with mode: 0644]
youtube_dl/extractor/generic.py
youtube_dl/extractor/rtlnl.py
youtube_dl/extractor/sztvhu.py
youtube_dl/extractor/telebruxelles.py [new file with mode: 0644]
youtube_dl/extractor/youtube.py
youtube_dl/version.py

index 433b5589610cc36dbc2a823a60655d3d5b8c32c9..9cb5e7a3f5b888fd8caab7455ce4f99958a94b24 100755 (executable)
Binary files a/youtube-dl and b/youtube-dl differ
index 0a3569304b113bf0e3ad7ebc6f77a34253fcc76f..fde026fbf695781ac22d936c87c0216e4c12cfd9 100755 (executable)
@@ -624,7 +624,7 @@ class YoutubeDL(object):
 
             return self.process_ie_result(
                 new_result, download=download, extra_info=extra_info)
 
             return self.process_ie_result(
                 new_result, download=download, extra_info=extra_info)
-        elif result_type == 'playlist' or playlist == 'multi_video':
+        elif result_type == 'playlist' or result_type == 'multi_video':
             # We process each entry in the playlist
             playlist = ie_result.get('title', None) or ie_result.get('id', None)
             self.to_screen('[download] Downloading playlist: %s' % playlist)
             # We process each entry in the playlist
             playlist = ie_result.get('title', None) or ie_result.get('id', None)
             self.to_screen('[download] Downloading playlist: %s' % playlist)
index f45ce05ab6587fb5d2906eddcc8546e7ec5729ec..7497a97f5c7ece071b0ddc5427dc3ed746d1421d 100644 (file)
@@ -115,6 +115,7 @@ from .fktv import (
     FKTVPosteckeIE,
 )
 from .flickr import FlickrIE
     FKTVPosteckeIE,
 )
 from .flickr import FlickrIE
+from .folketinget import FolketingetIE
 from .fourtube import FourTubeIE
 from .franceculture import FranceCultureIE
 from .franceinter import FranceInterIE
 from .fourtube import FourTubeIE
 from .franceculture import FranceCultureIE
 from .franceinter import FranceInterIE
@@ -379,6 +380,7 @@ from .teachingchannel import TeachingChannelIE
 from .teamcoco import TeamcocoIE
 from .techtalks import TechTalksIE
 from .ted import TEDIE
 from .teamcoco import TeamcocoIE
 from .techtalks import TechTalksIE
 from .ted import TEDIE
+from .telebruxelles import TeleBruxellesIE
 from .telecinco import TelecincoIE
 from .telemb import TeleMBIE
 from .tenplay import TenPlayIE
 from .telecinco import TelecincoIE
 from .telemb import TeleMBIE
 from .tenplay import TenPlayIE
diff --git a/youtube_dl/extractor/folketinget.py b/youtube_dl/extractor/folketinget.py
new file mode 100644 (file)
index 0000000..68e2db9
--- /dev/null
@@ -0,0 +1,75 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_parse_qs
+from ..utils import (
+    int_or_none,
+    parse_duration,
+    parse_iso8601,
+    xpath_text,
+)
+
+
+class FolketingetIE(InfoExtractor):
+    IE_DESC = 'Folketinget (ft.dk; Danish parliament)'
+    _VALID_URL = r'https?://(?:www\.)?ft\.dk/webtv/video/[^?#]*?\.(?P<id>[0-9]+)\.aspx'
+    _TEST = {
+        'url': 'http://www.ft.dk/webtv/video/20141/eru/td.1165642.aspx?as=1#player',
+        'info_dict': {
+            'id': '1165642',
+            'ext': 'mp4',
+            'title': 'Åbent samråd i Erhvervsudvalget',
+            'description': 'Åbent samråd med erhvervs- og vækstministeren om regeringens politik på teleområdet',
+            'view_count': int,
+            'width': 768,
+            'height': 432,
+            'tbr': 928000,
+            'timestamp': 1416493800,
+            'upload_date': '20141120',
+            'duration': 3960,
+        },
+        'params': {
+            'skip_download': 'rtmpdump required',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._og_search_title(webpage)
+        description = self._html_search_regex(
+            r'(?s)<div class="video-item-agenda"[^>]*>(.*?)<',
+            webpage, 'description', fatal=False)
+
+        player_params = compat_parse_qs(self._search_regex(
+            r'<embed src="http://ft\.arkena\.tv/flash/ftplayer\.swf\?([^"]+)"',
+            webpage, 'player params'))
+        xml_url = player_params['xml'][0]
+        doc = self._download_xml(xml_url, video_id)
+
+        timestamp = parse_iso8601(xpath_text(doc, './/date'))
+        duration = parse_duration(xpath_text(doc, './/duration'))
+        width = int_or_none(xpath_text(doc, './/width'))
+        height = int_or_none(xpath_text(doc, './/height'))
+        view_count = int_or_none(xpath_text(doc, './/views'))
+
+        formats = [{
+            'format_id': n.attrib['bitrate'],
+            'url': xpath_text(n, './url', fatal=True),
+            'tbr': int_or_none(n.attrib['bitrate']),
+        } for n in doc.findall('.//streams/stream')]
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+            'description': description,
+            'timestamp': timestamp,
+            'width': width,
+            'height': height,
+            'duration': duration,
+            'view_count': view_count,
+        }
index af769ab619eac9cb9b3d20869ba4a9e009977f6c..c7a824c29b15187becb0bb9758ee94348f2309da 100644 (file)
@@ -979,7 +979,7 @@ class GenericIE(InfoExtractor):
                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
         if not found:
             # HTML5 video
                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
         if not found:
             # HTML5 video
-            found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src="([^"]+)"', webpage)
+            found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
         if not found:
             found = re.search(
                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
         if not found:
             found = re.search(
                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
index 5daef2fc5656191ea2cc0c411d520b0b0f5cd455..4a188e5d46304ab8059d646cea667edc86cf7cb4 100644 (file)
@@ -28,9 +28,8 @@ class RtlXlIE(InfoExtractor):
         mobj = re.match(self._VALID_URL, url)
         uuid = mobj.group('uuid')
 
         mobj = re.match(self._VALID_URL, url)
         uuid = mobj.group('uuid')
 
-        # Use m3u8 streams (see https://github.com/rg3/youtube-dl/issues/4118)
         info = self._download_json(
         info = self._download_json(
-            'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/d=pc/fmt=adaptive/' % uuid,
+            'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid,
             uuid)
 
         material = info['material'][0]
             uuid)
 
         material = info['material'][0]
@@ -39,12 +38,13 @@ class RtlXlIE(InfoExtractor):
         progname = info['abstracts'][0]['name']
         subtitle = material['title'] or info['episodes'][0]['name']
 
         progname = info['abstracts'][0]['name']
         subtitle = material['title'] or info['episodes'][0]['name']
 
-        videopath = material['videopath']
+        # Use unencrypted m3u8 streams (See https://github.com/rg3/youtube-dl/issues/4118)
+        videopath = material['videopath'].replace('.f4m', '.m3u8')
         m3u8_url = 'http://manifest.us.rtl.nl' + videopath
 
         formats = self._extract_m3u8_formats(m3u8_url, uuid, ext='mp4')
 
         m3u8_url = 'http://manifest.us.rtl.nl' + videopath
 
         formats = self._extract_m3u8_formats(m3u8_url, uuid, ext='mp4')
 
-        video_urlpart = videopath.split('/adaptive/')[1][:-4]
+        video_urlpart = videopath.split('/flash/')[1][:-4]
         PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4'
 
         formats.extend([
         PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4'
 
         formats.extend([
index c9359fafb5c5989923c6320e3e684673b80057d6..aa5964acb6b3f40b0d663bd2169ac6aec0c210ae 100644 (file)
@@ -1,27 +1,24 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
-
-import re
+from __future__ import unicode_literals
 
 from .common import InfoExtractor
 
 from .common import InfoExtractor
-from ..utils import determine_ext
 
 
 class SztvHuIE(InfoExtractor):
 
 
 class SztvHuIE(InfoExtractor):
-    _VALID_URL = r'(?:http://)?(?:(?:www\.)?sztv\.hu|www\.tvszombathely\.hu)/(?:[^/]+)/.+-(?P<id>[0-9]+)'
+    _VALID_URL = r'http://(?:(?:www\.)?sztv\.hu|www\.tvszombathely\.hu)/(?:[^/]+)/.+-(?P<id>[0-9]+)'
     _TEST = {
     _TEST = {
-        u'url': u'http://sztv.hu/hirek/cserkeszek-nepszerusitettek-a-kornyezettudatos-eletmodot-a-savaria-teren-20130909',
-        u'file': u'20130909.mp4',
-        u'md5': u'a6df607b11fb07d0e9f2ad94613375cb',
-        u'info_dict': {
-            u"title": u"Cserkészek népszerűsítették a környezettudatos életmódot a Savaria téren",
-            u"description": u'A zöld nap játékos ismeretterjesztő programjait a Magyar Cserkész Szövetség szervezte, akik az ország nyolc városában adják át tudásukat az érdeklődőknek. A PET...',
+        'url': 'http://sztv.hu/hirek/cserkeszek-nepszerusitettek-a-kornyezettudatos-eletmodot-a-savaria-teren-20130909',
+        'md5': 'a6df607b11fb07d0e9f2ad94613375cb',
+        'info_dict': {
+            'id': '20130909',
+            'ext': 'mp4',
+            'title': 'Cserkészek népszerűsítették a környezettudatos életmódot a Savaria téren',
+            'description': 'A zöld nap játékos ismeretterjesztő programjait a Magyar Cserkész Szövetség szervezte, akik az ország nyolc városában adják át tudásukat az érdeklődőknek. A PET...',
         },
         },
-        u'skip': u'Service temporarily disabled as of 2013-11-20'
     }
 
     def _real_extract(self, url):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
         video_file = self._search_regex(
             r'file: "...:(.*?)",', webpage, 'video file')
         webpage = self._download_webpage(url, video_id)
         video_file = self._search_regex(
             r'file: "...:(.*?)",', webpage, 'video file')
@@ -39,7 +36,6 @@ class SztvHuIE(InfoExtractor):
             'id': video_id,
             'url': video_url,
             'title': title,
             'id': video_id,
             'url': video_url,
             'title': title,
-            'ext': determine_ext(video_url),
             'description': description,
             'thumbnail': thumbnail,
         }
             'description': description,
             'thumbnail': thumbnail,
         }
diff --git a/youtube_dl/extractor/telebruxelles.py b/youtube_dl/extractor/telebruxelles.py
new file mode 100644 (file)
index 0000000..a3d05f9
--- /dev/null
@@ -0,0 +1,60 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class TeleBruxellesIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?telebruxelles\.be/(news|sport|dernier-jt)/?(?P<id>[^/#?]+)'
+    _TESTS = [{
+        'url': 'http://www.telebruxelles.be/news/auditions-devant-parlement-francken-galant-tres-attendus/',
+        'md5': '59439e568c9ee42fb77588b2096b214f',
+        'info_dict': {
+            'id': '11942',
+            'display_id': 'auditions-devant-parlement-francken-galant-tres-attendus',
+            'ext': 'flv',
+            'title': 'Parlement : Francken et Galant répondent aux interpellations de l’opposition',
+            'description': 're:Les auditions des ministres se poursuivent*'
+        },
+        'params': {
+            'skip_download': 'requires rtmpdump'
+        },
+    }, {
+        'url': 'http://www.telebruxelles.be/sport/basket-brussels-bat-mons-80-74/',
+        'md5': '181d3fbdcf20b909309e5aef5c6c6047',
+        'info_dict': {
+            'id': '10091',
+            'display_id': 'basket-brussels-bat-mons-80-74',
+            'ext': 'flv',
+            'title': 'Basket : le Brussels bat Mons 80-74',
+            'description': 're:^Ils l\u2019on fait ! En basket, le B*',
+        },
+        'params': {
+            'skip_download': 'requires rtmpdump'
+        },
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        article_id = self._html_search_regex(
+            r"<article id=\"post-(\d+)\"", webpage, 'article ID')
+        title = self._html_search_regex(
+            r'<h1 class=\"entry-title\">(.*?)</h1>', webpage, 'title')
+        description = self._og_search_description(webpage)
+
+        rtmp_url = self._html_search_regex(
+            r"file: \"(rtmp://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}/vod/mp4:\" \+ \"\w+\" \+ \".mp4)\"",
+            webpage, 'RTMP url')
+        rtmp_url = rtmp_url.replace("\" + \"", "")
+
+        return {
+            'id': article_id,
+            'display_id': display_id,
+            'title': title,
+            'description': description,
+            'url': rtmp_url,
+            'ext': 'flv',
+            'rtmp_live': True  # if rtmpdump is not called with "--live" argument, the download is blocked and can be completed
+        }
index 461271d3b89cb6b96f3ef3cb5b6b130499e8835a..0cb837afcc7d448ec6ae5a8b3496642cc7f956c0 100644 (file)
@@ -406,6 +406,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                 'format': '141',
             },
         },
                 'format': '141',
             },
         },
+        # Controversy video
+        {
+            'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
+            'info_dict': {
+                'id': 'T4XJQO3qol8',
+                'ext': 'mp4',
+                'upload_date': '20100909',
+                'uploader': 'The Amazing Atheist',
+                'uploader_id': 'TheAmazingAtheist',
+                'title': 'Burning Everyone\'s Koran',
+                'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
+            }
+        }
     ]
 
     def __init__(self, *args, **kwargs):
     ]
 
     def __init__(self, *args, **kwargs):
@@ -666,7 +679,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         video_id = self.extract_id(url)
 
         # Get video webpage
         video_id = self.extract_id(url)
 
         # Get video webpage
-        url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
+        url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
         pref_cookies = [
             c for c in self._downloader.cookiejar
             if c.domain == '.youtube.com' and c.name == 'PREF']
         pref_cookies = [
             c for c in self._downloader.cookiejar
             if c.domain == '.youtube.com' and c.name == 'PREF']
index 95b05fa95ff42688f8ca405022b7d6808a1972b2..6be5d07c481ae9e4aff6b247a1b5083a2564be54 100644 (file)
@@ -1,2 +1,2 @@
 
 
-__version__ = '2014.11.21'
+__version__ = '2014.11.23'