]> Raphaël G. Git Repositories - youtubedl/blobdiff - youtube_dl/extractor/tlc.py
Merge tag 'upstream/2017.02.07'
[youtubedl] / youtube_dl / extractor / tlc.py
index d6d038a8d7a80db41ef75f7d13f13fe2ce0411c8..fd145ba429fbc94ec5582b6100660f2897b25f5f 100644 (file)
@@ -1,38 +1,19 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
+
 import re
 
 from .common import InfoExtractor
 from .brightcove import BrightcoveLegacyIE
-from .discovery import DiscoveryIE
-from ..compat import compat_urlparse
-
-
-class TlcIE(DiscoveryIE):
-    IE_NAME = 'tlc.com'
-    _VALID_URL = r'http://www\.tlc\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
-
-    # DiscoveryIE has _TESTS
-    _TESTS = [{
-        'url': 'http://www.tlc.com/tv-shows/cake-boss/videos/too-big-to-fly.htm',
-        'info_dict': {
-            'id': '104493',
-            'ext': 'mp4',
-            'title': 'Too Big to Fly',
-            'description': 'Buddy has taken on a high flying task.',
-            'duration': 119,
-            'timestamp': 1393365060,
-            'upload_date': '20140225',
-        },
-        'params': {
-            'skip_download': True,  # requires ffmpef
-        },
-    }]
+from ..compat import (
+    compat_parse_qs,
+    compat_urlparse,
+)
 
 
 class TlcDeIE(InfoExtractor):
     IE_NAME = 'tlc.de'
-    _VALID_URL = r'http://www\.tlc\.de/sendungen/[^/]+/videos/(?P<title>[^/?]+)'
+    _VALID_URL = r'https?://(?:www\.)?tlc\.de/(?:[^/]+/)*videos/(?P<title>[^/?#]+)?(?:.*#(?P<id>\d+))?'
 
     _TEST = {
         'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001',
@@ -40,32 +21,23 @@ class TlcDeIE(InfoExtractor):
             'id': '3235167922001',
             'ext': 'mp4',
             'title': 'Breaking Amish: Die Welt da draußen',
-            'uploader': 'Discovery Networks - Germany',
             'description': (
                 'Vier Amische und eine Mennonitin wagen in New York'
                 '  den Sprung in ein komplett anderes Leben. Begleitet sie auf'
                 ' ihrem spannenden Weg.'),
+            'timestamp': 1396598084,
+            'upload_date': '20140404',
+            'uploader_id': '1659832546',
         },
     }
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1659832546/default_default/index.html?videoId=%s'
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        title = mobj.group('title')
-        webpage = self._download_webpage(url, title)
-        iframe_url = self._search_regex(
-            '<iframe src="(http://www\.tlc\.de/wp-content/.+?)"', webpage,
-            'iframe url')
-        # Otherwise we don't get the correct 'BrightcoveExperience' element,
-        # example: http://www.tlc.de/sendungen/cake-boss/videos/cake-boss-cannoli-drama/
-        iframe_url = iframe_url.replace('.htm?', '.php?')
-        url_fragment = compat_urlparse.urlparse(url).fragment
-        if url_fragment:
-            # Since the fragment is not send to the server, we always get the same iframe
-            iframe_url = re.sub(r'playlist=(\d+)', 'playlist=%s' % url_fragment, iframe_url)
-        iframe = self._download_webpage(iframe_url, title)
-
-        return {
-            '_type': 'url',
-            'url': BrightcoveLegacyIE._extract_brightcove_url(iframe),
-            'ie': BrightcoveLegacyIE.ie_key(),
-        }
+        brightcove_id = mobj.group('id')
+        if not brightcove_id:
+            title = mobj.group('title')
+            webpage = self._download_webpage(url, title)
+            brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
+            brightcove_id = compat_parse_qs(compat_urlparse.urlparse(brightcove_legacy_url).query)['@videoPlayer'][0]
+        return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)