]> Raphaƫl G. Git Repositories - youtubedl/blobdiff - youtube_dl/extractor/ndr.py
Update upstream source from tag 'upstream/2020.05.08'
[youtubedl] / youtube_dl / extractor / ndr.py
index aec2ea1331f3c909957e50d4166e7657618fa1a6..2447c812e021e73991082aefab4bd98e6dd000a1 100644 (file)
@@ -7,8 +7,11 @@ from .common import InfoExtractor
 from ..utils import (
     determine_ext,
     int_or_none,
 from ..utils import (
     determine_ext,
     int_or_none,
+    merge_dicts,
     parse_iso8601,
     qualities,
     parse_iso8601,
     qualities,
+    try_get,
+    urljoin,
 )
 
 
 )
 
 
@@ -85,21 +88,25 @@ class NDRIE(NDRBaseIE):
 
     def _extract_embed(self, webpage, display_id):
         embed_url = self._html_search_meta(
 
     def _extract_embed(self, webpage, display_id):
         embed_url = self._html_search_meta(
-            'embedURL', webpage, 'embed URL', fatal=True)
+            'embedURL', webpage, 'embed URL',
+            default=None) or self._search_regex(
+            r'\bembedUrl["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+            'embed URL', group='url')
         description = self._search_regex(
             r'<p[^>]+itemprop="description">([^<]+)</p>',
             webpage, 'description', default=None) or self._og_search_description(webpage)
         timestamp = parse_iso8601(
             self._search_regex(
                 r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="([^"]+)"',
         description = self._search_regex(
             r'<p[^>]+itemprop="description">([^<]+)</p>',
             webpage, 'description', default=None) or self._og_search_description(webpage)
         timestamp = parse_iso8601(
             self._search_regex(
                 r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="([^"]+)"',
-                webpage, 'upload date', fatal=False))
-        return {
+                webpage, 'upload date', default=None))
+        info = self._search_json_ld(webpage, display_id, default={})
+        return merge_dicts({
             '_type': 'url_transparent',
             'url': embed_url,
             'display_id': display_id,
             'description': description,
             'timestamp': timestamp,
             '_type': 'url_transparent',
             'url': embed_url,
             'display_id': display_id,
             'description': description,
             'timestamp': timestamp,
-        }
+        }, info)
 
 
 class NJoyIE(NDRBaseIE):
 
 
 class NJoyIE(NDRBaseIE):
@@ -220,11 +227,17 @@ class NDREmbedBaseIE(InfoExtractor):
         upload_date = ppjson.get('config', {}).get('publicationDate')
         duration = int_or_none(config.get('duration'))
 
         upload_date = ppjson.get('config', {}).get('publicationDate')
         duration = int_or_none(config.get('duration'))
 
-        thumbnails = [{
-            'id': thumbnail.get('quality') or thumbnail_id,
-            'url': thumbnail['src'],
-            'preference': quality_key(thumbnail.get('quality')),
-        } for thumbnail_id, thumbnail in config.get('poster', {}).items() if thumbnail.get('src')]
+        thumbnails = []
+        poster = try_get(config, lambda x: x['poster'], dict) or {}
+        for thumbnail_id, thumbnail in poster.items():
+            thumbnail_url = urljoin(url, thumbnail.get('src'))
+            if not thumbnail_url:
+                continue
+            thumbnails.append({
+                'id': thumbnail.get('quality') or thumbnail_id,
+                'url': thumbnail_url,
+                'preference': quality_key(thumbnail.get('quality')),
+            })
 
         return {
             'id': video_id,
 
         return {
             'id': video_id,