Prepare to upload

[youtubedl] / youtube_dl / extractor / br.py
diff --git a/youtube_dl/extractor/br.py b/youtube_dl/extractor/br.py

index 11cf498515ba572f8ef8c7f20d5620bf50289827..9bde7f2d82896177612a81f74e499ad4e79985b1 100644 (file)
--- a/youtube_dl/extractor/br.py
+++ b/youtube_dl/extractor/br.py
@@ -1,20 +1,23 @@
  # coding: utf-8
  from __future__ import unicode_literals
  
+import json
  import re
  
  from .common import InfoExtractor
  from ..utils import (
+    determine_ext,
      ExtractorError,
      int_or_none,
      parse_duration,
+    parse_iso8601,
      xpath_element,
      xpath_text,
  )
  
  
  class BRIE(InfoExtractor):
-    IE_DESC = 'Bayerischer Rundfunk Mediathek'
+    IE_DESC = 'Bayerischer Rundfunk'
      _VALID_URL = r'(?P<base_url>https?://(?:www\.)?br(?:-klassik)?\.de)/(?:[a-z0-9\-_]+/)+(?P<id>[a-z0-9\-_]+)\.html'
  
      _TESTS = [
@@ -29,7 +32,8 @@ class BRIE(InfoExtractor):
                  'duration': 180,
                  'uploader': 'Reinhard Weber',
                  'upload_date': '20150422',
-            }
+            },
+            'skip': '404 not found',
          },
          {
              'url': 'http://www.br.de/nachrichten/oberbayern/inhalt/muenchner-polizeipraesident-schreiber-gestorben-100.html',
@@ -40,7 +44,8 @@ class BRIE(InfoExtractor):
                  'title': 'Manfred Schreiber ist tot',
                  'description': 'md5:b454d867f2a9fc524ebe88c3f5092d97',
                  'duration': 26,
-            }
+            },
+            'skip': '404 not found',
          },
          {
              'url': 'https://www.br-klassik.de/audio/peeping-tom-premierenkritik-dance-festival-muenchen-100.html',
@@ -51,7 +56,8 @@ class BRIE(InfoExtractor):
                  'title': 'Kurzweilig und sehr bewegend',
                  'description': 'md5:0351996e3283d64adeb38ede91fac54e',
                  'duration': 296,
-            }
+            },
+            'skip': '404 not found',
          },
          {
              'url': 'http://www.br.de/radio/bayern1/service/team/videos/team-video-erdelt100.html',
@@ -74,7 +80,7 @@ class BRIE(InfoExtractor):
                  'description': 'md5:bb659990e9e59905c3d41e369db1fbe3',
                  'duration': 893,
                  'uploader': 'Eva Maria Steimle',
-                'upload_date': '20140117',
+                'upload_date': '20170208',
              }
          },
      ]
@@ -120,10 +126,10 @@ class BRIE(InfoExtractor):
          for asset in assets.findall('asset'):
              format_url = xpath_text(asset, ['downloadUrl', 'url'])
              asset_type = asset.get('type')
-            if asset_type == 'HDS':
+            if asset_type.startswith('HDS'):
                  formats.extend(self._extract_f4m_formats(
                      format_url + '?hdcore=3.2.0', media_id, f4m_id='hds', fatal=False))
-            elif asset_type == 'HLS':
+            elif asset_type.startswith('HLS'):
                  formats.extend(self._extract_m3u8_formats(
                      format_url, media_id, 'mp4', 'm3u8_native', m3u8_id='hds', fatal=False))
              else:
@@ -166,3 +172,140 @@ class BRIE(InfoExtractor):
          } for variant in variants.findall('variant') if xpath_text(variant, 'url')]
          thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True)
          return thumbnails
+
+
+class BRMediathekIE(InfoExtractor):
+    IE_DESC = 'Bayerischer Rundfunk Mediathek'
+    _VALID_URL = r'https?://(?:www\.)?br\.de/mediathek/video/[^/?&#]*?-(?P<id>av:[0-9a-f]{24})'
+
+    _TESTS = [{
+        'url': 'https://www.br.de/mediathek/video/gesundheit-die-sendung-vom-28112017-av:5a1e6a6e8fce6d001871cc8e',
+        'md5': 'fdc3d485835966d1622587d08ba632ec',
+        'info_dict': {
+            'id': 'av:5a1e6a6e8fce6d001871cc8e',
+            'ext': 'mp4',
+            'title': 'Die Sendung vom 28.11.2017',
+            'description': 'md5:6000cdca5912ab2277e5b7339f201ccc',
+            'timestamp': 1511942766,
+            'upload_date': '20171129',
+        }
+    }]
+
+    def _real_extract(self, url):
+        clip_id = self._match_id(url)
+
+        clip = self._download_json(
+            'https://proxy-base.master.mango.express/graphql',
+            clip_id, data=json.dumps({
+                "query": """{
+  viewer {
+    clip(id: "%s") {
+      title
+      description
+      duration
+      createdAt
+      ageRestriction
+      videoFiles {
+        edges {
+          node {
+            publicLocation
+            fileSize
+            videoProfile {
+              width
+              height
+              bitrate
+              encoding
+            }
+          }
+        }
+      }
+      captionFiles {
+        edges {
+          node {
+            publicLocation
+          }
+        }
+      }
+      teaserImages {
+        edges {
+          node {
+            imageFiles {
+              edges {
+                node {
+                  publicLocation
+                  width
+                  height
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}""" % clip_id}).encode(), headers={
+                'Content-Type': 'application/json',
+            })['data']['viewer']['clip']
+        title = clip['title']
+
+        formats = []
+        for edge in clip.get('videoFiles', {}).get('edges', []):
+            node = edge.get('node', {})
+            n_url = node.get('publicLocation')
+            if not n_url:
+                continue
+            ext = determine_ext(n_url)
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    n_url, clip_id, 'mp4', 'm3u8_native',
+                    m3u8_id='hls', fatal=False))
+            else:
+                video_profile = node.get('videoProfile', {})
+                tbr = int_or_none(video_profile.get('bitrate'))
+                format_id = 'http'
+                if tbr:
+                    format_id += '-%d' % tbr
+                formats.append({
+                    'format_id': format_id,
+                    'url': n_url,
+                    'width': int_or_none(video_profile.get('width')),
+                    'height': int_or_none(video_profile.get('height')),
+                    'tbr': tbr,
+                    'filesize': int_or_none(node.get('fileSize')),
+                })
+        self._sort_formats(formats)
+
+        subtitles = {}
+        for edge in clip.get('captionFiles', {}).get('edges', []):
+            node = edge.get('node', {})
+            n_url = node.get('publicLocation')
+            if not n_url:
+                continue
+            subtitles.setdefault('de', []).append({
+                'url': n_url,
+            })
+
+        thumbnails = []
+        for edge in clip.get('teaserImages', {}).get('edges', []):
+            for image_edge in edge.get('node', {}).get('imageFiles', {}).get('edges', []):
+                node = image_edge.get('node', {})
+                n_url = node.get('publicLocation')
+                if not n_url:
+                    continue
+                thumbnails.append({
+                    'url': n_url,
+                    'width': int_or_none(node.get('width')),
+                    'height': int_or_none(node.get('height')),
+                })
+
+        return {
+            'id': clip_id,
+            'title': title,
+            'description': clip.get('description'),
+            'duration': int_or_none(clip.get('duration')),
+            'timestamp': parse_iso8601(clip.get('createdAt')),
+            'age_limit': int_or_none(clip.get('ageRestriction')),
+            'formats': formats,
+            'subtitles': subtitles,
+            'thumbnails': thumbnails,
+        }