New upstream version 2019.09.28

author Rogério Brito <rbrito@ime.usp.br>

Tue, 1 Oct 2019 18:48:03 +0000 (15:48 -0300)

committer Rogério Brito <rbrito@ime.usp.br>

Tue, 1 Oct 2019 18:48:03 +0000 (15:48 -0300)
author Rogério Brito <rbrito@ime.usp.br>
Tue, 1 Oct 2019 18:48:03 +0000 (15:48 -0300)
committer Rogério Brito <rbrito@ime.usp.br>
Tue, 1 Oct 2019 18:48:03 +0000 (15:48 -0300)
diff --git a/ChangeLog b/ChangeLog

index e91e49854d4d7e7b409b7262698d206cd49a686e..80681a9aeb7f5540c7142eaf04ae8828e1a7c233 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,46 @@
+version 2019.09.28
+
+Core
+* [YoutubeDL] Honour all --get-* options with --flat-playlist (#22493)
+
+Extractors
+* [vk] Fix extraction (#22522)
+* [heise] Fix kaltura embeds extraction (#22514)
+* [ted] Check for resources validity and extract subtitled downloads (#22513)
++ [youtube] Add support for
+  owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya.b32.i2p (#22292)
++ [nhk] Add support for clips
+* [nhk] Fix video extraction (#22249, #22353)
+* [byutv] Fix extraction (#22070)
++ [openload] Add support for oload.online (#22304)
++ [youtube] Add support for invidious.drycat.fr (#22451)
+* [jwplatfom] Do not match video URLs (#20596, #22148)
+* [youtube:playlist] Unescape playlist uploader (#22483)
++ [bilibili] Add support audio albums and songs (#21094)
++ [instagram] Add support for tv URLs
++ [mixcloud] Allow uppercase letters in format URLs (#19280)
+* [brightcove] Delegate all supported legacy URLs to new extractor (#11523,
+  #12842, #13912, #15669, #16303)
+* [hotstar] Use native HLS downloader by default
++ [hotstar] Extract more formats (#22323)
+* [9now] Fix extraction (#22361)
+* [zdf] Bypass geo restriction
++ [tv4] Extract series metadata
+* [tv4] Fix extraction (#22443)
+
+
+version 2019.09.12.1
+
+Extractors
+* [youtube] Remove quality and tbr for itag 43 (#22372)
+
+
+version 2019.09.12
+
+Extractors
+* [youtube] Quick extraction tempfix (#22367, #22163)
+
+
  version 2019.09.01
  
  Core
diff --git a/docs/supportedsites.md b/docs/supportedsites.md

index 18bddc1383d9d1e36131585a8ce8536878f41e49..35275278bf68f667e3c19c46d1634072e2bbbe59 100644 (file)
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -98,6 +98,8 @@
   - **Bigflix**
   - **Bild**: Bild.de
   - **BiliBili**
+ - **BilibiliAudio**
+ - **BilibiliAudioAlbum**
   - **BioBioChileTV**
   - **BIQLE**
   - **BitChute**
diff --git a/youtube-dl b/youtube-dl

index 76a8e49b8b8864bb89ef6bccaf7f4289a2beb302..c2e66c3ac5f06b4ee41061572c563b83ea233075 100755 (executable)

Binary files a/youtube-dl and b/youtube-dl differ
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index 6a44bc7bab8b79c67d5529f58ab455cc5da5c53c..c3d1407f99ef76ad55ff36fbfae6649fa406d998 100755 (executable)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -852,8 +852,9 @@ class YoutubeDL(object):
              extract_flat = self.params.get('extract_flat', False)
              if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
                      or extract_flat is True):
-                if self.params.get('forcejson', False):
-                    self.to_stdout(json.dumps(ie_result))
+                self.__forced_printings(
+                    ie_result, self.prepare_filename(ie_result),
+                    incomplete=True)
                  return ie_result
  
          if result_type == 'video':
@@ -1693,6 +1694,36 @@ class YoutubeDL(object):
              subs[lang] = f
          return subs
  
+    def __forced_printings(self, info_dict, filename, incomplete):
+        def print_mandatory(field):
+            if (self.params.get('force%s' % field, False)
+                    and (not incomplete or info_dict.get(field) is not None)):
+                self.to_stdout(info_dict[field])
+
+        def print_optional(field):
+            if (self.params.get('force%s' % field, False)
+                    and info_dict.get(field) is not None):
+                self.to_stdout(info_dict[field])
+
+        print_mandatory('title')
+        print_mandatory('id')
+        if self.params.get('forceurl', False) and not incomplete:
+            if info_dict.get('requested_formats') is not None:
+                for f in info_dict['requested_formats']:
+                    self.to_stdout(f['url'] + f.get('play_path', ''))
+            else:
+                # For RTMP URLs, also include the playpath
+                self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
+        print_optional('thumbnail')
+        print_optional('description')
+        if self.params.get('forcefilename', False) and filename is not None:
+            self.to_stdout(filename)
+        if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
+            self.to_stdout(formatSeconds(info_dict['duration']))
+        print_mandatory('format')
+        if self.params.get('forcejson', False):
+            self.to_stdout(json.dumps(info_dict))
+
      def process_info(self, info_dict):
          """Process a single resolved IE result."""
  
@@ -1703,9 +1734,8 @@ class YoutubeDL(object):
              if self._num_downloads >= int(max_downloads):
                  raise MaxDownloadsReached()
  
+        # TODO: backward compatibility, to be removed
          info_dict['fulltitle'] = info_dict['title']
-        if len(info_dict['title']) > 200:
-            info_dict['title'] = info_dict['title'][:197] + '...'
  
          if 'format' not in info_dict:
              info_dict['format'] = info_dict['ext']
@@ -1720,29 +1750,7 @@ class YoutubeDL(object):
          info_dict['_filename'] = filename = self.prepare_filename(info_dict)
  
          # Forced printings
-        if self.params.get('forcetitle', False):
-            self.to_stdout(info_dict['fulltitle'])
-        if self.params.get('forceid', False):
-            self.to_stdout(info_dict['id'])
-        if self.params.get('forceurl', False):
-            if info_dict.get('requested_formats') is not None:
-                for f in info_dict['requested_formats']:
-                    self.to_stdout(f['url'] + f.get('play_path', ''))
-            else:
-                # For RTMP URLs, also include the playpath
-                self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
-        if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
-            self.to_stdout(info_dict['thumbnail'])
-        if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
-            self.to_stdout(info_dict['description'])
-        if self.params.get('forcefilename', False) and filename is not None:
-            self.to_stdout(filename)
-        if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
-            self.to_stdout(formatSeconds(info_dict['duration']))
-        if self.params.get('forceformat', False):
-            self.to_stdout(info_dict['format'])
-        if self.params.get('forcejson', False):
-            self.to_stdout(json.dumps(info_dict))
+        self.__forced_printings(info_dict, filename, incomplete=False)
  
          # Do nothing else if in simulate mode
          if self.params.get('simulate', False):
diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py

index 3746671d3c110db6ee66f16193159f5ed8897d51..80bd696e21f3a4af3c996e9899ce439116e13d19 100644 (file)
--- a/youtube_dl/extractor/bilibili.py
+++ b/youtube_dl/extractor/bilibili.py
@@ -15,6 +15,7 @@ from ..utils import (
      float_or_none,
      parse_iso8601,
      smuggle_url,
+    str_or_none,
      strip_jsonp,
      unified_timestamp,
      unsmuggle_url,
@@ -306,3 +307,115 @@ class BiliBiliBangumiIE(InfoExtractor):
          return self.playlist_result(
              entries, bangumi_id,
              season_info.get('bangumi_title'), season_info.get('evaluate'))
+
+
+class BilibiliAudioBaseIE(InfoExtractor):
+    def _call_api(self, path, sid, query=None):
+        if not query:
+            query = {'sid': sid}
+        return self._download_json(
+            'https://www.bilibili.com/audio/music-service-c/web/' + path,
+            sid, query=query)['data']
+
+
+class BilibiliAudioIE(BilibiliAudioBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
+    _TEST = {
+        'url': 'https://www.bilibili.com/audio/au1003142',
+        'md5': 'fec4987014ec94ef9e666d4d158ad03b',
+        'info_dict': {
+            'id': '1003142',
+            'ext': 'm4a',
+            'title': '【tsukimi】YELLOW / 神山羊',
+            'artist': 'tsukimi',
+            'comment_count': int,
+            'description': 'YELLOW的mp3版！',
+            'duration': 183,
+            'subtitles': {
+                'origin': [{
+                    'ext': 'lrc',
+                }],
+            },
+            'thumbnail': r're:^https?://.+\.jpg',
+            'timestamp': 1564836614,
+            'upload_date': '20190803',
+            'uploader': 'tsukimi-つきみぐー',
+            'view_count': int,
+        },
+    }
+
+    def _real_extract(self, url):
+        au_id = self._match_id(url)
+
+        play_data = self._call_api('url', au_id)
+        formats = [{
+            'url': play_data['cdns'][0],
+            'filesize': int_or_none(play_data.get('size')),
+        }]
+
+        song = self._call_api('song/info', au_id)
+        title = song['title']
+        statistic = song.get('statistic') or {}
+
+        subtitles = None
+        lyric = song.get('lyric')
+        if lyric:
+            subtitles = {
+                'origin': [{
+                    'url': lyric,
+                }]
+            }
+
+        return {
+            'id': au_id,
+            'title': title,
+            'formats': formats,
+            'artist': song.get('author'),
+            'comment_count': int_or_none(statistic.get('comment')),
+            'description': song.get('intro'),
+            'duration': int_or_none(song.get('duration')),
+            'subtitles': subtitles,
+            'thumbnail': song.get('cover'),
+            'timestamp': int_or_none(song.get('passtime')),
+            'uploader': song.get('uname'),
+            'view_count': int_or_none(statistic.get('play')),
+        }
+
+
+class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
+    _TEST = {
+        'url': 'https://www.bilibili.com/audio/am10624',
+        'info_dict': {
+            'id': '10624',
+            'title': '每日新曲推荐（每日11:00更新）',
+            'description': '每天11:00更新，为你推送最新音乐',
+        },
+        'playlist_count': 19,
+    }
+
+    def _real_extract(self, url):
+        am_id = self._match_id(url)
+
+        songs = self._call_api(
+            'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
+
+        entries = []
+        for song in songs:
+            sid = str_or_none(song.get('id'))
+            if not sid:
+                continue
+            entries.append(self.url_result(
+                'https://www.bilibili.com/audio/au' + sid,
+                BilibiliAudioIE.ie_key(), sid))
+
+        if entries:
+            album_data = self._call_api('menu/info', am_id) or {}
+            album_title = album_data.get('title')
+            if album_title:
+                for entry in entries:
+                    entry['album'] = album_title
+                return self.playlist_result(
+                    entries, am_id, album_title, album_data.get('intro'))
+
+        return self.playlist_result(entries, am_id)
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py

index 58ec5c979e0bc88c98dfafa1ffff81b29af264d8..8e2f7217ab85a81a58d1bb902af02b6e62ec2ab6 100644 (file)
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -2,7 +2,6 @@
  from __future__ import unicode_literals
  
  import base64
-import json
  import re
  import struct
  
@@ -11,14 +10,12 @@ from .adobepass import AdobePassIE
  from ..compat import (
      compat_etree_fromstring,
      compat_parse_qs,
-    compat_str,
      compat_urllib_parse_urlparse,
      compat_urlparse,
      compat_xml_parse_error,
      compat_HTTPError,
  )
  from ..utils import (
-    determine_ext,
      ExtractorError,
      extract_attributes,
      find_xpath_attr,
@@ -27,18 +24,19 @@ from ..utils import (
      js_to_json,
      int_or_none,
      parse_iso8601,
+    smuggle_url,
      unescapeHTML,
      unsmuggle_url,
      update_url_query,
      clean_html,
      mimetype2ext,
+    UnsupportedError,
  )
  
  
  class BrightcoveLegacyIE(InfoExtractor):
      IE_NAME = 'brightcove:legacy'
      _VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
-    _FEDERATED_URL = 'http://c.brightcove.com/services/viewer/htmlFederated'
  
      _TESTS = [
          {
@@ -55,7 +53,8 @@ class BrightcoveLegacyIE(InfoExtractor):
                  'timestamp': 1368213670,
                  'upload_date': '20130510',
                  'uploader_id': '1589608506001',
-            }
+            },
+            'skip': 'The player has been deactivated by the content owner',
          },
          {
              # From http://medianetwork.oracle.com/video/player/1785452137001
@@ -70,6 +69,7 @@ class BrightcoveLegacyIE(InfoExtractor):
                  'upload_date': '20120814',
                  'uploader_id': '1460825906',
              },
+            'skip': 'video not playable',
          },
          {
              # From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/
@@ -79,7 +79,7 @@ class BrightcoveLegacyIE(InfoExtractor):
                  'ext': 'mp4',
                  'title': 'This Bracelet Acts as a Personal Thermostat',
                  'description': 'md5:547b78c64f4112766ccf4e151c20b6a0',
-                'uploader': 'Mashable',
+                # 'uploader': 'Mashable',
                  'timestamp': 1382041798,
                  'upload_date': '20131017',
                  'uploader_id': '1130468786001',
@@ -124,6 +124,7 @@ class BrightcoveLegacyIE(InfoExtractor):
                  'id': '3550319591001',
              },
              'playlist_mincount': 7,
+            'skip': 'Unsupported URL',
          },
          {
              # playlist with 'playlistTab' (https://github.com/ytdl-org/youtube-dl/issues/9965)
@@ -133,6 +134,7 @@ class BrightcoveLegacyIE(InfoExtractor):
                  'title': 'Lesson 08',
              },
              'playlist_mincount': 10,
+            'skip': 'Unsupported URL',
          },
          {
              # playerID inferred from bcpid
@@ -141,12 +143,6 @@ class BrightcoveLegacyIE(InfoExtractor):
              'only_matching': True,  # Tested in GenericIE
          }
      ]
-    FLV_VCODECS = {
-        1: 'SORENSON',
-        2: 'ON2',
-        3: 'H264',
-        4: 'VP8',
-    }
  
      @classmethod
      def _build_brighcove_url(cls, object_str):
@@ -238,7 +234,8 @@ class BrightcoveLegacyIE(InfoExtractor):
  
      @classmethod
      def _make_brightcove_url(cls, params):
-        return update_url_query(cls._FEDERATED_URL, params)
+        return update_url_query(
+            'http://c.brightcove.com/services/viewer/htmlFederated', params)
  
      @classmethod
      def _extract_brightcove_url(cls, webpage):
@@ -297,38 +294,12 @@ class BrightcoveLegacyIE(InfoExtractor):
          videoPlayer = query.get('@videoPlayer')
          if videoPlayer:
              # We set the original url as the default 'Referer' header
-            referer = smuggled_data.get('Referer', url)
+            referer = query.get('linkBaseURL', [None])[0] or smuggled_data.get('Referer', url)
+            video_id = videoPlayer[0]
              if 'playerID' not in query:
                  mobj = re.search(r'/bcpid(\d+)', url)
                  if mobj is not None:
                      query['playerID'] = [mobj.group(1)]
-            return self._get_video_info(
-                videoPlayer[0], query, referer=referer)
-        elif 'playerKey' in query:
-            player_key = query['playerKey']
-            return self._get_playlist_info(player_key[0])
-        else:
-            raise ExtractorError(
-                'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?',
-                expected=True)
-
-    def _brightcove_new_url_result(self, publisher_id, video_id):
-        brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id)
-        return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
-
-    def _get_video_info(self, video_id, query, referer=None):
-        headers = {}
-        linkBase = query.get('linkBaseURL')
-        if linkBase is not None:
-            referer = linkBase[0]
-        if referer is not None:
-            headers['Referer'] = referer
-        webpage = self._download_webpage(self._FEDERATED_URL, video_id, headers=headers, query=query)
-
-        error_msg = self._html_search_regex(
-            r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage,
-            'error message', default=None)
-        if error_msg is not None:
              publisher_id = query.get('publisherId')
              if publisher_id and publisher_id[0].isdigit():
                  publisher_id = publisher_id[0]
@@ -339,6 +310,9 @@ class BrightcoveLegacyIE(InfoExtractor):
                  else:
                      player_id = query.get('playerID')
                      if player_id and player_id[0].isdigit():
+                        headers = {}
+                        if referer:
+                            headers['Referer'] = referer
                          player_page = self._download_webpage(
                              'http://link.brightcove.com/services/player/bcpid' + player_id[0],
                              video_id, headers=headers, fatal=False)
@@ -349,136 +323,16 @@ class BrightcoveLegacyIE(InfoExtractor):
                  if player_key:
                      enc_pub_id = player_key.split(',')[1].replace('~', '=')
                      publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0]
-                if publisher_id:
-                    return self._brightcove_new_url_result(publisher_id, video_id)
-            raise ExtractorError(
-                'brightcove said: %s' % error_msg, expected=True)
-
-        self.report_extraction(video_id)
-        info = self._search_regex(r'var experienceJSON = ({.*});', webpage, 'json')
-        info = json.loads(info)['data']
-        video_info = info['programmedContent']['videoPlayer']['mediaDTO']
-        video_info['_youtubedl_adServerURL'] = info.get('adServerURL')
-
-        return self._extract_video_info(video_info)
-
-    def _get_playlist_info(self, player_key):
-        info_url = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s' % player_key
-        playlist_info = self._download_webpage(
-            info_url, player_key, 'Downloading playlist information')
-
-        json_data = json.loads(playlist_info)
-        if 'videoList' in json_data:
-            playlist_info = json_data['videoList']
-            playlist_dto = playlist_info['mediaCollectionDTO']
-        elif 'playlistTabs' in json_data:
-            playlist_info = json_data['playlistTabs']
-            playlist_dto = playlist_info['lineupListDTO']['playlistDTOs'][0]
-        else:
-            raise ExtractorError('Empty playlist')
-
-        videos = [self._extract_video_info(video_info) for video_info in playlist_dto['videoDTOs']]
-
-        return self.playlist_result(videos, playlist_id='%s' % playlist_info['id'],
-                                    playlist_title=playlist_dto['displayName'])
-
-    def _extract_video_info(self, video_info):
-        video_id = compat_str(video_info['id'])
-        publisher_id = video_info.get('publisherId')
-        info = {
-            'id': video_id,
-            'title': video_info['displayName'].strip(),
-            'description': video_info.get('shortDescription'),
-            'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
-            'uploader': video_info.get('publisherName'),
-            'uploader_id': compat_str(publisher_id) if publisher_id else None,
-            'duration': float_or_none(video_info.get('length'), 1000),
-            'timestamp': int_or_none(video_info.get('creationDate'), 1000),
-        }
-
-        renditions = video_info.get('renditions', []) + video_info.get('IOSRenditions', [])
-        if renditions:
-            formats = []
-            for rend in renditions:
-                url = rend['defaultURL']
-                if not url:
-                    continue
-                ext = None
-                if rend['remote']:
-                    url_comp = compat_urllib_parse_urlparse(url)
-                    if url_comp.path.endswith('.m3u8'):
-                        formats.extend(
-                            self._extract_m3u8_formats(
-                                url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
-                        continue
-                    elif 'akamaihd.net' in url_comp.netloc:
-                        # This type of renditions are served through
-                        # akamaihd.net, but they don't use f4m manifests
-                        url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB'
-                        ext = 'flv'
-                if ext is None:
-                    ext = determine_ext(url)
-                tbr = int_or_none(rend.get('encodingRate'), 1000)
-                a_format = {
-                    'format_id': 'http%s' % ('-%s' % tbr if tbr else ''),
-                    'url': url,
-                    'ext': ext,
-                    'filesize': int_or_none(rend.get('size')) or None,
-                    'tbr': tbr,
-                }
-                if rend.get('audioOnly'):
-                    a_format.update({
-                        'vcodec': 'none',
-                    })
-                else:
-                    a_format.update({
-                        'height': int_or_none(rend.get('frameHeight')),
-                        'width': int_or_none(rend.get('frameWidth')),
-                        'vcodec': rend.get('videoCodec'),
-                    })
-
-                # m3u8 manifests with remote == false are media playlists
-                # Not calling _extract_m3u8_formats here to save network traffic
-                if ext == 'm3u8':
-                    a_format.update({
-                        'format_id': 'hls%s' % ('-%s' % tbr if tbr else ''),
-                        'ext': 'mp4',
-                        'protocol': 'm3u8_native',
-                    })
-
-                formats.append(a_format)
-            self._sort_formats(formats)
-            info['formats'] = formats
-        elif video_info.get('FLVFullLengthURL') is not None:
-            info.update({
-                'url': video_info['FLVFullLengthURL'],
-                'vcodec': self.FLV_VCODECS.get(video_info.get('FLVFullCodec')),
-                'filesize': int_or_none(video_info.get('FLVFullSize')),
-            })
-
-        if self._downloader.params.get('include_ads', False):
-            adServerURL = video_info.get('_youtubedl_adServerURL')
-            if adServerURL:
-                ad_info = {
-                    '_type': 'url',
-                    'url': adServerURL,
-                }
-                if 'url' in info:
-                    return {
-                        '_type': 'playlist',
-                        'title': info['title'],
-                        'entries': [ad_info, info],
-                    }
-                else:
-                    return ad_info
-
-        if not info.get('url') and not info.get('formats'):
-            uploader_id = info.get('uploader_id')
-            if uploader_id:
-                info.update(self._brightcove_new_url_result(uploader_id, video_id))
-            else:
-                raise ExtractorError('Unable to extract video url for %s' % video_id)
-        return info
+            if publisher_id:
+                brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id)
+                if referer:
+                    brightcove_new_url = smuggle_url(brightcove_new_url, {'referrer': referer})
+                return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
+        # TODO: figure out if it's possible to extract playlistId from playerKey
+        # elif 'playerKey' in query:
+        #     player_key = query['playerKey']
+        #     return self._get_playlist_info(player_key[0])
+        raise UnsupportedError(url)
  
  
  class BrightcoveNewIE(AdobePassIE):
diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py

index 562c83af91a744428b70d92644008b1ac0de2b3c..0b11bf11fce42168b0aa6dc7592ae0fa259e7c0e 100644 (file)
--- a/youtube_dl/extractor/byutv.py
+++ b/youtube_dl/extractor/byutv.py
@@ -3,7 +3,12 @@ from __future__ import unicode_literals
  import re
  
  from .common import InfoExtractor
-from ..utils import parse_duration
+from ..utils import (
+    determine_ext,
+    merge_dicts,
+    parse_duration,
+    url_or_none,
+)
  
  
  class BYUtvIE(InfoExtractor):
@@ -51,7 +56,7 @@ class BYUtvIE(InfoExtractor):
          video_id = mobj.group('id')
          display_id = mobj.group('display_id') or video_id
  
-        info = self._download_json(
+        video = self._download_json(
              'https://api.byutv.org/api3/catalog/getvideosforcontent',
              display_id, query={
                  'contentid': video_id,
@@ -62,7 +67,7 @@ class BYUtvIE(InfoExtractor):
                  'x-byutv-platformkey': 'xsaaw9c7y5',
              })
  
-        ep = info.get('ooyalaVOD')
+        ep = video.get('ooyalaVOD')
          if ep:
              return {
                  '_type': 'url_transparent',
@@ -75,18 +80,38 @@ class BYUtvIE(InfoExtractor):
                  'thumbnail': ep.get('imageThumbnail'),
              }
  
-        ep = info['dvr']
-        title = ep['title']
-        formats = self._extract_m3u8_formats(
-            ep['videoUrl'], video_id, 'mp4', entry_protocol='m3u8_native',
-            m3u8_id='hls')
+        info = {}
+        formats = []
+        for format_id, ep in video.items():
+            if not isinstance(ep, dict):
+                continue
+            video_url = url_or_none(ep.get('videoUrl'))
+            if not video_url:
+                continue
+            ext = determine_ext(video_url)
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                    m3u8_id='hls', fatal=False))
+            elif ext == 'mpd':
+                formats.extend(self._extract_mpd_formats(
+                    video_url, video_id, mpd_id='dash', fatal=False))
+            else:
+                formats.append({
+                    'url': video_url,
+                    'format_id': format_id,
+                })
+            merge_dicts(info, {
+                'title': ep.get('title'),
+                'description': ep.get('description'),
+                'thumbnail': ep.get('imageThumbnail'),
+                'duration': parse_duration(ep.get('length')),
+            })
          self._sort_formats(formats)
-        return {
+
+        return merge_dicts(info, {
              'id': video_id,
              'display_id': display_id,
-            'title': title,
-            'description': ep.get('description'),
-            'thumbnail': ep.get('imageThumbnail'),
-            'duration': parse_duration(ep.get('length')),
+            'title': display_id,
              'formats': formats,
-        }
+        })
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py

index 4adcae1e5a240bc81f53e0ed63f1ae6ca19b9442..44120cae25e386a632f33d621a81ba0520e0f738 100644 (file)
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -104,6 +104,8 @@ from .bild import BildIE
  from .bilibili import (
      BiliBiliIE,
      BiliBiliBangumiIE,
+    BilibiliAudioIE,
+    BilibiliAudioAlbumIE,
  )
  from .biobiochiletv import BioBioChileTVIE
  from .bitchute import (
diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py

index d8a2f9d76b027f4bb2e0c34009b9ec499c5681f1..cbe564a3cf96dea94b4ce7b4b35d21e66f79be1d 100644 (file)
--- a/youtube_dl/extractor/heise.py
+++ b/youtube_dl/extractor/heise.py
@@ -105,8 +105,7 @@ class HeiseIE(InfoExtractor):
              webpage, default=None) or self._html_search_meta(
              'description', webpage)
  
-        kaltura_url = KalturaIE._extract_url(webpage)
-        if kaltura_url:
+        def _make_kaltura_result(kaltura_url):
              return {
                  '_type': 'url_transparent',
                  'url': smuggle_url(kaltura_url, {'source_url': url}),
@@ -115,6 +114,16 @@ class HeiseIE(InfoExtractor):
                  'description': description,
              }
  
+        kaltura_url = KalturaIE._extract_url(webpage)
+        if kaltura_url:
+            return _make_kaltura_result(kaltura_url)
+
+        kaltura_id = self._search_regex(
+            r'entry-id=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'kaltura id',
+            default=None, group='id')
+        if kaltura_id:
+            return _make_kaltura_result('kaltura:2238431:%s' % kaltura_id)
+
          yt_urls = YoutubeIE._extract_urls(webpage)
          if yt_urls:
              return self.playlist_from_matches(
diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py

index 79d5bbb2e8bb297e099efb44b2d5451eb06562ca..f9f7c5a645793eb13c44af45914b768b1982fe2d 100644 (file)
--- a/youtube_dl/extractor/hotstar.py
+++ b/youtube_dl/extractor/hotstar.py
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
  
  import hashlib
  import hmac
+import re
  import time
  import uuid
  
@@ -126,6 +127,8 @@ class HotStarIE(HotStarBaseIE):
              format_url = url_or_none(playback_set.get('playbackUrl'))
              if not format_url:
                  continue
+            format_url = re.sub(
+                r'(?<=//staragvod)(\d)', r'web\1', format_url)
              tags = str_or_none(playback_set.get('tagsCombination')) or ''
              if tags and 'encryption:plain' not in tags:
                  continue
@@ -133,7 +136,8 @@ class HotStarIE(HotStarBaseIE):
              try:
                  if 'package:hls' in tags or ext == 'm3u8':
                      formats.extend(self._extract_m3u8_formats(
-                        format_url, video_id, 'mp4', m3u8_id='hls'))
+                        format_url, video_id, 'mp4',
+                        entry_protocol='m3u8_native', m3u8_id='hls'))
                  elif 'package:dash' in tags or ext == 'mpd':
                      formats.extend(self._extract_mpd_formats(
                          format_url, video_id, mpd_id='dash'))
diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py

index ffd87b55f6d8bc78ab7853892ce92454b89bfe8e..b061850a187567e0d7ed18b5e64ce10e08927244 100644 (file)
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@@ -22,7 +22,7 @@ from ..utils import (
  
  
  class InstagramIE(InfoExtractor):
-    _VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/p/(?P<id>[^/?#&]+))'
+    _VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/(?:p|tv)/(?P<id>[^/?#&]+))'
      _TESTS = [{
          'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
          'md5': '0d2da106a9d2631273e192b372806516',
@@ -92,6 +92,9 @@ class InstagramIE(InfoExtractor):
      }, {
          'url': 'http://instagram.com/p/9o6LshA7zy/embed/',
          'only_matching': True,
+    }, {
+        'url': 'https://www.instagram.com/tv/aye83DjauH/',
+        'only_matching': True,
      }]
  
      @staticmethod
diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py

index 647b905f12ebd7ebbe525d90e5fce747cd42d61a..2aabd98b5bbaf36efecdc76415bf52854705bd1e 100644 (file)
--- a/youtube_dl/extractor/jwplatform.py
+++ b/youtube_dl/extractor/jwplatform.py
@@ -7,7 +7,7 @@ from .common import InfoExtractor
  
  
  class JWPlatformIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|video)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
+    _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
      _TESTS = [{
          'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
          'md5': 'fa8899fa601eb7c83a64e9d568bdf325',
diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py

index bcac13ec5edfdeb83216e732b46ae34a36656205..bf5353ef941d660ff3fe542069a7160afe281369 100644 (file)
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@@ -164,7 +164,7 @@ class MixcloudIE(InfoExtractor):
              def decrypt_url(f_url):
                  for k in (key, 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD'):
                      decrypted_url = self._decrypt_xor_cipher(k, f_url)
-                    if re.search(r'^https?://[0-9a-z.]+/[0-9A-Za-z/.?=&_-]+$', decrypted_url):
+                    if re.search(r'^https?://[0-9A-Za-z.]+/[0-9A-Za-z/.?=&_-]+$', decrypted_url):
                          return decrypted_url
  
              for url_key in ('url', 'hlsUrl', 'dashUrl'):
diff --git a/youtube_dl/extractor/nhk.py b/youtube_dl/extractor/nhk.py

index 241412f9849645674c5369e4a5d46a6ca0709358..6a2c6cb7bb6d039c56fcf7325de422846c437ab5 100644 (file)
--- a/youtube_dl/extractor/nhk.py
+++ b/youtube_dl/extractor/nhk.py
@@ -10,6 +10,18 @@ class NhkVodIE(InfoExtractor):
      # Content available only for a limited period of time. Visit
      # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
      _TESTS = [{
+        # clip
+        'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
+        'md5': '256a1be14f48d960a7e61e2532d95ec3',
+        'info_dict': {
+            'id': 'a95j5iza',
+            'ext': 'mp4',
+            'title': "Dining with the Chef - Chef Saito's Family recipe: MENCHI-KATSU",
+            'description': 'md5:5aee4a9f9d81c26281862382103b0ea5',
+            'timestamp': 1565965194,
+            'upload_date': '20190816',
+        },
+    }, {
          'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
          'only_matching': True,
      }, {
@@ -19,7 +31,7 @@ class NhkVodIE(InfoExtractor):
          'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/',
          'only_matching': True,
      }]
-    _API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sodesdlist/v7/episode/%s/%s/all%s.json'
+    _API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7/episode/%s/%s/all%s.json'
  
      def _real_extract(self, url):
          lang, m_type, episode_id = re.match(self._VALID_URL, url).groups()
@@ -28,7 +40,10 @@ class NhkVodIE(InfoExtractor):
  
          is_video = m_type == 'video'
          episode = self._download_json(
-            self._API_URL_TEMPLATE % ('v' if is_video else 'r', episode_id, lang, '/all' if is_video else ''),
+            self._API_URL_TEMPLATE % (
+                'v' if is_video else 'r',
+                'clip' if episode_id[:4] == '9999' else 'esd',
+                episode_id, lang, '/all' if is_video else ''),
              episode_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'][0]
          title = episode.get('sub_title_clean') or episode['sub_title']
  
@@ -60,8 +75,8 @@ class NhkVodIE(InfoExtractor):
          if is_video:
              info.update({
                  '_type': 'url_transparent',
-                'ie_key': 'Ooyala',
-                'url': 'ooyala:' + episode['vod_id'],
+                'ie_key': 'Piksel',
+                'url': 'https://player.piksel.com/v/refid/nhkworld/prefid/' + episode['vod_id'],
              })
          else:
              audio = episode['audio']
diff --git a/youtube_dl/extractor/nick.py b/youtube_dl/extractor/nick.py

index 5e34d776bd799490ac21282fed822cc52d467f75..2e8b302ac856ecf22baf2b72b0ae721784be9961 100644 (file)
--- a/youtube_dl/extractor/nick.py
+++ b/youtube_dl/extractor/nick.py
@@ -85,7 +85,8 @@ class NickBrIE(MTVServicesInfoExtractor):
                      https?://
                          (?:
                              (?P<domain>(?:www\.)?nickjr|mundonick\.uol)\.com\.br|
-                            (?:www\.)?nickjr\.[a-z]{2}
+                            (?:www\.)?nickjr\.[a-z]{2}|
+                            (?:www\.)?nickelodeonjunior\.fr
                          )
                          /(?:programas/)?[^/]+/videos/(?:episodios/)?(?P<id>[^/?\#.]+)
                      '''
@@ -101,6 +102,9 @@ class NickBrIE(MTVServicesInfoExtractor):
      }, {
          'url': 'http://www.nickjr.de/blaze-und-die-monster-maschinen/videos/f6caaf8f-e4e8-4cc1-b489-9380d6dcd059/',
          'only_matching': True,
+    }, {
+        'url': 'http://www.nickelodeonjunior.fr/paw-patrol-la-pat-patrouille/videos/episode-401-entier-paw-patrol/',
+        'only_matching': True,
      }]
  
      def _real_extract(self, url):
diff --git a/youtube_dl/extractor/ninenow.py b/youtube_dl/extractor/ninenow.py

index f32f530f75b16bec0040ce581b4122bd1166c795..6157dc7c16e785ca7f570a45c36cd8e90bbd05e1 100644 (file)
--- a/youtube_dl/extractor/ninenow.py
+++ b/youtube_dl/extractor/ninenow.py
@@ -45,7 +45,11 @@ class NineNowIE(InfoExtractor):
          webpage = self._download_webpage(url, display_id)
          page_data = self._parse_json(self._search_regex(
              r'window\.__data\s*=\s*({.*?});', webpage,
-            'page data'), display_id)
+            'page data', default='{}'), display_id, fatal=False)
+        if not page_data:
+            page_data = self._parse_json(self._parse_json(self._search_regex(
+                r'window\.__data\s*=\s*JSON\.parse\s*\(\s*(".+?")\s*\)\s*;',
+                webpage, 'page data'), display_id), display_id)
  
          for kind in ('episode', 'clip'):
              current_key = page_data.get(kind, {}).get(
diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py

index 679eaf6c312d0d4de0a3e11bee7076f6ec365039..1fe58178097a1cc1c101409a672b861dbf353561 100644 (file)
--- a/youtube_dl/extractor/openload.py
+++ b/youtube_dl/extractor/openload.py
@@ -246,7 +246,7 @@ class OpenloadIE(InfoExtractor):
      _DOMAINS = r'''
                      (?:
                          openload\.(?:co|io|link|pw)|
-                        oload\.(?:tv|best|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|life|live|space|services|website|vip)|
+                        oload\.(?:tv|best|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|online|press|pw|life|live|space|services|website|vip)|
                          oladblock\.(?:services|xyz|me)|openloed\.co
                      )
                  '''
@@ -362,6 +362,9 @@ class OpenloadIE(InfoExtractor):
      }, {
          'url': 'https://oload.services/embed/bs1NWj1dCag/',
          'only_matching': True,
+    }, {
+        'url': 'https://oload.online/f/W8o2UfN1vNY/',
+        'only_matching': True,
      }, {
          'url': 'https://oload.press/embed/drTBl1aOTvk/',
          'only_matching': True,
diff --git a/youtube_dl/extractor/piksel.py b/youtube_dl/extractor/piksel.py

index 401298cb877f59ec46e896ee0c087d21f2c93924..88b6859b01a7c51eebe9f129d759f68005c75ce6 100644 (file)
--- a/youtube_dl/extractor/piksel.py
+++ b/youtube_dl/extractor/piksel.py
@@ -15,7 +15,7 @@ from ..utils import (
  
  
  class PikselIE(InfoExtractor):
-    _VALID_URL = r'https?://player\.piksel\.com/v/(?P<id>[a-z0-9]+)'
+    _VALID_URL = r'https?://player\.piksel\.com/v/(?:refid/[^/]+/prefid/)?(?P<id>[a-z0-9_]+)'
      _TESTS = [
          {
              'url': 'http://player.piksel.com/v/ums2867l',
@@ -40,6 +40,11 @@ class PikselIE(InfoExtractor):
                  'timestamp': 1486171129,
                  'upload_date': '20170204'
              }
+        },
+        {
+            # https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2019240/
+            'url': 'http://player.piksel.com/v/refid/nhkworld/prefid/nw_vod_v_en_2019_240_20190823233000_02_1566873477',
+            'only_matching': True,
          }
      ]
  
@@ -52,8 +57,11 @@ class PikselIE(InfoExtractor):
              return mobj.group('url')
  
      def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        video_id = self._search_regex(
+            r'data-de-program-uuid=[\'"]([a-z0-9]+)',
+            webpage, 'program uuid', default=display_id)
          app_token = self._search_regex([
              r'clientAPI\s*:\s*"([^"]+)"',
              r'data-de-api-key\s*=\s*"([^"]+)"'
diff --git a/youtube_dl/extractor/platzi.py b/youtube_dl/extractor/platzi.py

index 557b2b5ade2fcd0269b8b2873fd644015f785c6c..602207bebdd6a01d7f33dbf08302ab5a75ccf207 100644 (file)
--- a/youtube_dl/extractor/platzi.py
+++ b/youtube_dl/extractor/platzi.py
@@ -18,43 +18,10 @@ from ..utils import (
  )
  
  
-class PlatziIE(InfoExtractor):
-    _VALID_URL = r'''(?x)
-                    https?://
-                        (?:
-                            platzi\.com/clases|           # es version
-                            courses\.platzi\.com/classes  # en version
-                        )/[^/]+/(?P<id>\d+)-[^/?\#&]+
-                    '''
+class PlatziBaseIE(InfoExtractor):
      _LOGIN_URL = 'https://platzi.com/login/'
      _NETRC_MACHINE = 'platzi'
  
-    _TESTS = [{
-        'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/',
-        'md5': '8f56448241005b561c10f11a595b37e3',
-        'info_dict': {
-            'id': '12074',
-            'ext': 'mp4',
-            'title': 'Creando nuestra primera página',
-            'description': 'md5:4c866e45034fc76412fbf6e60ae008bc',
-            'duration': 420,
-        },
-        'skip': 'Requires platzi account credentials',
-    }, {
-        'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/',
-        'info_dict': {
-            'id': '13430',
-            'ext': 'mp4',
-            'title': 'Background',
-            'description': 'md5:49c83c09404b15e6e71defaf87f6b305',
-            'duration': 360,
-        },
-        'skip': 'Requires platzi account credentials',
-        'params': {
-            'skip_download': True,
-        },
-    }]
-
      def _real_initialize(self):
          self._login()
  
@@ -97,6 +64,42 @@ class PlatziIE(InfoExtractor):
                      'Unable to login: %s' % error, expected=True)
          raise ExtractorError('Unable to log in')
  
+
+class PlatziIE(PlatziBaseIE):
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:
+                            platzi\.com/clases|           # es version
+                            courses\.platzi\.com/classes  # en version
+                        )/[^/]+/(?P<id>\d+)-[^/?\#&]+
+                    '''
+
+    _TESTS = [{
+        'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/',
+        'md5': '8f56448241005b561c10f11a595b37e3',
+        'info_dict': {
+            'id': '12074',
+            'ext': 'mp4',
+            'title': 'Creando nuestra primera página',
+            'description': 'md5:4c866e45034fc76412fbf6e60ae008bc',
+            'duration': 420,
+        },
+        'skip': 'Requires platzi account credentials',
+    }, {
+        'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/',
+        'info_dict': {
+            'id': '13430',
+            'ext': 'mp4',
+            'title': 'Background',
+            'description': 'md5:49c83c09404b15e6e71defaf87f6b305',
+            'duration': 360,
+        },
+        'skip': 'Requires platzi account credentials',
+        'params': {
+            'skip_download': True,
+        },
+    }]
+
      def _real_extract(self, url):
          lecture_id = self._match_id(url)
  
@@ -104,7 +107,11 @@ class PlatziIE(InfoExtractor):
  
          data = self._parse_json(
              self._search_regex(
-                r'client_data\s*=\s*({.+?})\s*;', webpage, 'client data'),
+                # client_data may contain "};" so that we have to try more
+                # strict regex first
+                (r'client_data\s*=\s*({.+?})\s*;\s*\n',
+                 r'client_data\s*=\s*({.+?})\s*;'),
+                webpage, 'client data'),
              lecture_id)
  
          material = data['initialState']['material']
@@ -146,7 +153,7 @@ class PlatziIE(InfoExtractor):
          }
  
  
-class PlatziCourseIE(InfoExtractor):
+class PlatziCourseIE(PlatziBaseIE):
      _VALID_URL = r'''(?x)
                      https?://
                          (?:
diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py

index db5a4f44e6b9be254729256fc741d6f3eb3693f7..63e2455b20b2a4f3fc639b14c9b04e2c122f505f 100644 (file)
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -182,20 +182,29 @@ class TEDIE(InfoExtractor):
  
          title = talk_info['title'].strip()
  
-        native_downloads = try_get(
-            talk_info,
-            (lambda x: x['downloads']['nativeDownloads'],
-             lambda x: x['nativeDownloads']),
-            dict) or {}
+        downloads = talk_info.get('downloads') or {}
+        native_downloads = downloads.get('nativeDownloads') or talk_info.get('nativeDownloads') or {}
  
          formats = [{
              'url': format_url,
              'format_id': format_id,
-            'format': format_id,
          } for (format_id, format_url) in native_downloads.items() if format_url is not None]
+
+        subtitled_downloads = downloads.get('subtitledDownloads') or {}
+        for lang, subtitled_download in subtitled_downloads.items():
+            for q in self._NATIVE_FORMATS:
+                q_url = subtitled_download.get(q)
+                if not q_url:
+                    continue
+                formats.append({
+                    'url': q_url,
+                    'format_id': '%s-%s' % (q, lang),
+                    'language': lang,
+                })
+
          if formats:
              for f in formats:
-                finfo = self._NATIVE_FORMATS.get(f['format_id'])
+                finfo = self._NATIVE_FORMATS.get(f['format_id'].split('-')[0])
                  if finfo:
                      f.update(finfo)
  
@@ -215,34 +224,7 @@ class TEDIE(InfoExtractor):
  
          http_url = None
          for format_id, resources in resources_.items():
-            if format_id == 'h264':
-                for resource in resources:
-                    h264_url = resource.get('file')
-                    if not h264_url:
-                        continue
-                    bitrate = int_or_none(resource.get('bitrate'))
-                    formats.append({
-                        'url': h264_url,
-                        'format_id': '%s-%sk' % (format_id, bitrate),
-                        'tbr': bitrate,
-                    })
-                    if re.search(r'\d+k', h264_url):
-                        http_url = h264_url
-            elif format_id == 'rtmp':
-                streamer = talk_info.get('streamer')
-                if not streamer:
-                    continue
-                for resource in resources:
-                    formats.append({
-                        'format_id': '%s-%s' % (format_id, resource.get('name')),
-                        'url': streamer,
-                        'play_path': resource['file'],
-                        'ext': 'flv',
-                        'width': int_or_none(resource.get('width')),
-                        'height': int_or_none(resource.get('height')),
-                        'tbr': int_or_none(resource.get('bitrate')),
-                    })
-            elif format_id == 'hls':
+            if format_id == 'hls':
                  if not isinstance(resources, dict):
                      continue
                  stream_url = url_or_none(resources.get('stream'))
@@ -251,6 +233,36 @@ class TEDIE(InfoExtractor):
                  formats.extend(self._extract_m3u8_formats(
                      stream_url, video_name, 'mp4', m3u8_id=format_id,
                      fatal=False))
+            else:
+                if not isinstance(resources, list):
+                    continue
+                if format_id == 'h264':
+                    for resource in resources:
+                        h264_url = resource.get('file')
+                        if not h264_url:
+                            continue
+                        bitrate = int_or_none(resource.get('bitrate'))
+                        formats.append({
+                            'url': h264_url,
+                            'format_id': '%s-%sk' % (format_id, bitrate),
+                            'tbr': bitrate,
+                        })
+                        if re.search(r'\d+k', h264_url):
+                            http_url = h264_url
+                elif format_id == 'rtmp':
+                    streamer = talk_info.get('streamer')
+                    if not streamer:
+                        continue
+                    for resource in resources:
+                        formats.append({
+                            'format_id': '%s-%s' % (format_id, resource.get('name')),
+                            'url': streamer,
+                            'play_path': resource['file'],
+                            'ext': 'flv',
+                            'width': int_or_none(resource.get('width')),
+                            'height': int_or_none(resource.get('height')),
+                            'tbr': int_or_none(resource.get('bitrate')),
+                        })
  
          m3u8_formats = list(filter(
              lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none',
diff --git a/youtube_dl/extractor/tv4.py b/youtube_dl/extractor/tv4.py

index 51923e44afcc7913c32406c940bba6aec9edbf13..a819d048c613929b79f090facc4a82a097e1cb73 100644 (file)
--- a/youtube_dl/extractor/tv4.py
+++ b/youtube_dl/extractor/tv4.py
@@ -72,8 +72,13 @@ class TV4IE(InfoExtractor):
          video_id = self._match_id(url)
  
          info = self._download_json(
-            'http://www.tv4play.se/player/assets/%s.json' % video_id,
-            video_id, 'Downloading video info JSON')
+            'https://playback-api.b17g.net/asset/%s' % video_id,
+            video_id, 'Downloading video info JSON', query={
+                'service': 'tv4',
+                'device': 'browser',
+                'protocol': 'hls,dash',
+                'drm': 'widevine',
+            })['metadata']
  
          title = info['title']
  
@@ -111,5 +116,9 @@ class TV4IE(InfoExtractor):
              'timestamp': parse_iso8601(info.get('broadcast_date_time')),
              'duration': int_or_none(info.get('duration')),
              'thumbnail': info.get('image'),
-            'is_live': info.get('is_live') is True,
+            'is_live': info.get('isLive') is True,
+            'series': info.get('seriesTitle'),
+            'season_number': int_or_none(info.get('seasonNumber')),
+            'episode': info.get('episodeTitle'),
+            'episode_number': int_or_none(info.get('episodeNumber')),
          }
diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py

index f57ed228870b1d094c8c5b42e88f6983671670f6..8b6dc0e244982f46cbc0d65bf5133b3505779fb0 100644 (file)
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -403,8 +403,17 @@ class VKIE(VKBaseIE):
              data = self._parse_json(
                  self._search_regex(
                      r'var\s+playerParams\s*=\s*({.+?})\s*;\s*\n', info_page,
-                    'player params'),
-                video_id)['params'][0]
+                    'player params', default='{}'),
+                video_id)
+            if data:
+                data = data['params'][0]
+
+        # <!--{...}
+        if not data:
+            data = self._parse_json(
+                self._search_regex(
+                    r'<!--\s*({.+})', info_page, 'payload'),
+                video_id)['payload'][-1][-1]['player']['params'][0]
  
          title = unescapeHTML(data['md_title'])
  
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index 25d056b3c21ea3f98c5c36384e86d243dfd4a913..a3364a14ed0ebb9f273915414dda78eaae258a33 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -41,7 +41,6 @@ from ..utils import (
      orderedSet,
      parse_codecs,
      parse_duration,
-    qualities,
      remove_quotes,
      remove_start,
      smuggle_url,
@@ -388,9 +387,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                              (?:www\.)?invidious\.13ad\.de/|
                              (?:www\.)?invidious\.mastodon\.host/|
                              (?:www\.)?invidious\.nixnet\.xyz/|
+                            (?:www\.)?invidious\.drycat\.fr/|
                              (?:www\.)?tube\.poal\.co/|
                              (?:www\.)?vid\.wxzm\.sx/|
                              (?:www\.)?yt\.elukerio\.org/|
+                            (?:www\.)?kgg2m7yk5aybusll\.onion/|
+                            (?:www\.)?qklhadlycap4cnod\.onion/|
+                            (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
+                            (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
+                            (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
+                            (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
+                            (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
                              youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
                           (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
                           (?:                                                  # the various things that can precede the ID:
@@ -1909,6 +1916,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              return int_or_none(self._search_regex(
                  r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
  
+        streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
+        streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
+
          if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
              self.report_rtmp_download()
              formats = [{
@@ -1917,10 +1927,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'url': video_info['conn'][0],
                  'player_url': player_url,
              }]
-        elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
+        elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
              encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
              if 'rtmpe%3Dyes' in encoded_url_map:
                  raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
+            formats = []
              formats_spec = {}
              fmt_list = video_info.get('fmt_list', [''])[0]
              if fmt_list:
@@ -1934,91 +1945,104 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                                  'width': int_or_none(width_height[0]),
                                  'height': int_or_none(width_height[1]),
                              }
-            q = qualities(['small', 'medium', 'hd720'])
-            streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list)
-            if streaming_formats:
-                for fmt in streaming_formats:
-                    itag = str_or_none(fmt.get('itag'))
-                    if not itag:
-                        continue
-                    quality = fmt.get('quality')
-                    quality_label = fmt.get('qualityLabel') or quality
-                    formats_spec[itag] = {
-                        'asr': int_or_none(fmt.get('audioSampleRate')),
-                        'filesize': int_or_none(fmt.get('contentLength')),
-                        'format_note': quality_label,
-                        'fps': int_or_none(fmt.get('fps')),
-                        'height': int_or_none(fmt.get('height')),
-                        'quality': q(quality),
-                        # bitrate for itag 43 is always 2147483647
-                        'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
-                        'width': int_or_none(fmt.get('width')),
-                    }
-            formats = []
-            for url_data_str in encoded_url_map.split(','):
-                url_data = compat_parse_qs(url_data_str)
-                if 'itag' not in url_data or 'url' not in url_data or url_data.get('drm_families'):
+            for fmt in streaming_formats:
+                itag = str_or_none(fmt.get('itag'))
+                if not itag:
                      continue
+                quality = fmt.get('quality')
+                quality_label = fmt.get('qualityLabel') or quality
+                formats_spec[itag] = {
+                    'asr': int_or_none(fmt.get('audioSampleRate')),
+                    'filesize': int_or_none(fmt.get('contentLength')),
+                    'format_note': quality_label,
+                    'fps': int_or_none(fmt.get('fps')),
+                    'height': int_or_none(fmt.get('height')),
+                    # bitrate for itag 43 is always 2147483647
+                    'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
+                    'width': int_or_none(fmt.get('width')),
+                }
+
+            for fmt in streaming_formats:
+                if fmt.get('drm_families'):
+                    continue
+                url = url_or_none(fmt.get('url'))
+
+                if not url:
+                    cipher = fmt.get('cipher')
+                    if not cipher:
+                        continue
+                    url_data = compat_parse_qs(cipher)
+                    url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
+                    if not url:
+                        continue
+                else:
+                    cipher = None
+                    url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+
                  stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
                  # Unsupported FORMAT_STREAM_TYPE_OTF
                  if stream_type == 3:
                      continue
-                format_id = url_data['itag'][0]
-                url = url_data['url'][0]
-
-                if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
-                    ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
-                    jsplayer_url_json = self._search_regex(
-                        ASSETS_RE,
-                        embed_webpage if age_gate else video_webpage,
-                        'JS player URL (1)', default=None)
-                    if not jsplayer_url_json and not age_gate:
-                        # We need the embed website after all
-                        if embed_webpage is None:
-                            embed_url = proto + '://www.youtube.com/embed/%s' % video_id
-                            embed_webpage = self._download_webpage(
-                                embed_url, video_id, 'Downloading embed webpage')
-                        jsplayer_url_json = self._search_regex(
-                            ASSETS_RE, embed_webpage, 'JS player URL')
-
-                    player_url = json.loads(jsplayer_url_json)
-                    if player_url is None:
-                        player_url_json = self._search_regex(
-                            r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
-                            video_webpage, 'age gate player URL')
-                        player_url = json.loads(player_url_json)
  
-                if 'sig' in url_data:
-                    url += '&signature=' + url_data['sig'][0]
-                elif 's' in url_data:
-                    encrypted_sig = url_data['s'][0]
+                format_id = fmt.get('itag') or url_data['itag'][0]
+                if not format_id:
+                    continue
+                format_id = compat_str(format_id)
  
-                    if self._downloader.params.get('verbose'):
+                if cipher:
+                    if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
+                        ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
+                        jsplayer_url_json = self._search_regex(
+                            ASSETS_RE,
+                            embed_webpage if age_gate else video_webpage,
+                            'JS player URL (1)', default=None)
+                        if not jsplayer_url_json and not age_gate:
+                            # We need the embed website after all
+                            if embed_webpage is None:
+                                embed_url = proto + '://www.youtube.com/embed/%s' % video_id
+                                embed_webpage = self._download_webpage(
+                                    embed_url, video_id, 'Downloading embed webpage')
+                            jsplayer_url_json = self._search_regex(
+                                ASSETS_RE, embed_webpage, 'JS player URL')
+
+                        player_url = json.loads(jsplayer_url_json)
                          if player_url is None:
-                            player_version = 'unknown'
-                            player_desc = 'unknown'
-                        else:
-                            if player_url.endswith('swf'):
-                                player_version = self._search_regex(
-                                    r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
-                                    'flash player', fatal=False)
-                                player_desc = 'flash player %s' % player_version
+                            player_url_json = self._search_regex(
+                                r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
+                                video_webpage, 'age gate player URL')
+                            player_url = json.loads(player_url_json)
+
+                    if 'sig' in url_data:
+                        url += '&signature=' + url_data['sig'][0]
+                    elif 's' in url_data:
+                        encrypted_sig = url_data['s'][0]
+
+                        if self._downloader.params.get('verbose'):
+                            if player_url is None:
+                                player_version = 'unknown'
+                                player_desc = 'unknown'
                              else:
-                                player_version = self._search_regex(
-                                    [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
-                                     r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
-                                    player_url,
-                                    'html5 player', fatal=False)
-                                player_desc = 'html5 player %s' % player_version
-
-                        parts_sizes = self._signature_cache_id(encrypted_sig)
-                        self.to_screen('{%s} signature length %s, %s' %
-                                       (format_id, parts_sizes, player_desc))
-
-                    signature = self._decrypt_signature(
-                        encrypted_sig, video_id, player_url, age_gate)
-                    sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
-                    url += '&%s=%s' % (sp, signature)
+                                if player_url.endswith('swf'):
+                                    player_version = self._search_regex(
+                                        r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
+                                        'flash player', fatal=False)
+                                    player_desc = 'flash player %s' % player_version
+                                else:
+                                    player_version = self._search_regex(
+                                        [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
+                                         r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
+                                        player_url,
+                                        'html5 player', fatal=False)
+                                    player_desc = 'html5 player %s' % player_version
+
+                            parts_sizes = self._signature_cache_id(encrypted_sig)
+                            self.to_screen('{%s} signature length %s, %s' %
+                                           (format_id, parts_sizes, player_desc))
+
+                        signature = self._decrypt_signature(
+                            encrypted_sig, video_id, player_url, age_gate)
+                        sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
+                        url += '&%s=%s' % (sp, signature)
                  if 'ratebypass' not in url:
                      url += '&ratebypass=yes'
  
@@ -2038,24 +2062,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
                  width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
  
+                if width is None:
+                    width = int_or_none(fmt.get('width'))
+                if height is None:
+                    height = int_or_none(fmt.get('height'))
+
                  filesize = int_or_none(url_data.get(
                      'clen', [None])[0]) or _extract_filesize(url)
  
-                quality = url_data.get('quality', [None])[0]
+                quality = url_data.get('quality', [None])[0] or fmt.get('quality')
+                quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
+
+                tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
+                       or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
+                fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
  
                  more_fields = {
                      'filesize': filesize,
-                    'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
+                    'tbr': tbr,
                      'width': width,
                      'height': height,
-                    'fps': int_or_none(url_data.get('fps', [None])[0]),
-                    'format_note': url_data.get('quality_label', [None])[0] or quality,
-                    'quality': q(quality),
+                    'fps': fps,
+                    'format_note': quality_label or quality,
                  }
                  for key, value in more_fields.items():
                      if value:
                          dct[key] = value
-                type_ = url_data.get('type', [None])[0]
+                type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
                  if type_:
                      type_split = type_.split(';')
                      kind_ext = type_split[0].split('/')
@@ -2709,7 +2742,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
              page, 'title', default=None)
  
          _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
-        uploader = self._search_regex(
+        uploader = self._html_search_regex(
              r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
              page, 'uploader', default=None)
          mobj = re.search(
diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py

index afa3f6c47f17a52f40903751cad5ff4293411715..145c123a42fee5e67c0fd8c2750ea13562632666 100644 (file)
--- a/youtube_dl/extractor/zdf.py
+++ b/youtube_dl/extractor/zdf.py
@@ -41,6 +41,7 @@ class ZDFBaseIE(InfoExtractor):
  class ZDFIE(ZDFBaseIE):
      _VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?]+)\.html'
      _QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh')
+    _GEO_COUNTRIES = ['DE']
  
      _TESTS = [{
          'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html',
diff --git a/youtube_dl/version.py b/youtube_dl/version.py

index 98fa3228606cde087ee04e696711958571711d23..c3eafb068b21766290dcc0ae53771688abe2b999 100644 (file)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
  from __future__ import unicode_literals
  
-__version__ = '2019.09.01'
+__version__ = '2019.09.28'
author	Rogério Brito <rbrito@ime.usp.br>
	Tue, 1 Oct 2019 18:48:03 +0000 (15:48 -0300)
committer	Rogério Brito <rbrito@ime.usp.br>
	Tue, 1 Oct 2019 18:48:03 +0000 (15:48 -0300)
ChangeLog		patch \| blob \| history
docs/supportedsites.md		patch \| blob \| history
youtube-dl		patch \| blob \| history
youtube_dl/YoutubeDL.py		patch \| blob \| history
youtube_dl/extractor/bilibili.py		patch \| blob \| history
youtube_dl/extractor/brightcove.py		patch \| blob \| history
youtube_dl/extractor/byutv.py		patch \| blob \| history
youtube_dl/extractor/extractors.py		patch \| blob \| history
youtube_dl/extractor/heise.py		patch \| blob \| history
youtube_dl/extractor/hotstar.py		patch \| blob \| history
youtube_dl/extractor/instagram.py		patch \| blob \| history
youtube_dl/extractor/jwplatform.py		patch \| blob \| history
youtube_dl/extractor/mixcloud.py		patch \| blob \| history
youtube_dl/extractor/nhk.py		patch \| blob \| history
youtube_dl/extractor/nick.py		patch \| blob \| history
youtube_dl/extractor/ninenow.py		patch \| blob \| history
youtube_dl/extractor/openload.py		patch \| blob \| history
youtube_dl/extractor/piksel.py		patch \| blob \| history
youtube_dl/extractor/platzi.py		patch \| blob \| history
youtube_dl/extractor/ted.py		patch \| blob \| history
youtube_dl/extractor/tv4.py		patch \| blob \| history
youtube_dl/extractor/vk.py		patch \| blob \| history
youtube_dl/extractor/youtube.py		patch \| blob \| history
youtube_dl/extractor/zdf.py		patch \| blob \| history
youtube_dl/version.py		patch \| blob \| history