]> Raphaël G. Git Repositories - youtubedl/commitdiff
New upstream version 2020.03.24
authorRogério Brito <rbrito@ime.usp.br>
Fri, 10 Apr 2020 13:28:52 +0000 (10:28 -0300)
committerRogério Brito <rbrito@ime.usp.br>
Fri, 10 Apr 2020 13:28:52 +0000 (10:28 -0300)
66 files changed:
ChangeLog
README.md
README.txt
docs/supportedsites.md
test/test_YoutubeDL.py
test/test_subtitles.py
youtube-dl
youtube-dl.1
youtube_dl/YoutubeDL.py
youtube_dl/compat.py
youtube_dl/extractor/abc.py
youtube_dl/extractor/bilibili.py
youtube_dl/extractor/cbc.py
youtube_dl/extractor/eporner.py
youtube_dl/extractor/extractors.py
youtube_dl/extractor/franceculture.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/hellporno.py
youtube_dl/extractor/imdb.py
youtube_dl/extractor/jpopsukitv.py [deleted file]
youtube_dl/extractor/lecturio.py
youtube_dl/extractor/limelight.py
youtube_dl/extractor/linuxacademy.py
youtube_dl/extractor/mediaset.py
youtube_dl/extractor/mediasite.py
youtube_dl/extractor/ndr.py
youtube_dl/extractor/nhk.py
youtube_dl/extractor/nova.py
youtube_dl/extractor/npr.py
youtube_dl/extractor/nytimes.py
youtube_dl/extractor/peertube.py
youtube_dl/extractor/platzi.py
youtube_dl/extractor/pokemon.py
youtube_dl/extractor/popcorntimes.py [new file with mode: 0644]
youtube_dl/extractor/pornhd.py
youtube_dl/extractor/pornhub.py
youtube_dl/extractor/safari.py
youtube_dl/extractor/servus.py
youtube_dl/extractor/soundcloud.py
youtube_dl/extractor/sportdeutschland.py
youtube_dl/extractor/svt.py
youtube_dl/extractor/teachable.py
youtube_dl/extractor/telecinco.py
youtube_dl/extractor/telequebec.py
youtube_dl/extractor/tfo.py
youtube_dl/extractor/thisoldhouse.py
youtube_dl/extractor/toggle.py
youtube_dl/extractor/tumblr.py
youtube_dl/extractor/tv2dk.py
youtube_dl/extractor/tv5mondeplus.py
youtube_dl/extractor/tva.py
youtube_dl/extractor/twentyfourvideo.py
youtube_dl/extractor/twitch.py
youtube_dl/extractor/viewlift.py
youtube_dl/extractor/vimeo.py
youtube_dl/extractor/wistia.py
youtube_dl/extractor/xhamster.py
youtube_dl/extractor/xtube.py
youtube_dl/extractor/youjizz.py
youtube_dl/extractor/youtube.py
youtube_dl/extractor/zapiks.py
youtube_dl/extractor/zdf.py
youtube_dl/options.py
youtube_dl/update.py
youtube_dl/utils.py
youtube_dl/version.py

index 94aa9f327c73a5c8ee3e792b8435a445de1432b1..f753972c46ae0df3951139cab26dafa155c05507 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,104 @@
+version 2020.03.24
+
+Core
+- [utils] Revert support for cookie files with spaces used instead of tabs
+
+Extractors
+* [teachable] Update upskillcourses and gns3 domains
+* [generic] Look for teachable embeds before wistia
++ [teachable] Extract chapter metadata (#24421)
++ [bilibili] Add support for player.bilibili.com (#24402)
++ [bilibili] Add support for new URL schema with BV ids (#24439, #24442)
+* [limelight] Remove disabled API requests (#24255)
+* [soundcloud] Fix download URL extraction (#24394)
++ [cbc:watch] Add support for authentication (#19160)
+* [hellporno] Fix extraction (#24399)
+* [xtube] Fix formats extraction (#24348)
+* [ndr] Fix extraction (#24326)
+* [nhk] Update m3u8 URL and use native HLS downloader (#24329)
+- [nhk] Remove obsolete rtmp formats (#24329)
+* [nhk] Relax URL regular expression (#24329)
+- [vimeo] Revert fix showcase password protected video extraction (#24224)
+
+
+version 2020.03.08
+
+Core
++ [utils] Add support for cookie files with spaces used instead of tabs
+
+Extractors
++ [pornhub] Add support for pornhubpremium.com (#24288)
+- [youtube] Remove outdated code and unnecessary requests
+* [youtube] Improve extraction in 429 HTTP error conditions (#24283)
+* [nhk] Update API version (#24270)
+
+
+version 2020.03.06
+
+Extractors
+* [youtube] Fix age-gated videos support without login (#24248)
+* [vimeo] Fix showcase password protected video extraction (#24224)
+* [pornhub] Improve title extraction (#24184)
+* [peertube] Improve extraction (#23657)
++ [servus] Add support for new URL schema (#23475, #23583, #24142)
+* [vimeo] Fix subtitles URLs (#24209)
+
+
+version 2020.03.01
+
+Core
+* [YoutubeDL] Force redirect URL to unicode on python 2
+- [options] Remove duplicate short option -v for --version (#24162)
+
+Extractors
+* [xhamster] Fix extraction (#24205)
+* [franceculture] Fix extraction (#24204)
++ [telecinco] Add support for article opening videos
+* [telecinco] Fix extraction (#24195)
+* [xtube] Fix metadata extraction (#21073, #22455)
+* [youjizz] Fix extraction (#24181)
+- Remove no longer needed compat_str around geturl
+* [pornhd] Fix extraction (#24128)
++ [teachable] Add support for multiple videos per lecture (#24101)
++ [wistia] Add support for multiple generic embeds (#8347, 11385)
+* [imdb] Fix extraction (#23443)
+* [tv2dk:bornholm:play] Fix extraction (#24076)
+
+
+version 2020.02.16
+
+Core
+* [YoutubeDL] Fix playlist entry indexing with --playlist-items (#10591,
+  #10622)
+* [update] Fix updating via symlinks (#23991)
++ [compat] Introduce compat_realpath (#23991)
+
+Extractors
++ [npr] Add support for streams (#24042)
++ [24video] Add support for porn.24video.net (#23779, #23784)
+- [jpopsuki] Remove extractor (#23858)
+* [nova] Improve extraction (#23690)
+* [nova:embed] Improve (#23690)
+* [nova:embed] Fix extraction (#23672)
++ [abc:iview] Add support for 720p (#22907, #22921)
+* [nytimes] Improve format sorting (#24010)
++ [toggle] Add support for mewatch.sg (#23895, #23930)
+* [thisoldhouse] Fix extraction (#23951)
++ [popcorntimes] Add support for popcorntimes.tv (#23949)
+* [sportdeutschland] Update to new API
+* [twitch:stream] Lowercase channel id for stream request (#23917)
+* [tv5mondeplus] Fix extraction (#23907, #23911)
+* [tva] Relax URL regular expression (#23903)
+* [vimeo] Fix album extraction (#23864)
+* [viewlift] Improve extraction
+    * Fix extraction (#23851)
+    + Add support for authentication
+    + Add support for more domains
+* [svt] Fix series extraction (#22297)
+* [svt] Fix article extraction (#22897, #22919)
+* [soundcloud] Imporve private playlist/set tracks extraction (#3707)
+
+
 version 2020.01.24
 
 Extractors
 version 2020.01.24
 
 Extractors
index 01f975958c8370016a39c9f3fb872241c977c62b..4f54a52401b4b6083147701a31a35217aa1000e6 100644 (file)
--- a/README.md
+++ b/README.md
@@ -835,7 +835,9 @@ In February 2015, the new YouTube player contained a character sequence in a str
 
 ### HTTP Error 429: Too Many Requests or 402: Payment Required
 
 
 ### HTTP Error 429: Too Many Requests or 402: Payment Required
 
-These two error codes indicate that the service is blocking your IP address because of overuse. Contact the service and ask them to unblock your IP address, or - if you have acquired a whitelisted IP address already - use the [`--proxy` or `--source-address` options](#network-options) to select another IP address.
+These two error codes indicate that the service is blocking your IP address because of overuse. Usually this is a soft block meaning that you can gain access again after solving CAPTCHA. Just open a browser and solve a CAPTCHA the service suggests you and after that [pass cookies](#how-do-i-pass-cookies-to-youtube-dl) to youtube-dl. Note that if your machine has multiple external IPs then you should also pass exactly the same IP you've used for solving CAPTCHA with [`--source-address`](#network-options). Also you may need to pass a `User-Agent` HTTP header of your browser with [`--user-agent`](#workarounds).
+
+If this is not the case (no CAPTCHA suggested to solve by the service) then you can contact the service and ask them to unblock your IP address, or - if you have acquired a whitelisted IP address already - use the [`--proxy` or `--source-address` options](#network-options) to select another IP address.
 
 ### SyntaxError: Non-ASCII character
 
 
 ### SyntaxError: Non-ASCII character
 
index cc86a1be504693d38cc15f08a567a4c8be01b872..168b1315cc72c3e4143346d34638840aeec09676 100644 (file)
@@ -1101,10 +1101,19 @@ above for how to update youtube-dl.
 HTTP Error 429: Too Many Requests or 402: Payment Required
 
 These two error codes indicate that the service is blocking your IP
 HTTP Error 429: Too Many Requests or 402: Payment Required
 
 These two error codes indicate that the service is blocking your IP
-address because of overuse. Contact the service and ask them to unblock
-your IP address, or - if you have acquired a whitelisted IP address
-already - use the --proxy or --source-address options to select another
-IP address.
+address because of overuse. Usually this is a soft block meaning that
+you can gain access again after solving CAPTCHA. Just open a browser and
+solve a CAPTCHA the service suggests you and after that pass cookies to
+youtube-dl. Note that if your machine has multiple external IPs then you
+should also pass exactly the same IP you've used for solving CAPTCHA
+with --source-address. Also you may need to pass a User-Agent HTTP
+header of your browser with --user-agent.
+
+If this is not the case (no CAPTCHA suggested to solve by the service)
+then you can contact the service and ask them to unblock your IP
+address, or - if you have acquired a whitelisted IP address already -
+use the --proxy or --source-address options to select another IP
+address.
 
 SyntaxError: Non-ASCII character
 
 
 SyntaxError: Non-ASCII character
 
index e9a8cc27adfc8a3da64f5a41ad8319e1563d5059..174b83bf3b6e1e75e2f8198b271cf6426431dc75 100644 (file)
@@ -98,6 +98,7 @@
  - **BiliBili**
  - **BilibiliAudio**
  - **BilibiliAudioAlbum**
  - **BiliBili**
  - **BilibiliAudio**
  - **BilibiliAudioAlbum**
+ - **BiliBiliPlayer**
  - **BioBioChileTV**
  - **BIQLE**
  - **BitChute**
  - **BioBioChileTV**
  - **BIQLE**
  - **BitChute**
  - **JeuxVideo**
  - **Joj**
  - **Jove**
  - **JeuxVideo**
  - **Joj**
  - **Jove**
- - **jpopsuki.tv**
  - **JWPlatform**
  - **Kakao**
  - **Kaltura**
  - **JWPlatform**
  - **Kakao**
  - **Kaltura**
  - **Pokemon**
  - **PolskieRadio**
  - **PolskieRadioCategory**
  - **Pokemon**
  - **PolskieRadio**
  - **PolskieRadioCategory**
+ - **Popcorntimes**
  - **PopcornTV**
  - **PornCom**
  - **PornerBros**
  - **PopcornTV**
  - **PornCom**
  - **PornerBros**
  - **Vidzi**
  - **vier**: vier.be and vijf.be
  - **vier:videos**
  - **Vidzi**
  - **vier**: vier.be and vijf.be
  - **vier:videos**
- - **ViewLift**
- - **ViewLiftEmbed**
+ - **viewlift**
+ - **viewlift:embed**
  - **Viidea**
  - **viki**
  - **viki:channel**
  - **Viidea**
  - **viki**
  - **viki:channel**
index ce96661716c42ae0bf9c6a8ccb9ddf48c715e0a2..1e204e551b499edead22ce65e371787ca22ffc35 100644 (file)
@@ -816,11 +816,15 @@ class TestYoutubeDL(unittest.TestCase):
             'webpage_url': 'http://example.com',
         }
 
             'webpage_url': 'http://example.com',
         }
 
-        def get_ids(params):
+        def get_downloaded_info_dicts(params):
             ydl = YDL(params)
             ydl = YDL(params)
-            # make a copy because the dictionary can be modified
-            ydl.process_ie_result(playlist.copy())
-            return [int(v['id']) for v in ydl.downloaded_info_dicts]
+            # make a deep copy because the dictionary and nested entries
+            # can be modified
+            ydl.process_ie_result(copy.deepcopy(playlist))
+            return ydl.downloaded_info_dicts
+
+        def get_ids(params):
+            return [int(v['id']) for v in get_downloaded_info_dicts(params)]
 
         result = get_ids({})
         self.assertEqual(result, [1, 2, 3, 4])
 
         result = get_ids({})
         self.assertEqual(result, [1, 2, 3, 4])
@@ -852,6 +856,22 @@ class TestYoutubeDL(unittest.TestCase):
         result = get_ids({'playlist_items': '2-4,3-4,3'})
         self.assertEqual(result, [2, 3, 4])
 
         result = get_ids({'playlist_items': '2-4,3-4,3'})
         self.assertEqual(result, [2, 3, 4])
 
+        # Tests for https://github.com/ytdl-org/youtube-dl/issues/10591
+        # @{
+        result = get_downloaded_info_dicts({'playlist_items': '2-4,3-4,3'})
+        self.assertEqual(result[0]['playlist_index'], 2)
+        self.assertEqual(result[1]['playlist_index'], 3)
+
+        result = get_downloaded_info_dicts({'playlist_items': '2-4,3-4,3'})
+        self.assertEqual(result[0]['playlist_index'], 2)
+        self.assertEqual(result[1]['playlist_index'], 3)
+        self.assertEqual(result[2]['playlist_index'], 4)
+
+        result = get_downloaded_info_dicts({'playlist_items': '4,2'})
+        self.assertEqual(result[0]['playlist_index'], 4)
+        self.assertEqual(result[1]['playlist_index'], 2)
+        # @}
+
     def test_urlopen_no_file_protocol(self):
         # see https://github.com/ytdl-org/youtube-dl/issues/8227
         ydl = YDL()
     def test_urlopen_no_file_protocol(self):
         # see https://github.com/ytdl-org/youtube-dl/issues/8227
         ydl = YDL()
index 7d57a628e5ef79c5e12d13ccd0a2b515548ffa60..17aaaf20d9a002336af43d1afa2f7d49a186ac9a 100644 (file)
@@ -26,7 +26,6 @@ from youtube_dl.extractor import (
     ThePlatformIE,
     ThePlatformFeedIE,
     RTVEALaCartaIE,
     ThePlatformIE,
     ThePlatformFeedIE,
     RTVEALaCartaIE,
-    FunnyOrDieIE,
     DemocracynowIE,
 )
 
     DemocracynowIE,
 )
 
@@ -322,18 +321,6 @@ class TestRtveSubtitles(BaseTestSubtitles):
         self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
 
 
         self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
 
 
-class TestFunnyOrDieSubtitles(BaseTestSubtitles):
-    url = 'http://www.funnyordie.com/videos/224829ff6d/judd-apatow-will-direct-your-vine'
-    IE = FunnyOrDieIE
-
-    def test_allsubtitles(self):
-        self.DL.params['writesubtitles'] = True
-        self.DL.params['allsubtitles'] = True
-        subtitles = self.getSubtitles()
-        self.assertEqual(set(subtitles.keys()), set(['en']))
-        self.assertEqual(md5(subtitles['en']), 'c5593c193eacd353596c11c2d4f9ecc4')
-
-
 class TestDemocracynowSubtitles(BaseTestSubtitles):
     url = 'http://www.democracynow.org/shows/2015/7/3'
     IE = DemocracynowIE
 class TestDemocracynowSubtitles(BaseTestSubtitles):
     url = 'http://www.democracynow.org/shows/2015/7/3'
     IE = DemocracynowIE
index 748d6a0deb8f348b9d682f478fb36db4c0515ebc..fc830df0c89108cde3ec3a54c1355c09c1def66d 100755 (executable)
Binary files a/youtube-dl and b/youtube-dl differ
index 251810f21fd7545613c8ce9ef0ade45afdbff652..84ee44c659744e4f888a04742c1c3f890a5595fe 100644 (file)
@@ -1711,10 +1711,21 @@ See above for how to update youtube\-dl.
 .PP
 These two error codes indicate that the service is blocking your IP
 address because of overuse.
 .PP
 These two error codes indicate that the service is blocking your IP
 address because of overuse.
-Contact the service and ask them to unblock your IP address, or \- if
-you have acquired a whitelisted IP address already \- use the
-\f[C]\-\-proxy\f[] or \f[C]\-\-source\-address\f[] options to select
-another IP address.
+Usually this is a soft block meaning that you can gain access again
+after solving CAPTCHA.
+Just open a browser and solve a CAPTCHA the service suggests you and
+after that pass cookies to youtube\-dl.
+Note that if your machine has multiple external IPs then you should also
+pass exactly the same IP you\[aq]ve used for solving CAPTCHA with
+\f[C]\-\-source\-address\f[].
+Also you may need to pass a \f[C]User\-Agent\f[] HTTP header of your
+browser with \f[C]\-\-user\-agent\f[].
+.PP
+If this is not the case (no CAPTCHA suggested to solve by the service)
+then you can contact the service and ask them to unblock your IP
+address, or \- if you have acquired a whitelisted IP address already \-
+use the \f[C]\-\-proxy\f[] or \f[C]\-\-source\-address\f[] options to
+select another IP address.
 .SS SyntaxError: Non\-ASCII character
 .PP
 The error
 .SS SyntaxError: Non\-ASCII character
 .PP
 The error
index f5cb46308198e4c65316fba10ccf30d2f3e14b6a..19370f62b0d3ddb91c74ae4b6cf6c569341fbdc5 100755 (executable)
@@ -92,6 +92,7 @@ from .utils import (
     YoutubeDLCookieJar,
     YoutubeDLCookieProcessor,
     YoutubeDLHandler,
     YoutubeDLCookieJar,
     YoutubeDLCookieProcessor,
     YoutubeDLHandler,
+    YoutubeDLRedirectHandler,
 )
 from .cache import Cache
 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
 )
 from .cache import Cache
 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
@@ -990,7 +991,7 @@ class YoutubeDL(object):
                     'playlist_title': ie_result.get('title'),
                     'playlist_uploader': ie_result.get('uploader'),
                     'playlist_uploader_id': ie_result.get('uploader_id'),
                     'playlist_title': ie_result.get('title'),
                     'playlist_uploader': ie_result.get('uploader'),
                     'playlist_uploader_id': ie_result.get('uploader_id'),
-                    'playlist_index': i + playliststart,
+                    'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
                     'extractor': ie_result['extractor'],
                     'webpage_url': ie_result['webpage_url'],
                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
                     'extractor': ie_result['extractor'],
                     'webpage_url': ie_result['webpage_url'],
                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
@@ -2343,6 +2344,7 @@ class YoutubeDL(object):
         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
+        redirect_handler = YoutubeDLRedirectHandler()
         data_handler = compat_urllib_request_DataHandler()
 
         # When passing our own FileHandler instance, build_opener won't add the
         data_handler = compat_urllib_request_DataHandler()
 
         # When passing our own FileHandler instance, build_opener won't add the
@@ -2356,7 +2358,7 @@ class YoutubeDL(object):
         file_handler.file_open = file_open
 
         opener = compat_urllib_request.build_opener(
         file_handler.file_open = file_open
 
         opener = compat_urllib_request.build_opener(
-            proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
+            proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
 
         # Delete the default user-agent header, which would otherwise apply in
         # cases where our custom HTTP handler doesn't come into play
 
         # Delete the default user-agent header, which would otherwise apply in
         # cases where our custom HTTP handler doesn't come into play
index c75ab131b9955cec1367ec42aa41d8dadde423da..d1b86bd13df40676f76879e8e9ea314e1fb3c9be 100644 (file)
@@ -2754,6 +2754,17 @@ else:
         compat_expanduser = os.path.expanduser
 
 
         compat_expanduser = os.path.expanduser
 
 
+if compat_os_name == 'nt' and sys.version_info < (3, 8):
+    # os.path.realpath on Windows does not follow symbolic links
+    # prior to Python 3.8 (see https://bugs.python.org/issue9949)
+    def compat_realpath(path):
+        while os.path.islink(path):
+            path = os.path.abspath(os.readlink(path))
+        return path
+else:
+    compat_realpath = os.path.realpath
+
+
 if sys.version_info < (3, 0):
     def compat_print(s):
         from .utils import preferredencoding
 if sys.version_info < (3, 0):
     def compat_print(s):
         from .utils import preferredencoding
@@ -2998,6 +3009,7 @@ __all__ = [
     'compat_os_name',
     'compat_parse_qs',
     'compat_print',
     'compat_os_name',
     'compat_parse_qs',
     'compat_print',
+    'compat_realpath',
     'compat_setenv',
     'compat_shlex_quote',
     'compat_shlex_split',
     'compat_setenv',
     'compat_shlex_quote',
     'compat_shlex_split',
index 4ac323bf6de6d17016c2425c133aad460072cadd..6637f4f3537591b46870cb7ec3f35d1167cb3cb7 100644 (file)
@@ -110,17 +110,17 @@ class ABCIViewIE(InfoExtractor):
 
     # ABC iview programs are normally available for 14 days only.
     _TESTS = [{
 
     # ABC iview programs are normally available for 14 days only.
     _TESTS = [{
-        'url': 'https://iview.abc.net.au/show/ben-and-hollys-little-kingdom/series/0/video/ZX9371A050S00',
-        'md5': 'cde42d728b3b7c2b32b1b94b4a548afc',
+        'url': 'https://iview.abc.net.au/show/gruen/series/11/video/LE1927H001S00',
+        'md5': '67715ce3c78426b11ba167d875ac6abf',
         'info_dict': {
         'info_dict': {
-            'id': 'ZX9371A050S00',
+            'id': 'LE1927H001S00',
             'ext': 'mp4',
             'ext': 'mp4',
-            'title': "Gaston's Birthday",
-            'series': "Ben And Holly's Little Kingdom",
-            'description': 'md5:f9de914d02f226968f598ac76f105bcf',
-            'upload_date': '20180604',
-            'uploader_id': 'abc4kids',
-            'timestamp': 1528140219,
+            'title': "Series 11 Ep 1",
+            'series': "Gruen",
+            'description': 'md5:52cc744ad35045baf6aded2ce7287f67',
+            'upload_date': '20190925',
+            'uploader_id': 'abc1',
+            'timestamp': 1569445289,
         },
         'params': {
             'skip_download': True,
         },
         'params': {
             'skip_download': True,
@@ -148,7 +148,7 @@ class ABCIViewIE(InfoExtractor):
                 'hdnea': token,
             })
 
                 'hdnea': token,
             })
 
-        for sd in ('sd', 'sd-low'):
+        for sd in ('720', 'sd', 'sd-low'):
             sd_url = try_get(
                 stream, lambda x: x['streams']['hls'][sd], compat_str)
             if not sd_url:
             sd_url = try_get(
                 stream, lambda x: x['streams']['hls'][sd], compat_str)
             if not sd_url:
index 80bd696e21f3a4af3c996e9899ce439116e13d19..4dc597e160bcea4f0821719dc82e2908f20d1972 100644 (file)
@@ -24,7 +24,18 @@ from ..utils import (
 
 
 class BiliBiliIE(InfoExtractor):
 
 
 class BiliBiliIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/(?P<anime_id>\d+)/play#)(?P<id>\d+)'
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:(?:www|bangumi)\.)?
+                        bilibili\.(?:tv|com)/
+                        (?:
+                            (?:
+                                video/[aA][vV]|
+                                anime/(?P<anime_id>\d+)/play\#
+                            )(?P<id_bv>\d+)|
+                            video/[bB][vV](?P<id>[^/?#&]+)
+                        )
+                    '''
 
     _TESTS = [{
         'url': 'http://www.bilibili.tv/video/av1074402/',
 
     _TESTS = [{
         'url': 'http://www.bilibili.tv/video/av1074402/',
@@ -92,6 +103,10 @@ class BiliBiliIE(InfoExtractor):
                 'skip_download': True,  # Test metadata only
             },
         }]
                 'skip_download': True,  # Test metadata only
             },
         }]
+    }, {
+        # new BV video id format
+        'url': 'https://www.bilibili.com/video/BV1JE411F741',
+        'only_matching': True,
     }]
 
     _APP_KEY = 'iVGUTjsxvpLeuDCf'
     }]
 
     _APP_KEY = 'iVGUTjsxvpLeuDCf'
@@ -109,7 +124,7 @@ class BiliBiliIE(InfoExtractor):
         url, smuggled_data = unsmuggle_url(url, {})
 
         mobj = re.match(self._VALID_URL, url)
         url, smuggled_data = unsmuggle_url(url, {})
 
         mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = mobj.group('id') or mobj.group('id_bv')
         anime_id = mobj.group('anime_id')
         webpage = self._download_webpage(url, video_id)
 
         anime_id = mobj.group('anime_id')
         webpage = self._download_webpage(url, video_id)
 
@@ -419,3 +434,17 @@ class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
                     entries, am_id, album_title, album_data.get('intro'))
 
         return self.playlist_result(entries, am_id)
                     entries, am_id, album_title, album_data.get('intro'))
 
         return self.playlist_result(entries, am_id)
+
+
+class BiliBiliPlayerIE(InfoExtractor):
+    _VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
+        'only_matching': True,
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        return self.url_result(
+            'http://www.bilibili.tv/video/av%s/' % video_id,
+            ie=BiliBiliIE.ie_key(), video_id=video_id)
index 751a3a8f26c94ecb19c130503593515312bac6c7..fd5ec6033b80513012cf2615fc56e80c7e82cadc 100644 (file)
@@ -1,8 +1,10 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
 # coding: utf-8
 from __future__ import unicode_literals
 
+import hashlib
 import json
 import re
 import json
 import re
+from xml.sax.saxutils import escape
 
 from .common import InfoExtractor
 from ..compat import (
 
 from .common import InfoExtractor
 from ..compat import (
@@ -216,6 +218,29 @@ class CBCWatchBaseIE(InfoExtractor):
         'clearleap': 'http://www.clearleap.com/namespace/clearleap/1.0/',
     }
     _GEO_COUNTRIES = ['CA']
         'clearleap': 'http://www.clearleap.com/namespace/clearleap/1.0/',
     }
     _GEO_COUNTRIES = ['CA']
+    _LOGIN_URL = 'https://api.loginradius.com/identity/v2/auth/login'
+    _TOKEN_URL = 'https://cloud-api.loginradius.com/sso/jwt/api/token'
+    _API_KEY = '3f4beddd-2061-49b0-ae80-6f1f2ed65b37'
+    _NETRC_MACHINE = 'cbcwatch'
+
+    def _signature(self, email, password):
+        data = json.dumps({
+            'email': email,
+            'password': password,
+        }).encode()
+        headers = {'content-type': 'application/json'}
+        query = {'apikey': self._API_KEY}
+        resp = self._download_json(self._LOGIN_URL, None, data=data, headers=headers, query=query)
+        access_token = resp['access_token']
+
+        # token
+        query = {
+            'access_token': access_token,
+            'apikey': self._API_KEY,
+            'jwtapp': 'jwt',
+        }
+        resp = self._download_json(self._TOKEN_URL, None, headers=headers, query=query)
+        return resp['signature']
 
     def _call_api(self, path, video_id):
         url = path if path.startswith('http') else self._API_BASE_URL + path
 
     def _call_api(self, path, video_id):
         url = path if path.startswith('http') else self._API_BASE_URL + path
@@ -239,7 +264,8 @@ class CBCWatchBaseIE(InfoExtractor):
     def _real_initialize(self):
         if self._valid_device_token():
             return
     def _real_initialize(self):
         if self._valid_device_token():
             return
-        device = self._downloader.cache.load('cbcwatch', 'device') or {}
+        device = self._downloader.cache.load(
+            'cbcwatch', self._cache_device_key()) or {}
         self._device_id, self._device_token = device.get('id'), device.get('token')
         if self._valid_device_token():
             return
         self._device_id, self._device_token = device.get('id'), device.get('token')
         if self._valid_device_token():
             return
@@ -248,16 +274,30 @@ class CBCWatchBaseIE(InfoExtractor):
     def _valid_device_token(self):
         return self._device_id and self._device_token
 
     def _valid_device_token(self):
         return self._device_id and self._device_token
 
+    def _cache_device_key(self):
+        email, _ = self._get_login_info()
+        return '%s_device' % hashlib.sha256(email.encode()).hexdigest() if email else 'device'
+
     def _register_device(self):
     def _register_device(self):
-        self._device_id = self._device_token = None
         result = self._download_xml(
             self._API_BASE_URL + 'device/register',
             None, 'Acquiring device token',
             data=b'<device><type>web</type></device>')
         self._device_id = xpath_text(result, 'deviceId', fatal=True)
         result = self._download_xml(
             self._API_BASE_URL + 'device/register',
             None, 'Acquiring device token',
             data=b'<device><type>web</type></device>')
         self._device_id = xpath_text(result, 'deviceId', fatal=True)
-        self._device_token = xpath_text(result, 'deviceToken', fatal=True)
+        email, password = self._get_login_info()
+        if email and password:
+            signature = self._signature(email, password)
+            data = '<login><token>{0}</token><device><deviceId>{1}</deviceId><type>web</type></device></login>'.format(
+                escape(signature), escape(self._device_id)).encode()
+            url = self._API_BASE_URL + 'device/login'
+            result = self._download_xml(
+                url, None, data=data,
+                headers={'content-type': 'application/xml'})
+            self._device_token = xpath_text(result, 'token', fatal=True)
+        else:
+            self._device_token = xpath_text(result, 'deviceToken', fatal=True)
         self._downloader.cache.store(
         self._downloader.cache.store(
-            'cbcwatch', 'device', {
+            'cbcwatch', self._cache_device_key(), {
                 'id': self._device_id,
                 'token': self._device_token,
             })
                 'id': self._device_id,
                 'token': self._device_token,
             })
index c050bf9df3fb7ececed5b3e03a70aea9b2c37417..fe42821c731c711e8f0974fd4ce48f5c9aee8e8f 100644 (file)
@@ -4,7 +4,6 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
     encode_base_n,
     ExtractorError,
 from ..utils import (
     encode_base_n,
     ExtractorError,
@@ -55,7 +54,7 @@ class EpornerIE(InfoExtractor):
 
         webpage, urlh = self._download_webpage_handle(url, display_id)
 
 
         webpage, urlh = self._download_webpage_handle(url, display_id)
 
-        video_id = self._match_id(compat_str(urlh.geturl()))
+        video_id = self._match_id(urlh.geturl())
 
         hash = self._search_regex(
             r'hash\s*:\s*["\']([\da-f]{32})', webpage, 'hash')
 
         hash = self._search_regex(
             r'hash\s*:\s*["\']([\da-f]{32})', webpage, 'hash')
index 1cab440f46e9733e6182563b36a464d77b3610c7..ef803b8a78d00544f4286f21d2c43acde751940b 100644 (file)
@@ -105,6 +105,7 @@ from .bilibili import (
     BiliBiliBangumiIE,
     BilibiliAudioIE,
     BilibiliAudioAlbumIE,
     BiliBiliBangumiIE,
     BilibiliAudioIE,
     BilibiliAudioAlbumIE,
+    BiliBiliPlayerIE,
 )
 from .biobiochiletv import BioBioChileTVIE
 from .bitchute import (
 )
 from .biobiochiletv import BioBioChileTVIE
 from .bitchute import (
@@ -497,7 +498,6 @@ from .jeuxvideo import JeuxVideoIE
 from .jove import JoveIE
 from .joj import JojIE
 from .jwplatform import JWPlatformIE
 from .jove import JoveIE
 from .joj import JojIE
 from .jwplatform import JWPlatformIE
-from .jpopsukitv import JpopsukiIE
 from .kakao import KakaoIE
 from .kaltura import KalturaIE
 from .kanalplay import KanalPlayIE
 from .kakao import KakaoIE
 from .kaltura import KalturaIE
 from .kanalplay import KanalPlayIE
@@ -850,6 +850,7 @@ from .polskieradio import (
     PolskieRadioIE,
     PolskieRadioCategoryIE,
 )
     PolskieRadioIE,
     PolskieRadioCategoryIE,
 )
+from .popcorntimes import PopcorntimesIE
 from .popcorntv import PopcornTVIE
 from .porn91 import Porn91IE
 from .porncom import PornComIE
 from .popcorntv import PopcornTVIE
 from .porn91 import Porn91IE
 from .porncom import PornComIE
index b8fa175880f47d6050e4fa3908994bd9777131ac..306b45fc99a4c3495a233d8fb3c649032641d87a 100644 (file)
@@ -31,7 +31,13 @@ class FranceCultureIE(InfoExtractor):
         webpage = self._download_webpage(url, display_id)
 
         video_data = extract_attributes(self._search_regex(
         webpage = self._download_webpage(url, display_id)
 
         video_data = extract_attributes(self._search_regex(
-            r'(?s)<div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>.*?(<button[^>]+data-asset-source="[^"]+"[^>]+>)',
+            r'''(?sx)
+                (?:
+                    </h1>|
+                    <div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>
+                ).*?
+                (<button[^>]+data-asset-source="[^"]+"[^>]+>)
+            ''',
             webpage, 'video data'))
 
         video_url = video_data['data-asset-source']
             webpage, 'video data'))
 
         video_url = video_data['data-asset-source']
index 3c002472f795b3600c8f721e76c446f85f3f9bf5..a495ee15aaedc2a64fad86bde03e0be4cab64e85 100644 (file)
@@ -2287,7 +2287,7 @@ class GenericIE(InfoExtractor):
 
         if head_response is not False:
             # Check for redirect
 
         if head_response is not False:
             # Check for redirect
-            new_url = compat_str(head_response.geturl())
+            new_url = head_response.geturl()
             if url != new_url:
                 self.report_following_redirect(new_url)
                 if force_videoid:
             if url != new_url:
                 self.report_following_redirect(new_url)
                 if force_videoid:
@@ -2387,12 +2387,12 @@ class GenericIE(InfoExtractor):
                 return self.playlist_result(
                     self._parse_xspf(
                         doc, video_id, xspf_url=url,
                 return self.playlist_result(
                     self._parse_xspf(
                         doc, video_id, xspf_url=url,
-                        xspf_base_url=compat_str(full_response.geturl())),
+                        xspf_base_url=full_response.geturl()),
                     video_id)
             elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
                 info_dict['formats'] = self._parse_mpd_formats(
                     doc,
                     video_id)
             elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
                 info_dict['formats'] = self._parse_mpd_formats(
                     doc,
-                    mpd_base_url=compat_str(full_response.geturl()).rpartition('/')[0],
+                    mpd_base_url=full_response.geturl().rpartition('/')[0],
                     mpd_url=url)
                 self._sort_formats(info_dict['formats'])
                 return info_dict
                     mpd_url=url)
                 self._sort_formats(info_dict['formats'])
                 return info_dict
@@ -2536,15 +2536,21 @@ class GenericIE(InfoExtractor):
             return self.playlist_from_matches(
                 dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key())
 
             return self.playlist_from_matches(
                 dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key())
 
+        # Look for Teachable embeds, must be before Wistia
+        teachable_url = TeachableIE._extract_url(webpage, url)
+        if teachable_url:
+            return self.url_result(teachable_url)
+
         # Look for embedded Wistia player
         # Look for embedded Wistia player
-        wistia_url = WistiaIE._extract_url(webpage)
-        if wistia_url:
-            return {
-                '_type': 'url_transparent',
-                'url': self._proto_relative_url(wistia_url),
-                'ie_key': WistiaIE.ie_key(),
-                'uploader': video_uploader,
-            }
+        wistia_urls = WistiaIE._extract_urls(webpage)
+        if wistia_urls:
+            playlist = self.playlist_from_matches(wistia_urls, video_id, video_title, ie=WistiaIE.ie_key())
+            for entry in playlist['entries']:
+                entry.update({
+                    '_type': 'url_transparent',
+                    'uploader': video_uploader,
+                })
+            return playlist
 
         # Look for SVT player
         svt_url = SVTIE._extract_url(webpage)
 
         # Look for SVT player
         svt_url = SVTIE._extract_url(webpage)
@@ -3140,10 +3146,6 @@ class GenericIE(InfoExtractor):
             return self.playlist_from_matches(
                 peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
 
             return self.playlist_from_matches(
                 peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
 
-        teachable_url = TeachableIE._extract_url(webpage, url)
-        if teachable_url:
-            return self.url_result(teachable_url)
-
         indavideo_urls = IndavideoEmbedIE._extract_urls(webpage)
         if indavideo_urls:
             return self.playlist_from_matches(
         indavideo_urls = IndavideoEmbedIE._extract_urls(webpage)
         if indavideo_urls:
             return self.playlist_from_matches(
index 0ee8ea712c72e618a4d7544f26c376e94fcaf70d..fae4251034da29dd252557f86528531b8fcb9235 100644 (file)
@@ -1,12 +1,11 @@
 from __future__ import unicode_literals
 
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 from ..utils import (
 from .common import InfoExtractor
 from ..utils import (
-    js_to_json,
+    int_or_none,
+    merge_dicts,
     remove_end,
     remove_end,
-    determine_ext,
+    unified_timestamp,
 )
 
 
 )
 
 
@@ -14,15 +13,21 @@ class HellPornoIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?hellporno\.(?:com/videos|net/v)/(?P<id>[^/]+)'
     _TESTS = [{
         'url': 'http://hellporno.com/videos/dixie-is-posing-with-naked-ass-very-erotic/',
     _VALID_URL = r'https?://(?:www\.)?hellporno\.(?:com/videos|net/v)/(?P<id>[^/]+)'
     _TESTS = [{
         'url': 'http://hellporno.com/videos/dixie-is-posing-with-naked-ass-very-erotic/',
-        'md5': '1fee339c610d2049699ef2aa699439f1',
+        'md5': 'f0a46ebc0bed0c72ae8fe4629f7de5f3',
         'info_dict': {
             'id': '149116',
             'display_id': 'dixie-is-posing-with-naked-ass-very-erotic',
             'ext': 'mp4',
             'title': 'Dixie is posing with naked ass very erotic',
         'info_dict': {
             'id': '149116',
             'display_id': 'dixie-is-posing-with-naked-ass-very-erotic',
             'ext': 'mp4',
             'title': 'Dixie is posing with naked ass very erotic',
+            'description': 'md5:9a72922749354edb1c4b6e540ad3d215',
+            'categories': list,
             'thumbnail': r're:https?://.*\.jpg$',
             'thumbnail': r're:https?://.*\.jpg$',
+            'duration': 240,
+            'timestamp': 1398762720,
+            'upload_date': '20140429',
+            'view_count': int,
             'age_limit': 18,
             'age_limit': 18,
-        }
+        },
     }, {
         'url': 'http://hellporno.net/v/186271/',
         'only_matching': True,
     }, {
         'url': 'http://hellporno.net/v/186271/',
         'only_matching': True,
@@ -36,40 +41,36 @@ class HellPornoIE(InfoExtractor):
         title = remove_end(self._html_search_regex(
             r'<title>([^<]+)</title>', webpage, 'title'), ' - Hell Porno')
 
         title = remove_end(self._html_search_regex(
             r'<title>([^<]+)</title>', webpage, 'title'), ' - Hell Porno')
 
-        flashvars = self._parse_json(self._search_regex(
-            r'var\s+flashvars\s*=\s*({.+?});', webpage, 'flashvars'),
-            display_id, transform_source=js_to_json)
-
-        video_id = flashvars.get('video_id')
-        thumbnail = flashvars.get('preview_url')
-        ext = determine_ext(flashvars.get('postfix'), 'mp4')
-
-        formats = []
-        for video_url_key in ['video_url', 'video_alt_url']:
-            video_url = flashvars.get(video_url_key)
-            if not video_url:
-                continue
-            video_text = flashvars.get('%s_text' % video_url_key)
-            fmt = {
-                'url': video_url,
-                'ext': ext,
-                'format_id': video_text,
-            }
-            m = re.search(r'^(?P<height>\d+)[pP]', video_text)
-            if m:
-                fmt['height'] = int(m.group('height'))
-            formats.append(fmt)
-        self._sort_formats(formats)
+        info = self._parse_html5_media_entries(url, webpage, display_id)[0]
+        self._sort_formats(info['formats'])
 
 
-        categories = self._html_search_meta(
-            'keywords', webpage, 'categories', default='').split(',')
+        video_id = self._search_regex(
+            (r'chs_object\s*=\s*["\'](\d+)',
+             r'params\[["\']video_id["\']\]\s*=\s*(\d+)'), webpage, 'video id',
+            default=display_id)
+        description = self._search_regex(
+            r'class=["\']desc_video_view_v2[^>]+>([^<]+)', webpage,
+            'description', fatal=False)
+        categories = [
+            c.strip()
+            for c in self._html_search_meta(
+                'keywords', webpage, 'categories', default='').split(',')
+            if c.strip()]
+        duration = int_or_none(self._og_search_property(
+            'video:duration', webpage, fatal=False))
+        timestamp = unified_timestamp(self._og_search_property(
+            'video:release_date', webpage, fatal=False))
+        view_count = int_or_none(self._search_regex(
+            r'>Views\s+(\d+)', webpage, 'view count', fatal=False))
 
 
-        return {
+        return merge_dicts(info, {
             'id': video_id,
             'display_id': display_id,
             'title': title,
             'id': video_id,
             'display_id': display_id,
             'title': title,
-            'thumbnail': thumbnail,
+            'description': description,
             'categories': categories,
             'categories': categories,
+            'duration': duration,
+            'timestamp': timestamp,
+            'view_count': view_count,
             'age_limit': 18,
             'age_limit': 18,
-            'formats': formats,
-        }
+        })
index 436759da5480da347a578aba6cb388cb01e97448..a31301985b0c7d212886a2e6e495c7d705714041 100644 (file)
@@ -1,5 +1,7 @@
 from __future__ import unicode_literals
 
 from __future__ import unicode_literals
 
+import base64
+import json
 import re
 
 from .common import InfoExtractor
 import re
 
 from .common import InfoExtractor
@@ -8,6 +10,7 @@ from ..utils import (
     mimetype2ext,
     parse_duration,
     qualities,
     mimetype2ext,
     parse_duration,
     qualities,
+    try_get,
     url_or_none,
 )
 
     url_or_none,
 )
 
@@ -15,15 +18,16 @@ from ..utils import (
 class ImdbIE(InfoExtractor):
     IE_NAME = 'imdb'
     IE_DESC = 'Internet Movie Database trailers'
 class ImdbIE(InfoExtractor):
     IE_NAME = 'imdb'
     IE_DESC = 'Internet Movie Database trailers'
-    _VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video|title|list).+?[/-]vi(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video|title|list).*?[/-]vi(?P<id>\d+)'
 
     _TESTS = [{
         'url': 'http://www.imdb.com/video/imdb/vi2524815897',
         'info_dict': {
             'id': '2524815897',
             'ext': 'mp4',
 
     _TESTS = [{
         'url': 'http://www.imdb.com/video/imdb/vi2524815897',
         'info_dict': {
             'id': '2524815897',
             'ext': 'mp4',
-            'title': 'No. 2 from Ice Age: Continental Drift (2012)',
+            'title': 'No. 2',
             'description': 'md5:87bd0bdc61e351f21f20d2d7441cb4e7',
             'description': 'md5:87bd0bdc61e351f21f20d2d7441cb4e7',
+            'duration': 152,
         }
     }, {
         'url': 'http://www.imdb.com/video/_/vi2524815897',
         }
     }, {
         'url': 'http://www.imdb.com/video/_/vi2524815897',
@@ -47,21 +51,23 @@ class ImdbIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        webpage = self._download_webpage(
-            'https://www.imdb.com/videoplayer/vi' + video_id, video_id)
-        video_metadata = self._parse_json(self._search_regex(
-            r'window\.IMDbReactInitialState\.push\(({.+?})\);', webpage,
-            'video metadata'), video_id)['videos']['videoMetadata']['vi' + video_id]
-        title = self._html_search_meta(
-            ['og:title', 'twitter:title'], webpage) or self._html_search_regex(
-            r'<title>(.+?)</title>', webpage, 'title', fatal=False) or video_metadata['title']
+
+        data = self._download_json(
+            'https://www.imdb.com/ve/data/VIDEO_PLAYBACK_DATA', video_id,
+            query={
+                'key': base64.b64encode(json.dumps({
+                    'type': 'VIDEO_PLAYER',
+                    'subType': 'FORCE_LEGACY',
+                    'id': 'vi%s' % video_id,
+                }).encode()).decode(),
+            })[0]
 
         quality = qualities(('SD', '480p', '720p', '1080p'))
         formats = []
 
         quality = qualities(('SD', '480p', '720p', '1080p'))
         formats = []
-        for encoding in video_metadata.get('encodings', []):
+        for encoding in data['videoLegacyEncodings']:
             if not encoding or not isinstance(encoding, dict):
                 continue
             if not encoding or not isinstance(encoding, dict):
                 continue
-            video_url = url_or_none(encoding.get('videoUrl'))
+            video_url = url_or_none(encoding.get('url'))
             if not video_url:
                 continue
             ext = mimetype2ext(encoding.get(
             if not video_url:
                 continue
             ext = mimetype2ext(encoding.get(
@@ -69,7 +75,7 @@ class ImdbIE(InfoExtractor):
             if ext == 'm3u8':
                 formats.extend(self._extract_m3u8_formats(
                     video_url, video_id, 'mp4', entry_protocol='m3u8_native',
             if ext == 'm3u8':
                 formats.extend(self._extract_m3u8_formats(
                     video_url, video_id, 'mp4', entry_protocol='m3u8_native',
-                    m3u8_id='hls', fatal=False))
+                    preference=1, m3u8_id='hls', fatal=False))
                 continue
             format_id = encoding.get('definition')
             formats.append({
                 continue
             format_id = encoding.get('definition')
             formats.append({
@@ -80,13 +86,33 @@ class ImdbIE(InfoExtractor):
             })
         self._sort_formats(formats)
 
             })
         self._sort_formats(formats)
 
+        webpage = self._download_webpage(
+            'https://www.imdb.com/video/vi' + video_id, video_id)
+        video_metadata = self._parse_json(self._search_regex(
+            r'args\.push\(\s*({.+?})\s*\)\s*;', webpage,
+            'video metadata'), video_id)
+
+        video_info = video_metadata.get('VIDEO_INFO')
+        if video_info and isinstance(video_info, dict):
+            info = try_get(
+                video_info, lambda x: x[list(video_info.keys())[0]][0], dict)
+        else:
+            info = {}
+
+        title = self._html_search_meta(
+            ['og:title', 'twitter:title'], webpage) or self._html_search_regex(
+            r'<title>(.+?)</title>', webpage, 'title',
+            default=None) or info['videoTitle']
+
         return {
             'id': video_id,
             'title': title,
         return {
             'id': video_id,
             'title': title,
+            'alt_title': info.get('videoSubTitle'),
             'formats': formats,
             'formats': formats,
-            'description': video_metadata.get('description'),
-            'thumbnail': video_metadata.get('slate', {}).get('url'),
-            'duration': parse_duration(video_metadata.get('duration')),
+            'description': info.get('videoDescription'),
+            'thumbnail': url_or_none(try_get(
+                video_metadata, lambda x: x['videoSlate']['source'])),
+            'duration': parse_duration(info.get('videoRuntime')),
         }
 
 
         }
 
 
diff --git a/youtube_dl/extractor/jpopsukitv.py b/youtube_dl/extractor/jpopsukitv.py
deleted file mode 100644 (file)
index 4b5f346..0000000
+++ /dev/null
@@ -1,68 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
-    int_or_none,
-    unified_strdate,
-)
-
-
-class JpopsukiIE(InfoExtractor):
-    IE_NAME = 'jpopsuki.tv'
-    _VALID_URL = r'https?://(?:www\.)?jpopsuki\.tv/(?:category/)?video/[^/]+/(?P<id>\S+)'
-
-    _TEST = {
-        'url': 'http://www.jpopsuki.tv/video/ayumi-hamasaki---evolution/00be659d23b0b40508169cdee4545771',
-        'md5': '88018c0c1a9b1387940e90ec9e7e198e',
-        'info_dict': {
-            'id': '00be659d23b0b40508169cdee4545771',
-            'ext': 'mp4',
-            'title': 'ayumi hamasaki - evolution',
-            'description': 'Release date: 2001.01.31\r\n浜崎あゆみ - evolution',
-            'thumbnail': 'http://www.jpopsuki.tv/cache/89722c74d2a2ebe58bcac65321c115b2.jpg',
-            'uploader': 'plama_chan',
-            'uploader_id': '404',
-            'upload_date': '20121101'
-        }
-    }
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, video_id)
-
-        video_url = 'http://www.jpopsuki.tv' + self._html_search_regex(
-            r'<source src="(.*?)" type', webpage, 'video url')
-
-        video_title = self._og_search_title(webpage)
-        description = self._og_search_description(webpage)
-        thumbnail = self._og_search_thumbnail(webpage)
-        uploader = self._html_search_regex(
-            r'<li>from: <a href="/user/view/user/(.*?)/uid/',
-            webpage, 'video uploader', fatal=False)
-        uploader_id = self._html_search_regex(
-            r'<li>from: <a href="/user/view/user/\S*?/uid/(\d*)',
-            webpage, 'video uploader_id', fatal=False)
-        upload_date = unified_strdate(self._html_search_regex(
-            r'<li>uploaded: (.*?)</li>', webpage, 'video upload_date',
-            fatal=False))
-        view_count_str = self._html_search_regex(
-            r'<li>Hits: ([0-9]+?)</li>', webpage, 'video view_count',
-            fatal=False)
-        comment_count_str = self._html_search_regex(
-            r'<h2>([0-9]+?) comments</h2>', webpage, 'video comment_count',
-            fatal=False)
-
-        return {
-            'id': video_id,
-            'url': video_url,
-            'title': video_title,
-            'description': description,
-            'thumbnail': thumbnail,
-            'uploader': uploader,
-            'uploader_id': uploader_id,
-            'upload_date': upload_date,
-            'view_count': int_or_none(view_count_str),
-            'comment_count': int_or_none(comment_count_str),
-        }
index 6ed7da4abaa7a2a45f924b4bf9f919261a40bec9..1b2dcef46621237fd7c7ce376165a6bc5c674606 100644 (file)
@@ -4,7 +4,6 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
     clean_html,
     determine_ext,
 from ..utils import (
     clean_html,
     determine_ext,
@@ -36,7 +35,7 @@ class LecturioBaseIE(InfoExtractor):
             self._LOGIN_URL, None, 'Downloading login popup')
 
         def is_logged(url_handle):
             self._LOGIN_URL, None, 'Downloading login popup')
 
         def is_logged(url_handle):
-            return self._LOGIN_URL not in compat_str(url_handle.geturl())
+            return self._LOGIN_URL not in url_handle.geturl()
 
         # Already logged in
         if is_logged(urlh):
 
         # Already logged in
         if is_logged(urlh):
index 729d8de50fab70cd69bab41fae9db0cba4d7da9b..39f74d2822bc7296df8a5c16e5edfce3298e82ab 100644 (file)
@@ -18,7 +18,6 @@ from ..utils import (
 
 class LimelightBaseIE(InfoExtractor):
     _PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
 
 class LimelightBaseIE(InfoExtractor):
     _PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
-    _API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json'
 
     @classmethod
     def _extract_urls(cls, webpage, source_url):
 
     @classmethod
     def _extract_urls(cls, webpage, source_url):
@@ -70,7 +69,8 @@ class LimelightBaseIE(InfoExtractor):
         try:
             return self._download_json(
                 self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method),
         try:
             return self._download_json(
                 self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method),
-                item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal, headers=headers)
+                item_id, 'Downloading PlaylistService %s JSON' % method,
+                fatal=fatal, headers=headers)
         except ExtractorError as e:
             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
                 error = self._parse_json(e.cause.read().decode(), item_id)['detail']['contentAccessPermission']
         except ExtractorError as e:
             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
                 error = self._parse_json(e.cause.read().decode(), item_id)['detail']['contentAccessPermission']
@@ -79,22 +79,22 @@ class LimelightBaseIE(InfoExtractor):
                 raise ExtractorError(error, expected=True)
             raise
 
                 raise ExtractorError(error, expected=True)
             raise
 
-    def _call_api(self, organization_id, item_id, method):
-        return self._download_json(
-            self._API_URL % (organization_id, self._API_PATH, item_id, method),
-            item_id, 'Downloading API %s JSON' % method)
-
-    def _extract(self, item_id, pc_method, mobile_method, meta_method, referer=None):
+    def _extract(self, item_id, pc_method, mobile_method, referer=None):
         pc = self._call_playlist_service(item_id, pc_method, referer=referer)
         pc = self._call_playlist_service(item_id, pc_method, referer=referer)
-        metadata = self._call_api(pc['orgId'], item_id, meta_method)
-        mobile = self._call_playlist_service(item_id, mobile_method, fatal=False, referer=referer)
-        return pc, mobile, metadata
+        mobile = self._call_playlist_service(
+            item_id, mobile_method, fatal=False, referer=referer)
+        return pc, mobile
+
+    def _extract_info(self, pc, mobile, i, referer):
+        get_item = lambda x, y: try_get(x, lambda x: x[y][i], dict) or {}
+        pc_item = get_item(pc, 'playlistItems')
+        mobile_item = get_item(mobile, 'mediaList')
+        video_id = pc_item.get('mediaId') or mobile_item['mediaId']
+        title = pc_item.get('title') or mobile_item['title']
 
 
-    def _extract_info(self, streams, mobile_urls, properties):
-        video_id = properties['media_id']
         formats = []
         urls = []
         formats = []
         urls = []
-        for stream in streams:
+        for stream in pc_item.get('streams', []):
             stream_url = stream.get('url')
             if not stream_url or stream.get('drmProtected') or stream_url in urls:
                 continue
             stream_url = stream.get('url')
             if not stream_url or stream.get('drmProtected') or stream_url in urls:
                 continue
@@ -155,7 +155,7 @@ class LimelightBaseIE(InfoExtractor):
                     })
                 formats.append(fmt)
 
                     })
                 formats.append(fmt)
 
-        for mobile_url in mobile_urls:
+        for mobile_url in mobile_item.get('mobileUrls', []):
             media_url = mobile_url.get('mobileUrl')
             format_id = mobile_url.get('targetMediaPlatform')
             if not media_url or format_id in ('Widevine', 'SmoothStreaming') or media_url in urls:
             media_url = mobile_url.get('mobileUrl')
             format_id = mobile_url.get('targetMediaPlatform')
             if not media_url or format_id in ('Widevine', 'SmoothStreaming') or media_url in urls:
@@ -179,54 +179,34 @@ class LimelightBaseIE(InfoExtractor):
 
         self._sort_formats(formats)
 
 
         self._sort_formats(formats)
 
-        title = properties['title']
-        description = properties.get('description')
-        timestamp = int_or_none(properties.get('publish_date') or properties.get('create_date'))
-        duration = float_or_none(properties.get('duration_in_milliseconds'), 1000)
-        filesize = int_or_none(properties.get('total_storage_in_bytes'))
-        categories = [properties.get('category')]
-        tags = properties.get('tags', [])
-        thumbnails = [{
-            'url': thumbnail['url'],
-            'width': int_or_none(thumbnail.get('width')),
-            'height': int_or_none(thumbnail.get('height')),
-        } for thumbnail in properties.get('thumbnails', []) if thumbnail.get('url')]
-
         subtitles = {}
         subtitles = {}
-        for caption in properties.get('captions', []):
-            lang = caption.get('language_code')
-            subtitles_url = caption.get('url')
-            if lang and subtitles_url:
-                subtitles.setdefault(lang, []).append({
-                    'url': subtitles_url,
-                })
-        closed_captions_url = properties.get('closed_captions_url')
-        if closed_captions_url:
-            subtitles.setdefault('en', []).append({
-                'url': closed_captions_url,
-                'ext': 'ttml',
-            })
+        for flag in mobile_item.get('flags'):
+            if flag == 'ClosedCaptions':
+                closed_captions = self._call_playlist_service(
+                    video_id, 'getClosedCaptionsDetailsByMediaId',
+                    False, referer) or []
+                for cc in closed_captions:
+                    cc_url = cc.get('webvttFileUrl')
+                    if not cc_url:
+                        continue
+                    lang = cc.get('languageCode') or self._search_regex(r'/[a-z]{2}\.vtt', cc_url, 'lang', default='en')
+                    subtitles.setdefault(lang, []).append({
+                        'url': cc_url,
+                    })
+                break
+
+        get_meta = lambda x: pc_item.get(x) or mobile_item.get(x)
 
         return {
             'id': video_id,
             'title': title,
 
         return {
             'id': video_id,
             'title': title,
-            'description': description,
+            'description': get_meta('description'),
             'formats': formats,
             'formats': formats,
-            'timestamp': timestamp,
-            'duration': duration,
-            'filesize': filesize,
-            'categories': categories,
-            'tags': tags,
-            'thumbnails': thumbnails,
+            'duration': float_or_none(get_meta('durationInMilliseconds'), 1000),
+            'thumbnail': get_meta('previewImageUrl') or get_meta('thumbnailImageUrl'),
             'subtitles': subtitles,
         }
 
             'subtitles': subtitles,
         }
 
-    def _extract_info_helper(self, pc, mobile, i, metadata):
-        return self._extract_info(
-            try_get(pc, lambda x: x['playlistItems'][i]['streams'], list) or [],
-            try_get(mobile, lambda x: x['mediaList'][i]['mobileUrls'], list) or [],
-            metadata)
-
 
 class LimelightMediaIE(LimelightBaseIE):
     IE_NAME = 'limelight'
 
 class LimelightMediaIE(LimelightBaseIE):
     IE_NAME = 'limelight'
@@ -251,8 +231,6 @@ class LimelightMediaIE(LimelightBaseIE):
             'description': 'md5:8005b944181778e313d95c1237ddb640',
             'thumbnail': r're:^https?://.*\.jpeg$',
             'duration': 144.23,
             'description': 'md5:8005b944181778e313d95c1237ddb640',
             'thumbnail': r're:^https?://.*\.jpeg$',
             'duration': 144.23,
-            'timestamp': 1244136834,
-            'upload_date': '20090604',
         },
         'params': {
             # m3u8 download
         },
         'params': {
             # m3u8 download
@@ -268,30 +246,29 @@ class LimelightMediaIE(LimelightBaseIE):
             'title': '3Play Media Overview Video',
             'thumbnail': r're:^https?://.*\.jpeg$',
             'duration': 78.101,
             'title': '3Play Media Overview Video',
             'thumbnail': r're:^https?://.*\.jpeg$',
             'duration': 78.101,
-            'timestamp': 1338929955,
-            'upload_date': '20120605',
-            'subtitles': 'mincount:9',
+            # TODO: extract all languages that were accessible via API
+            # 'subtitles': 'mincount:9',
+            'subtitles': 'mincount:1',
         },
     }, {
         'url': 'https://assets.delvenetworks.com/player/loader.swf?mediaId=8018a574f08d416e95ceaccae4ba0452',
         'only_matching': True,
     }]
     _PLAYLIST_SERVICE_PATH = 'media'
         },
     }, {
         'url': 'https://assets.delvenetworks.com/player/loader.swf?mediaId=8018a574f08d416e95ceaccae4ba0452',
         'only_matching': True,
     }]
     _PLAYLIST_SERVICE_PATH = 'media'
-    _API_PATH = 'media'
 
     def _real_extract(self, url):
         url, smuggled_data = unsmuggle_url(url, {})
         video_id = self._match_id(url)
 
     def _real_extract(self, url):
         url, smuggled_data = unsmuggle_url(url, {})
         video_id = self._match_id(url)
+        source_url = smuggled_data.get('source_url')
         self._initialize_geo_bypass({
             'countries': smuggled_data.get('geo_countries'),
         })
 
         self._initialize_geo_bypass({
             'countries': smuggled_data.get('geo_countries'),
         })
 
-        pc, mobile, metadata = self._extract(
+        pc, mobile = self._extract(
             video_id, 'getPlaylistByMediaId',
             video_id, 'getPlaylistByMediaId',
-            'getMobilePlaylistByMediaId', 'properties',
-            smuggled_data.get('source_url'))
+            'getMobilePlaylistByMediaId', source_url)
 
 
-        return self._extract_info_helper(pc, mobile, 0, metadata)
+        return self._extract_info(pc, mobile, 0, source_url)
 
 
 class LimelightChannelIE(LimelightBaseIE):
 
 
 class LimelightChannelIE(LimelightBaseIE):
@@ -313,6 +290,7 @@ class LimelightChannelIE(LimelightBaseIE):
         'info_dict': {
             'id': 'ab6a524c379342f9b23642917020c082',
             'title': 'Javascript Sample Code',
         'info_dict': {
             'id': 'ab6a524c379342f9b23642917020c082',
             'title': 'Javascript Sample Code',
+            'description': 'Javascript Sample Code - http://www.delvenetworks.com/sample-code/playerCode-demo.html',
         },
         'playlist_mincount': 3,
     }, {
         },
         'playlist_mincount': 3,
     }, {
@@ -320,22 +298,23 @@ class LimelightChannelIE(LimelightBaseIE):
         'only_matching': True,
     }]
     _PLAYLIST_SERVICE_PATH = 'channel'
         'only_matching': True,
     }]
     _PLAYLIST_SERVICE_PATH = 'channel'
-    _API_PATH = 'channels'
 
     def _real_extract(self, url):
         url, smuggled_data = unsmuggle_url(url, {})
         channel_id = self._match_id(url)
 
     def _real_extract(self, url):
         url, smuggled_data = unsmuggle_url(url, {})
         channel_id = self._match_id(url)
+        source_url = smuggled_data.get('source_url')
 
 
-        pc, mobile, medias = self._extract(
+        pc, mobile = self._extract(
             channel_id, 'getPlaylistByChannelId',
             'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1',
             channel_id, 'getPlaylistByChannelId',
             'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1',
-            'media', smuggled_data.get('source_url'))
+            source_url)
 
         entries = [
 
         entries = [
-            self._extract_info_helper(pc, mobile, i, medias['media_list'][i])
-            for i in range(len(medias['media_list']))]
+            self._extract_info(pc, mobile, i, source_url)
+            for i in range(len(pc['playlistItems']))]
 
 
-        return self.playlist_result(entries, channel_id, pc['title'])
+        return self.playlist_result(
+            entries, channel_id, pc.get('title'), mobile.get('description'))
 
 
 class LimelightChannelListIE(LimelightBaseIE):
 
 
 class LimelightChannelListIE(LimelightBaseIE):
@@ -368,10 +347,12 @@ class LimelightChannelListIE(LimelightBaseIE):
     def _real_extract(self, url):
         channel_list_id = self._match_id(url)
 
     def _real_extract(self, url):
         channel_list_id = self._match_id(url)
 
-        channel_list = self._call_playlist_service(channel_list_id, 'getMobileChannelListById')
+        channel_list = self._call_playlist_service(
+            channel_list_id, 'getMobileChannelListById')
 
         entries = [
             self.url_result('limelight:channel:%s' % channel['id'], 'LimelightChannel')
             for channel in channel_list['channelList']]
 
 
         entries = [
             self.url_result('limelight:channel:%s' % channel['id'], 'LimelightChannel')
             for channel in channel_list['channelList']]
 
-        return self.playlist_result(entries, channel_list_id, channel_list['title'])
+        return self.playlist_result(
+            entries, channel_list_id, channel_list['title'])
index a78c6556e105220a09b66dac94d8fc27780e3dc5..23ca965d977b1ec682101f048684f20f1b70834c 100644 (file)
@@ -8,7 +8,6 @@ from .common import InfoExtractor
 from ..compat import (
     compat_b64decode,
     compat_HTTPError,
 from ..compat import (
     compat_b64decode,
     compat_HTTPError,
-    compat_str,
 )
 from ..utils import (
     ExtractorError,
 )
 from ..utils import (
     ExtractorError,
@@ -99,7 +98,7 @@ class LinuxAcademyIE(InfoExtractor):
             'sso': 'true',
         })
 
             'sso': 'true',
         })
 
-        login_state_url = compat_str(urlh.geturl())
+        login_state_url = urlh.geturl()
 
         try:
             login_page = self._download_webpage(
 
         try:
             login_page = self._download_webpage(
@@ -129,7 +128,7 @@ class LinuxAcademyIE(InfoExtractor):
             })
 
         access_token = self._search_regex(
             })
 
         access_token = self._search_regex(
-            r'access_token=([^=&]+)', compat_str(urlh.geturl()),
+            r'access_token=([^=&]+)', urlh.geturl(),
             'access token')
 
         self._download_webpage(
             'access token')
 
         self._download_webpage(
index 027a790b8b182541dd8b592b183b0dbaff505322..933df14952d5cc16857485e306be07f2d32384d3 100644 (file)
@@ -6,7 +6,6 @@ import re
 from .theplatform import ThePlatformBaseIE
 from ..compat import (
     compat_parse_qs,
 from .theplatform import ThePlatformBaseIE
 from ..compat import (
     compat_parse_qs,
-    compat_str,
     compat_urllib_parse_urlparse,
 )
 from ..utils import (
     compat_urllib_parse_urlparse,
 )
 from ..utils import (
@@ -114,7 +113,7 @@ class MediasetIE(ThePlatformBaseIE):
                 continue
             urlh = ie._request_webpage(
                 embed_url, video_id, note='Following embed URL redirect')
                 continue
             urlh = ie._request_webpage(
                 embed_url, video_id, note='Following embed URL redirect')
-            embed_url = compat_str(urlh.geturl())
+            embed_url = urlh.geturl()
             program_guid = _program_guid(_qs(embed_url))
             if program_guid:
                 entries.append(embed_url)
             program_guid = _program_guid(_qs(embed_url))
             if program_guid:
                 entries.append(embed_url)
index 694a264d672288b47c2700b9265bfc0635158ff2..d6eb1574065dece67e28a4b36fa43478dd48dfa3 100644 (file)
@@ -129,7 +129,7 @@ class MediasiteIE(InfoExtractor):
         query = mobj.group('query')
 
         webpage, urlh = self._download_webpage_handle(url, resource_id)  # XXX: add UrlReferrer?
         query = mobj.group('query')
 
         webpage, urlh = self._download_webpage_handle(url, resource_id)  # XXX: add UrlReferrer?
-        redirect_url = compat_str(urlh.geturl())
+        redirect_url = urlh.geturl()
 
         # XXX: might have also extracted UrlReferrer and QueryString from the html
         service_path = compat_urlparse.urljoin(redirect_url, self._html_search_regex(
 
         # XXX: might have also extracted UrlReferrer and QueryString from the html
         service_path = compat_urlparse.urljoin(redirect_url, self._html_search_regex(
index 9c8bf05af10b26f0c45da23ddeb5fa42ec51f307..2447c812e021e73991082aefab4bd98e6dd000a1 100644 (file)
@@ -7,6 +7,7 @@ from .common import InfoExtractor
 from ..utils import (
     determine_ext,
     int_or_none,
 from ..utils import (
     determine_ext,
     int_or_none,
+    merge_dicts,
     parse_iso8601,
     qualities,
     try_get,
     parse_iso8601,
     qualities,
     try_get,
@@ -87,21 +88,25 @@ class NDRIE(NDRBaseIE):
 
     def _extract_embed(self, webpage, display_id):
         embed_url = self._html_search_meta(
 
     def _extract_embed(self, webpage, display_id):
         embed_url = self._html_search_meta(
-            'embedURL', webpage, 'embed URL', fatal=True)
+            'embedURL', webpage, 'embed URL',
+            default=None) or self._search_regex(
+            r'\bembedUrl["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+            'embed URL', group='url')
         description = self._search_regex(
             r'<p[^>]+itemprop="description">([^<]+)</p>',
             webpage, 'description', default=None) or self._og_search_description(webpage)
         timestamp = parse_iso8601(
             self._search_regex(
                 r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="([^"]+)"',
         description = self._search_regex(
             r'<p[^>]+itemprop="description">([^<]+)</p>',
             webpage, 'description', default=None) or self._og_search_description(webpage)
         timestamp = parse_iso8601(
             self._search_regex(
                 r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="([^"]+)"',
-                webpage, 'upload date', fatal=False))
-        return {
+                webpage, 'upload date', default=None))
+        info = self._search_json_ld(webpage, display_id, default={})
+        return merge_dicts({
             '_type': 'url_transparent',
             'url': embed_url,
             'display_id': display_id,
             'description': description,
             'timestamp': timestamp,
             '_type': 'url_transparent',
             'url': embed_url,
             'display_id': display_id,
             'description': description,
             'timestamp': timestamp,
-        }
+        }, info)
 
 
 class NJoyIE(NDRBaseIE):
 
 
 class NJoyIE(NDRBaseIE):
index 6a2c6cb7bb6d039c56fcf7325de422846c437ab5..de6a707c4265c4fc61a57db117a432a95468ab54 100644 (file)
@@ -6,7 +6,7 @@ from .common import InfoExtractor
 
 
 class NhkVodIE(InfoExtractor):
 
 
 class NhkVodIE(InfoExtractor):
-    _VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand/(?P<type>video|audio)/(?P<id>\d{7}|[a-z]+-\d{8}-\d+)'
+    _VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand/(?P<type>video|audio)/(?P<id>\d{7}|[^/]+?-\d{8}-\d+)'
     # Content available only for a limited period of time. Visit
     # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
     _TESTS = [{
     # Content available only for a limited period of time. Visit
     # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
     _TESTS = [{
@@ -30,8 +30,11 @@ class NhkVodIE(InfoExtractor):
     }, {
         'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/',
         'only_matching': True,
     }, {
         'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/',
         'only_matching': True,
+    }, {
+        'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/',
+        'only_matching': True,
     }]
     }]
-    _API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7/episode/%s/%s/all%s.json'
+    _API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7a/episode/%s/%s/all%s.json'
 
     def _real_extract(self, url):
         lang, m_type, episode_id = re.match(self._VALID_URL, url).groups()
 
     def _real_extract(self, url):
         lang, m_type, episode_id = re.match(self._VALID_URL, url).groups()
@@ -82,15 +85,9 @@ class NhkVodIE(InfoExtractor):
             audio = episode['audio']
             audio_path = audio['audio']
             info['formats'] = self._extract_m3u8_formats(
             audio = episode['audio']
             audio_path = audio['audio']
             info['formats'] = self._extract_m3u8_formats(
-                'https://nhks-vh.akamaihd.net/i%s/master.m3u8' % audio_path,
-                episode_id, 'm4a', m3u8_id='hls', fatal=False)
-            for proto in ('rtmpt', 'rtmp'):
-                info['formats'].append({
-                    'ext': 'flv',
-                    'format_id': proto,
-                    'url': '%s://flv.nhk.or.jp/ondemand/mp4:flv%s' % (proto, audio_path),
-                    'vcodec': 'none',
-                })
+                'https://nhkworld-vh.akamaihd.net/i%s/master.m3u8' % audio_path,
+                episode_id, 'm4a', entry_protocol='m3u8_native',
+                m3u8_id='hls', fatal=False)
             for f in info['formats']:
                 f['language'] = lang
         return info
             for f in info['formats']:
                 f['language'] = lang
         return info
index 901f44b54f40c2c02e120c636a80b0b5bfb4ea2e..2850af5dbe14231df3ee075d3868744e587d0c14 100644 (file)
@@ -18,7 +18,7 @@ class NovaEmbedIE(InfoExtractor):
     _VALID_URL = r'https?://media\.cms\.nova\.cz/embed/(?P<id>[^/?#&]+)'
     _TEST = {
         'url': 'https://media.cms.nova.cz/embed/8o0n0r?autoplay=1',
     _VALID_URL = r'https?://media\.cms\.nova\.cz/embed/(?P<id>[^/?#&]+)'
     _TEST = {
         'url': 'https://media.cms.nova.cz/embed/8o0n0r?autoplay=1',
-        'md5': 'b3834f6de5401baabf31ed57456463f7',
+        'md5': 'ee009bafcc794541570edd44b71cbea3',
         'info_dict': {
             'id': '8o0n0r',
             'ext': 'mp4',
         'info_dict': {
             'id': '8o0n0r',
             'ext': 'mp4',
@@ -44,11 +44,17 @@ class NovaEmbedIE(InfoExtractor):
         formats = []
         for format_id, format_list in bitrates.items():
             if not isinstance(format_list, list):
         formats = []
         for format_id, format_list in bitrates.items():
             if not isinstance(format_list, list):
-                continue
+                format_list = [format_list]
             for format_url in format_list:
                 format_url = url_or_none(format_url)
                 if not format_url:
                     continue
             for format_url in format_list:
                 format_url = url_or_none(format_url)
                 if not format_url:
                     continue
+                if format_id == 'hls':
+                    formats.extend(self._extract_m3u8_formats(
+                        format_url, video_id, ext='mp4',
+                        entry_protocol='m3u8_native', m3u8_id='hls',
+                        fatal=False))
+                    continue
                 f = {
                     'url': format_url,
                 }
                 f = {
                     'url': format_url,
                 }
@@ -91,7 +97,7 @@ class NovaIE(InfoExtractor):
     _VALID_URL = r'https?://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)'
     _TESTS = [{
         'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260',
     _VALID_URL = r'https?://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)'
     _TESTS = [{
         'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260',
-        'md5': '1dd7b9d5ea27bc361f110cd855a19bd3',
+        'md5': '249baab7d0104e186e78b0899c7d5f28',
         'info_dict': {
             'id': '1757139',
             'display_id': 'tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci',
         'info_dict': {
             'id': '1757139',
             'display_id': 'tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci',
@@ -113,7 +119,8 @@ class NovaIE(InfoExtractor):
         'params': {
             # rtmp download
             'skip_download': True,
         'params': {
             # rtmp download
             'skip_download': True,
-        }
+        },
+        'skip': 'gone',
     }, {
         # media.cms.nova.cz embed
         'url': 'https://novaplus.nova.cz/porad/ulice/epizoda/18760-2180-dil',
     }, {
         # media.cms.nova.cz embed
         'url': 'https://novaplus.nova.cz/porad/ulice/epizoda/18760-2180-dil',
@@ -128,6 +135,7 @@ class NovaIE(InfoExtractor):
             'skip_download': True,
         },
         'add_ie': [NovaEmbedIE.ie_key()],
             'skip_download': True,
         },
         'add_ie': [NovaEmbedIE.ie_key()],
+        'skip': 'CHYBA 404: STRÁNKA NENALEZENA',
     }, {
         'url': 'http://sport.tn.nova.cz/clanek/sport/hokej/nhl/zivot-jde-dal-hodnotil-po-vyrazeni-z-playoff-jiri-sekac.html',
         'only_matching': True,
     }, {
         'url': 'http://sport.tn.nova.cz/clanek/sport/hokej/nhl/zivot-jde-dal-hodnotil-po-vyrazeni-z-playoff-jiri-sekac.html',
         'only_matching': True,
@@ -152,14 +160,29 @@ class NovaIE(InfoExtractor):
 
         webpage = self._download_webpage(url, display_id)
 
 
         webpage = self._download_webpage(url, display_id)
 
+        description = clean_html(self._og_search_description(webpage, default=None))
+        if site == 'novaplus':
+            upload_date = unified_strdate(self._search_regex(
+                r'(\d{1,2}-\d{1,2}-\d{4})$', display_id, 'upload date', default=None))
+        elif site == 'fanda':
+            upload_date = unified_strdate(self._search_regex(
+                r'<span class="date_time">(\d{1,2}\.\d{1,2}\.\d{4})', webpage, 'upload date', default=None))
+        else:
+            upload_date = None
+
         # novaplus
         embed_id = self._search_regex(
             r'<iframe[^>]+\bsrc=["\'](?:https?:)?//media\.cms\.nova\.cz/embed/([^/?#&]+)',
             webpage, 'embed url', default=None)
         if embed_id:
         # novaplus
         embed_id = self._search_regex(
             r'<iframe[^>]+\bsrc=["\'](?:https?:)?//media\.cms\.nova\.cz/embed/([^/?#&]+)',
             webpage, 'embed url', default=None)
         if embed_id:
-            return self.url_result(
-                'https://media.cms.nova.cz/embed/%s' % embed_id,
-                ie=NovaEmbedIE.ie_key(), video_id=embed_id)
+            return {
+                '_type': 'url_transparent',
+                'url': 'https://media.cms.nova.cz/embed/%s' % embed_id,
+                'ie_key': NovaEmbedIE.ie_key(),
+                'id': embed_id,
+                'description': description,
+                'upload_date': upload_date
+            }
 
         video_id = self._search_regex(
             [r"(?:media|video_id)\s*:\s*'(\d+)'",
 
         video_id = self._search_regex(
             [r"(?:media|video_id)\s*:\s*'(\d+)'",
@@ -233,18 +256,8 @@ class NovaIE(InfoExtractor):
         self._sort_formats(formats)
 
         title = mediafile.get('meta', {}).get('title') or self._og_search_title(webpage)
         self._sort_formats(formats)
 
         title = mediafile.get('meta', {}).get('title') or self._og_search_title(webpage)
-        description = clean_html(self._og_search_description(webpage, default=None))
         thumbnail = config.get('poster')
 
         thumbnail = config.get('poster')
 
-        if site == 'novaplus':
-            upload_date = unified_strdate(self._search_regex(
-                r'(\d{1,2}-\d{1,2}-\d{4})$', display_id, 'upload date', default=None))
-        elif site == 'fanda':
-            upload_date = unified_strdate(self._search_regex(
-                r'<span class="date_time">(\d{1,2}\.\d{1,2}\.\d{4})', webpage, 'upload date', default=None))
-        else:
-            upload_date = None
-
         return {
             'id': video_id,
             'display_id': display_id,
         return {
             'id': video_id,
             'display_id': display_id,
index a5e8baa7e2542f4e8d6a8c83dea7bddecf82413d..53acc6e574c0743a223d552d95d8806dc071ed41 100644 (file)
@@ -4,6 +4,7 @@ from .common import InfoExtractor
 from ..utils import (
     int_or_none,
     qualities,
 from ..utils import (
     int_or_none,
     qualities,
+    url_or_none,
 )
 
 
 )
 
 
@@ -48,6 +49,10 @@ class NprIE(InfoExtractor):
             },
         }],
         'expected_warnings': ['Failed to download m3u8 information'],
             },
         }],
         'expected_warnings': ['Failed to download m3u8 information'],
+    }, {
+        # multimedia, no formats, stream
+        'url': 'https://www.npr.org/2020/02/14/805476846/laura-stevenson-tiny-desk-concert',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
     }]
 
     def _real_extract(self, url):
@@ -95,6 +100,17 @@ class NprIE(InfoExtractor):
                             'format_id': format_id,
                             'quality': quality(format_id),
                         })
                             'format_id': format_id,
                             'quality': quality(format_id),
                         })
+            for stream_id, stream_entry in media.get('stream', {}).items():
+                if not isinstance(stream_entry, dict):
+                    continue
+                if stream_id != 'hlsUrl':
+                    continue
+                stream_url = url_or_none(stream_entry.get('$text'))
+                if not stream_url:
+                    continue
+                formats.extend(self._extract_m3u8_formats(
+                    stream_url, stream_id, 'mp4', 'm3u8_native',
+                    m3u8_id='hls', fatal=False))
             self._sort_formats(formats)
 
             entries.append({
             self._sort_formats(formats)
 
             entries.append({
index 2bb77ab249239163d8318a57e8fd0fdb57d2e32a..fc78ca56c90d37b00c1f396aee7c896d54fb91c9 100644 (file)
@@ -69,10 +69,10 @@ class NYTimesBaseIE(InfoExtractor):
                     'width': int_or_none(video.get('width')),
                     'height': int_or_none(video.get('height')),
                     'filesize': get_file_size(video.get('file_size') or video.get('fileSize')),
                     'width': int_or_none(video.get('width')),
                     'height': int_or_none(video.get('height')),
                     'filesize': get_file_size(video.get('file_size') or video.get('fileSize')),
-                    'tbr': int_or_none(video.get('bitrate'), 1000),
+                    'tbr': int_or_none(video.get('bitrate'), 1000) or None,
                     'ext': ext,
                 })
                     'ext': ext,
                 })
-        self._sort_formats(formats)
+        self._sort_formats(formats, ('height', 'width', 'filesize', 'tbr', 'fps', 'format_id'))
 
         thumbnails = []
         for image in video_data.get('images', []):
 
         thumbnails = []
         for image in video_data.get('images', []):
index d3a83ea2bb5215e34e72ad85bf99867697e2e1b2..48fb9541693c35878317f22ed9dd6e2da4412ced 100644 (file)
@@ -8,6 +8,7 @@ from ..compat import compat_str
 from ..utils import (
     int_or_none,
     parse_resolution,
 from ..utils import (
     int_or_none,
     parse_resolution,
+    str_or_none,
     try_get,
     unified_timestamp,
     url_or_none,
     try_get,
     unified_timestamp,
     url_or_none,
@@ -415,6 +416,7 @@ class PeerTubeIE(InfoExtractor):
                             peertube\.cpy\.re
                         )'''
     _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
                             peertube\.cpy\.re
                         )'''
     _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
+    _API_BASE = 'https://%s/api/v1/videos/%s/%s'
     _VALID_URL = r'''(?x)
                     (?:
                         peertube:(?P<host>[^:]+):|
     _VALID_URL = r'''(?x)
                     (?:
                         peertube:(?P<host>[^:]+):|
@@ -423,26 +425,30 @@ class PeerTubeIE(InfoExtractor):
                     (?P<id>%s)
                     ''' % (_INSTANCES_RE, _UUID_RE)
     _TESTS = [{
                     (?P<id>%s)
                     ''' % (_INSTANCES_RE, _UUID_RE)
     _TESTS = [{
-        'url': 'https://peertube.cpy.re/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c',
-        'md5': '80f24ff364cc9d333529506a263e7feb',
+        'url': 'https://framatube.org/videos/watch/9c9de5e8-0a1e-484a-b099-e80766180a6d',
+        'md5': '9bed8c0137913e17b86334e5885aacff',
         'info_dict': {
         'info_dict': {
-            'id': '2790feb0-8120-4e63-9af3-c943c69f5e6c',
+            'id': '9c9de5e8-0a1e-484a-b099-e80766180a6d',
             'ext': 'mp4',
             'ext': 'mp4',
-            'title': 'wow',
-            'description': 'wow such video, so gif',
+            'title': 'What is PeerTube?',
+            'description': 'md5:3fefb8dde2b189186ce0719fda6f7b10',
             'thumbnail': r're:https?://.*\.(?:jpg|png)',
             'thumbnail': r're:https?://.*\.(?:jpg|png)',
-            'timestamp': 1519297480,
-            'upload_date': '20180222',
-            'uploader': 'Luclu7',
-            'uploader_id': '7fc42640-efdb-4505-a45d-a15b1a5496f1',
-            'uploder_url': 'https://peertube.nsa.ovh/accounts/luclu7',
-            'license': 'Unknown',
-            'duration': 3,
+            'timestamp': 1538391166,
+            'upload_date': '20181001',
+            'uploader': 'Framasoft',
+            'uploader_id': '3',
+            'uploader_url': 'https://framatube.org/accounts/framasoft',
+            'channel': 'Les vidéos de Framasoft',
+            'channel_id': '2',
+            'channel_url': 'https://framatube.org/video-channels/bf54d359-cfad-4935-9d45-9d6be93f63e8',
+            'language': 'en',
+            'license': 'Attribution - Share Alike',
+            'duration': 113,
             'view_count': int,
             'like_count': int,
             'dislike_count': int,
             'view_count': int,
             'like_count': int,
             'dislike_count': int,
-            'tags': list,
-            'categories': list,
+            'tags': ['framasoft', 'peertube'],
+            'categories': ['Science & Technology'],
         }
     }, {
         'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
         }
     }, {
         'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
@@ -484,13 +490,38 @@ class PeerTubeIE(InfoExtractor):
                 entries = [peertube_url]
         return entries
 
                 entries = [peertube_url]
         return entries
 
+    def _call_api(self, host, video_id, path, note=None, errnote=None, fatal=True):
+        return self._download_json(
+            self._API_BASE % (host, video_id, path), video_id,
+            note=note, errnote=errnote, fatal=fatal)
+
+    def _get_subtitles(self, host, video_id):
+        captions = self._call_api(
+            host, video_id, 'captions', note='Downloading captions JSON',
+            fatal=False)
+        if not isinstance(captions, dict):
+            return
+        data = captions.get('data')
+        if not isinstance(data, list):
+            return
+        subtitles = {}
+        for e in data:
+            language_id = try_get(e, lambda x: x['language']['id'], compat_str)
+            caption_url = urljoin('https://%s' % host, e.get('captionPath'))
+            if not caption_url:
+                continue
+            subtitles.setdefault(language_id or 'en', []).append({
+                'url': caption_url,
+            })
+        return subtitles
+
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         host = mobj.group('host') or mobj.group('host_2')
         video_id = mobj.group('id')
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         host = mobj.group('host') or mobj.group('host_2')
         video_id = mobj.group('id')
 
-        video = self._download_json(
-            'https://%s/api/v1/videos/%s' % (host, video_id), video_id)
+        video = self._call_api(
+            host, video_id, '', note='Downloading video JSON')
 
         title = video['name']
 
 
         title = video['name']
 
@@ -513,10 +544,28 @@ class PeerTubeIE(InfoExtractor):
             formats.append(f)
         self._sort_formats(formats)
 
             formats.append(f)
         self._sort_formats(formats)
 
-        def account_data(field):
-            return try_get(video, lambda x: x['account'][field], compat_str)
+        full_description = self._call_api(
+            host, video_id, 'description', note='Downloading description JSON',
+            fatal=False)
+
+        description = None
+        if isinstance(full_description, dict):
+            description = str_or_none(full_description.get('description'))
+        if not description:
+            description = video.get('description')
+
+        subtitles = self.extract_subtitles(host, video_id)
+
+        def data(section, field, type_):
+            return try_get(video, lambda x: x[section][field], type_)
+
+        def account_data(field, type_):
+            return data('account', field, type_)
+
+        def channel_data(field, type_):
+            return data('channel', field, type_)
 
 
-        category = try_get(video, lambda x: x['category']['label'], compat_str)
+        category = data('category', 'label', compat_str)
         categories = [category] if category else None
 
         nsfw = video.get('nsfw')
         categories = [category] if category else None
 
         nsfw = video.get('nsfw')
@@ -528,14 +577,17 @@ class PeerTubeIE(InfoExtractor):
         return {
             'id': video_id,
             'title': title,
         return {
             'id': video_id,
             'title': title,
-            'description': video.get('description'),
+            'description': description,
             'thumbnail': urljoin(url, video.get('thumbnailPath')),
             'timestamp': unified_timestamp(video.get('publishedAt')),
             'thumbnail': urljoin(url, video.get('thumbnailPath')),
             'timestamp': unified_timestamp(video.get('publishedAt')),
-            'uploader': account_data('displayName'),
-            'uploader_id': account_data('uuid'),
-            'uploder_url': account_data('url'),
-            'license': try_get(
-                video, lambda x: x['licence']['label'], compat_str),
+            'uploader': account_data('displayName', compat_str),
+            'uploader_id': str_or_none(account_data('id', int)),
+            'uploader_url': url_or_none(account_data('url', compat_str)),
+            'channel': channel_data('displayName', compat_str),
+            'channel_id': str_or_none(channel_data('id', int)),
+            'channel_url': url_or_none(channel_data('url', compat_str)),
+            'language': data('language', 'id', compat_str),
+            'license': data('licence', 'label', compat_str),
             'duration': int_or_none(video.get('duration')),
             'view_count': int_or_none(video.get('views')),
             'like_count': int_or_none(video.get('likes')),
             'duration': int_or_none(video.get('duration')),
             'view_count': int_or_none(video.get('views')),
             'like_count': int_or_none(video.get('likes')),
@@ -544,4 +596,5 @@ class PeerTubeIE(InfoExtractor):
             'tags': try_get(video, lambda x: x['tags'], list),
             'categories': categories,
             'formats': formats,
             'tags': try_get(video, lambda x: x['tags'], list),
             'categories': categories,
             'formats': formats,
+            'subtitles': subtitles
         }
         }
index 602207bebdd6a01d7f33dbf08302ab5a75ccf207..23c8256b59dab4a92ae79ef48dc8e3b0adf0ff68 100644 (file)
@@ -46,7 +46,7 @@ class PlatziBaseIE(InfoExtractor):
             headers={'Referer': self._LOGIN_URL})
 
         # login succeeded
             headers={'Referer': self._LOGIN_URL})
 
         # login succeeded
-        if 'platzi.com/login' not in compat_str(urlh.geturl()):
+        if 'platzi.com/login' not in urlh.geturl():
             return
 
         login_error = self._webpage_read_content(
             return
 
         login_error = self._webpage_read_content(
index dd5f17f1192c3543636f6ff24624b0c9cc9a0bd6..80222d42831b8e58116449f96a615ea250b2985f 100644 (file)
@@ -20,20 +20,16 @@ class PokemonIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'The Ol’ Raise and Switch!',
             'description': 'md5:7db77f7107f98ba88401d3adc80ff7af',
             'ext': 'mp4',
             'title': 'The Ol’ Raise and Switch!',
             'description': 'md5:7db77f7107f98ba88401d3adc80ff7af',
-            'timestamp': 1511824728,
-            'upload_date': '20171127',
         },
         'add_id': ['LimelightMedia'],
     }, {
         # no data-video-title
         },
         'add_id': ['LimelightMedia'],
     }, {
         # no data-video-title
-        'url': 'https://www.pokemon.com/us/pokemon-episodes/pokemon-movies/pokemon-the-rise-of-darkrai-2008',
+        'url': 'https://www.pokemon.com/fr/episodes-pokemon/films-pokemon/pokemon-lascension-de-darkrai-2008',
         'info_dict': {
         'info_dict': {
-            'id': '99f3bae270bf4e5097274817239ce9c8',
+            'id': 'dfbaf830d7e54e179837c50c0c6cc0e1',
             'ext': 'mp4',
             'ext': 'mp4',
-            'title': 'Pokémon: The Rise of Darkrai',
-            'description': 'md5:ea8fbbf942e1e497d54b19025dd57d9d',
-            'timestamp': 1417778347,
-            'upload_date': '20141205',
+            'title': "Pokémon : L'ascension de Darkrai",
+            'description': 'md5:d1dbc9e206070c3e14a06ff557659fb5',
         },
         'add_id': ['LimelightMedia'],
         'params': {
         },
         'add_id': ['LimelightMedia'],
         'params': {
diff --git a/youtube_dl/extractor/popcorntimes.py b/youtube_dl/extractor/popcorntimes.py
new file mode 100644 (file)
index 0000000..7bf7f98
--- /dev/null
@@ -0,0 +1,99 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_b64decode,
+    compat_chr,
+)
+from ..utils import int_or_none
+
+
+class PopcorntimesIE(InfoExtractor):
+    _VALID_URL = r'https?://popcorntimes\.tv/[^/]+/m/(?P<id>[^/]+)/(?P<display_id>[^/?#&]+)'
+    _TEST = {
+        'url': 'https://popcorntimes.tv/de/m/A1XCFvz/haensel-und-gretel-opera-fantasy',
+        'md5': '93f210991ad94ba8c3485950a2453257',
+        'info_dict': {
+            'id': 'A1XCFvz',
+            'display_id': 'haensel-und-gretel-opera-fantasy',
+            'ext': 'mp4',
+            'title': 'Hänsel und Gretel',
+            'description': 'md5:1b8146791726342e7b22ce8125cf6945',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'creator': 'John Paul',
+            'release_date': '19541009',
+            'duration': 4260,
+            'tbr': 5380,
+            'width': 720,
+            'height': 540,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id, display_id = mobj.group('id', 'display_id')
+
+        webpage = self._download_webpage(url, display_id)
+
+        title = self._search_regex(
+            r'<h1>([^<]+)', webpage, 'title',
+            default=None) or self._html_search_meta(
+            'ya:ovs:original_name', webpage, 'title', fatal=True)
+
+        loc = self._search_regex(
+            r'PCTMLOC\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, 'loc',
+            group='value')
+
+        loc_b64 = ''
+        for c in loc:
+            c_ord = ord(c)
+            if ord('a') <= c_ord <= ord('z') or ord('A') <= c_ord <= ord('Z'):
+                upper = ord('Z') if c_ord <= ord('Z') else ord('z')
+                c_ord += 13
+                if upper < c_ord:
+                    c_ord -= 26
+            loc_b64 += compat_chr(c_ord)
+
+        video_url = compat_b64decode(loc_b64).decode('utf-8')
+
+        description = self._html_search_regex(
+            r'(?s)<div[^>]+class=["\']pt-movie-desc[^>]+>(.+?)</div>', webpage,
+            'description', fatal=False)
+
+        thumbnail = self._search_regex(
+            r'<img[^>]+class=["\']video-preview[^>]+\bsrc=(["\'])(?P<value>(?:(?!\1).)+)\1',
+            webpage, 'thumbnail', default=None,
+            group='value') or self._og_search_thumbnail(webpage)
+
+        creator = self._html_search_meta(
+            'video:director', webpage, 'creator', default=None)
+
+        release_date = self._html_search_meta(
+            'video:release_date', webpage, default=None)
+        if release_date:
+            release_date = release_date.replace('-', '')
+
+        def int_meta(name):
+            return int_or_none(self._html_search_meta(
+                name, webpage, default=None))
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'url': video_url,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'creator': creator,
+            'release_date': release_date,
+            'duration': int_meta('video:duration'),
+            'tbr': int_meta('ya:ovs:bitrate'),
+            'width': int_meta('og:video:width'),
+            'height': int_meta('og:video:height'),
+            'http_headers': {
+                'Referer': url,
+            },
+        }
index 27d65d4b9cdcf1e068b0d6971502eaa0caccf895..c6052ac9f966f332d0cfb1f7acfe68b0a143d2b7 100644 (file)
@@ -8,6 +8,7 @@ from ..utils import (
     ExtractorError,
     int_or_none,
     js_to_json,
     ExtractorError,
     int_or_none,
     js_to_json,
+    merge_dicts,
     urljoin,
 )
 
     urljoin,
 )
 
@@ -27,23 +28,22 @@ class PornHdIE(InfoExtractor):
             'view_count': int,
             'like_count': int,
             'age_limit': 18,
             'view_count': int,
             'like_count': int,
             'age_limit': 18,
-        }
+        },
+        'skip': 'HTTP Error 404: Not Found',
     }, {
     }, {
-        # removed video
         'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
         'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
-        'md5': '956b8ca569f7f4d8ec563e2c41598441',
+        'md5': '1b7b3a40b9d65a8e5b25f7ab9ee6d6de',
         'info_dict': {
             'id': '1962',
             'display_id': 'sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
             'ext': 'mp4',
         'info_dict': {
             'id': '1962',
             'display_id': 'sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
             'ext': 'mp4',
-            'title': 'Sierra loves doing laundry',
+            'title': 'md5:98c6f8b2d9c229d0f0fde47f61a1a759',
             'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294',
             'thumbnail': r're:^https?://.*\.jpg',
             'view_count': int,
             'like_count': int,
             'age_limit': 18,
         },
             'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294',
             'thumbnail': r're:^https?://.*\.jpg',
             'view_count': int,
             'like_count': int,
             'age_limit': 18,
         },
-        'skip': 'Not available anymore',
     }]
 
     def _real_extract(self, url):
     }]
 
     def _real_extract(self, url):
@@ -61,7 +61,13 @@ class PornHdIE(InfoExtractor):
             r"(?s)sources'?\s*[:=]\s*(\{.+?\})",
             webpage, 'sources', default='{}')), video_id)
 
             r"(?s)sources'?\s*[:=]\s*(\{.+?\})",
             webpage, 'sources', default='{}')), video_id)
 
+        info = {}
         if not sources:
         if not sources:
+            entries = self._parse_html5_media_entries(url, webpage, video_id)
+            if entries:
+                info = entries[0]
+
+        if not sources and not info:
             message = self._html_search_regex(
                 r'(?s)<(div|p)[^>]+class="no-video"[^>]*>(?P<value>.+?)</\1',
                 webpage, 'error message', group='value')
             message = self._html_search_regex(
                 r'(?s)<(div|p)[^>]+class="no-video"[^>]*>(?P<value>.+?)</\1',
                 webpage, 'error message', group='value')
@@ -80,23 +86,29 @@ class PornHdIE(InfoExtractor):
                 'format_id': format_id,
                 'height': height,
             })
                 'format_id': format_id,
                 'height': height,
             })
-        self._sort_formats(formats)
+        if formats:
+            info['formats'] = formats
+        self._sort_formats(info['formats'])
 
         description = self._html_search_regex(
 
         description = self._html_search_regex(
-            r'<(div|p)[^>]+class="description"[^>]*>(?P<value>[^<]+)</\1',
-            webpage, 'description', fatal=False, group='value')
+            (r'(?s)<section[^>]+class=["\']video-description[^>]+>(?P<value>.+?)</section>',
+             r'<(div|p)[^>]+class="description"[^>]*>(?P<value>[^<]+)</\1'),
+            webpage, 'description', fatal=False,
+            group='value') or self._html_search_meta(
+            'description', webpage, default=None) or self._og_search_description(webpage)
         view_count = int_or_none(self._html_search_regex(
             r'(\d+) views\s*<', webpage, 'view count', fatal=False))
         thumbnail = self._search_regex(
             r"poster'?\s*:\s*([\"'])(?P<url>(?:(?!\1).)+)\1", webpage,
         view_count = int_or_none(self._html_search_regex(
             r'(\d+) views\s*<', webpage, 'view count', fatal=False))
         thumbnail = self._search_regex(
             r"poster'?\s*:\s*([\"'])(?P<url>(?:(?!\1).)+)\1", webpage,
-            'thumbnail', fatal=False, group='url')
+            'thumbnail', default=None, group='url')
 
         like_count = int_or_none(self._search_regex(
 
         like_count = int_or_none(self._search_regex(
-            (r'(\d+)\s*</11[^>]+>(?:&nbsp;|\s)*\blikes',
+            (r'(\d+)</span>\s*likes',
+             r'(\d+)\s*</11[^>]+>(?:&nbsp;|\s)*\blikes',
              r'class=["\']save-count["\'][^>]*>\s*(\d+)'),
             webpage, 'like count', fatal=False))
 
              r'class=["\']save-count["\'][^>]*>\s*(\d+)'),
             webpage, 'like count', fatal=False))
 
-        return {
+        return merge_dicts(info, {
             'id': video_id,
             'display_id': display_id,
             'title': title,
             'id': video_id,
             'display_id': display_id,
             'title': title,
@@ -106,4 +118,4 @@ class PornHdIE(InfoExtractor):
             'like_count': like_count,
             'formats': formats,
             'age_limit': 18,
             'like_count': like_count,
             'formats': formats,
             'age_limit': 18,
-        }
+        })
index b3251ccd9b2300188f7efce561c3c5d1fbff702e..3567a32839eef2f75123a3f1b939038cf3eaf678 100644 (file)
@@ -52,7 +52,7 @@ class PornHubIE(PornHubBaseIE):
     _VALID_URL = r'''(?x)
                     https?://
                         (?:
     _VALID_URL = r'''(?x)
                     https?://
                         (?:
-                            (?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
+                            (?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
                             (?:www\.)?thumbzilla\.com/video/
                         )
                         (?P<id>[\da-z]+)
                             (?:www\.)?thumbzilla\.com/video/
                         )
                         (?P<id>[\da-z]+)
@@ -149,6 +149,9 @@ class PornHubIE(PornHubBaseIE):
     }, {
         'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933',
         'only_matching': True,
     }, {
         'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933',
         'only_matching': True,
+    }, {
+        'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82',
+        'only_matching': True,
     }]
 
     @staticmethod
     }]
 
     @staticmethod
@@ -166,6 +169,13 @@ class PornHubIE(PornHubBaseIE):
         host = mobj.group('host') or 'pornhub.com'
         video_id = mobj.group('id')
 
         host = mobj.group('host') or 'pornhub.com'
         video_id = mobj.group('id')
 
+        if 'premium' in host:
+            if not self._downloader.params.get('cookiefile'):
+                raise ExtractorError(
+                    'PornHub Premium requires authentication.'
+                    ' You may want to use --cookies.',
+                    expected=True)
+
         self._set_cookie(host, 'age_verified', '1')
 
         def dl_webpage(platform):
         self._set_cookie(host, 'age_verified', '1')
 
         def dl_webpage(platform):
@@ -189,10 +199,10 @@ class PornHubIE(PornHubBaseIE):
         # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
         # on that anymore.
         title = self._html_search_meta(
         # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
         # on that anymore.
         title = self._html_search_meta(
-            'twitter:title', webpage, default=None) or self._search_regex(
-            (r'<h1[^>]+class=["\']title["\'][^>]*>(?P<title>[^<]+)',
-             r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1',
-             r'shareTitle\s*=\s*(["\'])(?P<title>.+?)\1'),
+            'twitter:title', webpage, default=None) or self._html_search_regex(
+            (r'(?s)<h1[^>]+class=["\']title["\'][^>]*>(?P<title>.+?)</h1>',
+             r'<div[^>]+data-video-title=(["\'])(?P<title>(?:(?!\1).)+)\1',
+             r'shareTitle["\']\s*[=:]\s*(["\'])(?P<title>(?:(?!\1).)+)\1'),
             webpage, 'title', group='title')
 
         video_urls = []
             webpage, 'title', group='title')
 
         video_urls = []
@@ -405,7 +415,7 @@ class PornHubPlaylistBaseIE(PornHubBaseIE):
 
 
 class PornHubUserIE(PornHubPlaylistBaseIE):
 
 
 class PornHubUserIE(PornHubPlaylistBaseIE):
-    _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?pornhub\.(?:com|net)/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
+    _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
     _TESTS = [{
         'url': 'https://www.pornhub.com/model/zoe_ph',
         'playlist_mincount': 118,
     _TESTS = [{
         'url': 'https://www.pornhub.com/model/zoe_ph',
         'playlist_mincount': 118,
@@ -473,7 +483,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
 
 
 class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
 
 
 class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
-    _VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?P<id>(?:[^/]+/)*[^/?#&]+)'
+    _VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?P<id>(?:[^/]+/)*[^/?#&]+)'
     _TESTS = [{
         'url': 'https://www.pornhub.com/model/zoe_ph/videos',
         'only_matching': True,
     _TESTS = [{
         'url': 'https://www.pornhub.com/model/zoe_ph/videos',
         'only_matching': True,
@@ -588,7 +598,7 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
 
 
 class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
 
 
 class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
-    _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)'
+    _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)'
     _TESTS = [{
         'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
         'info_dict': {
     _TESTS = [{
         'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
         'info_dict': {
index 4942437c7dbe918bd408bf805f8b76cc1d7967e4..2cc66512241dbc6f65589e52d842cf70b7250ccb 100644 (file)
@@ -8,7 +8,6 @@ from .common import InfoExtractor
 
 from ..compat import (
     compat_parse_qs,
 
 from ..compat import (
     compat_parse_qs,
-    compat_str,
     compat_urlparse,
 )
 from ..utils import (
     compat_urlparse,
 )
 from ..utils import (
@@ -39,13 +38,13 @@ class SafariBaseIE(InfoExtractor):
             'Downloading login page')
 
         def is_logged(urlh):
             'Downloading login page')
 
         def is_logged(urlh):
-            return 'learning.oreilly.com/home/' in compat_str(urlh.geturl())
+            return 'learning.oreilly.com/home/' in urlh.geturl()
 
         if is_logged(urlh):
             self.LOGGED_IN = True
             return
 
 
         if is_logged(urlh):
             self.LOGGED_IN = True
             return
 
-        redirect_url = compat_str(urlh.geturl())
+        redirect_url = urlh.geturl()
         parsed_url = compat_urlparse.urlparse(redirect_url)
         qs = compat_parse_qs(parsed_url.query)
         next_uri = compat_urlparse.urljoin(
         parsed_url = compat_urlparse.urlparse(redirect_url)
         qs = compat_parse_qs(parsed_url.query)
         next_uri = compat_urlparse.urljoin(
index e579d42cf525b56500b98c84272b67b279e36baa..9401bf2cf7fcdad2eb218f5a3d072399932fea9a 100644 (file)
@@ -7,9 +7,18 @@ from .common import InfoExtractor
 
 
 class ServusIE(InfoExtractor):
 
 
 class ServusIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?servus\.com/(?:(?:at|de)/p/[^/]+|tv/videos)/(?P<id>[aA]{2}-\w+|\d+-\d+)'
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:www\.)?
+                        (?:
+                            servus\.com/(?:(?:at|de)/p/[^/]+|tv/videos)|
+                            servustv\.com/videos
+                        )
+                        /(?P<id>[aA]{2}-\w+|\d+-\d+)
+                    '''
     _TESTS = [{
     _TESTS = [{
-        'url': 'https://www.servus.com/de/p/Die-Gr%C3%BCnen-aus-Sicht-des-Volkes/AA-1T6VBU5PW1W12/',
+        # new URL schema
+        'url': 'https://www.servustv.com/videos/aa-1t6vbu5pw1w12/',
         'md5': '3e1dd16775aa8d5cbef23628cfffc1f4',
         'info_dict': {
             'id': 'AA-1T6VBU5PW1W12',
         'md5': '3e1dd16775aa8d5cbef23628cfffc1f4',
         'info_dict': {
             'id': 'AA-1T6VBU5PW1W12',
@@ -18,6 +27,10 @@ class ServusIE(InfoExtractor):
             'description': 'md5:1247204d85783afe3682644398ff2ec4',
             'thumbnail': r're:^https?://.*\.jpg',
         }
             'description': 'md5:1247204d85783afe3682644398ff2ec4',
             'thumbnail': r're:^https?://.*\.jpg',
         }
+    }, {
+        # old URL schema
+        'url': 'https://www.servus.com/de/p/Die-Gr%C3%BCnen-aus-Sicht-des-Volkes/AA-1T6VBU5PW1W12/',
+        'only_matching': True,
     }, {
         'url': 'https://www.servus.com/at/p/Wie-das-Leben-beginnt/1309984137314-381415152/',
         'only_matching': True,
     }, {
         'url': 'https://www.servus.com/at/p/Wie-das-Leben-beginnt/1309984137314-381415152/',
         'only_matching': True,
index a0b09f5b1747e3b1b9b3a17b10d482b47512143a..ff6be0b545a79260341ec25e6059d306fe859ef8 100644 (file)
@@ -27,6 +27,7 @@ from ..utils import (
     unified_timestamp,
     update_url_query,
     url_or_none,
     unified_timestamp,
     update_url_query,
     url_or_none,
+    urlhandle_detect_ext,
 )
 
 
 )
 
 
@@ -96,7 +97,7 @@ class SoundcloudIE(InfoExtractor):
                 'repost_count': int,
             }
         },
                 'repost_count': int,
             }
         },
-        # not streamable song, preview
+        # geo-restricted
         {
             'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
             'info_dict': {
         {
             'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
             'info_dict': {
@@ -108,17 +109,13 @@ class SoundcloudIE(InfoExtractor):
                 'uploader_id': '9615865',
                 'timestamp': 1337635207,
                 'upload_date': '20120521',
                 'uploader_id': '9615865',
                 'timestamp': 1337635207,
                 'upload_date': '20120521',
-                'duration': 30,
+                'duration': 227.155,
                 'license': 'all-rights-reserved',
                 'view_count': int,
                 'like_count': int,
                 'comment_count': int,
                 'repost_count': int,
             },
                 'license': 'all-rights-reserved',
                 'view_count': int,
                 'like_count': int,
                 'comment_count': int,
                 'repost_count': int,
             },
-            'params': {
-                # rtmp
-                'skip_download': True,
-            },
         },
         # private link
         {
         },
         # private link
         {
@@ -229,7 +226,6 @@ class SoundcloudIE(InfoExtractor):
                 'skip_download': True,
             },
         },
                 'skip_download': True,
             },
         },
-        # not available via api.soundcloud.com/i1/tracks/id/streams
         {
             'url': 'https://soundcloud.com/giovannisarani/mezzo-valzer',
             'md5': 'e22aecd2bc88e0e4e432d7dcc0a1abf7',
         {
             'url': 'https://soundcloud.com/giovannisarani/mezzo-valzer',
             'md5': 'e22aecd2bc88e0e4e432d7dcc0a1abf7',
@@ -238,7 +234,7 @@ class SoundcloudIE(InfoExtractor):
                 'ext': 'mp3',
                 'title': 'Mezzo Valzer',
                 'description': 'md5:4138d582f81866a530317bae316e8b61',
                 'ext': 'mp3',
                 'title': 'Mezzo Valzer',
                 'description': 'md5:4138d582f81866a530317bae316e8b61',
-                'uploader': 'Giovanni Sarani',
+                'uploader': 'Micronie',
                 'uploader_id': '3352531',
                 'timestamp': 1551394171,
                 'upload_date': '20190228',
                 'uploader_id': '3352531',
                 'timestamp': 1551394171,
                 'upload_date': '20190228',
@@ -250,11 +246,9 @@ class SoundcloudIE(InfoExtractor):
                 'comment_count': int,
                 'repost_count': int,
             },
                 'comment_count': int,
                 'repost_count': int,
             },
-            'expected_warnings': ['Unable to download JSON metadata'],
         }
     ]
 
         }
     ]
 
-    _API_BASE = 'https://api.soundcloud.com/'
     _API_V2_BASE = 'https://api-v2.soundcloud.com/'
     _BASE_URL = 'https://soundcloud.com/'
     _IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg'
     _API_V2_BASE = 'https://api-v2.soundcloud.com/'
     _BASE_URL = 'https://soundcloud.com/'
     _IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg'
@@ -316,10 +310,9 @@ class SoundcloudIE(InfoExtractor):
     def _resolv_url(cls, url):
         return SoundcloudIE._API_V2_BASE + 'resolve?url=' + url
 
     def _resolv_url(cls, url):
         return SoundcloudIE._API_V2_BASE + 'resolve?url=' + url
 
-    def _extract_info_dict(self, info, full_title=None, secret_token=None, version=2):
+    def _extract_info_dict(self, info, full_title=None, secret_token=None):
         track_id = compat_str(info['id'])
         title = info['title']
         track_id = compat_str(info['id'])
         title = info['title']
-        track_base_url = self._API_BASE + 'tracks/%s' % track_id
 
         format_urls = set()
         formats = []
 
         format_urls = set()
         formats = []
@@ -328,21 +321,22 @@ class SoundcloudIE(InfoExtractor):
             query['secret_token'] = secret_token
 
         if info.get('downloadable') and info.get('has_downloads_left'):
             query['secret_token'] = secret_token
 
         if info.get('downloadable') and info.get('has_downloads_left'):
-            format_url = update_url_query(
-                info.get('download_url') or track_base_url + '/download', query)
-            format_urls.add(format_url)
-            if version == 2:
-                v1_info = self._download_json(
-                    track_base_url, track_id, query=query, fatal=False) or {}
-            else:
-                v1_info = info
-            formats.append({
-                'format_id': 'download',
-                'ext': v1_info.get('original_format') or 'mp3',
-                'filesize': int_or_none(v1_info.get('original_content_size')),
-                'url': format_url,
-                'preference': 10,
-            })
+            download_url = update_url_query(
+                self._API_V2_BASE + 'tracks/' + track_id + '/download', query)
+            redirect_url = (self._download_json(download_url, track_id, fatal=False) or {}).get('redirectUri')
+            if redirect_url:
+                urlh = self._request_webpage(
+                    HEADRequest(redirect_url), track_id, fatal=False)
+                if urlh:
+                    format_url = urlh.geturl()
+                    format_urls.add(format_url)
+                    formats.append({
+                        'format_id': 'download',
+                        'ext': urlhandle_detect_ext(urlh) or 'mp3',
+                        'filesize': int_or_none(urlh.headers.get('Content-Length')),
+                        'url': format_url,
+                        'preference': 10,
+                    })
 
         def invalid_url(url):
             return not url or url in format_urls
 
         def invalid_url(url):
             return not url or url in format_urls
@@ -406,42 +400,11 @@ class SoundcloudIE(InfoExtractor):
             }, 'http' if protocol == 'progressive' else protocol,
                 t.get('snipped') or '/preview/' in format_url)
 
             }, 'http' if protocol == 'progressive' else protocol,
                 t.get('snipped') or '/preview/' in format_url)
 
-        if not formats:
-            # Old API, does not work for some tracks (e.g.
-            # https://soundcloud.com/giovannisarani/mezzo-valzer)
-            # and might serve preview URLs (e.g.
-            # http://www.soundcloud.com/snbrn/ele)
-            format_dict = self._download_json(
-                track_base_url + '/streams', track_id,
-                'Downloading track url', query=query, fatal=False) or {}
-
-            for key, stream_url in format_dict.items():
-                if invalid_url(stream_url):
-                    continue
-                format_urls.add(stream_url)
-                mobj = re.search(r'(http|hls)_([^_]+)_(\d+)_url', key)
-                if mobj:
-                    protocol, ext, abr = mobj.groups()
-                    add_format({
-                        'abr': abr,
-                        'ext': ext,
-                        'url': stream_url,
-                    }, protocol)
-
-        if not formats:
-            # We fallback to the stream_url in the original info, this
-            # cannot be always used, sometimes it can give an HTTP 404 error
-            urlh = self._request_webpage(
-                HEADRequest(info.get('stream_url') or track_base_url + '/stream'),
-                track_id, query=query, fatal=False)
-            if urlh:
-                stream_url = urlh.geturl()
-                if not invalid_url(stream_url):
-                    add_format({'url': stream_url}, 'http')
-
         for f in formats:
             f['vcodec'] = 'none'
 
         for f in formats:
             f['vcodec'] = 'none'
 
+        if not formats and info.get('policy') == 'BLOCK':
+            self.raise_geo_restricted()
         self._sort_formats(formats)
 
         user = info.get('user') or {}
         self._sort_formats(formats)
 
         user = info.get('user') or {}
@@ -511,20 +474,24 @@ class SoundcloudIE(InfoExtractor):
                 resolve_title += '/%s' % token
             info_json_url = self._resolv_url(self._BASE_URL + resolve_title)
 
                 resolve_title += '/%s' % token
             info_json_url = self._resolv_url(self._BASE_URL + resolve_title)
 
-        version = 2
         info = self._download_json(
         info = self._download_json(
-            info_json_url, full_title, 'Downloading info JSON', query=query, fatal=False)
-        if not info:
-            info = self._download_json(
-                info_json_url.replace(self._API_V2_BASE, self._API_BASE),
-                full_title, 'Downloading info JSON', query=query)
-            version = 1
+            info_json_url, full_title, 'Downloading info JSON', query=query)
 
 
-        return self._extract_info_dict(info, full_title, token, version)
+        return self._extract_info_dict(info, full_title, token)
 
 
 class SoundcloudPlaylistBaseIE(SoundcloudIE):
 
 
 class SoundcloudPlaylistBaseIE(SoundcloudIE):
-    def _extract_track_entries(self, tracks, token=None):
+    def _extract_set(self, playlist, token=None):
+        playlist_id = compat_str(playlist['id'])
+        tracks = playlist.get('tracks') or []
+        if not all([t.get('permalink_url') for t in tracks]) and token:
+            tracks = self._download_json(
+                self._API_V2_BASE + 'tracks', playlist_id,
+                'Downloading tracks', query={
+                    'ids': ','.join([compat_str(t['id']) for t in tracks]),
+                    'playlistId': playlist_id,
+                    'playlistSecretToken': token,
+                })
         entries = []
         for track in tracks:
             track_id = str_or_none(track.get('id'))
         entries = []
         for track in tracks:
             track_id = str_or_none(track.get('id'))
@@ -537,7 +504,10 @@ class SoundcloudPlaylistBaseIE(SoundcloudIE):
                     url += '?secret_token=' + token
             entries.append(self.url_result(
                 url, SoundcloudIE.ie_key(), track_id))
                     url += '?secret_token=' + token
             entries.append(self.url_result(
                 url, SoundcloudIE.ie_key(), track_id))
-        return entries
+        return self.playlist_result(
+            entries, playlist_id,
+            playlist.get('title'),
+            playlist.get('description'))
 
 
 class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
 
 
 class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
@@ -548,6 +518,7 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
         'info_dict': {
             'id': '2284613',
             'title': 'The Royal Concept EP',
         'info_dict': {
             'id': '2284613',
             'title': 'The Royal Concept EP',
+            'description': 'md5:71d07087c7a449e8941a70a29e34671e',
         },
         'playlist_mincount': 5,
     }, {
         },
         'playlist_mincount': 5,
     }, {
@@ -570,13 +541,10 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
             msgs = (compat_str(err['error_message']) for err in info['errors'])
             raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs))
 
             msgs = (compat_str(err['error_message']) for err in info['errors'])
             raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs))
 
-        entries = self._extract_track_entries(info['tracks'], token)
-
-        return self.playlist_result(
-            entries, str_or_none(info.get('id')), info.get('title'))
+        return self._extract_set(info, token)
 
 
 
 
-class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE):
+class SoundcloudPagedPlaylistBaseIE(SoundcloudIE):
     def _extract_playlist(self, base_url, playlist_id, playlist_title):
         COMMON_QUERY = {
             'limit': 2000000000,
     def _extract_playlist(self, base_url, playlist_id, playlist_title):
         COMMON_QUERY = {
             'limit': 2000000000,
@@ -774,10 +742,7 @@ class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
             self._API_V2_BASE + 'playlists/' + playlist_id,
             playlist_id, 'Downloading playlist', query=query)
 
             self._API_V2_BASE + 'playlists/' + playlist_id,
             playlist_id, 'Downloading playlist', query=query)
 
-        entries = self._extract_track_entries(data['tracks'], token)
-
-        return self.playlist_result(
-            entries, playlist_id, data.get('title'), data.get('description'))
+        return self._extract_set(data, token)
 
 
 class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
 
 
 class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
index a3c35a899a2186f1e937771cd0e34df408b2d361..378fc75686313f92a846aaa30579049e9a29eccc 100644 (file)
@@ -13,36 +13,18 @@ from ..utils import (
 class SportDeutschlandIE(InfoExtractor):
     _VALID_URL = r'https?://sportdeutschland\.tv/(?P<sport>[^/?#]+)/(?P<id>[^?#/]+)(?:$|[?#])'
     _TESTS = [{
 class SportDeutschlandIE(InfoExtractor):
     _VALID_URL = r'https?://sportdeutschland\.tv/(?P<sport>[^/?#]+)/(?P<id>[^?#/]+)(?:$|[?#])'
     _TESTS = [{
-        'url': 'http://sportdeutschland.tv/badminton/live-li-ning-badminton-weltmeisterschaft-2014-kopenhagen',
+        'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0',
         'info_dict': {
         'info_dict': {
-            'id': 'live-li-ning-badminton-weltmeisterschaft-2014-kopenhagen',
+            'id': 're-live-deutsche-meisterschaften-2020-halbfinals',
             'ext': 'mp4',
             'ext': 'mp4',
-            'title': 're:Li-Ning Badminton Weltmeisterschaft 2014 Kopenhagen',
-            'categories': ['Badminton'],
+            'title': 're:Re-live: Deutsche Meisterschaften 2020.*Halbfinals',
+            'categories': ['Badminton-Deutschland'],
             'view_count': int,
             'view_count': int,
-            'thumbnail': r're:^https?://.*\.jpg$',
-            'description': r're:Die Badminton-WM 2014 aus Kopenhagen bei Sportdeutschland\.TV',
+            'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
             'timestamp': int,
             'timestamp': int,
-            'upload_date': 're:^201408[23][0-9]$',
+            'upload_date': '20200201',
+            'description': 're:.*',  # meaningless description for THIS video
         },
         },
-        'params': {
-            'skip_download': 'Live stream',
-        },
-    }, {
-        'url': 'http://sportdeutschland.tv/li-ning-badminton-wm-2014/lee-li-ning-badminton-weltmeisterschaft-2014-kopenhagen-herren-einzel-wei-vs',
-        'info_dict': {
-            'id': 'lee-li-ning-badminton-weltmeisterschaft-2014-kopenhagen-herren-einzel-wei-vs',
-            'ext': 'mp4',
-            'upload_date': '20140825',
-            'description': 'md5:60a20536b57cee7d9a4ec005e8687504',
-            'timestamp': 1408976060,
-            'duration': 2732,
-            'title': 'Li-Ning Badminton Weltmeisterschaft 2014 Kopenhagen: Herren Einzel, Wei Lee vs. Keun Lee',
-            'thumbnail': r're:^https?://.*\.jpg$',
-            'view_count': int,
-            'categories': ['Li-Ning Badminton WM 2014'],
-
-        }
     }]
 
     def _real_extract(self, url):
     }]
 
     def _real_extract(self, url):
@@ -50,7 +32,7 @@ class SportDeutschlandIE(InfoExtractor):
         video_id = mobj.group('id')
         sport_id = mobj.group('sport')
 
         video_id = mobj.group('id')
         sport_id = mobj.group('sport')
 
-        api_url = 'http://proxy.vidibusdynamic.net/sportdeutschland.tv/api/permalinks/%s/%s?access_token=true' % (
+        api_url = 'https://proxy.vidibusdynamic.net/ssl/backend.sportdeutschland.tv/api/permalinks/%s/%s?access_token=true' % (
             sport_id, video_id)
         req = sanitized_Request(api_url, headers={
             'Accept': 'application/vnd.vidibus.v2.html+json',
             sport_id, video_id)
         req = sanitized_Request(api_url, headers={
             'Accept': 'application/vnd.vidibus.v2.html+json',
index 0901c3163e6cab4723d451b30df8574e359ba899..e12389cad80a83612e10d052b7f36bceba0f1fbf 100644 (file)
@@ -4,19 +4,14 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
 import re
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_parse_qs,
-    compat_urllib_parse_urlparse,
-)
+from ..compat import compat_str
 from ..utils import (
     determine_ext,
     dict_get,
     int_or_none,
 from ..utils import (
     determine_ext,
     dict_get,
     int_or_none,
-    orderedSet,
+    str_or_none,
     strip_or_none,
     try_get,
     strip_or_none,
     try_get,
-    urljoin,
-    compat_str,
 )
 
 
 )
 
 
@@ -237,23 +232,23 @@ class SVTPlayIE(SVTPlayBaseIE):
 
 
 class SVTSeriesIE(SVTPlayBaseIE):
 
 
 class SVTSeriesIE(SVTPlayBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?svtplay\.se/(?P<id>[^/?&#]+)'
+    _VALID_URL = r'https?://(?:www\.)?svtplay\.se/(?P<id>[^/?&#]+)(?:.+?\btab=(?P<season_slug>[^&#]+))?'
     _TESTS = [{
         'url': 'https://www.svtplay.se/rederiet',
         'info_dict': {
     _TESTS = [{
         'url': 'https://www.svtplay.se/rederiet',
         'info_dict': {
-            'id': 'rederiet',
+            'id': '14445680',
             'title': 'Rederiet',
             'title': 'Rederiet',
-            'description': 'md5:505d491a58f4fcf6eb418ecab947e69e',
+            'description': 'md5:d9fdfff17f5d8f73468176ecd2836039',
         },
         'playlist_mincount': 318,
     }, {
         },
         'playlist_mincount': 318,
     }, {
-        'url': 'https://www.svtplay.se/rederiet?tab=sasong2',
+        'url': 'https://www.svtplay.se/rederiet?tab=season-2-14445680',
         'info_dict': {
         'info_dict': {
-            'id': 'rederiet-sasong2',
+            'id': 'season-2-14445680',
             'title': 'Rederiet - Säsong 2',
             'title': 'Rederiet - Säsong 2',
-            'description': 'md5:505d491a58f4fcf6eb418ecab947e69e',
+            'description': 'md5:d9fdfff17f5d8f73468176ecd2836039',
         },
         },
-        'playlist_count': 12,
+        'playlist_mincount': 12,
     }]
 
     @classmethod
     }]
 
     @classmethod
@@ -261,83 +256,87 @@ class SVTSeriesIE(SVTPlayBaseIE):
         return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTSeriesIE, cls).suitable(url)
 
     def _real_extract(self, url):
         return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTSeriesIE, cls).suitable(url)
 
     def _real_extract(self, url):
-        series_id = self._match_id(url)
-
-        qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
-        season_slug = qs.get('tab', [None])[0]
-
-        if season_slug:
-            series_id += '-%s' % season_slug
-
-        webpage = self._download_webpage(
-            url, series_id, 'Downloading series page')
-
-        root = self._parse_json(
-            self._search_regex(
-                self._SVTPLAY_RE, webpage, 'content', group='json'),
-            series_id)
+        series_slug, season_id = re.match(self._VALID_URL, url).groups()
+
+        series = self._download_json(
+            'https://api.svt.se/contento/graphql', series_slug,
+            'Downloading series page', query={
+                'query': '''{
+  listablesBySlug(slugs: ["%s"]) {
+    associatedContent(include: [productionPeriod, season]) {
+      items {
+        item {
+          ... on Episode {
+            videoSvtId
+          }
+        }
+      }
+      id
+      name
+    }
+    id
+    longDescription
+    name
+    shortDescription
+  }
+}''' % series_slug,
+            })['data']['listablesBySlug'][0]
 
         season_name = None
 
         entries = []
 
         season_name = None
 
         entries = []
-        for season in root['relatedVideoContent']['relatedVideosAccordion']:
+        for season in series['associatedContent']:
             if not isinstance(season, dict):
                 continue
             if not isinstance(season, dict):
                 continue
-            if season_slug:
-                if season.get('slug') != season_slug:
+            if season_id:
+                if season.get('id') != season_id:
                     continue
                 season_name = season.get('name')
                     continue
                 season_name = season.get('name')
-            videos = season.get('videos')
-            if not isinstance(videos, list):
+            items = season.get('items')
+            if not isinstance(items, list):
                 continue
                 continue
-            for video in videos:
-                content_url = video.get('contentUrl')
-                if not content_url or not isinstance(content_url, compat_str):
+            for item in items:
+                video = item.get('item') or {}
+                content_id = video.get('videoSvtId')
+                if not content_id or not isinstance(content_id, compat_str):
                     continue
                     continue
-                entries.append(
-                    self.url_result(
-                        urljoin(url, content_url),
-                        ie=SVTPlayIE.ie_key(),
-                        video_title=video.get('title')
-                    ))
-
-        metadata = root.get('metaData')
-        if not isinstance(metadata, dict):
-            metadata = {}
+                entries.append(self.url_result(
+                    'svt:' + content_id, SVTPlayIE.ie_key(), content_id))
 
 
-        title = metadata.get('title')
-        season_name = season_name or season_slug
+        title = series.get('name')
+        season_name = season_name or season_id
 
         if title and season_name:
             title = '%s - %s' % (title, season_name)
 
         if title and season_name:
             title = '%s - %s' % (title, season_name)
-        elif season_slug:
-            title = season_slug
+        elif season_id:
+            title = season_id
 
         return self.playlist_result(
 
         return self.playlist_result(
-            entries, series_id, title, metadata.get('description'))
+            entries, season_id or series.get('id'), title,
+            dict_get(series, ('longDescription', 'shortDescription')))
 
 
 class SVTPageIE(InfoExtractor):
 
 
 class SVTPageIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?svt\.se/(?:[^/]+/)*(?P<id>[^/?&#]+)'
+    _VALID_URL = r'https?://(?:www\.)?svt\.se/(?P<path>(?:[^/]+/)*(?P<id>[^/?&#]+))'
     _TESTS = [{
     _TESTS = [{
-        'url': 'https://www.svt.se/sport/oseedat/guide-sommartraningen-du-kan-gora-var-och-nar-du-vill',
+        'url': 'https://www.svt.se/sport/ishockey/bakom-masken-lehners-kamp-mot-mental-ohalsa',
         'info_dict': {
         'info_dict': {
-            'id': 'guide-sommartraningen-du-kan-gora-var-och-nar-du-vill',
-            'title': 'GUIDE: Sommarträning du kan göra var och när du vill',
+            'id': '25298267',
+            'title': 'Bakom masken – Lehners kamp mot mental ohälsa',
         },
         },
-        'playlist_count': 7,
+        'playlist_count': 4,
     }, {
     }, {
-        'url': 'https://www.svt.se/nyheter/inrikes/ebba-busch-thor-kd-har-delvis-ratt-om-no-go-zoner',
+        'url': 'https://www.svt.se/nyheter/utrikes/svenska-andrea-ar-en-mil-fran-branderna-i-kalifornien',
         'info_dict': {
         'info_dict': {
-            'id': 'ebba-busch-thor-kd-har-delvis-ratt-om-no-go-zoner',
-            'title': 'Ebba Busch Thor har bara delvis rätt om ”no-go-zoner”',
+            'id': '24243746',
+            'title': 'Svenska Andrea redo att fly sitt hem i Kalifornien',
         },
         },
-        'playlist_count': 1,
+        'playlist_count': 2,
     }, {
         # only programTitle
         'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
         'info_dict': {
     }, {
         # only programTitle
         'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
         'info_dict': {
-            'id': '2900353',
+            'id': '8439V2K',
             'ext': 'mp4',
             'title': 'Stjärnorna skojar till det - under SVT-intervjun',
             'duration': 27,
             'ext': 'mp4',
             'title': 'Stjärnorna skojar till det - under SVT-intervjun',
             'duration': 27,
@@ -356,16 +355,26 @@ class SVTPageIE(InfoExtractor):
         return False if SVTIE.suitable(url) else super(SVTPageIE, cls).suitable(url)
 
     def _real_extract(self, url):
         return False if SVTIE.suitable(url) else super(SVTPageIE, cls).suitable(url)
 
     def _real_extract(self, url):
-        playlist_id = self._match_id(url)
+        path, display_id = re.match(self._VALID_URL, url).groups()
 
 
-        webpage = self._download_webpage(url, playlist_id)
+        article = self._download_json(
+            'https://api.svt.se/nss-api/page/' + path, display_id,
+            query={'q': 'articles'})['articles']['content'][0]
 
 
-        entries = [
-            self.url_result(
-                'svt:%s' % video_id, ie=SVTPlayIE.ie_key(), video_id=video_id)
-            for video_id in orderedSet(re.findall(
-                r'data-video-id=["\'](\d+)', webpage))]
+        entries = []
 
 
-        title = strip_or_none(self._og_search_title(webpage, default=None))
+        def _process_content(content):
+            if content.get('_type') in ('VIDEOCLIP', 'VIDEOEPISODE'):
+                video_id = compat_str(content['image']['svtId'])
+                entries.append(self.url_result(
+                    'svt:' + video_id, SVTPlayIE.ie_key(), video_id))
 
 
-        return self.playlist_result(entries, playlist_id, title)
+        for media in article.get('media', []):
+            _process_content(media)
+
+        for obj in article.get('structuredBody', []):
+            _process_content(obj.get('content') or {})
+
+        return self.playlist_result(
+            entries, str_or_none(article.get('id')),
+            strip_or_none(article.get('title')))
index 6b7f13b437e3f3671b355b7dabcea9fd5056969e..a75369dbe8a3582595ae339d58887eaefd220536 100644 (file)
@@ -4,11 +4,12 @@ import re
 
 from .common import InfoExtractor
 from .wistia import WistiaIE
 
 from .common import InfoExtractor
 from .wistia import WistiaIE
-from ..compat import compat_str
 from ..utils import (
     clean_html,
     ExtractorError,
 from ..utils import (
     clean_html,
     ExtractorError,
+    int_or_none,
     get_element_by_class,
     get_element_by_class,
+    strip_or_none,
     urlencode_postdata,
     urljoin,
 )
     urlencode_postdata,
     urljoin,
 )
@@ -20,8 +21,8 @@ class TeachableBaseIE(InfoExtractor):
 
     _SITES = {
         # Only notable ones here
 
     _SITES = {
         # Only notable ones here
-        'upskillcourses.com': 'upskill',
-        'academy.gns3.com': 'gns3',
+        'v1.upskillcourses.com': 'upskill',
+        'gns3.teachable.com': 'gns3',
         'academyhacker.com': 'academyhacker',
         'stackskills.com': 'stackskills',
         'market.saleshacker.com': 'saleshacker',
         'academyhacker.com': 'academyhacker',
         'stackskills.com': 'stackskills',
         'market.saleshacker.com': 'saleshacker',
@@ -58,7 +59,7 @@ class TeachableBaseIE(InfoExtractor):
             self._logged_in = True
             return
 
             self._logged_in = True
             return
 
-        login_url = compat_str(urlh.geturl())
+        login_url = urlh.geturl()
 
         login_form = self._hidden_inputs(login_page)
 
 
         login_form = self._hidden_inputs(login_page)
 
@@ -110,27 +111,29 @@ class TeachableIE(TeachableBaseIE):
                     ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE
 
     _TESTS = [{
                     ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE
 
     _TESTS = [{
-        'url': 'http://upskillcourses.com/courses/essential-web-developer-course/lectures/1747100',
+        'url': 'https://gns3.teachable.com/courses/gns3-certified-associate/lectures/6842364',
         'info_dict': {
         'info_dict': {
-            'id': 'uzw6zw58or',
-            'ext': 'mp4',
-            'title': 'Welcome to the Course!',
-            'description': 'md5:65edb0affa582974de4625b9cdea1107',
-            'duration': 138.763,
-            'timestamp': 1479846621,
-            'upload_date': '20161122',
+            'id': 'untlgzk1v7',
+            'ext': 'bin',
+            'title': 'Overview',
+            'description': 'md5:071463ff08b86c208811130ea1c2464c',
+            'duration': 736.4,
+            'timestamp': 1542315762,
+            'upload_date': '20181115',
+            'chapter': 'Welcome',
+            'chapter_number': 1,
         },
         'params': {
             'skip_download': True,
         },
     }, {
         },
         'params': {
             'skip_download': True,
         },
     }, {
-        'url': 'http://upskillcourses.com/courses/119763/lectures/1747100',
+        'url': 'http://v1.upskillcourses.com/courses/119763/lectures/1747100',
         'only_matching': True,
     }, {
         'only_matching': True,
     }, {
-        'url': 'https://academy.gns3.com/courses/423415/lectures/6885939',
+        'url': 'https://gns3.teachable.com/courses/423415/lectures/6885939',
         'only_matching': True,
     }, {
         'only_matching': True,
     }, {
-        'url': 'teachable:https://upskillcourses.com/courses/essential-web-developer-course/lectures/1747100',
+        'url': 'teachable:https://v1.upskillcourses.com/courses/essential-web-developer-course/lectures/1747100',
         'only_matching': True,
     }]
 
         'only_matching': True,
     }]
 
@@ -160,8 +163,8 @@ class TeachableIE(TeachableBaseIE):
 
         webpage = self._download_webpage(url, video_id)
 
 
         webpage = self._download_webpage(url, video_id)
 
-        wistia_url = WistiaIE._extract_url(webpage)
-        if not wistia_url:
+        wistia_urls = WistiaIE._extract_urls(webpage)
+        if not wistia_urls:
             if any(re.search(p, webpage) for p in (
                     r'class=["\']lecture-contents-locked',
                     r'>\s*Lecture contents locked',
             if any(re.search(p, webpage) for p in (
                     r'class=["\']lecture-contents-locked',
                     r'>\s*Lecture contents locked',
@@ -174,12 +177,37 @@ class TeachableIE(TeachableBaseIE):
 
         title = self._og_search_title(webpage, default=None)
 
 
         title = self._og_search_title(webpage, default=None)
 
-        return {
+        chapter = None
+        chapter_number = None
+        section_item = self._search_regex(
+            r'(?s)(?P<li><li[^>]+\bdata-lecture-id=["\']%s[^>]+>.+?</li>)' % video_id,
+            webpage, 'section item', default=None, group='li')
+        if section_item:
+            chapter_number = int_or_none(self._search_regex(
+                r'data-ss-position=["\'](\d+)', section_item, 'section id',
+                default=None))
+            if chapter_number is not None:
+                sections = []
+                for s in re.findall(
+                        r'(?s)<div[^>]+\bclass=["\']section-title[^>]+>(.+?)</div>', webpage):
+                    section = strip_or_none(clean_html(s))
+                    if not section:
+                        sections = []
+                        break
+                    sections.append(section)
+                if chapter_number <= len(sections):
+                    chapter = sections[chapter_number - 1]
+
+        entries = [{
             '_type': 'url_transparent',
             'url': wistia_url,
             'ie_key': WistiaIE.ie_key(),
             'title': title,
             '_type': 'url_transparent',
             'url': wistia_url,
             'ie_key': WistiaIE.ie_key(),
             'title': title,
-        }
+            'chapter': chapter,
+            'chapter_number': chapter_number,
+        } for wistia_url in wistia_urls]
+
+        return self.playlist_result(entries, video_id, title)
 
 
 class TeachableCourseIE(TeachableBaseIE):
 
 
 class TeachableCourseIE(TeachableBaseIE):
@@ -191,20 +219,20 @@ class TeachableCourseIE(TeachableBaseIE):
                         /(?:courses|p)/(?:enrolled/)?(?P<id>[^/?#&]+)
                     ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE
     _TESTS = [{
                         /(?:courses|p)/(?:enrolled/)?(?P<id>[^/?#&]+)
                     ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE
     _TESTS = [{
-        'url': 'http://upskillcourses.com/courses/essential-web-developer-course/',
+        'url': 'http://v1.upskillcourses.com/courses/essential-web-developer-course/',
         'info_dict': {
             'id': 'essential-web-developer-course',
             'title': 'The Essential Web Developer Course (Free)',
         },
         'playlist_count': 192,
     }, {
         'info_dict': {
             'id': 'essential-web-developer-course',
             'title': 'The Essential Web Developer Course (Free)',
         },
         'playlist_count': 192,
     }, {
-        'url': 'http://upskillcourses.com/courses/119763/',
+        'url': 'http://v1.upskillcourses.com/courses/119763/',
         'only_matching': True,
     }, {
         'only_matching': True,
     }, {
-        'url': 'http://upskillcourses.com/courses/enrolled/119763',
+        'url': 'http://v1.upskillcourses.com/courses/enrolled/119763',
         'only_matching': True,
     }, {
         'only_matching': True,
     }, {
-        'url': 'https://academy.gns3.com/courses/enrolled/423415',
+        'url': 'https://gns3.teachable.com/courses/enrolled/423415',
         'only_matching': True,
     }, {
         'url': 'teachable:https://learn.vrdev.school/p/gear-vr-developer-mini',
         'only_matching': True,
     }, {
         'url': 'teachable:https://learn.vrdev.school/p/gear-vr-developer-mini',
index d37e1b0557cf3ba241a25e7e56d28c8dc679b1d0..9ba3da341dac65d18a599a790bff9c95b0e52eb8 100644 (file)
@@ -11,6 +11,7 @@ from ..utils import (
     determine_ext,
     int_or_none,
     str_or_none,
     determine_ext,
     int_or_none,
     str_or_none,
+    try_get,
     urljoin,
 )
 
     urljoin,
 )
 
@@ -24,7 +25,7 @@ class TelecincoIE(InfoExtractor):
         'info_dict': {
             'id': '1876350223',
             'title': 'Bacalao con kokotxas al pil-pil',
         'info_dict': {
             'id': '1876350223',
             'title': 'Bacalao con kokotxas al pil-pil',
-            'description': 'md5:1382dacd32dd4592d478cbdca458e5bb',
+            'description': 'md5:716caf5601e25c3c5ab6605b1ae71529',
         },
         'playlist': [{
             'md5': 'adb28c37238b675dad0f042292f209a7',
         },
         'playlist': [{
             'md5': 'adb28c37238b675dad0f042292f209a7',
@@ -55,6 +56,26 @@ class TelecincoIE(InfoExtractor):
             'description': 'md5:2771356ff7bfad9179c5f5cd954f1477',
             'duration': 50,
         },
             'description': 'md5:2771356ff7bfad9179c5f5cd954f1477',
             'duration': 50,
         },
+    }, {
+        # video in opening's content
+        'url': 'https://www.telecinco.es/vivalavida/fiorella-sobrina-edmundo-arrocet-entrevista_18_2907195140.html',
+        'info_dict': {
+            'id': '2907195140',
+            'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"',
+            'description': 'md5:73f340a7320143d37ab895375b2bf13a',
+        },
+        'playlist': [{
+            'md5': 'adb28c37238b675dad0f042292f209a7',
+            'info_dict': {
+                'id': 'TpI2EttSDAReWpJ1o0NVh2',
+                'ext': 'mp4',
+                'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"',
+                'duration': 1015,
+            },
+        }],
+        'params': {
+            'skip_download': True,
+        },
     }, {
         'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
         'only_matching': True,
     }, {
         'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
         'only_matching': True,
@@ -135,17 +156,28 @@ class TelecincoIE(InfoExtractor):
         display_id = self._match_id(url)
         webpage = self._download_webpage(url, display_id)
         article = self._parse_json(self._search_regex(
         display_id = self._match_id(url)
         webpage = self._download_webpage(url, display_id)
         article = self._parse_json(self._search_regex(
-            r'window\.\$REACTBASE_STATE\.article\s*=\s*({.+})',
+            r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=\s*({.+})',
             webpage, 'article'), display_id)['article']
         title = article.get('title')
             webpage, 'article'), display_id)['article']
         title = article.get('title')
-        description = clean_html(article.get('leadParagraph'))
+        description = clean_html(article.get('leadParagraph')) or ''
         if article.get('editorialType') != 'VID':
             entries = []
         if article.get('editorialType') != 'VID':
             entries = []
-            for p in article.get('body', []):
+            body = [article.get('opening')]
+            body.extend(try_get(article, lambda x: x['body'], list) or [])
+            for p in body:
+                if not isinstance(p, dict):
+                    continue
                 content = p.get('content')
                 content = p.get('content')
-                if p.get('type') != 'video' or not content:
+                if not content:
+                    continue
+                type_ = p.get('type')
+                if type_ == 'paragraph':
+                    content_str = str_or_none(content)
+                    if content_str:
+                        description += content_str
                     continue
                     continue
-                entries.append(self._parse_content(content, url))
+                if type_ == 'video' and isinstance(content, dict):
+                    entries.append(self._parse_content(content, url))
             return self.playlist_result(
                 entries, str_or_none(article.get('id')), title, description)
         content = article['opening']['content']
             return self.playlist_result(
                 entries, str_or_none(article.get('id')), title, description)
         content = article['opening']['content']
index ae9f66787439462967baa63dc58f39870fb89382..c82c94b3a0009da2cf0938c92910feec84de018b 100644 (file)
@@ -38,8 +38,6 @@ class TeleQuebecIE(TeleQuebecBaseIE):
             'ext': 'mp4',
             'title': 'Un petit choc et puis repart!',
             'description': 'md5:b04a7e6b3f74e32d7b294cffe8658374',
             'ext': 'mp4',
             'title': 'Un petit choc et puis repart!',
             'description': 'md5:b04a7e6b3f74e32d7b294cffe8658374',
-            'upload_date': '20180222',
-            'timestamp': 1519326631,
         },
         'params': {
             'skip_download': True,
         },
         'params': {
             'skip_download': True,
index 0e2370cd828f78a2e1a708852a392a05d96e3039..0631cb7aba8a7068a291fb8e67de0d5e04acf482 100644 (file)
@@ -17,14 +17,12 @@ class TFOIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?tfo\.org/(?:en|fr)/(?:[^/]+/){2}(?P<id>\d+)'
     _TEST = {
         'url': 'http://www.tfo.org/en/universe/tfo-247/100463871/video-game-hackathon',
     _VALID_URL = r'https?://(?:www\.)?tfo\.org/(?:en|fr)/(?:[^/]+/){2}(?P<id>\d+)'
     _TEST = {
         'url': 'http://www.tfo.org/en/universe/tfo-247/100463871/video-game-hackathon',
-        'md5': '47c987d0515561114cf03d1226a9d4c7',
+        'md5': 'cafbe4f47a8dae0ca0159937878100d6',
         'info_dict': {
         'info_dict': {
-            'id': '100463871',
+            'id': '7da3d50e495c406b8fc0b997659cc075',
             'ext': 'mp4',
             'title': 'Video Game Hackathon',
             'description': 'md5:558afeba217c6c8d96c60e5421795c07',
             'ext': 'mp4',
             'title': 'Video Game Hackathon',
             'description': 'md5:558afeba217c6c8d96c60e5421795c07',
-            'upload_date': '20160212',
-            'timestamp': 1455310233,
         }
     }
 
         }
     }
 
index 6ab147ad726306ba9250599d34491a50e64e82d0..387f955eee5752ac8797c85375070ba77a897075 100644 (file)
@@ -2,43 +2,42 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import try_get
 
 
 class ThisOldHouseIE(InfoExtractor):
 
 
 class ThisOldHouseIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode)/(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode|(?:[^/]+/)?\d+)/(?P<id>[^/?#]+)'
     _TESTS = [{
         'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench',
     _TESTS = [{
         'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench',
-        'md5': '568acf9ca25a639f0c4ff905826b662f',
         'info_dict': {
         'info_dict': {
-            'id': '2REGtUDQ',
+            'id': '5dcdddf673c3f956ef5db202',
             'ext': 'mp4',
             'title': 'How to Build a Storage Bench',
             'description': 'In the workshop, Tom Silva and Kevin O\'Connor build a storage bench for an entryway.',
             'timestamp': 1442548800,
             'upload_date': '20150918',
             'ext': 'mp4',
             'title': 'How to Build a Storage Bench',
             'description': 'In the workshop, Tom Silva and Kevin O\'Connor build a storage bench for an entryway.',
             'timestamp': 1442548800,
             'upload_date': '20150918',
-        }
+        },
+        'params': {
+            'skip_download': True,
+        },
     }, {
         'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins',
         'only_matching': True,
     }, {
         'url': 'https://www.thisoldhouse.com/tv-episode/ask-toh-shelf-rough-electric',
         'only_matching': True,
     }, {
         'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins',
         'only_matching': True,
     }, {
         'url': 'https://www.thisoldhouse.com/tv-episode/ask-toh-shelf-rough-electric',
         'only_matching': True,
+    }, {
+        'url': 'https://www.thisoldhouse.com/furniture/21017078/how-to-build-a-storage-bench',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.thisoldhouse.com/21113884/s41-e13-paradise-lost',
+        'only_matching': True,
     }]
     }]
+    _ZYPE_TMPL = 'https://player.zype.com/embed/%s.html?api_key=hsOk_yMSPYNrT22e9pu8hihLXjaZf0JW5jsOWv4ZqyHJFvkJn6rtToHl09tbbsbe'
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
         webpage = self._download_webpage(url, display_id)
         video_id = self._search_regex(
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
         webpage = self._download_webpage(url, display_id)
         video_id = self._search_regex(
-            (r'data-mid=(["\'])(?P<id>(?:(?!\1).)+)\1',
-             r'id=(["\'])inline-video-player-(?P<id>(?:(?!\1).)+)\1'),
-            webpage, 'video id', default=None, group='id')
-        if not video_id:
-            drupal_settings = self._parse_json(self._search_regex(
-                r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
-                webpage, 'drupal settings'), display_id)
-            video_id = try_get(
-                drupal_settings, lambda x: x['jwplatform']['video_id'],
-                compat_str) or list(drupal_settings['comScore'])[0]
-        return self.url_result('jwplatform:' + video_id, 'JWPlatform', video_id)
+            r'<iframe[^>]+src=[\'"](?:https?:)?//thisoldhouse\.chorus\.build/videos/zype/([0-9a-f]{24})',
+            webpage, 'video id')
+        return self.url_result(self._ZYPE_TMPL % video_id, 'Zype', video_id)
index 5e5efda0f0780fb98b7c37b788ad2734a837e90d..ca2e36efe4216ad66d46252c662ea4cc5395c3ca 100644 (file)
@@ -17,9 +17,9 @@ from ..utils import (
 
 class ToggleIE(InfoExtractor):
     IE_NAME = 'toggle'
 
 class ToggleIE(InfoExtractor):
     IE_NAME = 'toggle'
-    _VALID_URL = r'https?://video\.toggle\.sg/(?:en|zh)/(?:[^/]+/){2,}(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:(?:www\.)?mewatch|video\.toggle)\.sg/(?:en|zh)/(?:[^/]+/){2,}(?P<id>[0-9]+)'
     _TESTS = [{
     _TESTS = [{
-        'url': 'http://video.toggle.sg/en/series/lion-moms-tif/trailers/lion-moms-premier/343115',
+        'url': 'http://www.mewatch.sg/en/series/lion-moms-tif/trailers/lion-moms-premier/343115',
         'info_dict': {
             'id': '343115',
             'ext': 'mp4',
         'info_dict': {
             'id': '343115',
             'ext': 'mp4',
@@ -33,7 +33,7 @@ class ToggleIE(InfoExtractor):
         }
     }, {
         'note': 'DRM-protected video',
         }
     }, {
         'note': 'DRM-protected video',
-        'url': 'http://video.toggle.sg/en/movies/dug-s-special-mission/341413',
+        'url': 'http://www.mewatch.sg/en/movies/dug-s-special-mission/341413',
         'info_dict': {
             'id': '341413',
             'ext': 'wvm',
         'info_dict': {
             'id': '341413',
             'ext': 'wvm',
@@ -48,7 +48,7 @@ class ToggleIE(InfoExtractor):
     }, {
         # this also tests correct video id extraction
         'note': 'm3u8 links are geo-restricted, but Android/mp4 is okay',
     }, {
         # this also tests correct video id extraction
         'note': 'm3u8 links are geo-restricted, but Android/mp4 is okay',
-        'url': 'http://video.toggle.sg/en/series/28th-sea-games-5-show/28th-sea-games-5-show-ep11/332861',
+        'url': 'http://www.mewatch.sg/en/series/28th-sea-games-5-show/28th-sea-games-5-show-ep11/332861',
         'info_dict': {
             'id': '332861',
             'ext': 'mp4',
         'info_dict': {
             'id': '332861',
             'ext': 'mp4',
@@ -65,19 +65,22 @@ class ToggleIE(InfoExtractor):
         'url': 'http://video.toggle.sg/en/clips/seraph-sun-aloysius-will-suddenly-sing-some-old-songs-in-high-pitch-on-set/343331',
         'only_matching': True,
     }, {
         'url': 'http://video.toggle.sg/en/clips/seraph-sun-aloysius-will-suddenly-sing-some-old-songs-in-high-pitch-on-set/343331',
         'only_matching': True,
     }, {
-        'url': 'http://video.toggle.sg/zh/series/zero-calling-s2-hd/ep13/336367',
+        'url': 'http://www.mewatch.sg/en/clips/seraph-sun-aloysius-will-suddenly-sing-some-old-songs-in-high-pitch-on-set/343331',
         'only_matching': True,
     }, {
         'only_matching': True,
     }, {
-        'url': 'http://video.toggle.sg/en/series/vetri-s2/webisodes/jeeva-is-an-orphan-vetri-s2-webisode-7/342302',
+        'url': 'http://www.mewatch.sg/zh/series/zero-calling-s2-hd/ep13/336367',
         'only_matching': True,
     }, {
         'only_matching': True,
     }, {
-        'url': 'http://video.toggle.sg/en/movies/seven-days/321936',
+        'url': 'http://www.mewatch.sg/en/series/vetri-s2/webisodes/jeeva-is-an-orphan-vetri-s2-webisode-7/342302',
         'only_matching': True,
     }, {
         'only_matching': True,
     }, {
-        'url': 'https://video.toggle.sg/en/tv-show/news/may-2017-cna-singapore-tonight/fri-19-may-2017/512456',
+        'url': 'http://www.mewatch.sg/en/movies/seven-days/321936',
         'only_matching': True,
     }, {
         'only_matching': True,
     }, {
-        'url': 'http://video.toggle.sg/en/channels/eleven-plus/401585',
+        'url': 'https://www.mewatch.sg/en/tv-show/news/may-2017-cna-singapore-tonight/fri-19-may-2017/512456',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.mewatch.sg/en/channels/eleven-plus/401585',
         'only_matching': True,
     }]
 
         'only_matching': True,
     }]
 
index edbb0aa6944ba82b36415875f2d99e570b3373fc..ae584ad697bdf3f460eff033b8f43e75776942ee 100644 (file)
@@ -4,7 +4,6 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
     ExtractorError,
     int_or_none,
 from ..utils import (
     ExtractorError,
     int_or_none,
@@ -151,7 +150,7 @@ class TumblrIE(InfoExtractor):
         url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
         webpage, urlh = self._download_webpage_handle(url, video_id)
 
         url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
         webpage, urlh = self._download_webpage_handle(url, video_id)
 
-        redirect_url = compat_str(urlh.geturl())
+        redirect_url = urlh.geturl()
         if 'tumblr.com/safe-mode' in redirect_url or redirect_url.startswith('/safe-mode'):
             raise ExtractorError(
                 'This Tumblr may contain sensitive media. '
         if 'tumblr.com/safe-mode' in redirect_url or redirect_url.startswith('/safe-mode'):
             raise ExtractorError(
                 'This Tumblr may contain sensitive media. '
index 611fdc0c6c7002c1669200c7ace75bf498a85c6c..8bda9348d723073b894d2d77b6556b51d89dad80 100644 (file)
@@ -106,7 +106,7 @@ class TV2DKBornholmPlayIE(InfoExtractor):
         video_id = self._match_id(url)
 
         video = self._download_json(
         video_id = self._match_id(url)
 
         video = self._download_json(
-            'http://play.tv2bornholm.dk/controls/AJAX.aspx/specifikVideo', video_id,
+            'https://play.tv2bornholm.dk/controls/AJAX.aspx/specifikVideo', video_id,
             data=json.dumps({
                 'playlist_id': video_id,
                 'serienavn': '',
             data=json.dumps({
                 'playlist_id': video_id,
                 'serienavn': '',
index 88b6baa316b54eb58e3deb5d69f2fd04c1795bba..b7fe082b9c00e6600b6bf9f6e29849a3fd4eb3b6 100644 (file)
@@ -3,31 +3,51 @@ from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from ..utils import (
 
 from .common import InfoExtractor
 from ..utils import (
-    clean_html,
     determine_ext,
     extract_attributes,
     determine_ext,
     extract_attributes,
-    get_element_by_class,
     int_or_none,
     parse_duration,
     int_or_none,
     parse_duration,
-    parse_iso8601,
 )
 
 
 class TV5MondePlusIE(InfoExtractor):
     IE_DESC = 'TV5MONDE+'
 )
 
 
 class TV5MondePlusIE(InfoExtractor):
     IE_DESC = 'TV5MONDE+'
-    _VALID_URL = r'https?://(?:www\.)?tv5mondeplus\.com/toutes-les-videos/[^/]+/(?P<id>[^/?#]+)'
-    _TEST = {
-        'url': 'http://www.tv5mondeplus.com/toutes-les-videos/documentaire/tdah-mon-amour-tele-quebec-tdah-mon-amour-ep001-enfants',
-        'md5': '12130fc199f020673138a83466542ec6',
+    _VALID_URL = r'https?://(?:www\.)?(?:tv5mondeplus|revoir\.tv5monde)\.com/toutes-les-videos/[^/]+/(?P<id>[^/?#]+)'
+    _TESTS = [{
+        # movie
+        'url': 'https://revoir.tv5monde.com/toutes-les-videos/cinema/rendez-vous-a-atlit',
+        'md5': '8cbde5ea7b296cf635073e27895e227f',
         'info_dict': {
         'info_dict': {
-            'id': 'tdah-mon-amour-tele-quebec-tdah-mon-amour-ep001-enfants',
+            'id': '822a4756-0712-7329-1859-a13ac7fd1407',
+            'display_id': 'rendez-vous-a-atlit',
             'ext': 'mp4',
             'ext': 'mp4',
-            'title': 'Tdah, mon amour - Enfants',
-            'description': 'md5:230e3aca23115afcf8006d1bece6df74',
-            'upload_date': '20170401',
-            'timestamp': 1491022860,
-        }
-    }
+            'title': 'Rendez-vous à Atlit',
+            'description': 'md5:2893a4c5e1dbac3eedff2d87956e4efb',
+            'upload_date': '20200130',
+        },
+    }, {
+        # series episode
+        'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/c-est-la-vie-ennemie-juree',
+        'info_dict': {
+            'id': '0df7007c-4900-3936-c601-87a13a93a068',
+            'display_id': 'c-est-la-vie-ennemie-juree',
+            'ext': 'mp4',
+            'title': "C'est la vie - Ennemie jurée",
+            'description': 'md5:dfb5c63087b6f35fe0cc0af4fe44287e',
+            'upload_date': '20200130',
+            'series': "C'est la vie",
+            'episode': 'Ennemie jurée',
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/neuf-jours-en-hiver-neuf-jours-en-hiver',
+        'only_matching': True,
+    }, {
+        'url': 'https://revoir.tv5monde.com/toutes-les-videos/info-societe/le-journal-de-la-rts-edition-du-30-01-20-19h30',
+        'only_matching': True,
+    }]
     _GEO_BYPASS = False
 
     def _real_extract(self, url):
     _GEO_BYPASS = False
 
     def _real_extract(self, url):
@@ -37,11 +57,7 @@ class TV5MondePlusIE(InfoExtractor):
         if ">Ce programme n'est malheureusement pas disponible pour votre zone géographique.<" in webpage:
             self.raise_geo_restricted(countries=['FR'])
 
         if ">Ce programme n'est malheureusement pas disponible pour votre zone géographique.<" in webpage:
             self.raise_geo_restricted(countries=['FR'])
 
-        series = get_element_by_class('video-detail__title', webpage)
-        title = episode = get_element_by_class(
-            'video-detail__subtitle', webpage) or series
-        if series and series != title:
-            title = '%s - %s' % (series, title)
+        title = episode = self._html_search_regex(r'<h1>([^<]+)', webpage, 'title')
         vpl_data = extract_attributes(self._search_regex(
             r'(<[^>]+class="video_player_loader"[^>]+>)',
             webpage, 'video player loader'))
         vpl_data = extract_attributes(self._search_regex(
             r'(<[^>]+class="video_player_loader"[^>]+>)',
             webpage, 'video player loader'))
@@ -65,15 +81,37 @@ class TV5MondePlusIE(InfoExtractor):
                 })
         self._sort_formats(formats)
 
                 })
         self._sort_formats(formats)
 
+        description = self._html_search_regex(
+            r'(?s)<div[^>]+class=["\']episode-texte[^>]+>(.+?)</div>', webpage,
+            'description', fatal=False)
+
+        series = self._html_search_regex(
+            r'<p[^>]+class=["\']episode-emission[^>]+>([^<]+)', webpage,
+            'series', default=None)
+
+        if series and series != title:
+            title = '%s - %s' % (series, title)
+
+        upload_date = self._search_regex(
+            r'(?:date_publication|publish_date)["\']\s*:\s*["\'](\d{4}_\d{2}_\d{2})',
+            webpage, 'upload date', default=None)
+        if upload_date:
+            upload_date = upload_date.replace('_', '')
+
+        video_id = self._search_regex(
+            (r'data-guid=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
+             r'id_contenu["\']\s:\s*(\d+)'), webpage, 'video id',
+            default=display_id)
+
         return {
         return {
-            'id': display_id,
+            'id': video_id,
             'display_id': display_id,
             'title': title,
             'display_id': display_id,
             'title': title,
-            'description': clean_html(get_element_by_class('video-detail__description', webpage)),
+            'description': description,
             'thumbnail': vpl_data.get('data-image'),
             'duration': int_or_none(vpl_data.get('data-duration')) or parse_duration(self._html_search_meta('duration', webpage)),
             'thumbnail': vpl_data.get('data-image'),
             'duration': int_or_none(vpl_data.get('data-duration')) or parse_duration(self._html_search_meta('duration', webpage)),
-            'timestamp': parse_iso8601(self._html_search_meta('uploadDate', webpage)),
+            'upload_date': upload_date,
             'formats': formats,
             'formats': formats,
-            'episode': episode,
             'series': series,
             'series': series,
+            'episode': episode,
         }
         }
index 0b863df2ff4ad214162c6187ac7aaa65fe3fc6c9..443f46e8a3537165d620c2db8863634e9f922ab6 100644 (file)
@@ -9,8 +9,8 @@ from ..utils import (
 
 
 class TVAIE(InfoExtractor):
 
 
 class TVAIE(InfoExtractor):
-    _VALID_URL = r'https?://videos\.tva\.ca/details/_(?P<id>\d+)'
-    _TEST = {
+    _VALID_URL = r'https?://videos?\.tva\.ca/details/_(?P<id>\d+)'
+    _TESTS = [{
         'url': 'https://videos.tva.ca/details/_5596811470001',
         'info_dict': {
             'id': '5596811470001',
         'url': 'https://videos.tva.ca/details/_5596811470001',
         'info_dict': {
             'id': '5596811470001',
@@ -24,7 +24,10 @@ class TVAIE(InfoExtractor):
             # m3u8 download
             'skip_download': True,
         }
             # m3u8 download
             'skip_download': True,
         }
-    }
+    }, {
+        'url': 'https://video.tva.ca/details/_5596811470001',
+        'only_matching': True,
+    }]
     BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5481942443001/default_default/index.html?videoId=%s'
 
     def _real_extract(self, url):
     BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5481942443001/default_default/index.html?videoId=%s'
 
     def _real_extract(self, url):
index 2830c212ebbf9c6daa819667cc994001f8dc798a..74d14049b482a702bf464a40f2e5f361dc7cd72a 100644 (file)
@@ -17,7 +17,7 @@ class TwentyFourVideoIE(InfoExtractor):
     _VALID_URL = r'''(?x)
                     https?://
                         (?P<host>
     _VALID_URL = r'''(?x)
                     https?://
                         (?P<host>
-                            (?:(?:www|porno)\.)?24video\.
+                            (?:(?:www|porno?)\.)?24video\.
                             (?:net|me|xxx|sexy?|tube|adult|site|vip)
                         )/
                         (?:
                             (?:net|me|xxx|sexy?|tube|adult|site|vip)
                         )/
                         (?:
@@ -62,6 +62,9 @@ class TwentyFourVideoIE(InfoExtractor):
     }, {
         'url': 'https://www.24video.vip/video/view/1044982',
         'only_matching': True,
     }, {
         'url': 'https://www.24video.vip/video/view/1044982',
         'only_matching': True,
+    }, {
+        'url': 'https://porn.24video.net/video/2640421-vsya-takay',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
     }]
 
     def _real_extract(self, url):
index a8c2502af8132834a34b8ef9c8ade935dd432604..0db2dca41c7e7e914c66956987d2f921b81b1791 100644 (file)
@@ -575,8 +575,8 @@ class TwitchStreamIE(TwitchBaseIE):
         channel_id = self._match_id(url)
 
         stream = self._call_api(
         channel_id = self._match_id(url)
 
         stream = self._call_api(
-            'kraken/streams/%s?stream_type=all' % channel_id, channel_id,
-            'Downloading stream JSON').get('stream')
+            'kraken/streams/%s?stream_type=all' % channel_id.lower(),
+            channel_id, 'Downloading stream JSON').get('stream')
 
         if not stream:
             raise ExtractorError('%s is offline' % channel_id, expected=True)
 
         if not stream:
             raise ExtractorError('%s is offline' % channel_id, expected=True)
index 851ad936cfc012b02c3125c9a0a3e898f9c6f005..d6b92b1c833072bab2bbacb8503693b6da156427 100644 (file)
@@ -1,28 +1,62 @@
 from __future__ import unicode_literals
 
 from __future__ import unicode_literals
 
-import base64
+import json
 import re
 
 from .common import InfoExtractor
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote
+from ..compat import compat_HTTPError
 from ..utils import (
     ExtractorError,
 from ..utils import (
     ExtractorError,
-    clean_html,
-    determine_ext,
     int_or_none,
     int_or_none,
-    js_to_json,
     parse_age_limit,
     parse_age_limit,
-    parse_duration,
-    try_get,
 )
 
 
 class ViewLiftBaseIE(InfoExtractor):
 )
 
 
 class ViewLiftBaseIE(InfoExtractor):
-    _DOMAINS_REGEX = r'(?:(?:main\.)?snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|(?:monumental|lax)sportsnetwork|vayafilm)\.com|hoichoi\.tv'
+    _API_BASE = 'https://prod-api.viewlift.com/'
+    _DOMAINS_REGEX = r'(?:(?:main\.)?snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|(?:monumental|lax)sportsnetwork|vayafilm|failarmy|ftfnext|lnppass\.legapallacanestro|moviespree|app\.myoutdoortv|neoufitness|pflmma|theidentitytb)\.com|(?:hoichoi|app\.horseandcountry|kronon|marquee|supercrosslive)\.tv'
+    _SITE_MAP = {
+        'ftfnext': 'lax',
+        'funnyforfree': 'snagfilms',
+        'hoichoi': 'hoichoitv',
+        'kiddovid': 'snagfilms',
+        'laxsportsnetwork': 'lax',
+        'legapallacanestro': 'lnp',
+        'marquee': 'marquee-tv',
+        'monumentalsportsnetwork': 'monumental-network',
+        'moviespree': 'bingeflix',
+        'pflmma': 'pfl',
+        'snagxtreme': 'snagfilms',
+        'theidentitytb': 'tampabay',
+        'vayafilm': 'snagfilms',
+    }
+    _TOKENS = {}
+
+    def _call_api(self, site, path, video_id, query):
+        token = self._TOKENS.get(site)
+        if not token:
+            token_query = {'site': site}
+            email, password = self._get_login_info(netrc_machine=site)
+            if email:
+                resp = self._download_json(
+                    self._API_BASE + 'identity/signin', video_id,
+                    'Logging in', query=token_query, data=json.dumps({
+                        'email': email,
+                        'password': password,
+                    }).encode())
+            else:
+                resp = self._download_json(
+                    self._API_BASE + 'identity/anonymous-token', video_id,
+                    'Downloading authorization token', query=token_query)
+            self._TOKENS[site] = token = resp['authorizationToken']
+        return self._download_json(
+            self._API_BASE + path, video_id,
+            headers={'Authorization': token}, query=query)
 
 
 class ViewLiftEmbedIE(ViewLiftBaseIE):
 
 
 class ViewLiftEmbedIE(ViewLiftBaseIE):
-    _VALID_URL = r'https?://(?:(?:www|embed)\.)?(?:%s)/embed/player\?.*\bfilmId=(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})' % ViewLiftBaseIE._DOMAINS_REGEX
+    IE_NAME = 'viewlift:embed'
+    _VALID_URL = r'https?://(?:(?:www|embed)\.)?(?P<domain>%s)/embed/player\?.*\bfilmId=(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})' % ViewLiftBaseIE._DOMAINS_REGEX
     _TESTS = [{
         'url': 'http://embed.snagfilms.com/embed/player?filmId=74849a00-85a9-11e1-9660-123139220831&w=500',
         'md5': '2924e9215c6eff7a55ed35b72276bd93',
     _TESTS = [{
         'url': 'http://embed.snagfilms.com/embed/player?filmId=74849a00-85a9-11e1-9660-123139220831&w=500',
         'md5': '2924e9215c6eff7a55ed35b72276bd93',
@@ -30,6 +64,9 @@ class ViewLiftEmbedIE(ViewLiftBaseIE):
             'id': '74849a00-85a9-11e1-9660-123139220831',
             'ext': 'mp4',
             'title': '#whilewewatch',
             'id': '74849a00-85a9-11e1-9660-123139220831',
             'ext': 'mp4',
             'title': '#whilewewatch',
+            'description': 'md5:b542bef32a6f657dadd0df06e26fb0c8',
+            'timestamp': 1334350096,
+            'upload_date': '20120413',
         }
     }, {
         # invalid labels, 360p is better that 480p
         }
     }, {
         # invalid labels, 360p is better that 480p
@@ -39,7 +76,8 @@ class ViewLiftEmbedIE(ViewLiftBaseIE):
             'id': '17ca0950-a74a-11e0-a92a-0026bb61d036',
             'ext': 'mp4',
             'title': 'Life in Limbo',
             'id': '17ca0950-a74a-11e0-a92a-0026bb61d036',
             'ext': 'mp4',
             'title': 'Life in Limbo',
-        }
+        },
+        'skip': 'The video does not exist',
     }, {
         'url': 'http://www.snagfilms.com/embed/player?filmId=0000014c-de2f-d5d6-abcf-ffef58af0017',
         'only_matching': True,
     }, {
         'url': 'http://www.snagfilms.com/embed/player?filmId=0000014c-de2f-d5d6-abcf-ffef58af0017',
         'only_matching': True,
@@ -54,67 +92,68 @@ class ViewLiftEmbedIE(ViewLiftBaseIE):
             return mobj.group('url')
 
     def _real_extract(self, url):
             return mobj.group('url')
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, video_id)
-
-        if '>This film is not playable in your area.<' in webpage:
-            raise ExtractorError(
-                'Film %s is not playable in your area.' % video_id, expected=True)
+        domain, film_id = re.match(self._VALID_URL, url).groups()
+        site = domain.split('.')[-2]
+        if site in self._SITE_MAP:
+            site = self._SITE_MAP[site]
+        try:
+            content_data = self._call_api(
+                site, 'entitlement/video/status', film_id, {
+                    'id': film_id
+                })['video']
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+                error_message = self._parse_json(e.cause.read().decode(), film_id).get('errorMessage')
+                if error_message == 'User does not have a valid subscription or has not purchased this content.':
+                    self.raise_login_required()
+                raise ExtractorError(error_message, expected=True)
+            raise
+        gist = content_data['gist']
+        title = gist['title']
+        video_assets = content_data['streamingInfo']['videoAssets']
 
         formats = []
 
         formats = []
-        has_bitrate = False
-        sources = self._parse_json(self._search_regex(
-            r'(?s)sources:\s*(\[.+?\]),', webpage,
-            'sources', default='[]'), video_id, js_to_json)
-        for source in sources:
-            file_ = source.get('file')
-            if not file_:
+        mpeg_video_assets = video_assets.get('mpeg') or []
+        for video_asset in mpeg_video_assets:
+            video_asset_url = video_asset.get('url')
+            if not video_asset:
                 continue
                 continue
-            type_ = source.get('type')
-            ext = determine_ext(file_)
-            format_id = source.get('label') or ext
-            if all(v in ('m3u8', 'hls') for v in (type_, ext)):
-                formats.extend(self._extract_m3u8_formats(
-                    file_, video_id, 'mp4', 'm3u8_native',
-                    m3u8_id='hls', fatal=False))
-            else:
-                bitrate = int_or_none(self._search_regex(
-                    [r'(\d+)kbps', r'_\d{1,2}x\d{1,2}_(\d{3,})\.%s' % ext],
-                    file_, 'bitrate', default=None))
-                if not has_bitrate and bitrate:
-                    has_bitrate = True
-                height = int_or_none(self._search_regex(
-                    r'^(\d+)[pP]$', format_id, 'height', default=None))
-                formats.append({
-                    'url': file_,
-                    'format_id': 'http-%s%s' % (format_id, ('-%dk' % bitrate if bitrate else '')),
-                    'tbr': bitrate,
-                    'height': height,
-                })
-        if not formats:
-            hls_url = self._parse_json(self._search_regex(
-                r'filmInfo\.src\s*=\s*({.+?});',
-                webpage, 'src'), video_id, js_to_json)['src']
-            formats = self._extract_m3u8_formats(
-                hls_url, video_id, 'mp4', 'm3u8_native',
-                m3u8_id='hls', fatal=False)
-        field_preference = None if has_bitrate else ('height', 'tbr', 'format_id')
-        self._sort_formats(formats, field_preference)
-
-        title = self._search_regex(
-            [r"title\s*:\s*'([^']+)'", r'<title>([^<]+)</title>'],
-            webpage, 'title')
-
-        return {
-            'id': video_id,
+            bitrate = int_or_none(video_asset.get('bitrate'))
+            height = int_or_none(self._search_regex(
+                r'^_?(\d+)[pP]$', video_asset.get('renditionValue'),
+                'height', default=None))
+            formats.append({
+                'url': video_asset_url,
+                'format_id': 'http%s' % ('-%d' % bitrate if bitrate else ''),
+                'tbr': bitrate,
+                'height': height,
+                'vcodec': video_asset.get('codec'),
+            })
+
+        hls_url = video_assets.get('hls')
+        if hls_url:
+            formats.extend(self._extract_m3u8_formats(
+                hls_url, film_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+        self._sort_formats(formats, ('height', 'tbr', 'format_id'))
+
+        info = {
+            'id': film_id,
             'title': title,
             'title': title,
+            'description': gist.get('description'),
+            'thumbnail': gist.get('videoImageUrl'),
+            'duration': int_or_none(gist.get('runtime')),
+            'age_limit': parse_age_limit(content_data.get('parentalRating')),
+            'timestamp': int_or_none(gist.get('publishDate'), 1000),
             'formats': formats,
         }
             'formats': formats,
         }
+        for k in ('categories', 'tags'):
+            info[k] = [v['title'] for v in content_data.get(k, []) if v.get('title')]
+        return info
 
 
 class ViewLiftIE(ViewLiftBaseIE):
 
 
 class ViewLiftIE(ViewLiftBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?(?P<domain>%s)(?:/(?:films/title|show|(?:news/)?videos?))?/(?P<id>[^?#]+)' % ViewLiftBaseIE._DOMAINS_REGEX
+    IE_NAME = 'viewlift'
+    _VALID_URL = r'https?://(?:www\.)?(?P<domain>%s)(?P<path>(?:/(?:films/title|show|(?:news/)?videos?|watch))?/(?P<id>[^?#]+))' % ViewLiftBaseIE._DOMAINS_REGEX
     _TESTS = [{
         'url': 'http://www.snagfilms.com/films/title/lost_for_life',
         'md5': '19844f897b35af219773fd63bdec2942',
     _TESTS = [{
         'url': 'http://www.snagfilms.com/films/title/lost_for_life',
         'md5': '19844f897b35af219773fd63bdec2942',
@@ -151,10 +190,13 @@ class ViewLiftIE(ViewLiftBaseIE):
             'id': '00000148-7b53-de26-a9fb-fbf306f70020',
             'display_id': 'augie_alone/s_2_ep_12_love',
             'ext': 'mp4',
             'id': '00000148-7b53-de26-a9fb-fbf306f70020',
             'display_id': 'augie_alone/s_2_ep_12_love',
             'ext': 'mp4',
-            'title': 'Augie, Alone:S. 2 Ep. 12 - Love',
-            'description': 'md5:db2a5c72d994f16a780c1eb353a8f403',
+            'title': 'S. 2 Ep. 12 - Love',
+            'description': 'Augie finds love.',
             'thumbnail': r're:^https?://.*\.jpg',
             'duration': 107,
             'thumbnail': r're:^https?://.*\.jpg',
             'duration': 107,
+            'upload_date': '20141012',
+            'timestamp': 1413129540,
+            'age_limit': 17,
         },
         'params': {
             'skip_download': True,
         },
         'params': {
             'skip_download': True,
@@ -177,6 +219,9 @@ class ViewLiftIE(ViewLiftBaseIE):
         # Was once Kaltura embed
         'url': 'https://www.monumentalsportsnetwork.com/videos/john-carlson-postgame-2-25-15',
         'only_matching': True,
         # Was once Kaltura embed
         'url': 'https://www.monumentalsportsnetwork.com/videos/john-carlson-postgame-2-25-15',
         'only_matching': True,
+    }, {
+        'url': 'https://www.marquee.tv/watch/sadlerswells-sacredmonsters',
+        'only_matching': True,
     }]
 
     @classmethod
     }]
 
     @classmethod
@@ -184,119 +229,22 @@ class ViewLiftIE(ViewLiftBaseIE):
         return False if ViewLiftEmbedIE.suitable(url) else super(ViewLiftIE, cls).suitable(url)
 
     def _real_extract(self, url):
         return False if ViewLiftEmbedIE.suitable(url) else super(ViewLiftIE, cls).suitable(url)
 
     def _real_extract(self, url):
-        domain, display_id = re.match(self._VALID_URL, url).groups()
-
-        webpage = self._download_webpage(url, display_id)
-
-        if ">Sorry, the Film you're looking for is not available.<" in webpage:
-            raise ExtractorError(
-                'Film %s is not available.' % display_id, expected=True)
-
-        initial_store_state = self._search_regex(
-            r"window\.initialStoreState\s*=.*?JSON\.parse\(unescape\(atob\('([^']+)'\)\)\)",
-            webpage, 'Initial Store State', default=None)
-        if initial_store_state:
-            modules = self._parse_json(compat_urllib_parse_unquote(base64.b64decode(
-                initial_store_state).decode()), display_id)['page']['data']['modules']
-            content_data = next(m['contentData'][0] for m in modules if m.get('moduleType') == 'VideoDetailModule')
-            gist = content_data['gist']
-            film_id = gist['id']
-            title = gist['title']
-            video_assets = try_get(
-                content_data, lambda x: x['streamingInfo']['videoAssets'], dict)
-            if not video_assets:
-                token = self._download_json(
-                    'https://prod-api.viewlift.com/identity/anonymous-token',
-                    film_id, 'Downloading authorization token',
-                    query={'site': 'snagfilms'})['authorizationToken']
-                video_assets = self._download_json(
-                    'https://prod-api.viewlift.com/entitlement/video/status',
-                    film_id, headers={
-                        'Authorization': token,
-                        'Referer': url,
-                    }, query={
-                        'id': film_id
-                    })['video']['streamingInfo']['videoAssets']
-
-            formats = []
-            mpeg_video_assets = video_assets.get('mpeg') or []
-            for video_asset in mpeg_video_assets:
-                video_asset_url = video_asset.get('url')
-                if not video_asset:
-                    continue
-                bitrate = int_or_none(video_asset.get('bitrate'))
-                height = int_or_none(self._search_regex(
-                    r'^_?(\d+)[pP]$', video_asset.get('renditionValue'),
-                    'height', default=None))
-                formats.append({
-                    'url': video_asset_url,
-                    'format_id': 'http%s' % ('-%d' % bitrate if bitrate else ''),
-                    'tbr': bitrate,
-                    'height': height,
-                    'vcodec': video_asset.get('codec'),
-                })
-
-            hls_url = video_assets.get('hls')
-            if hls_url:
-                formats.extend(self._extract_m3u8_formats(
-                    hls_url, film_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
-            self._sort_formats(formats, ('height', 'tbr', 'format_id'))
-
-            info = {
-                'id': film_id,
-                'display_id': display_id,
-                'title': title,
-                'description': gist.get('description'),
-                'thumbnail': gist.get('videoImageUrl'),
-                'duration': int_or_none(gist.get('runtime')),
-                'age_limit': parse_age_limit(content_data.get('parentalRating')),
-                'timestamp': int_or_none(gist.get('publishDate'), 1000),
-                'formats': formats,
-            }
-            for k in ('categories', 'tags'):
-                info[k] = [v['title'] for v in content_data.get(k, []) if v.get('title')]
-            return info
-        else:
-            film_id = self._search_regex(r'filmId=([\da-f-]{36})"', webpage, 'film id')
-
-            snag = self._parse_json(
-                self._search_regex(
-                    r'Snag\.page\.data\s*=\s*(\[.+?\]);', webpage, 'snag', default='[]'),
-                display_id)
-
-            for item in snag:
-                if item.get('data', {}).get('film', {}).get('id') == film_id:
-                    data = item['data']['film']
-                    title = data['title']
-                    description = clean_html(data.get('synopsis'))
-                    thumbnail = data.get('image')
-                    duration = int_or_none(data.get('duration') or data.get('runtime'))
-                    categories = [
-                        category['title'] for category in data.get('categories', [])
-                        if category.get('title')]
-                    break
-            else:
-                title = self._html_search_regex(
-                    (r'itemprop="title">([^<]+)<',
-                     r'(?s)itemprop="title">(.+?)<div'), webpage, 'title')
-                description = self._html_search_regex(
-                    r'(?s)<div itemprop="description" class="film-synopsis-inner ">(.+?)</div>',
-                    webpage, 'description', default=None) or self._og_search_description(webpage)
-                thumbnail = self._og_search_thumbnail(webpage)
-                duration = parse_duration(self._search_regex(
-                    r'<span itemprop="duration" class="film-duration strong">([^<]+)<',
-                    webpage, 'duration', fatal=False))
-                categories = re.findall(r'<a href="/movies/[^"]+">([^<]+)</a>', webpage)
-
-            return {
-                '_type': 'url_transparent',
-                'url': 'http://%s/embed/player?filmId=%s' % (domain, film_id),
-                'id': film_id,
-                'display_id': display_id,
-                'title': title,
-                'description': description,
-                'thumbnail': thumbnail,
-                'duration': duration,
-                'categories': categories,
-                'ie_key': 'ViewLiftEmbed',
-            }
+        domain, path, display_id = re.match(self._VALID_URL, url).groups()
+        site = domain.split('.')[-2]
+        if site in self._SITE_MAP:
+            site = self._SITE_MAP[site]
+        modules = self._call_api(
+            site, 'content/pages', display_id, {
+                'includeContent': 'true',
+                'moduleOffset': 1,
+                'path': path,
+                'site': site,
+            })['modules']
+        film_id = next(m['contentData'][0]['gist']['id'] for m in modules if m.get('moduleType') == 'VideoDetailModule')
+        return {
+            '_type': 'url_transparent',
+            'url': 'http://%s/embed/player?filmId=%s' % (domain, film_id),
+            'id': film_id,
+            'display_id': display_id,
+            'ie_key': 'ViewLiftEmbed',
+        }
index baa46d5f3513cbde337f144c7143a9c501455ff4..8cd611e1e42f177e7331ca4c3ea08a350a613e7f 100644 (file)
@@ -33,6 +33,7 @@ from ..utils import (
     unified_timestamp,
     unsmuggle_url,
     urlencode_postdata,
     unified_timestamp,
     unsmuggle_url,
     urlencode_postdata,
+    urljoin,
     unescapeHTML,
 )
 
     unescapeHTML,
 )
 
@@ -191,7 +192,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
             for tt in text_tracks:
                 subtitles[tt['lang']] = [{
                     'ext': 'vtt',
             for tt in text_tracks:
                 subtitles[tt['lang']] = [{
                     'ext': 'vtt',
-                    'url': 'https://vimeo.com' + tt['url'],
+                    'url': urljoin('https://vimeo.com', tt['url']),
                 }]
 
         thumbnails = []
                 }]
 
         thumbnails = []
@@ -591,7 +592,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
             # Retrieve video webpage to extract further information
             webpage, urlh = self._download_webpage_handle(
                 url, video_id, headers=headers)
             # Retrieve video webpage to extract further information
             webpage, urlh = self._download_webpage_handle(
                 url, video_id, headers=headers)
-            redirect_url = compat_str(urlh.geturl())
+            redirect_url = urlh.geturl()
         except ExtractorError as ee:
             if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
                 errmsg = ee.cause.read()
         except ExtractorError as ee:
             if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
                 errmsg = ee.cause.read()
@@ -841,33 +842,6 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
         return self._TITLE or self._html_search_regex(
             self._TITLE_RE, webpage, 'list title', fatal=False)
 
         return self._TITLE or self._html_search_regex(
             self._TITLE_RE, webpage, 'list title', fatal=False)
 
-    def _login_list_password(self, page_url, list_id, webpage):
-        login_form = self._search_regex(
-            r'(?s)<form[^>]+?id="pw_form"(.*?)</form>',
-            webpage, 'login form', default=None)
-        if not login_form:
-            return webpage
-
-        password = self._downloader.params.get('videopassword')
-        if password is None:
-            raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True)
-        fields = self._hidden_inputs(login_form)
-        token, vuid = self._extract_xsrft_and_vuid(webpage)
-        fields['token'] = token
-        fields['password'] = password
-        post = urlencode_postdata(fields)
-        password_path = self._search_regex(
-            r'action="([^"]+)"', login_form, 'password URL')
-        password_url = compat_urlparse.urljoin(page_url, password_path)
-        password_request = sanitized_Request(password_url, post)
-        password_request.add_header('Content-type', 'application/x-www-form-urlencoded')
-        self._set_vimeo_cookie('vuid', vuid)
-        self._set_vimeo_cookie('xsrft', token)
-
-        return self._download_webpage(
-            password_request, list_id,
-            'Verifying the password', 'Wrong password')
-
     def _title_and_entries(self, list_id, base_url):
         for pagenum in itertools.count(1):
             page_url = self._page_url(base_url, pagenum)
     def _title_and_entries(self, list_id, base_url):
         for pagenum in itertools.count(1):
             page_url = self._page_url(base_url, pagenum)
@@ -876,7 +850,6 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
                 'Downloading page %s' % pagenum)
 
             if pagenum == 1:
                 'Downloading page %s' % pagenum)
 
             if pagenum == 1:
-                webpage = self._login_list_password(page_url, list_id, webpage)
                 yield self._extract_list_title(webpage)
 
             # Try extracting href first since not all videos are available via
                 yield self._extract_list_title(webpage)
 
             # Try extracting href first since not all videos are available via
@@ -923,7 +896,7 @@ class VimeoUserIE(VimeoChannelIE):
     _BASE_URL_TEMPL = 'https://vimeo.com/%s'
 
 
     _BASE_URL_TEMPL = 'https://vimeo.com/%s'
 
 
-class VimeoAlbumIE(VimeoChannelIE):
+class VimeoAlbumIE(VimeoBaseInfoExtractor):
     IE_NAME = 'vimeo:album'
     _VALID_URL = r'https://vimeo\.com/(?:album|showcase)/(?P<id>\d+)(?:$|[?#]|/(?!video))'
     _TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
     IE_NAME = 'vimeo:album'
     _VALID_URL = r'https://vimeo\.com/(?:album|showcase)/(?P<id>\d+)(?:$|[?#]|/(?!video))'
     _TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
@@ -973,13 +946,39 @@ class VimeoAlbumIE(VimeoChannelIE):
     def _real_extract(self, url):
         album_id = self._match_id(url)
         webpage = self._download_webpage(url, album_id)
     def _real_extract(self, url):
         album_id = self._match_id(url)
         webpage = self._download_webpage(url, album_id)
-        webpage = self._login_list_password(url, album_id, webpage)
-        api_config = self._extract_vimeo_config(webpage, album_id)['api']
+        viewer = self._parse_json(self._search_regex(
+            r'bootstrap_data\s*=\s*({.+?})</script>',
+            webpage, 'bootstrap data'), album_id)['viewer']
+        jwt = viewer['jwt']
+        album = self._download_json(
+            'https://api.vimeo.com/albums/' + album_id,
+            album_id, headers={'Authorization': 'jwt ' + jwt},
+            query={'fields': 'description,name,privacy'})
+        hashed_pass = None
+        if try_get(album, lambda x: x['privacy']['view']) == 'password':
+            password = self._downloader.params.get('videopassword')
+            if not password:
+                raise ExtractorError(
+                    'This album is protected by a password, use the --video-password option',
+                    expected=True)
+            self._set_vimeo_cookie('vuid', viewer['vuid'])
+            try:
+                hashed_pass = self._download_json(
+                    'https://vimeo.com/showcase/%s/auth' % album_id,
+                    album_id, 'Verifying the password', data=urlencode_postdata({
+                        'password': password,
+                        'token': viewer['xsrft'],
+                    }), headers={
+                        'X-Requested-With': 'XMLHttpRequest',
+                    })['hashed_pass']
+            except ExtractorError as e:
+                if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+                    raise ExtractorError('Wrong password', expected=True)
+                raise
         entries = OnDemandPagedList(functools.partial(
         entries = OnDemandPagedList(functools.partial(
-            self._fetch_page, album_id, api_config['jwt'],
-            api_config.get('hashed_pass')), self._PAGE_SIZE)
-        return self.playlist_result(entries, album_id, self._html_search_regex(
-            r'<title>\s*(.+?)(?:\s+on Vimeo)?</title>', webpage, 'title', fatal=False))
+            self._fetch_page, album_id, jwt, hashed_pass), self._PAGE_SIZE)
+        return self.playlist_result(
+            entries, album_id, album.get('name'), album.get('description'))
 
 
 class VimeoGroupsIE(VimeoChannelIE):
 
 
 class VimeoGroupsIE(VimeoChannelIE):
index 085514d470bcee6d5719a9260ceefdfc3a9cdb17..168e5e90152b44d76dcbbbeb1b274db5dcbf5827 100644 (file)
@@ -45,22 +45,23 @@ class WistiaIE(InfoExtractor):
     # https://wistia.com/support/embed-and-share/video-on-your-website
     @staticmethod
     def _extract_url(webpage):
     # https://wistia.com/support/embed-and-share/video-on-your-website
     @staticmethod
     def _extract_url(webpage):
-        match = re.search(
-            r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage)
-        if match:
-            return unescapeHTML(match.group('url'))
+        urls = WistiaIE._extract_urls(webpage)
+        return urls[0] if urls else None
 
 
-        match = re.search(
-            r'''(?sx)
-                <script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
-                <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]{10})\b.*?\2
-            ''', webpage)
-        if match:
-            return 'wistia:%s' % match.group('id')
-
-        match = re.search(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage)
-        if match:
-            return 'wistia:%s' % match.group('id')
+    @staticmethod
+    def _extract_urls(webpage):
+        urls = []
+        for match in re.finditer(
+                r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage):
+            urls.append(unescapeHTML(match.group('url')))
+        for match in re.finditer(
+                r'''(?sx)
+                    <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]{10})\b.*?\2
+                ''', webpage):
+            urls.append('wistia:%s' % match.group('id'))
+        for match in re.finditer(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage):
+            urls.append('wistia:%s' % match.group('id'))
+        return urls
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
index a5b94d2794166d452464b728ec40b2b258459c64..0f7be6a7d93adc3a4fea8c6995cd8b58a084b9b4 100644 (file)
@@ -113,7 +113,7 @@ class XHamsterIE(InfoExtractor):
         display_id = mobj.group('display_id') or mobj.group('display_id_2')
 
         desktop_url = re.sub(r'^(https?://(?:.+?\.)?)m\.', r'\1', url)
         display_id = mobj.group('display_id') or mobj.group('display_id_2')
 
         desktop_url = re.sub(r'^(https?://(?:.+?\.)?)m\.', r'\1', url)
-        webpage = self._download_webpage(desktop_url, video_id)
+        webpage, urlh = self._download_webpage_handle(desktop_url, video_id)
 
         error = self._html_search_regex(
             r'<div[^>]+id=["\']videoClosed["\'][^>]*>(.+?)</div>',
 
         error = self._html_search_regex(
             r'<div[^>]+id=["\']videoClosed["\'][^>]*>(.+?)</div>',
@@ -161,6 +161,9 @@ class XHamsterIE(InfoExtractor):
                         'ext': determine_ext(format_url, 'mp4'),
                         'height': get_height(quality),
                         'filesize': filesize,
                         'ext': determine_ext(format_url, 'mp4'),
                         'height': get_height(quality),
                         'filesize': filesize,
+                        'http_headers': {
+                            'Referer': urlh.geturl(),
+                        },
                     })
             self._sort_formats(formats)
 
                     })
             self._sort_formats(formats)
 
index c6c0b3291c8320064fa0a7529be5b5d78f14461c..01b253dcb1e8c92232a06c0b2b4153a545dabcc1 100644 (file)
@@ -47,7 +47,7 @@ class XTubeIE(InfoExtractor):
             'display_id': 'A-Super-Run-Part-1-YT',
             'ext': 'flv',
             'title': 'A Super Run - Part 1 (YT)',
             'display_id': 'A-Super-Run-Part-1-YT',
             'ext': 'flv',
             'title': 'A Super Run - Part 1 (YT)',
-            'description': 'md5:ca0d47afff4a9b2942e4b41aa970fd93',
+            'description': 'md5:4cc3af1aa1b0413289babc88f0d4f616',
             'uploader': 'tshirtguy59',
             'duration': 579,
             'view_count': int,
             'uploader': 'tshirtguy59',
             'duration': 579,
             'view_count': int,
@@ -87,10 +87,24 @@ class XTubeIE(InfoExtractor):
                 'Cookie': 'age_verified=1; cookiesAccepted=1',
             })
 
                 'Cookie': 'age_verified=1; cookiesAccepted=1',
             })
 
-        sources = self._parse_json(self._search_regex(
-            r'(["\'])?sources\1?\s*:\s*(?P<sources>{.+?}),',
-            webpage, 'sources', group='sources'), video_id,
-            transform_source=js_to_json)
+        title, thumbnail, duration = [None] * 3
+
+        config = self._parse_json(self._search_regex(
+            r'playerConf\s*=\s*({.+?})\s*,\s*\n', webpage, 'config',
+            default='{}'), video_id, transform_source=js_to_json, fatal=False)
+        if config:
+            config = config.get('mainRoll')
+            if isinstance(config, dict):
+                title = config.get('title')
+                thumbnail = config.get('poster')
+                duration = int_or_none(config.get('duration'))
+                sources = config.get('sources') or config.get('format')
+
+        if not isinstance(sources, dict):
+            sources = self._parse_json(self._search_regex(
+                r'(["\'])?sources\1?\s*:\s*(?P<sources>{.+?}),',
+                webpage, 'sources', group='sources'), video_id,
+                transform_source=js_to_json)
 
         formats = []
         for format_id, format_url in sources.items():
 
         formats = []
         for format_id, format_url in sources.items():
@@ -102,20 +116,25 @@ class XTubeIE(InfoExtractor):
         self._remove_duplicate_formats(formats)
         self._sort_formats(formats)
 
         self._remove_duplicate_formats(formats)
         self._sort_formats(formats)
 
-        title = self._search_regex(
-            (r'<h1>\s*(?P<title>[^<]+?)\s*</h1>', r'videoTitle\s*:\s*(["\'])(?P<title>.+?)\1'),
-            webpage, 'title', group='title')
-        description = self._search_regex(
+        if not title:
+            title = self._search_regex(
+                (r'<h1>\s*(?P<title>[^<]+?)\s*</h1>', r'videoTitle\s*:\s*(["\'])(?P<title>.+?)\1'),
+                webpage, 'title', group='title')
+        description = self._og_search_description(
+            webpage, default=None) or self._html_search_meta(
+            'twitter:description', webpage, default=None) or self._search_regex(
             r'</h1>\s*<p>([^<]+)', webpage, 'description', fatal=False)
         uploader = self._search_regex(
             (r'<input[^>]+name="contentOwnerId"[^>]+value="([^"]+)"',
              r'<span[^>]+class="nickname"[^>]*>([^<]+)'),
             webpage, 'uploader', fatal=False)
             r'</h1>\s*<p>([^<]+)', webpage, 'description', fatal=False)
         uploader = self._search_regex(
             (r'<input[^>]+name="contentOwnerId"[^>]+value="([^"]+)"',
              r'<span[^>]+class="nickname"[^>]*>([^<]+)'),
             webpage, 'uploader', fatal=False)
-        duration = parse_duration(self._search_regex(
-            r'<dt>Runtime:?</dt>\s*<dd>([^<]+)</dd>',
-            webpage, 'duration', fatal=False))
+        if not duration:
+            duration = parse_duration(self._search_regex(
+                r'<dt>Runtime:?</dt>\s*<dd>([^<]+)</dd>',
+                webpage, 'duration', fatal=False))
         view_count = str_to_int(self._search_regex(
         view_count = str_to_int(self._search_regex(
-            r'<dt>Views:?</dt>\s*<dd>([\d,\.]+)</dd>',
+            (r'["\']viewsCount["\'][^>]*>(\d+)\s+views',
+             r'<dt>Views:?</dt>\s*<dd>([\d,\.]+)</dd>'),
             webpage, 'view count', fatal=False))
         comment_count = str_to_int(self._html_search_regex(
             r'>Comments? \(([\d,\.]+)\)<',
             webpage, 'view count', fatal=False))
         comment_count = str_to_int(self._html_search_regex(
             r'>Comments? \(([\d,\.]+)\)<',
@@ -126,6 +145,7 @@ class XTubeIE(InfoExtractor):
             'display_id': display_id,
             'title': title,
             'description': description,
             'display_id': display_id,
             'title': title,
             'description': description,
+            'thumbnail': thumbnail,
             'uploader': uploader,
             'duration': duration,
             'view_count': view_count,
             'uploader': uploader,
             'duration': duration,
             'view_count': view_count,
@@ -144,7 +164,7 @@ class XTubeUserIE(InfoExtractor):
             'id': 'greenshowers-4056496',
             'age_limit': 18,
         },
             'id': 'greenshowers-4056496',
             'age_limit': 18,
         },
-        'playlist_mincount': 155,
+        'playlist_mincount': 154,
     }
 
     def _real_extract(self, url):
     }
 
     def _real_extract(self, url):
index dff69fcb7aca250373fc0e70b2f8278ed2661755..88aabd272c98e944523f3b333174342ba23c9fe1 100644 (file)
@@ -44,7 +44,7 @@ class YouJizzIE(InfoExtractor):
 
         encodings = self._parse_json(
             self._search_regex(
 
         encodings = self._parse_json(
             self._search_regex(
-                r'encodings\s*=\s*(\[.+?\]);\n', webpage, 'encodings',
+                r'[Ee]ncodings\s*=\s*(\[.+?\]);\n', webpage, 'encodings',
                 default='[]'),
             video_id, fatal=False)
         for encoding in encodings:
                 default='[]'),
             video_id, fatal=False)
         for encoding in encodings:
index eacaa5ecdb70d2a16748b4c2e58edc14d7d69484..908defecd3f24ef4070c46f6e8d195aea625ab4b 100644 (file)
@@ -29,7 +29,6 @@ from ..compat import (
 from ..utils import (
     bool_or_none,
     clean_html,
 from ..utils import (
     bool_or_none,
     clean_html,
-    dict_get,
     error_to_compat_str,
     extract_attributes,
     ExtractorError,
     error_to_compat_str,
     extract_attributes,
     ExtractorError,
@@ -570,7 +569,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'upload_date': '20120506',
                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
                 'alt_title': 'I Love It (feat. Charli XCX)',
                 'upload_date': '20120506',
                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
                 'alt_title': 'I Love It (feat. Charli XCX)',
-                'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
+                'description': 'md5:19a2f98d9032b9311e686ed039564f63',
                 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
                          'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
                          'iconic ep', 'iconic', 'love', 'it'],
                 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
                          'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
                          'iconic ep', 'iconic', 'love', 'it'],
@@ -685,12 +684,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'id': 'nfWlot6h_JM',
                 'ext': 'm4a',
                 'title': 'Taylor Swift - Shake It Off',
                 'id': 'nfWlot6h_JM',
                 'ext': 'm4a',
                 'title': 'Taylor Swift - Shake It Off',
-                'description': 'md5:bec2185232c05479482cb5a9b82719bf',
+                'description': 'md5:307195cd21ff7fa352270fe884570ef0',
                 'duration': 242,
                 'uploader': 'TaylorSwiftVEVO',
                 'uploader_id': 'TaylorSwiftVEVO',
                 'upload_date': '20140818',
                 'duration': 242,
                 'uploader': 'TaylorSwiftVEVO',
                 'uploader_id': 'TaylorSwiftVEVO',
                 'upload_date': '20140818',
-                'creator': 'Taylor Swift',
             },
             'params': {
                 'youtube_include_dash_manifest': True,
             },
             'params': {
                 'youtube_include_dash_manifest': True,
@@ -755,11 +753,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'upload_date': '20100430',
                 'uploader_id': 'deadmau5',
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
                 'upload_date': '20100430',
                 'uploader_id': 'deadmau5',
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
-                'creator': 'deadmau5',
+                'creator': 'Dada Life, deadmau5',
                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
                 'uploader': 'deadmau5',
                 'title': 'Deadmau5 - Some Chords (HD)',
                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
                 'uploader': 'deadmau5',
                 'title': 'Deadmau5 - Some Chords (HD)',
-                'alt_title': 'Some Chords',
+                'alt_title': 'This Machine Kills Some Chords',
             },
             'expected_warnings': [
                 'DASH manifest missing',
             },
             'expected_warnings': [
                 'DASH manifest missing',
@@ -1135,6 +1133,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'skip_download': True,
                 'youtube_include_dash_manifest': False,
             },
                 'skip_download': True,
                 'youtube_include_dash_manifest': False,
             },
+            'skip': 'not actual anymore',
         },
         {
             # Youtube Music Auto-generated description
         },
         {
             # Youtube Music Auto-generated description
@@ -1145,8 +1144,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'title': 'Voyeur Girl',
                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
                 'upload_date': '20190312',
                 'title': 'Voyeur Girl',
                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
                 'upload_date': '20190312',
-                'uploader': 'Various Artists - Topic',
-                'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw',
+                'uploader': 'Stephen - Topic',
+                'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
                 'artist': 'Stephen',
                 'track': 'Voyeur Girl',
                 'album': 'it\'s too much love to know my dear',
                 'artist': 'Stephen',
                 'track': 'Voyeur Girl',
                 'album': 'it\'s too much love to know my dear',
@@ -1210,7 +1209,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'id': '-hcAI0g-f5M',
                 'ext': 'mp4',
                 'title': 'Put It On Me',
                 'id': '-hcAI0g-f5M',
                 'ext': 'mp4',
                 'title': 'Put It On Me',
-                'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e',
+                'description': 'md5:f6422397c07c4c907c6638e1fee380a5',
                 'upload_date': '20180426',
                 'uploader': 'Matt Maeson - Topic',
                 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
                 'upload_date': '20180426',
                 'uploader': 'Matt Maeson - Topic',
                 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
@@ -1256,7 +1255,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
     def _extract_signature_function(self, video_id, player_url, example_sig):
         id_m = re.match(
 
     def _extract_signature_function(self, video_id, player_url, example_sig):
         id_m = re.match(
-            r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
+            r'.*?[-.](?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
             player_url)
         if not id_m:
             raise ExtractorError('Cannot identify player %r' % player_url)
             player_url)
         if not id_m:
             raise ExtractorError('Cannot identify player %r' % player_url)
@@ -1708,9 +1707,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         def extract_view_count(v_info):
             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
 
         def extract_view_count(v_info):
             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
 
-        def extract_token(v_info):
-            return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token'))
-
         def extract_player_response(player_response, video_id):
             pl_response = str_or_none(player_response)
             if not pl_response:
         def extract_player_response(player_response, video_id):
             pl_response = str_or_none(player_response)
             if not pl_response:
@@ -1723,6 +1719,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         player_response = {}
 
         # Get video info
         player_response = {}
 
         # Get video info
+        video_info = {}
         embed_webpage = None
         if re.search(r'player-age-gate-content">', video_webpage) is not None:
             age_gate = True
         embed_webpage = None
         if re.search(r'player-age-gate-content">', video_webpage) is not None:
             age_gate = True
@@ -1737,19 +1734,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
             })
             video_info_url = proto + '://www.youtube.com/get_video_info?' + data
                     r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
             })
             video_info_url = proto + '://www.youtube.com/get_video_info?' + data
-            video_info_webpage = self._download_webpage(
-                video_info_url, video_id,
-                note='Refetching age-gated info webpage',
-                errnote='unable to download video info webpage')
-            video_info = compat_parse_qs(video_info_webpage)
-            pl_response = video_info.get('player_response', [None])[0]
-            player_response = extract_player_response(pl_response, video_id)
-            add_dash_mpd(video_info)
-            view_count = extract_view_count(video_info)
+            try:
+                video_info_webpage = self._download_webpage(
+                    video_info_url, video_id,
+                    note='Refetching age-gated info webpage',
+                    errnote='unable to download video info webpage')
+            except ExtractorError:
+                video_info_webpage = None
+            if video_info_webpage:
+                video_info = compat_parse_qs(video_info_webpage)
+                pl_response = video_info.get('player_response', [None])[0]
+                player_response = extract_player_response(pl_response, video_id)
+                add_dash_mpd(video_info)
+                view_count = extract_view_count(video_info)
         else:
             age_gate = False
         else:
             age_gate = False
-            video_info = None
-            sts = None
             # Try looking directly into the video webpage
             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
             if ytplayer_config:
             # Try looking directly into the video webpage
             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
             if ytplayer_config:
@@ -1766,61 +1765,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
                     is_live = True
                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
                     is_live = True
-                sts = ytplayer_config.get('sts')
                 if not player_response:
                     player_response = extract_player_response(args.get('player_response'), video_id)
             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
                 add_dash_mpd_pr(player_response)
                 if not player_response:
                     player_response = extract_player_response(args.get('player_response'), video_id)
             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
                 add_dash_mpd_pr(player_response)
-                # We also try looking in get_video_info since it may contain different dashmpd
-                # URL that points to a DASH manifest with possibly different itag set (some itags
-                # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
-                # manifest pointed by get_video_info's dashmpd).
-                # The general idea is to take a union of itags of both DASH manifests (for example
-                # video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093)
-                self.report_video_info_webpage_download(video_id)
-                for el in ('embedded', 'detailpage', 'vevo', ''):
-                    query = {
-                        'video_id': video_id,
-                        'ps': 'default',
-                        'eurl': '',
-                        'gl': 'US',
-                        'hl': 'en',
-                    }
-                    if el:
-                        query['el'] = el
-                    if sts:
-                        query['sts'] = sts
-                    video_info_webpage = self._download_webpage(
-                        '%s://www.youtube.com/get_video_info' % proto,
-                        video_id, note=False,
-                        errnote='unable to download video info webpage',
-                        fatal=False, query=query)
-                    if not video_info_webpage:
-                        continue
-                    get_video_info = compat_parse_qs(video_info_webpage)
-                    if not player_response:
-                        pl_response = get_video_info.get('player_response', [None])[0]
-                        player_response = extract_player_response(pl_response, video_id)
-                    add_dash_mpd(get_video_info)
-                    if view_count is None:
-                        view_count = extract_view_count(get_video_info)
-                    if not video_info:
-                        video_info = get_video_info
-                    get_token = extract_token(get_video_info)
-                    if get_token:
-                        # Different get_video_info requests may report different results, e.g.
-                        # some may report video unavailability, but some may serve it without
-                        # any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362,
-                        # the original webpage as well as el=info and el=embedded get_video_info
-                        # requests report video unavailability due to geo restriction while
-                        # el=detailpage succeeds and returns valid data). This is probably
-                        # due to YouTube measures against IP ranges of hosting providers.
-                        # Working around by preferring the first succeeded video_info containing
-                        # the token if no such video_info yet was found.
-                        token = extract_token(video_info)
-                        if not token:
-                            video_info = get_video_info
-                        break
 
         def extract_unavailable_message():
             messages = []
 
         def extract_unavailable_message():
             messages = []
@@ -1833,13 +1781,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             if messages:
                 return '\n'.join(messages)
 
             if messages:
                 return '\n'.join(messages)
 
-        if not video_info:
+        if not video_info and not player_response:
             unavailable_message = extract_unavailable_message()
             if not unavailable_message:
                 unavailable_message = 'Unable to extract video data'
             raise ExtractorError(
                 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
 
             unavailable_message = extract_unavailable_message()
             if not unavailable_message:
                 unavailable_message = 'Unable to extract video data'
             raise ExtractorError(
                 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
 
+        if not isinstance(video_info, dict):
+            video_info = {}
+
         video_details = try_get(
             player_response, lambda x: x['videoDetails'], dict) or {}
 
         video_details = try_get(
             player_response, lambda x: x['videoDetails'], dict) or {}
 
@@ -2035,7 +1986,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                                 else:
                                     player_version = self._search_regex(
                                         [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
                                 else:
                                     player_version = self._search_regex(
                                         [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
-                                         r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
+                                         r'(?:www|player(?:_ias)?)[-.]([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
                                         player_url,
                                         'html5 player', fatal=False)
                                     player_desc = 'html5 player %s' % player_version
                                         player_url,
                                         'html5 player', fatal=False)
                                     player_desc = 'html5 player %s' % player_version
@@ -2392,30 +2343,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                         f['stretched_ratio'] = ratio
 
         if not formats:
                         f['stretched_ratio'] = ratio
 
         if not formats:
-            token = extract_token(video_info)
-            if not token:
-                if 'reason' in video_info:
-                    if 'The uploader has not made this video available in your country.' in video_info['reason']:
-                        regions_allowed = self._html_search_meta(
-                            'regionsAllowed', video_webpage, default=None)
-                        countries = regions_allowed.split(',') if regions_allowed else None
-                        self.raise_geo_restricted(
-                            msg=video_info['reason'][0], countries=countries)
-                    reason = video_info['reason'][0]
-                    if 'Invalid parameters' in reason:
-                        unavailable_message = extract_unavailable_message()
-                        if unavailable_message:
-                            reason = unavailable_message
-                    raise ExtractorError(
-                        'YouTube said: %s' % reason,
-                        expected=True, video_id=video_id)
-                else:
-                    raise ExtractorError(
-                        '"token" parameter not in video info for unknown reason',
-                        video_id=video_id)
-
-        if not formats and (video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos'])):
-            raise ExtractorError('This video is DRM protected.', expected=True)
+            if 'reason' in video_info:
+                if 'The uploader has not made this video available in your country.' in video_info['reason']:
+                    regions_allowed = self._html_search_meta(
+                        'regionsAllowed', video_webpage, default=None)
+                    countries = regions_allowed.split(',') if regions_allowed else None
+                    self.raise_geo_restricted(
+                        msg=video_info['reason'][0], countries=countries)
+                reason = video_info['reason'][0]
+                if 'Invalid parameters' in reason:
+                    unavailable_message = extract_unavailable_message()
+                    if unavailable_message:
+                        reason = unavailable_message
+                raise ExtractorError(
+                    'YouTube said: %s' % reason,
+                    expected=True, video_id=video_id)
+            if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
+                raise ExtractorError('This video is DRM protected.', expected=True)
 
         self._sort_formats(formats)
 
 
         self._sort_formats(formats)
 
@@ -2495,20 +2439,23 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
     _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
     IE_NAME = 'youtube:playlist'
     _TESTS = [{
     _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
     IE_NAME = 'youtube:playlist'
     _TESTS = [{
-        'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
+        'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
         'info_dict': {
         'info_dict': {
-            'title': 'ytdl test PL',
-            'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
+            'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
+            'uploader': 'Sergey M.',
+            'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
+            'title': 'youtube-dl public playlist',
         },
         },
-        'playlist_count': 3,
+        'playlist_count': 1,
     }, {
     }, {
-        'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
+        'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
         'info_dict': {
         'info_dict': {
-            'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
-            'title': 'YDL_Empty_List',
+            'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
+            'uploader': 'Sergey M.',
+            'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
+            'title': 'youtube-dl empty playlist',
         },
         'playlist_count': 0,
         },
         'playlist_count': 0,
-        'skip': 'This playlist is private',
     }, {
         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
     }, {
         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
@@ -2518,7 +2465,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
             'uploader': 'Christiaan008',
             'uploader_id': 'ChRiStIaAn008',
         },
             'uploader': 'Christiaan008',
             'uploader_id': 'ChRiStIaAn008',
         },
-        'playlist_count': 95,
+        'playlist_count': 96,
     }, {
         'note': 'issue #673',
         'url': 'PLBB231211A4F62143',
     }, {
         'note': 'issue #673',
         'url': 'PLBB231211A4F62143',
index bacb82eeeb2a549edbb0cbf6d0a67e07f28b595b..f6496f5168cf057c9c415cc7461105462ad66370 100644 (file)
@@ -29,7 +29,6 @@ class ZapiksIE(InfoExtractor):
                 'timestamp': 1359044972,
                 'upload_date': '20130124',
                 'view_count': int,
                 'timestamp': 1359044972,
                 'upload_date': '20130124',
                 'view_count': int,
-                'comment_count': int,
             },
         },
         {
             },
         },
         {
index 145c123a42fee5e67c0fd8c2750ea13562632666..656864b2ed8a9982c1da934b01aff6bcb7126acf 100644 (file)
@@ -244,14 +244,14 @@ class ZDFChannelIE(ZDFBaseIE):
             'id': 'das-aktuelle-sportstudio',
             'title': 'das aktuelle sportstudio | ZDF',
         },
             'id': 'das-aktuelle-sportstudio',
             'title': 'das aktuelle sportstudio | ZDF',
         },
-        'playlist_count': 21,
+        'playlist_mincount': 23,
     }, {
         'url': 'https://www.zdf.de/dokumentation/planet-e',
         'info_dict': {
             'id': 'planet-e',
             'title': 'planet e.',
         },
     }, {
         'url': 'https://www.zdf.de/dokumentation/planet-e',
         'info_dict': {
             'id': 'planet-e',
             'title': 'planet e.',
         },
-        'playlist_count': 4,
+        'playlist_mincount': 50,
     }, {
         'url': 'https://www.zdf.de/filme/taunuskrimi/',
         'only_matching': True,
     }, {
         'url': 'https://www.zdf.de/filme/taunuskrimi/',
         'only_matching': True,
index 1ffabc62bedacb42aeb34f585d04ed7bc3ff8045..8826b382c3cc70ab21bacef8de94314ec193948b 100644 (file)
@@ -134,7 +134,7 @@ def parseOpts(overrideArguments=None):
         action='help',
         help='Print this help text and exit')
     general.add_option(
         action='help',
         help='Print this help text and exit')
     general.add_option(
-        '-v', '--version',
+        '--version',
         action='version',
         help='Print program version and exit')
     general.add_option(
         action='version',
         help='Print program version and exit')
     general.add_option(
index 002ea7f3386215c61bcf3bc60419d0059abf2bc2..84c9646171e0b8f8d6a6397bff5339205cdadcd7 100644 (file)
@@ -9,6 +9,7 @@ import subprocess
 import sys
 from zipimport import zipimporter
 
 import sys
 from zipimport import zipimporter
 
+from .compat import compat_realpath
 from .utils import encode_compat_str
 
 from .version import __version__
 from .utils import encode_compat_str
 
 from .version import __version__
@@ -84,7 +85,9 @@ def update_self(to_screen, verbose, opener):
     print_notes(to_screen, versions_info['versions'])
 
     # sys.executable is set to the full pathname of the exe-file for py2exe
     print_notes(to_screen, versions_info['versions'])
 
     # sys.executable is set to the full pathname of the exe-file for py2exe
-    filename = sys.executable if hasattr(sys, 'frozen') else sys.argv[0]
+    # though symlinks are not followed so that we need to do this manually
+    # with help of realpath
+    filename = compat_realpath(sys.executable if hasattr(sys, 'frozen') else sys.argv[0])
 
     if not os.access(filename, os.W_OK):
         to_screen('ERROR: no write permissions on %s' % filename)
 
     if not os.access(filename, os.W_OK):
         to_screen('ERROR: no write permissions on %s' % filename)
index f6204692a81002cdfc44b02d183126e755283bd9..38262bee4cd08f97e4f5009d7066d1466bde25a4 100644 (file)
@@ -2729,6 +2729,11 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
 
 
 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
 
 
 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
+    """
+    See [1] for cookie file format.
+
+    1. https://curl.haxx.se/docs/http-cookies.html
+    """
     _HTTPONLY_PREFIX = '#HttpOnly_'
 
     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
     _HTTPONLY_PREFIX = '#HttpOnly_'
 
     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
@@ -2795,6 +2800,15 @@ class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
     https_response = http_response
 
 
     https_response = http_response
 
 
+class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
+    if sys.version_info[0] < 3:
+        def redirect_request(self, req, fp, code, msg, headers, newurl):
+            # On python 2 urlh.geturl() may sometimes return redirect URL
+            # as byte string instead of unicode. This workaround allows
+            # to force it always return unicode.
+            return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
+
+
 def extract_timezone(date_str):
     m = re.search(
         r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
 def extract_timezone(date_str):
     m = re.search(
         r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
index fa6f7289a0e11c045f741090bfaaef6f006e5ed6..5aedd32688ef2c9c2d6409d85091e4f2e4afe618 100644 (file)
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
 from __future__ import unicode_literals
 
-__version__ = '2020.01.24'
+__version__ = '2020.03.24'