]> Raphaël G. Git Repositories - youtubedl/commitdiff
New upstream version 2018.11.07
authorRogério Brito <rbrito@ime.usp.br>
Sun, 11 Nov 2018 01:56:55 +0000 (23:56 -0200)
committerRogério Brito <rbrito@ime.usp.br>
Sun, 11 Nov 2018 01:56:55 +0000 (23:56 -0200)
60 files changed:
ChangeLog
README.md
README.txt
docs/supportedsites.md
test/helper.py
test/test_InfoExtractor.py
test/test_downloader_http.py
test/test_http.py
youtube-dl
youtube-dl.1
youtube_dl/extractor/adobepass.py
youtube_dl/extractor/aparat.py
youtube_dl/extractor/asiancrush.py
youtube_dl/extractor/azmedien.py
youtube_dl/extractor/brightcove.py
youtube_dl/extractor/cliphunter.py
youtube_dl/extractor/cnbc.py
youtube_dl/extractor/common.py
youtube_dl/extractor/crunchyroll.py
youtube_dl/extractor/cwtv.py
youtube_dl/extractor/dailymail.py
youtube_dl/extractor/dailymotion.py
youtube_dl/extractor/eporner.py
youtube_dl/extractor/extractors.py
youtube_dl/extractor/facebook.py
youtube_dl/extractor/fourtube.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/hotstar.py
youtube_dl/extractor/ivi.py
youtube_dl/extractor/jamendo.py
youtube_dl/extractor/laola1tv.py
youtube_dl/extractor/linkedin.py [new file with mode: 0644]
youtube_dl/extractor/mediaset.py
youtube_dl/extractor/njpwworld.py
youtube_dl/extractor/openload.py
youtube_dl/extractor/orf.py
youtube_dl/extractor/patreon.py
youtube_dl/extractor/philharmoniedeparis.py
youtube_dl/extractor/pluralsight.py
youtube_dl/extractor/popcorntv.py
youtube_dl/extractor/pornhub.py
youtube_dl/extractor/rai.py
youtube_dl/extractor/rutube.py
youtube_dl/extractor/screencast.py
youtube_dl/extractor/spike.py
youtube_dl/extractor/sportbox.py
youtube_dl/extractor/ted.py
youtube_dl/extractor/theplatform.py
youtube_dl/extractor/tube8.py
youtube_dl/extractor/tv3.py [deleted file]
youtube_dl/extractor/twitcasting.py [new file with mode: 0644]
youtube_dl/extractor/twitch.py
youtube_dl/extractor/udemy.py
youtube_dl/extractor/viewster.py
youtube_dl/extractor/vimeo.py
youtube_dl/extractor/vrv.py
youtube_dl/extractor/vzaar.py
youtube_dl/extractor/youtube.py
youtube_dl/extractor/zattoo.py
youtube_dl/version.py

index d184f69eee1d8818e499c60d64605c4157e56067..fa5de8b0416c9224454aa185c4a72b25f57a8573 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,112 @@
+version 2018.11.07
+
+Extractors
++ [youtube] Add another JS signature function name regex (#18091, #18093,
+  #18094)
+* [facebook] Fix tahoe request (#17171)
+* [cliphunter] Fix extraction (#18083)
++ [youtube:playlist] Add support for invidio.us (#18077)
+* [zattoo] Arrange API hosts for derived extractors (#18035)
++ [youtube] Add fallback metadata extraction from videoDetails (#18052)
+
+
+version 2018.11.03
+
+Core
+* [extractor/common] Ensure response handle is not prematurely closed before
+  it can be read if it matches expected_status (#17195, #17846, #17447)
+
+Extractors
+* [laola1tv:embed] Set correct stream access URL scheme (#16341)
++ [ehftv] Add support for ehftv.com (#15408)
+* [azmedien] Adopt to major site redesign (#17745, #17746)
++ [twitcasting] Add support for twitcasting.tv (#17981)
+* [orf:tvthek] Fix extraction (#17737, #17956, #18024)
++ [openload] Add support for oload.fun (#18045)
+* [njpwworld] Fix authentication (#17427)
++ [linkedin:learning] Add support for linkedin.com/learning (#13545)
+* [theplatform] Improve error detection (#13222)
+* [cnbc] Simplify extraction (#14280, #17110)
++ [cbnc] Add support for new URL schema (#14193)
+* [aparat] Improve extraction and extract more metadata (#17445, #18008)
+* [aparat] Fix extraction
+
+
+version 2018.10.29
+
+Core
++ [extractor/common] Add validation for JSON-LD URLs
+
+Extractors
++ [sportbox] Add support for matchtv.ru
+* [sportbox] Fix extraction (#17978)
+* [screencast] Fix extraction (#14590, #14617, #17990)
++ [openload] Add support for oload.icu
++ [ivi] Add support for ivi.tv
+* [crunchyroll] Improve extraction failsafeness (#17991)
+* [dailymail] Fix formats extraction (#17976)
+* [viewster] Reduce format requests
+* [cwtv] Handle API errors (#17905)
++ [rutube] Use geo verification headers (#17897)
++ [brightcove:legacy] Add fallbacks to brightcove:new (#13912)
+- [tv3] Remove extractor (#10461, #15339)
+* [ted] Fix extraction for HTTP and RTMP formats (#5941, #17572, #17894)
++ [openload] Add support for oload.cc (#17823)
++ [patreon] Extract post_file URL (#17792)
+* [patreon] Fix extraction (#14502, #10471)
+
+
+version 2018.10.05
+
+Extractors
+* [pluralsight] Improve authentication (#17762)
+* [dailymotion] Fix extraction (#17699)
+* [crunchyroll] Switch to HTTPS for RpcApi (#17749)
++ [philharmoniedeparis] Add support for pad.philharmoniedeparis.fr (#17705)
+* [philharmoniedeparis] Fix extraction (#17705)
++ [jamendo] Add support for licensing.jamendo.com (#17724)
++ [openload] Add support for oload.cloud (#17710)
+* [pluralsight] Fix subtitles extraction (#17726, #17728)
++ [vimeo] Add another config regular expression (#17690)
+* [spike] Fix Paramount Network extraction (#17677)
+* [hotstar] Fix extraction (#14694, #14931, #17637)
+
+
+version 2018.09.26
+
+Extractors
+* [pluralsight] Fix subtitles extraction (#17671)
+* [mediaset] Improve embed support (#17668)
++ [youtube] Add support for invidio.us (#17613)
++ [zattoo] Add support for more zattoo platform sites
+* [zattoo] Fix extraction (#17175, #17542)
+
+
+version 2018.09.18
+
+Core
++ [extractor/common] Introduce channel meta fields
+
+Extractors
+* [adobepass] Don't pollute default headers dict
+* [udemy] Don't pollute default headers dict
+* [twitch] Don't pollute default headers dict
+* [youtube] Don't pollute default query dict (#17593)
+* [crunchyroll] Prefer hardsubless formats and formats in locale language
+* [vrv] Make format ids deterministic
+* [vimeo] Fix ondemand playlist extraction (#14591)
++ [pornhub] Extract upload date (#17574)
++ [porntube] Extract channel meta fields
++ [vimeo] Extract channel meta fields
++ [youtube] Extract channel meta fields (#9676, #12939)
+* [porntube] Fix extraction (#17541)
+* [asiancrush] Fix extraction (#15630)
++ [twitch:clips] Extend URL regular expression (closes #17559)
++ [vzaar] Add support for HLS
+* [tube8] Fix metadata extraction (#17520)
+* [eporner] Extract JSON-LD (#17519)
+
+
 version 2018.09.10
 
 Core
 version 2018.09.10
 
 Core
index dd068a462bebd3fb8a6982181b364c967ceff083..35c3de5127455792bedc26d032d987806faaec0b 100644 (file)
--- a/README.md
+++ b/README.md
@@ -511,6 +511,8 @@ The basic usage is not to set any template arguments when downloading a single f
  - `timestamp` (numeric): UNIX timestamp of the moment the video became available
  - `upload_date` (string): Video upload date (YYYYMMDD)
  - `uploader_id` (string): Nickname or id of the video uploader
  - `timestamp` (numeric): UNIX timestamp of the moment the video became available
  - `upload_date` (string): Video upload date (YYYYMMDD)
  - `uploader_id` (string): Nickname or id of the video uploader
+ - `channel` (string): Full name of the channel the video is uploaded on
+ - `channel_id` (string): Id of the channel
  - `location` (string): Physical location where the video was filmed
  - `duration` (numeric): Length of the video in seconds
  - `view_count` (numeric): How many users have watched the video on the platform
  - `location` (string): Physical location where the video was filmed
  - `duration` (numeric): Length of the video in seconds
  - `view_count` (numeric): How many users have watched the video on the platform
@@ -1166,7 +1168,28 @@ title = self._search_regex(
 
 ### Use safe conversion functions
 
 
 ### Use safe conversion functions
 
-Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
+Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
+
+Use `url_or_none` for safe URL processing.
+
+Use `try_get` for safe metadata extraction from parsed JSON.
+
+Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions.
+
+#### More examples
+
+##### Safely extract optional description from parsed JSON
+```python
+description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str)
+```
+
+##### Safely extract more optional metadata
+```python
+video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
+description = video.get('summary')
+duration = float_or_none(video.get('durationMs'), scale=1000)
+view_count = int_or_none(video.get('views'))
+```
 
 # EMBEDDING YOUTUBE-DL
 
 
 # EMBEDDING YOUTUBE-DL
 
index a0f20fdb246486f7d7f8e39322c0f4536a8e5c81..19988f992080121c96ce4ed6218bd3137403f374 100644 (file)
@@ -594,6 +594,8 @@ with sequence type are:
     available
 -   upload_date (string): Video upload date (YYYYMMDD)
 -   uploader_id (string): Nickname or id of the video uploader
     available
 -   upload_date (string): Video upload date (YYYYMMDD)
 -   uploader_id (string): Nickname or id of the video uploader
+-   channel (string): Full name of the channel the video is uploaded on
+-   channel_id (string): Id of the channel
 -   location (string): Physical location where the video was filmed
 -   duration (numeric): Length of the video in seconds
 -   view_count (numeric): How many users have watched the video on the
 -   location (string): Physical location where the video was filmed
 -   duration (numeric): Length of the video in seconds
 -   view_count (numeric): How many users have watched the video on the
@@ -1589,9 +1591,28 @@ The code definitely should not look like:
 
 Use safe conversion functions
 
 
 Use safe conversion functions
 
-Wrap all extracted numeric data into safe functions from utils:
-int_or_none, float_or_none. Use them for string to number conversions as
-well.
+Wrap all extracted numeric data into safe functions from
+youtube_dl/utils.py: int_or_none, float_or_none. Use them for string to
+number conversions as well.
+
+Use url_or_none for safe URL processing.
+
+Use try_get for safe metadata extraction from parsed JSON.
+
+Explore youtube_dl/utils.py for more useful convenience functions.
+
+More examples
+
+Safely extract optional description from parsed JSON
+
+    description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str)
+
+Safely extract more optional metadata
+
+    video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
+    description = video.get('summary')
+    duration = float_or_none(video.get('durationMs'), scale=1000)
+    view_count = int_or_none(video.get('views'))
 
 
 
 
 
 
index 9b86017519043125ffc1851399ac6d0585ab415c..24c3254c36ac37e756996cbdf6ca5d2d78a565d2 100644 (file)
@@ -84,8 +84,6 @@
  - **awaan:season**
  - **awaan:video**
  - **AZMedien**: AZ Medien videos
  - **awaan:season**
  - **awaan:video**
  - **AZMedien**: AZ Medien videos
- - **AZMedienPlaylist**: AZ Medien playlists
- - **AZMedienShowPlaylist**: AZ Medien show playlists
  - **BaiduVideo**: 百度视频
  - **bambuser**
  - **bambuser:channel**
  - **BaiduVideo**: 百度视频
  - **bambuser**
  - **bambuser:channel**
@@ -98,6 +96,7 @@
  - **bbc.co.uk:article**: BBC articles
  - **bbc.co.uk:iplayer:playlist**
  - **bbc.co.uk:playlist**
  - **bbc.co.uk:article**: BBC articles
  - **bbc.co.uk:iplayer:playlist**
  - **bbc.co.uk:playlist**
+ - **BBVTV**
  - **Beatport**
  - **Beeg**
  - **BehindKink**
  - **Beatport**
  - **Beeg**
  - **BehindKink**
  - **Clyp**
  - **cmt.com**
  - **CNBC**
  - **Clyp**
  - **cmt.com**
  - **CNBC**
+ - **CNBCVideo**
  - **CNN**
  - **CNNArticle**
  - **CNNBlogs**
  - **CNN**
  - **CNNArticle**
  - **CNNBlogs**
  - **EchoMsk**
  - **egghead:course**: egghead.io course
  - **egghead:lesson**: egghead.io lesson
  - **EchoMsk**
  - **egghead:course**: egghead.io course
  - **egghead:lesson**: egghead.io lesson
+ - **ehftv**
  - **eHow**
  - **eHow**
+ - **EinsUndEinsTV**
  - **Einthusan**
  - **eitb.tv**
  - **EllenTube**
  - **Einthusan**
  - **eitb.tv**
  - **EllenTube**
  - **EsriVideo**
  - **Europa**
  - **EveryonesMixtape**
  - **EsriVideo**
  - **Europa**
  - **EveryonesMixtape**
+ - **EWETV**
  - **ExpoTV**
  - **Expressen**
  - **ExtremeTube**
  - **ExpoTV**
  - **Expressen**
  - **ExtremeTube**
  - **Gfycat**
  - **GiantBomb**
  - **Giga**
  - **Gfycat**
  - **GiantBomb**
  - **Giga**
+ - **GlattvisionTV**
  - **Glide**: Glide mobile video messages (glide.me)
  - **Globo**
  - **GloboArticle**
  - **Glide**: Glide mobile video messages (glide.me)
  - **Globo**
  - **GloboArticle**
  - **HitRecord**
  - **HornBunny**
  - **HotNewHipHop**
  - **HitRecord**
  - **HornBunny**
  - **HotNewHipHop**
- - **HotStar**
+ - **hotstar**
  - **hotstar:playlist**
  - **Howcast**
  - **HowStuffWorks**
  - **hotstar:playlist**
  - **Howcast**
  - **HowStuffWorks**
  - **limelight:channel**
  - **limelight:channel_list**
  - **LineTV**
  - **limelight:channel**
  - **limelight:channel_list**
  - **LineTV**
+ - **linkedin:learning**
+ - **linkedin:learning:course**
  - **LiTV**
  - **LiveLeak**
  - **LiveLeakEmbed**
  - **LiTV**
  - **LiveLeak**
  - **LiveLeakEmbed**
  - **Mixer:vod**
  - **MLB**
  - **Mnet**
  - **Mixer:vod**
  - **MLB**
  - **Mnet**
+ - **MNetTV**
  - **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
  - **Mofosex**
  - **Mojvideo**
  - **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
  - **Mofosex**
  - **Mojvideo**
  - **Myvi**
  - **MyVidster**
  - **MyviEmbed**
  - **Myvi**
  - **MyVidster**
  - **MyviEmbed**
+ - **MyVisionTV**
  - **n-tv.de**
  - **natgeo**
  - **natgeo:episodeguide**
  - **n-tv.de**
  - **natgeo**
  - **natgeo:episodeguide**
  - **netease:program**: 网易云音乐 - 电台节目
  - **netease:singer**: 网易云音乐 - 歌手
  - **netease:song**: 网易云音乐
  - **netease:program**: 网易云音乐 - 电台节目
  - **netease:singer**: 网易云音乐 - 歌手
  - **netease:song**: 网易云音乐
+ - **NetPlus**
  - **Netzkino**
  - **Newgrounds**
  - **NewgroundsPlaylist**
  - **Netzkino**
  - **Newgrounds**
  - **NewgroundsPlaylist**
  - **orf:iptv**: iptv.ORF.at
  - **orf:oe1**: Radio Österreich 1
  - **orf:tvthek**: ORF TVthek
  - **orf:iptv**: iptv.ORF.at
  - **orf:oe1**: Radio Österreich 1
  - **orf:tvthek**: ORF TVthek
+ - **OsnatelTV**
  - **PacktPub**
  - **PacktPubCourse**
  - **PandaTV**: 熊猫TV
  - **PacktPub**
  - **PacktPubCourse**
  - **PandaTV**: 熊猫TV
  - **qqmusic:playlist**: QQ音乐 - 歌单
  - **qqmusic:singer**: QQ音乐 - 歌手
  - **qqmusic:toplist**: QQ音乐 - 排行榜
  - **qqmusic:playlist**: QQ音乐 - 歌单
  - **qqmusic:singer**: QQ音乐 - 歌手
  - **qqmusic:toplist**: QQ音乐 - 排行榜
+ - **QuantumTV**
  - **Quickline**
  - **QuicklineLive**
  - **R7**
  - **Quickline**
  - **QuicklineLive**
  - **R7**
  - **safari**: safaribooksonline.com online video
  - **safari:api**
  - **safari:course**: safaribooksonline.com online courses
  - **safari**: safaribooksonline.com online video
  - **safari:api**
  - **safari:course**: safaribooksonline.com online courses
+ - **SAKTV**
  - **Sapo**: SAPO Vídeos
  - **savefrom.net**
  - **SBS**: sbs.com.au
  - **Sapo**: SAPO Vídeos
  - **savefrom.net**
  - **SBS**: sbs.com.au
  - **Spiegeltv**
  - **sport.francetvinfo.fr**
  - **Sport5**
  - **Spiegeltv**
  - **sport.francetvinfo.fr**
  - **Sport5**
- - **SportBoxEmbed**
+ - **SportBox**
  - **SportDeutschland**
  - **SpringboardPlatform**
  - **Sprout**
  - **SportDeutschland**
  - **SpringboardPlatform**
  - **Sprout**
  - **TV2**
  - **tv2.hu**
  - **TV2Article**
  - **TV2**
  - **tv2.hu**
  - **TV2Article**
- - **TV3**
  - **TV4**: tv4.se and tv4play.se
  - **TV5MondePlus**: TV5MONDE+
  - **TVA**
  - **TV4**: tv4.se and tv4play.se
  - **TV5MondePlus**: TV5MONDE+
  - **TVA**
  - **TVPlayer**
  - **TVPlayHome**
  - **Tweakers**
  - **TVPlayer**
  - **TVPlayHome**
  - **Tweakers**
+ - **TwitCasting**
  - **twitch:chapter**
  - **twitch:clips**
  - **twitch:profile**
  - **twitch:chapter**
  - **twitch:clips**
  - **twitch:profile**
  - **vrv**
  - **vrv:series**
  - **VShare**
  - **vrv**
  - **vrv:series**
  - **VShare**
+ - **VTXTV**
  - **vube**: Vube.com
  - **VuClip**
  - **VVVVID**
  - **VyboryMos**
  - **Vzaar**
  - **Walla**
  - **vube**: Vube.com
  - **VuClip**
  - **VVVVID**
  - **VyboryMos**
  - **Vzaar**
  - **Walla**
+ - **WalyTV**
  - **washingtonpost**
  - **washingtonpost:article**
  - **wat.tv**
  - **washingtonpost**
  - **washingtonpost:article**
  - **wat.tv**
index dfee217a9b8acb64e426c3ce8fc5c11a9c5a0121..aa9a1c9b2aadcd3a9eaeb1170c2e8d90afabb0b8 100644 (file)
@@ -7,6 +7,7 @@ import json
 import os.path
 import re
 import types
 import os.path
 import re
 import types
+import ssl
 import sys
 
 import youtube_dl.extractor
 import sys
 
 import youtube_dl.extractor
@@ -244,3 +245,12 @@ def expect_warnings(ydl, warnings_re):
             real_warning(w)
 
     ydl.report_warning = _report_warning
             real_warning(w)
 
     ydl.report_warning = _report_warning
+
+
+def http_server_port(httpd):
+    if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
+        # In Jython SSLSocket is not a subclass of socket.socket
+        sock = httpd.socket.sock
+    else:
+        sock = httpd.socket
+    return sock.getsockname()[1]
index 4833396a521bf1d7a072db8ad425bed333235248..06be726166c164e29c9af54f4a66590001846e53 100644 (file)
@@ -9,11 +9,30 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-from test.helper import FakeYDL, expect_dict, expect_value
-from youtube_dl.compat import compat_etree_fromstring
+from test.helper import FakeYDL, expect_dict, expect_value, http_server_port
+from youtube_dl.compat import compat_etree_fromstring, compat_http_server
 from youtube_dl.extractor.common import InfoExtractor
 from youtube_dl.extractor import YoutubeIE, get_info_extractor
 from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
 from youtube_dl.extractor.common import InfoExtractor
 from youtube_dl.extractor import YoutubeIE, get_info_extractor
 from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
+import threading
+
+
+TEAPOT_RESPONSE_STATUS = 418
+TEAPOT_RESPONSE_BODY = "<h1>418 I'm a teapot</h1>"
+
+
+class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
+    def log_message(self, format, *args):
+        pass
+
+    def do_GET(self):
+        if self.path == '/teapot':
+            self.send_response(TEAPOT_RESPONSE_STATUS)
+            self.send_header('Content-Type', 'text/html; charset=utf-8')
+            self.end_headers()
+            self.wfile.write(TEAPOT_RESPONSE_BODY.encode())
+        else:
+            assert False
 
 
 class TestIE(InfoExtractor):
 
 
 class TestIE(InfoExtractor):
@@ -743,6 +762,25 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                 for i in range(len(entries)):
                     expect_dict(self, entries[i], expected_entries[i])
 
                 for i in range(len(entries)):
                     expect_dict(self, entries[i], expected_entries[i])
 
+    def test_response_with_expected_status_returns_content(self):
+        # Checks for mitigations against the effects of
+        # <https://bugs.python.org/issue15002> that affect Python 3.4.1+, which
+        # manifest as `_download_webpage`, `_download_xml`, `_download_json`,
+        # or the underlying `_download_webpage_handle` returning no content
+        # when a response matches `expected_status`.
+
+        httpd = compat_http_server.HTTPServer(
+            ('127.0.0.1', 0), InfoExtractorTestRequestHandler)
+        port = http_server_port(httpd)
+        server_thread = threading.Thread(target=httpd.serve_forever)
+        server_thread.daemon = True
+        server_thread.start()
+
+        (content, urlh) = self.ie._download_webpage_handle(
+            'http://127.0.0.1:%d/teapot' % port, None,
+            expected_status=TEAPOT_RESPONSE_STATUS)
+        self.assertEqual(content, TEAPOT_RESPONSE_BODY)
+
 
 if __name__ == '__main__':
     unittest.main()
 
 if __name__ == '__main__':
     unittest.main()
index 5cf2bf1a56212ed1888ee5cb8728c68b01d555fa..7504722810b4e706f6b1143c7a36208ee0478749 100644 (file)
@@ -9,26 +9,16 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-from test.helper import try_rm
+from test.helper import http_server_port, try_rm
 from youtube_dl import YoutubeDL
 from youtube_dl.compat import compat_http_server
 from youtube_dl.downloader.http import HttpFD
 from youtube_dl.utils import encodeFilename
 from youtube_dl import YoutubeDL
 from youtube_dl.compat import compat_http_server
 from youtube_dl.downloader.http import HttpFD
 from youtube_dl.utils import encodeFilename
-import ssl
 import threading
 
 TEST_DIR = os.path.dirname(os.path.abspath(__file__))
 
 
 import threading
 
 TEST_DIR = os.path.dirname(os.path.abspath(__file__))
 
 
-def http_server_port(httpd):
-    if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
-        # In Jython SSLSocket is not a subclass of socket.socket
-        sock = httpd.socket.sock
-    else:
-        sock = httpd.socket
-    return sock.getsockname()[1]
-
-
 TEST_SIZE = 10 * 1024
 
 
 TEST_SIZE = 10 * 1024
 
 
index 409fec9c8a377a79f05b86b4472106c237cdb629..3ee0a5dda8df4446f915391e031f6d13da486150 100644 (file)
@@ -8,6 +8,7 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
+from test.helper import http_server_port
 from youtube_dl import YoutubeDL
 from youtube_dl.compat import compat_http_server, compat_urllib_request
 import ssl
 from youtube_dl import YoutubeDL
 from youtube_dl.compat import compat_http_server, compat_urllib_request
 import ssl
@@ -16,15 +17,6 @@ import threading
 TEST_DIR = os.path.dirname(os.path.abspath(__file__))
 
 
 TEST_DIR = os.path.dirname(os.path.abspath(__file__))
 
 
-def http_server_port(httpd):
-    if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
-        # In Jython SSLSocket is not a subclass of socket.socket
-        sock = httpd.socket.sock
-    else:
-        sock = httpd.socket
-    return sock.getsockname()[1]
-
-
 class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
     def log_message(self, format, *args):
         pass
 class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
     def log_message(self, format, *args):
         pass
index 8879627e81e648ba6e37e920db3fba323e2ea16d..0bab846a651b32c28ca28948fa6dcb812a6e01c8 100755 (executable)
Binary files a/youtube-dl and b/youtube-dl differ
index 61ee72ff416faf3d55e97160e65a1f1bd9c55af0..8ce66300d1bb2e15beb87c2bc173c7cd3fd78d93 100644 (file)
@@ -1064,6 +1064,11 @@ became available
 .IP \[bu] 2
 \f[C]uploader_id\f[] (string): Nickname or id of the video uploader
 .IP \[bu] 2
 .IP \[bu] 2
 \f[C]uploader_id\f[] (string): Nickname or id of the video uploader
 .IP \[bu] 2
+\f[C]channel\f[] (string): Full name of the channel the video is
+uploaded on
+.IP \[bu] 2
+\f[C]channel_id\f[] (string): Id of the channel
+.IP \[bu] 2
 \f[C]location\f[] (string): Physical location where the video was filmed
 .IP \[bu] 2
 \f[C]duration\f[] (numeric): Length of the video in seconds
 \f[C]location\f[] (string): Physical location where the video was filmed
 .IP \[bu] 2
 \f[C]duration\f[] (numeric): Length of the video in seconds
@@ -2328,9 +2333,36 @@ title\ =\ self._search_regex(
 .fi
 .SS Use safe conversion functions
 .PP
 .fi
 .SS Use safe conversion functions
 .PP
-Wrap all extracted numeric data into safe functions from \f[C]utils\f[]:
+Wrap all extracted numeric data into safe functions from
+\f[C]youtube_dl/utils.py\f[] (https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py):
 \f[C]int_or_none\f[], \f[C]float_or_none\f[].
 Use them for string to number conversions as well.
 \f[C]int_or_none\f[], \f[C]float_or_none\f[].
 Use them for string to number conversions as well.
+.PP
+Use \f[C]url_or_none\f[] for safe URL processing.
+.PP
+Use \f[C]try_get\f[] for safe metadata extraction from parsed JSON.
+.PP
+Explore
+\f[C]youtube_dl/utils.py\f[] (https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py)
+for more useful convenience functions.
+.SS More examples
+.SS Safely extract optional description from parsed JSON
+.IP
+.nf
+\f[C]
+description\ =\ try_get(response,\ lambda\ x:\ x[\[aq]result\[aq]][\[aq]video\[aq]][0][\[aq]summary\[aq]],\ compat_str)
+\f[]
+.fi
+.SS Safely extract more optional metadata
+.IP
+.nf
+\f[C]
+video\ =\ try_get(response,\ lambda\ x:\ x[\[aq]result\[aq]][\[aq]video\[aq]][0],\ dict)\ or\ {}
+description\ =\ video.get(\[aq]summary\[aq])
+duration\ =\ float_or_none(video.get(\[aq]durationMs\[aq]),\ scale=1000)
+view_count\ =\ int_or_none(video.get(\[aq]views\[aq]))
+\f[]
+.fi
 .SH EMBEDDING YOUTUBE\-DL
 .PP
 youtube\-dl makes the best effort to be a good command\-line program,
 .SH EMBEDDING YOUTUBE\-DL
 .PP
 youtube\-dl makes the best effort to be a good command\-line program,
index b83b51efb624a876bbb46658e50b6f6714e10048..1cf2dcbf35567bc6a47664e1bbf05234eecaf2fb 100644 (file)
@@ -1325,8 +1325,8 @@ class AdobePassIE(InfoExtractor):
     _DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page'
 
     def _download_webpage_handle(self, *args, **kwargs):
     _DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page'
 
     def _download_webpage_handle(self, *args, **kwargs):
-        headers = kwargs.get('headers', {})
-        headers.update(self.geo_verification_headers())
+        headers = self.geo_verification_headers()
+        headers.update(kwargs.get('headers', {}))
         kwargs['headers'] = headers
         return super(AdobePassIE, self)._download_webpage_handle(
             *args, **compat_kwargs(kwargs))
         kwargs['headers'] = headers
         return super(AdobePassIE, self)._download_webpage_handle(
             *args, **compat_kwargs(kwargs))
index 6eb8bbb6e989d0310a0b447ef1928ba081567a6f..883dcee7aa4cae953fff16dbca8cbc5fbf07e64e 100644 (file)
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
     int_or_none,
 from .common import InfoExtractor
 from ..utils import (
     int_or_none,
+    merge_dicts,
     mimetype2ext,
     url_or_none,
 )
     mimetype2ext,
     url_or_none,
 )
@@ -12,59 +13,83 @@ from ..utils import (
 class AparatIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
 
 class AparatIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.aparat.com/v/wP8On',
         'md5': '131aca2e14fe7c4dcb3c4877ba300c89',
         'info_dict': {
             'id': 'wP8On',
             'ext': 'mp4',
             'title': 'تیم گلکسی 11 - زومیت',
         'url': 'http://www.aparat.com/v/wP8On',
         'md5': '131aca2e14fe7c4dcb3c4877ba300c89',
         'info_dict': {
             'id': 'wP8On',
             'ext': 'mp4',
             'title': 'تیم گلکسی 11 - زومیت',
-            'age_limit': 0,
+            'description': 'md5:096bdabcdcc4569f2b8a5e903a3b3028',
+            'duration': 231,
+            'timestamp': 1387394859,
+            'upload_date': '20131218',
+            'view_count': int,
         },
         },
-        # 'skip': 'Extremely unreliable',
-    }
+    }, {
+        # multiple formats
+        'url': 'https://www.aparat.com/v/8dflw/',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        # Note: There is an easier-to-parse configuration at
-        # http://www.aparat.com/video/video/config/videohash/%video_id
-        # but the URL in there does not work
-        webpage = self._download_webpage(
-            'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
-            video_id)
+        # Provides more metadata
+        webpage = self._download_webpage(url, video_id, fatal=False)
 
 
-        title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title')
+        if not webpage:
+            # Note: There is an easier-to-parse configuration at
+            # http://www.aparat.com/video/video/config/videohash/%video_id
+            # but the URL in there does not work
+            webpage = self._download_webpage(
+                'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
+                video_id)
 
 
-        file_list = self._parse_json(
+        options = self._parse_json(
             self._search_regex(
             self._search_regex(
-                r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage,
-                'file list'),
+                r'options\s*=\s*JSON\.parse\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1\s*\)',
+                webpage, 'options', group='value'),
             video_id)
 
             video_id)
 
+        player = options['plugins']['sabaPlayerPlugin']
+
         formats = []
         formats = []
-        for item in file_list[0]:
-            file_url = url_or_none(item.get('file'))
-            if not file_url:
-                continue
-            ext = mimetype2ext(item.get('type'))
-            label = item.get('label')
-            formats.append({
-                'url': file_url,
-                'ext': ext,
-                'format_id': label or ext,
-                'height': int_or_none(self._search_regex(
-                    r'(\d+)[pP]', label or '', 'height', default=None)),
-            })
-        self._sort_formats(formats)
+        for sources in player['multiSRC']:
+            for item in sources:
+                if not isinstance(item, dict):
+                    continue
+                file_url = url_or_none(item.get('src'))
+                if not file_url:
+                    continue
+                item_type = item.get('type')
+                if item_type == 'application/vnd.apple.mpegurl':
+                    formats.extend(self._extract_m3u8_formats(
+                        file_url, video_id, 'mp4',
+                        entry_protocol='m3u8_native', m3u8_id='hls',
+                        fatal=False))
+                else:
+                    ext = mimetype2ext(item.get('type'))
+                    label = item.get('label')
+                    formats.append({
+                        'url': file_url,
+                        'ext': ext,
+                        'format_id': 'http-%s' % (label or ext),
+                        'height': int_or_none(self._search_regex(
+                            r'(\d+)[pP]', label or '', 'height',
+                            default=None)),
+                    })
+        self._sort_formats(
+            formats, field_preference=('height', 'width', 'tbr', 'format_id'))
+
+        info = self._search_json_ld(webpage, video_id, default={})
 
 
-        thumbnail = self._search_regex(
-            r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
+        if not info.get('title'):
+            info['title'] = player['title']
 
 
-        return {
+        return merge_dicts(info, {
             'id': video_id,
             'id': video_id,
-            'title': title,
-            'thumbnail': thumbnail,
-            'age_limit': self._family_friendly_search(webpage),
+            'thumbnail': url_or_none(options.get('poster')),
+            'duration': int_or_none(player.get('duration')),
             'formats': formats,
             'formats': formats,
-        }
+        })
index 594c88c9cd94e3b94cf10dba6c2f6826a82bc406..6d71c5ad5f8dc6a0277688ea947d64d03b06a0d0 100644 (file)
@@ -8,7 +8,6 @@ from .kaltura import KalturaIE
 from ..utils import (
     extract_attributes,
     remove_end,
 from ..utils import (
     extract_attributes,
     remove_end,
-    urlencode_postdata,
 )
 
 
 )
 
 
@@ -34,19 +33,40 @@ class AsianCrushIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        data = self._download_json(
-            'https://www.asiancrush.com/wp-admin/admin-ajax.php', video_id,
-            data=urlencode_postdata({
-                'postid': video_id,
-                'action': 'get_channel_kaltura_vars',
-            }))
+        webpage = self._download_webpage(url, video_id)
 
 
-        entry_id = data['entry_id']
+        entry_id, partner_id, title = [None] * 3
+
+        vars = self._parse_json(
+            self._search_regex(
+                r'iEmbedVars\s*=\s*({.+?})', webpage, 'embed vars',
+                default='{}'), video_id, fatal=False)
+        if vars:
+            entry_id = vars.get('entry_id')
+            partner_id = vars.get('partner_id')
+            title = vars.get('vid_label')
+
+        if not entry_id:
+            entry_id = self._search_regex(
+                r'\bentry_id["\']\s*:\s*["\'](\d+)', webpage, 'entry id')
+
+        player = self._download_webpage(
+            'https://api.asiancrush.com/embeddedVideoPlayer', video_id,
+            query={'id': entry_id})
+
+        kaltura_id = self._search_regex(
+            r'entry_id["\']\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1', player,
+            'kaltura id', group='id')
+
+        if not partner_id:
+            partner_id = self._search_regex(
+                r'/p(?:artner_id)?/(\d+)', player, 'partner id',
+                default='513551')
 
         return self.url_result(
 
         return self.url_result(
-            'kaltura:%s:%s' % (data['partner_id'], entry_id),
-            ie=KalturaIE.ie_key(), video_id=entry_id,
-            video_title=data.get('vid_label'))
+            'kaltura:%s:%s' % (partner_id, kaltura_id),
+            ie=KalturaIE.ie_key(), video_id=kaltura_id,
+            video_title=title)
 
 
 class AsianCrushPlaylistIE(InfoExtractor):
 
 
 class AsianCrushPlaylistIE(InfoExtractor):
index 68f26e2cad635bd8eb23a14ced3e1b90b2b320fc..a57a5f114c825f80e3f5ec9dc9f3b6a7511c67e9 100644 (file)
 # coding: utf-8
 from __future__ import unicode_literals
 
 # coding: utf-8
 from __future__ import unicode_literals
 
+import json
 import re
 
 from .common import InfoExtractor
 from .kaltura import KalturaIE
 import re
 
 from .common import InfoExtractor
 from .kaltura import KalturaIE
-from ..utils import (
-    get_element_by_class,
-    get_element_by_id,
-    strip_or_none,
-    urljoin,
-)
 
 
 
 
-class AZMedienBaseIE(InfoExtractor):
-    def _kaltura_video(self, partner_id, entry_id):
-        return self.url_result(
-            'kaltura:%s:%s' % (partner_id, entry_id), ie=KalturaIE.ie_key(),
-            video_id=entry_id)
-
-
-class AZMedienIE(AZMedienBaseIE):
+class AZMedienIE(InfoExtractor):
     IE_DESC = 'AZ Medien videos'
     _VALID_URL = r'''(?x)
                     https?://
                         (?:www\.)?
     IE_DESC = 'AZ Medien videos'
     _VALID_URL = r'''(?x)
                     https?://
                         (?:www\.)?
-                        (?:
+                        (?P<host>
                             telezueri\.ch|
                             telebaern\.tv|
                             telem1\.ch
                         )/
                             telezueri\.ch|
                             telebaern\.tv|
                             telem1\.ch
                         )/
-                        [0-9]+-show-[^/\#]+
-                        (?:
-                            /[0-9]+-episode-[^/\#]+
-                            (?:
-                                /[0-9]+-segment-(?:[^/\#]+\#)?|
-                                \#
-                            )|
-                            \#
+                        [^/]+/
+                        (?P<id>
+                            [^/]+-(?P<article_id>\d+)
                         )
                         )
-                        (?P<id>[^\#]+)
+                        (?:
+                            \#video=
+                            (?P<kaltura_id>
+                                [_0-9a-z]+
+                            )
+                        )?
                     '''
 
     _TESTS = [{
                     '''
 
     _TESTS = [{
-        # URL with 'segment'
-        'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom',
+        'url': 'https://www.telezueri.ch/sonntalk/bundesrats-vakanzen-eu-rahmenabkommen-133214569',
         'info_dict': {
         'info_dict': {
-            'id': '1_2444peh4',
+            'id': '1_anruz3wy',
             'ext': 'mp4',
             'ext': 'mp4',
-            'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom',
-            'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8',
-            'uploader_id': 'TeleZ?ri',
-            'upload_date': '20161218',
-            'timestamp': 1482084490,
+            'title': 'Bundesrats-Vakanzen / EU-Rahmenabkommen',
+            'description': 'md5:dd9f96751ec9c35e409a698a328402f3',
+            'uploader_id': 'TVOnline',
+            'upload_date': '20180930',
+            'timestamp': 1538328802,
         },
         'params': {
             'skip_download': True,
         },
     }, {
         },
         'params': {
             'skip_download': True,
         },
     }, {
-        # URL with 'segment' and fragment:
-        'url': 'http://www.telebaern.tv/118-show-news/14240-episode-dienstag-17-januar-2017/33666-segment-achtung-gefahr#zu-wenig-pflegerinnen-und-pfleger',
-        'only_matching': True
-    }, {
-        # URL with 'episode' and fragment:
-        'url': 'http://www.telem1.ch/47-show-sonntalk/13986-episode-soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz#soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz',
-        'only_matching': True
-    }, {
-        # URL with 'show' and fragment:
-        'url': 'http://www.telezueri.ch/66-show-sonntalk#burka-plakate-trump-putin-china-besuch',
+        'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
         'only_matching': True
     }]
 
         'only_matching': True
     }]
 
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, video_id)
-
-        partner_id = self._search_regex(
-            r'<script[^>]+src=["\'](?:https?:)?//(?:[^/]+\.)?kaltura\.com(?:/[^/]+)*/(?:p|partner_id)/([0-9]+)',
-            webpage, 'kaltura partner id')
-        entry_id = self._html_search_regex(
-            r'<a[^>]+data-id=(["\'])(?P<id>(?:(?!\1).)+)\1[^>]+data-slug=["\']%s'
-            % re.escape(video_id), webpage, 'kaltura entry id', group='id')
-
-        return self._kaltura_video(partner_id, entry_id)
-
-
-class AZMedienPlaylistIE(AZMedienBaseIE):
-    IE_DESC = 'AZ Medien playlists'
-    _VALID_URL = r'''(?x)
-                    https?://
-                        (?:www\.)?
-                        (?:
-                            telezueri\.ch|
-                            telebaern\.tv|
-                            telem1\.ch
-                        )/
-                        (?P<id>[0-9]+-
-                            (?:
-                                show|
-                                topic|
-                                themen
-                            )-[^/\#]+
-                            (?:
-                                /[0-9]+-episode-[^/\#]+
-                            )?
-                        )$
-                    '''
-
-    _TESTS = [{
-        # URL with 'episode'
-        'url': 'http://www.telebaern.tv/118-show-news/13735-episode-donnerstag-15-dezember-2016',
-        'info_dict': {
-            'id': '118-show-news/13735-episode-donnerstag-15-dezember-2016',
-            'title': 'News - Donnerstag, 15. Dezember 2016',
-        },
-        'playlist_count': 9,
-    }, {
-        # URL with 'themen'
-        'url': 'http://www.telem1.ch/258-themen-tele-m1-classics',
-        'info_dict': {
-            'id': '258-themen-tele-m1-classics',
-            'title': 'Tele M1 Classics',
-        },
-        'playlist_mincount': 15,
-    }, {
-        # URL with 'topic', contains nested playlists
-        'url': 'http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen',
-        'only_matching': True,
-    }, {
-        # URL with 'show' only
-        'url': 'http://www.telezueri.ch/86-show-talktaeglich',
-        'only_matching': True
-    }]
+    _PARTNER_ID = '1719221'
 
     def _real_extract(self, url):
 
     def _real_extract(self, url):
-        show_id = self._match_id(url)
-        webpage = self._download_webpage(url, show_id)
-
-        entries = []
-
-        partner_id = self._search_regex(
-            r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
-            webpage, 'kaltura partner id', default=None)
-
-        if partner_id:
-            entries = [
-                self._kaltura_video(partner_id, m.group('id'))
-                for m in re.finditer(
-                    r'data-id=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage)]
-
-        if not entries:
-            entries = [
-                self.url_result(m.group('url'), ie=AZMedienIE.ie_key())
-                for m in re.finditer(
-                    r'<a[^>]+data-real=(["\'])(?P<url>http.+?)\1', webpage)]
-
-        if not entries:
-            entries = [
-                # May contain nested playlists (e.g. [1]) thus no explicit
-                # ie_key
-                # 1. http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen)
-                self.url_result(urljoin(url, m.group('url')))
-                for m in re.finditer(
-                    r'<a[^>]+name=[^>]+href=(["\'])(?P<url>/.+?)\1', webpage)]
-
-        title = self._search_regex(
-            r'episodeShareTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
-            webpage, 'title',
-            default=strip_or_none(get_element_by_id(
-                'video-title', webpage)), group='title')
-
-        return self.playlist_result(entries, show_id, title)
-
-
-class AZMedienShowPlaylistIE(AZMedienBaseIE):
-    IE_DESC = 'AZ Medien show playlists'
-    _VALID_URL = r'''(?x)
-                    https?://
-                        (?:www\.)?
-                        (?:
-                            telezueri\.ch|
-                            telebaern\.tv|
-                            telem1\.ch
-                        )/
-                        (?:
-                            all-episodes|
-                            alle-episoden
-                        )/
-                        (?P<id>[^/?#&]+)
-                    '''
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        entry_id = mobj.group('kaltura_id')
+
+        if not entry_id:
+            webpage = self._download_webpage(url, video_id)
+            api_path = self._search_regex(
+                r'["\']apiPath["\']\s*:\s*["\']([^"^\']+)["\']',
+                webpage, 'api path')
+            api_url = 'https://www.%s%s' % (mobj.group('host'), api_path)
+            payload = {
+                'query': '''query VideoContext($articleId: ID!) {
+                    article: node(id: $articleId) {
+                      ... on Article {
+                        mainAssetRelation {
+                          asset {
+                            ... on VideoAsset {
+                              kalturaId
+                            }
+                          }
+                        }
+                      }
+                    }
+                  }''',
+                'variables': {'articleId': 'Article:%s' % mobj.group('article_id')},
+            }
+            json_data = self._download_json(
+                api_url, video_id, headers={
+                    'Content-Type': 'application/json',
+                },
+                data=json.dumps(payload).encode())
+            entry_id = json_data['data']['article']['mainAssetRelation']['asset']['kalturaId']
 
 
-    _TEST = {
-        'url': 'http://www.telezueri.ch/all-episodes/astrotalk',
-        'info_dict': {
-            'id': 'astrotalk',
-            'title': 'TeleZüri: AstroTalk - alle episoden',
-            'description': 'md5:4c0f7e7d741d906004266e295ceb4a26',
-        },
-        'playlist_mincount': 13,
-    }
-
-    def _real_extract(self, url):
-        playlist_id = self._match_id(url)
-        webpage = self._download_webpage(url, playlist_id)
-        episodes = get_element_by_class('search-mobile-box', webpage)
-        entries = [self.url_result(
-            urljoin(url, m.group('url'))) for m in re.finditer(
-                r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', episodes)]
-        title = self._og_search_title(webpage, fatal=False)
-        description = self._og_search_description(webpage)
-        return self.playlist_result(entries, playlist_id, title, description)
+        return self.url_result(
+            'kaltura:%s:%s' % (self._PARTNER_ID, entry_id),
+            ie=KalturaIE.ie_key(), video_id=entry_id)
index 14f9a14edf417d996fa1cbe3c80bcd06f2ee9ef0..465ae396eb9c690d84dd00a64f7439a74c74ba33 100644 (file)
@@ -1,8 +1,10 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
 # coding: utf-8
 from __future__ import unicode_literals
 
-import re
+import base64
 import json
 import json
+import re
+import struct
 
 from .common import InfoExtractor
 from .adobepass import AdobePassIE
 
 from .common import InfoExtractor
 from .adobepass import AdobePassIE
@@ -310,6 +312,10 @@ class BrightcoveLegacyIE(InfoExtractor):
                 'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?',
                 expected=True)
 
                 'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?',
                 expected=True)
 
+    def _brightcove_new_url_result(self, publisher_id, video_id):
+        brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id)
+        return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
+
     def _get_video_info(self, video_id, query, referer=None):
         headers = {}
         linkBase = query.get('linkBaseURL')
     def _get_video_info(self, video_id, query, referer=None):
         headers = {}
         linkBase = query.get('linkBaseURL')
@@ -323,6 +329,28 @@ class BrightcoveLegacyIE(InfoExtractor):
             r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage,
             'error message', default=None)
         if error_msg is not None:
             r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage,
             'error message', default=None)
         if error_msg is not None:
+            publisher_id = query.get('publisherId')
+            if publisher_id and publisher_id[0].isdigit():
+                publisher_id = publisher_id[0]
+            if not publisher_id:
+                player_key = query.get('playerKey')
+                if player_key and ',' in player_key[0]:
+                    player_key = player_key[0]
+                else:
+                    player_id = query.get('playerID')
+                    if player_id and player_id[0].isdigit():
+                        player_page = self._download_webpage(
+                            'http://link.brightcove.com/services/player/bcpid' + player_id[0],
+                            video_id, headers=headers, fatal=False)
+                        if player_page:
+                            player_key = self._search_regex(
+                                r'<param\s+name="playerKey"\s+value="([\w~,-]+)"',
+                                player_page, 'player key', fatal=False)
+                if player_key:
+                    enc_pub_id = player_key.split(',')[1].replace('~', '=')
+                    publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0]
+                if publisher_id:
+                    return self._brightcove_new_url_result(publisher_id, video_id)
             raise ExtractorError(
                 'brightcove said: %s' % error_msg, expected=True)
 
             raise ExtractorError(
                 'brightcove said: %s' % error_msg, expected=True)
 
@@ -444,8 +472,12 @@ class BrightcoveLegacyIE(InfoExtractor):
                 else:
                     return ad_info
 
                 else:
                     return ad_info
 
-        if 'url' not in info and not info.get('formats'):
-            raise ExtractorError('Unable to extract video url for %s' % video_id)
+        if not info.get('url') and not info.get('formats'):
+            uploader_id = info.get('uploader_id')
+            if uploader_id:
+                info.update(self._brightcove_new_url_result(uploader_id, video_id))
+            else:
+                raise ExtractorError('Unable to extract video url for %s' % video_id)
         return info
 
 
         return info
 
 
index ab651d1c8632fe08c29d44334cb4ba4a6ea2fddf..f2ca7a337df88aab5ba7f7b070537b7b9fec3172 100644 (file)
@@ -1,19 +1,10 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import int_or_none
-
-
-_translation_table = {
-    'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n',
-    'm': 'a', 'n': 'm', 'p': 'u', 'q': 't', 'r': 's', 'v': 'p', 'x': 'r',
-    'y': 'l', 'z': 'i',
-    '$': ':', '&': '.', '(': '=', '^': '&', '=': '/',
-}
-
-
-def _decode(s):
-    return ''.join(_translation_table.get(c, c) for c in s)
+from ..utils import (
+    int_or_none,
+    url_or_none,
+)
 
 
 class CliphunterIE(InfoExtractor):
 
 
 class CliphunterIE(InfoExtractor):
@@ -60,14 +51,14 @@ class CliphunterIE(InfoExtractor):
 
         formats = []
         for format_id, f in gexo_files.items():
 
         formats = []
         for format_id, f in gexo_files.items():
-            video_url = f.get('url')
+            video_url = url_or_none(f.get('url'))
             if not video_url:
                 continue
             fmt = f.get('fmt')
             height = f.get('h')
             format_id = '%s_%sp' % (fmt, height) if fmt and height else format_id
             formats.append({
             if not video_url:
                 continue
             fmt = f.get('fmt')
             height = f.get('h')
             format_id = '%s_%sp' % (fmt, height) if fmt and height else format_id
             formats.append({
-                'url': _decode(video_url),
+                'url': video_url,
                 'format_id': format_id,
                 'width': int_or_none(f.get('w')),
                 'height': int_or_none(height),
                 'format_id': format_id,
                 'width': int_or_none(f.get('w')),
                 'height': int_or_none(height),
index d354d9f9584426658e468452cb54d3eaa8e3478a..6889b0f401b13a04855b3afef1882aa1c0b7da3b 100644 (file)
@@ -1,6 +1,7 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
 # coding: utf-8
 from __future__ import unicode_literals
 
+
 from .common import InfoExtractor
 from ..utils import smuggle_url
 
 from .common import InfoExtractor
 from ..utils import smuggle_url
 
@@ -34,3 +35,32 @@ class CNBCIE(InfoExtractor):
                 {'force_smil_url': True}),
             'id': video_id,
         }
                 {'force_smil_url': True}),
             'id': video_id,
         }
+
+
+class CNBCVideoIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?cnbc\.com/video/(?:[^/]+/)+(?P<id>[^./?#&]+)'
+    _TEST = {
+        'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html',
+        'info_dict': {
+            'id': '7000031301',
+            'ext': 'mp4',
+            'title': "Trump: I don't necessarily agree with raising rates",
+            'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3',
+            'timestamp': 1531958400,
+            'upload_date': '20180719',
+            'uploader': 'NBCU-CNBC',
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        video_id = self._search_regex(
+            r'content_id["\']\s*:\s*["\'](\d+)', webpage, display_id,
+            'video id')
+        return self.url_result(
+            'http://video.cnbc.com/gallery/?video=%s' % video_id,
+            CNBCIE.ie_key())
index b8bbaf81a22aab883b1740f25edd91b78473916c..e5f8136fc1511d573978298393f5aba5d94d7af8 100644 (file)
@@ -69,6 +69,7 @@ from ..utils import (
     update_url_query,
     urljoin,
     url_basename,
     update_url_query,
     urljoin,
     url_basename,
+    url_or_none,
     xpath_element,
     xpath_text,
     xpath_with_ns,
     xpath_element,
     xpath_text,
     xpath_with_ns,
@@ -211,6 +212,11 @@ class InfoExtractor(object):
                     If not explicitly set, calculated from timestamp.
     uploader_id:    Nickname or id of the video uploader.
     uploader_url:   Full URL to a personal webpage of the video uploader.
                     If not explicitly set, calculated from timestamp.
     uploader_id:    Nickname or id of the video uploader.
     uploader_url:   Full URL to a personal webpage of the video uploader.
+    channel:        Full name of the channel the video is uploaded on.
+                    Note that channel fields may or may not repeat uploader
+                    fields. This depends on a particular extractor.
+    channel_id:     Id of the channel.
+    channel_url:    Full URL to a channel webpage.
     location:       Physical location where the video was filmed.
     subtitles:      The available subtitles as a dictionary in the format
                     {tag: subformats}. "tag" is usually a language code, and
     location:       Physical location where the video was filmed.
     subtitles:      The available subtitles as a dictionary in the format
                     {tag: subformats}. "tag" is usually a language code, and
@@ -600,6 +606,11 @@ class InfoExtractor(object):
         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
             if isinstance(err, compat_urllib_error.HTTPError):
                 if self.__can_accept_status_code(err, expected_status):
         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
             if isinstance(err, compat_urllib_error.HTTPError):
                 if self.__can_accept_status_code(err, expected_status):
+                    # Retain reference to error to prevent file object from
+                    # being closed before it can be read. Works around the
+                    # effects of <https://bugs.python.org/issue15002>
+                    # introduced in Python 3.4.1.
+                    err.fp._error = err
                     return err.fp
 
             if errnote is False:
                     return err.fp
 
             if errnote is False:
@@ -1208,10 +1219,10 @@ class InfoExtractor(object):
         def extract_video_object(e):
             assert e['@type'] == 'VideoObject'
             info.update({
         def extract_video_object(e):
             assert e['@type'] == 'VideoObject'
             info.update({
-                'url': e.get('contentUrl'),
+                'url': url_or_none(e.get('contentUrl')),
                 'title': unescapeHTML(e.get('name')),
                 'description': unescapeHTML(e.get('description')),
                 'title': unescapeHTML(e.get('name')),
                 'description': unescapeHTML(e.get('description')),
-                'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'),
+                'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
                 'duration': parse_duration(e.get('duration')),
                 'timestamp': unified_timestamp(e.get('uploadDate')),
                 'filesize': float_or_none(e.get('contentSize')),
                 'duration': parse_duration(e.get('duration')),
                 'timestamp': unified_timestamp(e.get('uploadDate')),
                 'filesize': float_or_none(e.get('contentSize')),
@@ -1701,9 +1712,9 @@ class InfoExtractor(object):
                 # However, this is not always respected, for example, [2]
                 # contains EXT-X-STREAM-INF tag which references AUDIO
                 # rendition group but does not have CODECS and despite
                 # However, this is not always respected, for example, [2]
                 # contains EXT-X-STREAM-INF tag which references AUDIO
                 # rendition group but does not have CODECS and despite
-                # referencing audio group an audio group, it represents
-                # a complete (with audio and video) format. So, for such cases
-                # we will ignore references to rendition groups and treat them
+                # referencing an audio group it represents a complete
+                # (with audio and video) format. So, for such cases we will
+                # ignore references to rendition groups and treat them
                 # as complete formats.
                 if audio_group_id and codecs and f.get('vcodec') != 'none':
                     audio_group = groups.get(audio_group_id)
                 # as complete formats.
                 if audio_group_id and codecs and f.get('vcodec') != 'none':
                     audio_group = groups.get(audio_group_id)
index ba8b9fa7eff3105fb657cf322fdacab17da632eb..4a68d092b0c842c1869b485a3de229364e18545a 100644 (file)
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
 
 import re
 import json
 
 import re
 import json
+import xml.etree.ElementTree as etree
 import zlib
 
 from hashlib import sha1
 import zlib
 
 from hashlib import sha1
@@ -45,7 +46,7 @@ class CrunchyrollBaseIE(InfoExtractor):
         data['req'] = 'RpcApi' + method
         data = compat_urllib_parse_urlencode(data).encode('utf-8')
         return self._download_xml(
         data['req'] = 'RpcApi' + method
         data = compat_urllib_parse_urlencode(data).encode('utf-8')
         return self._download_xml(
-            'http://www.crunchyroll.com/xml/',
+            'https://www.crunchyroll.com/xml/',
             video_id, note, fatal=False, data=data, headers={
                 'Content-Type': 'application/x-www-form-urlencoded',
             })
             video_id, note, fatal=False, data=data, headers={
                 'Content-Type': 'application/x-www-form-urlencoded',
             })
@@ -398,7 +399,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
                 'Downloading subtitles for ' + sub_name, data={
                     'subtitle_script_id': sub_id,
                 })
                 'Downloading subtitles for ' + sub_name, data={
                     'subtitle_script_id': sub_id,
                 })
-            if sub_doc is None:
+            if not isinstance(sub_doc, etree.Element):
                 continue
             sid = sub_doc.get('id')
             iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
                 continue
             sid = sub_doc.get('id')
             iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
@@ -445,6 +446,10 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
             webpage, 'vilos media', default='{}'), video_id)
         media_metadata = media.get('metadata') or {}
 
             webpage, 'vilos media', default='{}'), video_id)
         media_metadata = media.get('metadata') or {}
 
+        language = self._search_regex(
+            r'(?:vilos\.config\.player\.language|LOCALE)\s*=\s*(["\'])(?P<lang>(?:(?!\1).)+)\1',
+            webpage, 'language', default=None, group='lang')
+
         video_title = self._html_search_regex(
             r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>',
             webpage, 'video_title')
         video_title = self._html_search_regex(
             r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>',
             webpage, 'video_title')
@@ -466,9 +471,22 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
 
         formats = []
         for stream in media.get('streams', []):
 
         formats = []
         for stream in media.get('streams', []):
-            formats.extend(self._extract_vrv_formats(
+            audio_lang = stream.get('audio_lang')
+            hardsub_lang = stream.get('hardsub_lang')
+            vrv_formats = self._extract_vrv_formats(
                 stream.get('url'), video_id, stream.get('format'),
                 stream.get('url'), video_id, stream.get('format'),
-                stream.get('audio_lang'), stream.get('hardsub_lang')))
+                audio_lang, hardsub_lang)
+            for f in vrv_formats:
+                if not hardsub_lang:
+                    f['preference'] = 1
+                language_preference = 0
+                if audio_lang == language:
+                    language_preference += 1
+                if hardsub_lang == language:
+                    language_preference += 1
+                if language_preference:
+                    f['language_preference'] = language_preference
+            formats.extend(vrv_formats)
         if not formats:
             available_fmts = []
             for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
         if not formats:
             available_fmts = []
             for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
@@ -498,7 +516,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
                         'video_quality': stream_quality,
                         'current_page': url,
                     })
                         'video_quality': stream_quality,
                         'current_page': url,
                     })
-                if streamdata is not None:
+                if isinstance(streamdata, etree.Element):
                     stream_info = streamdata.find('./{default}preload/stream_info')
                     if stream_info is not None:
                         stream_infos.append(stream_info)
                     stream_info = streamdata.find('./{default}preload/stream_info')
                     if stream_info is not None:
                         stream_infos.append(stream_info)
@@ -509,7 +527,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
                         'video_format': stream_format,
                         'video_encode_quality': stream_quality,
                     })
                         'video_format': stream_format,
                         'video_encode_quality': stream_quality,
                     })
-                if stream_info is not None:
+                if isinstance(stream_info, etree.Element):
                     stream_infos.append(stream_info)
                 for stream_info in stream_infos:
                     video_encode_id = xpath_text(stream_info, './video_encode_id')
                     stream_infos.append(stream_info)
                 for stream_info in stream_infos:
                     video_encode_id = xpath_text(stream_info, './video_encode_id')
@@ -557,7 +575,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
                         'ext': 'flv',
                     })
                     formats.append(format_info)
                         'ext': 'flv',
                     })
                     formats.append(format_info)
-        self._sort_formats(formats, ('height', 'width', 'tbr', 'fps'))
+        self._sort_formats(formats, ('preference', 'language_preference', 'height', 'width', 'tbr', 'fps'))
 
         metadata = self._call_rpc_api(
             'VideoPlayer_GetMediaMetadata', video_id,
 
         metadata = self._call_rpc_api(
             'VideoPlayer_GetMediaMetadata', video_id,
@@ -581,10 +599,22 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
         series = self._html_search_regex(
             r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d',
             webpage, 'series', fatal=False)
         series = self._html_search_regex(
             r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d',
             webpage, 'series', fatal=False)
-        season = xpath_text(metadata, 'series_title')
 
 
-        episode = xpath_text(metadata, 'episode_title') or media_metadata.get('title')
-        episode_number = int_or_none(xpath_text(metadata, 'episode_number') or media_metadata.get('episode_number'))
+        season = episode = episode_number = duration = thumbnail = None
+
+        if isinstance(metadata, etree.Element):
+            season = xpath_text(metadata, 'series_title')
+            episode = xpath_text(metadata, 'episode_title')
+            episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
+            duration = float_or_none(media_metadata.get('duration'), 1000)
+            thumbnail = xpath_text(metadata, 'episode_image_url')
+
+        if not episode:
+            episode = media_metadata.get('title')
+        if not episode_number:
+            episode_number = int_or_none(media_metadata.get('episode_number'))
+        if not thumbnail:
+            thumbnail = media_metadata.get('thumbnail', {}).get('url')
 
         season_number = int_or_none(self._search_regex(
             r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
 
         season_number = int_or_none(self._search_regex(
             r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
@@ -594,8 +624,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
             'id': video_id,
             'title': video_title,
             'description': video_description,
             'id': video_id,
             'title': video_title,
             'description': video_description,
-            'duration': float_or_none(media_metadata.get('duration'), 1000),
-            'thumbnail': xpath_text(metadata, 'episode_image_url') or media_metadata.get('thumbnail', {}).get('url'),
+            'duration': duration,
+            'thumbnail': thumbnail,
             'uploader': video_uploader,
             'upload_date': video_upload_date,
             'series': series,
             'uploader': video_uploader,
             'upload_date': video_upload_date,
             'series': series,
index 224a1fb5d4abcd49d3285a1aed7724d5de6a17e9..f9bd535f6d228b137b78e90a765f1042c0f5c925 100644 (file)
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from ..utils import (
 
 from .common import InfoExtractor
 from ..utils import (
+    ExtractorError,
     int_or_none,
     parse_age_limit,
     parse_iso8601,
     int_or_none,
     parse_age_limit,
     parse_iso8601,
@@ -66,9 +67,12 @@ class CWTVIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        video_data = self._download_json(
+        data = self._download_json(
             'http://images.cwtv.com/feed/mobileapp/video-meta/apiversion_8/guid_' + video_id,
             'http://images.cwtv.com/feed/mobileapp/video-meta/apiversion_8/guid_' + video_id,
-            video_id)['video']
+            video_id)
+        if data.get('result') != 'ok':
+            raise ExtractorError(data['msg'], expected=True)
+        video_data = data['video']
         title = video_data['title']
         mpx_url = video_data.get('mpx_url') or 'http://link.theplatform.com/s/cwtv/media/guid/2703454149/%s?formats=M3U' % video_id
 
         title = video_data['title']
         mpx_url = video_data.get('mpx_url') or 'http://link.theplatform.com/s/cwtv/media/guid/2703454149/%s?formats=M3U' % video_id
 
index af39780353ac1e044ffb07464dfa64032ba18f0e..4f75a2a307169c91e141e8ee73d71f68fa20a66e 100644 (file)
@@ -49,6 +49,9 @@ class DailyMailIE(InfoExtractor):
             'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id)
 
         video_sources = self._download_json(sources_url, video_id)
             'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id)
 
         video_sources = self._download_json(sources_url, video_id)
+        body = video_sources.get('body')
+        if body:
+            video_sources = body
 
         formats = []
         for rendition in video_sources['renditions']:
 
         formats = []
         for rendition in video_sources['renditions']:
index 040f0bd02e94992bb1e4248d425f564ce7c81c75..1816c559e7481b659b5d91fd3944a03eb18f0f9e 100644 (file)
@@ -22,7 +22,10 @@ from ..utils import (
     parse_iso8601,
     sanitized_Request,
     str_to_int,
     parse_iso8601,
     sanitized_Request,
     str_to_int,
+    try_get,
     unescapeHTML,
     unescapeHTML,
+    update_url_query,
+    url_or_none,
     urlencode_postdata,
 )
 
     urlencode_postdata,
 )
 
@@ -171,10 +174,25 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
              r'__PLAYER_CONFIG__\s*=\s*({.+?});'],
             webpage, 'player v5', default=None)
         if player_v5:
              r'__PLAYER_CONFIG__\s*=\s*({.+?});'],
             webpage, 'player v5', default=None)
         if player_v5:
-            player = self._parse_json(player_v5, video_id)
-            metadata = player['metadata']
-
-            if metadata.get('error', {}).get('type') == 'password_protected':
+            player = self._parse_json(player_v5, video_id, fatal=False) or {}
+            metadata = try_get(player, lambda x: x['metadata'], dict)
+            if not metadata:
+                metadata_url = url_or_none(try_get(
+                    player, lambda x: x['context']['metadata_template_url1']))
+                if metadata_url:
+                    metadata_url = metadata_url.replace(':videoId', video_id)
+                else:
+                    metadata_url = update_url_query(
+                        'https://www.dailymotion.com/player/metadata/video/%s'
+                        % video_id, {
+                            'embedder': url,
+                            'integration': 'inline',
+                            'GK_PV5_NEON': '1',
+                        })
+                metadata = self._download_json(
+                    metadata_url, video_id, 'Downloading metadata JSON')
+
+            if try_get(metadata, lambda x: x['error']['type']) == 'password_protected':
                 password = self._downloader.params.get('videopassword')
                 if password:
                     r = int(metadata['id'][1:], 36)
                 password = self._downloader.params.get('videopassword')
                 if password:
                     r = int(metadata['id'][1:], 36)
index 6d03d7095822d079d780cc8fb571adb06b7314d3..c050bf9df3fb7ececed5b3e03a70aea9b2c37417 100644 (file)
@@ -9,6 +9,7 @@ from ..utils import (
     encode_base_n,
     ExtractorError,
     int_or_none,
     encode_base_n,
     ExtractorError,
     int_or_none,
+    merge_dicts,
     parse_duration,
     str_to_int,
     url_or_none,
     parse_duration,
     str_to_int,
     url_or_none,
@@ -25,10 +26,16 @@ class EpornerIE(InfoExtractor):
             'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video',
             'ext': 'mp4',
             'title': 'Infamous Tiffany Teen Strip Tease Video',
             'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video',
             'ext': 'mp4',
             'title': 'Infamous Tiffany Teen Strip Tease Video',
+            'description': 'md5:764f39abf932daafa37485eb46efa152',
+            'timestamp': 1232520922,
+            'upload_date': '20090121',
             'duration': 1838,
             'view_count': int,
             'age_limit': 18,
         },
             'duration': 1838,
             'view_count': int,
             'age_limit': 18,
         },
+        'params': {
+            'proxy': '127.0.0.1:8118'
+        }
     }, {
         # New (May 2016) URL layout
         'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',
     }, {
         # New (May 2016) URL layout
         'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',
@@ -104,12 +111,15 @@ class EpornerIE(InfoExtractor):
                     })
         self._sort_formats(formats)
 
                     })
         self._sort_formats(formats)
 
-        duration = parse_duration(self._html_search_meta('duration', webpage))
+        json_ld = self._search_json_ld(webpage, display_id, default={})
+
+        duration = parse_duration(self._html_search_meta(
+            'duration', webpage, default=None))
         view_count = str_to_int(self._search_regex(
             r'id="cinemaviews">\s*([0-9,]+)\s*<small>views',
             webpage, 'view count', fatal=False))
 
         view_count = str_to_int(self._search_regex(
             r'id="cinemaviews">\s*([0-9,]+)\s*<small>views',
             webpage, 'view count', fatal=False))
 
-        return {
+        return merge_dicts(json_ld, {
             'id': video_id,
             'display_id': display_id,
             'title': title,
             'id': video_id,
             'display_id': display_id,
             'title': title,
@@ -117,4 +127,4 @@ class EpornerIE(InfoExtractor):
             'view_count': view_count,
             'formats': formats,
             'age_limit': 18,
             'view_count': view_count,
             'formats': formats,
             'age_limit': 18,
-        }
+        })
index 7dc56972498325b428a7c134115dc78ac5f81556..e5488cce45e9180d46daddc072ea08e7269c0d9e 100644 (file)
@@ -88,11 +88,7 @@ from .awaan import (
     AWAANLiveIE,
     AWAANSeasonIE,
 )
     AWAANLiveIE,
     AWAANSeasonIE,
 )
-from .azmedien import (
-    AZMedienIE,
-    AZMedienPlaylistIE,
-    AZMedienShowPlaylistIE,
-)
+from .azmedien import AZMedienIE
 from .baidu import BaiduVideoIE
 from .bambuser import BambuserIE, BambuserChannelIE
 from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
 from .baidu import BaiduVideoIE
 from .bambuser import BambuserIE, BambuserChannelIE
 from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
@@ -209,7 +205,10 @@ from .cloudy import CloudyIE
 from .clubic import ClubicIE
 from .clyp import ClypIE
 from .cmt import CMTIE
 from .clubic import ClubicIE
 from .clyp import ClypIE
 from .cmt import CMTIE
-from .cnbc import CNBCIE
+from .cnbc import (
+    CNBCIE,
+    CNBCVideoIE,
+)
 from .cnn import (
     CNNIE,
     CNNBlogsIE,
 from .cnn import (
     CNNIE,
     CNNBlogsIE,
@@ -540,6 +539,7 @@ from .la7 import LA7IE
 from .laola1tv import (
     Laola1TvEmbedIE,
     Laola1TvIE,
 from .laola1tv import (
     Laola1TvEmbedIE,
     Laola1TvIE,
+    EHFTVIE,
     ITTFIE,
 )
 from .lci import LCIIE
     ITTFIE,
 )
 from .lci import LCIIE
@@ -569,6 +569,10 @@ from .limelight import (
     LimelightChannelListIE,
 )
 from .line import LineTVIE
     LimelightChannelListIE,
 )
 from .line import LineTVIE
+from .linkedin import (
+    LinkedInLearningIE,
+    LinkedInLearningCourseIE,
+)
 from .litv import LiTVIE
 from .liveleak import (
     LiveLeakIE,
 from .litv import LiTVIE
 from .liveleak import (
     LiveLeakIE,
@@ -1043,7 +1047,7 @@ from .spike import (
 )
 from .stitcher import StitcherIE
 from .sport5 import Sport5IE
 )
 from .stitcher import StitcherIE
 from .sport5 import Sport5IE
-from .sportbox import SportBoxEmbedIE
+from .sportbox import SportBoxIE
 from .sportdeutschland import SportDeutschlandIE
 from .springboardplatform import SpringboardPlatformIE
 from .sprout import SproutIE
 from .sportdeutschland import SportDeutschlandIE
 from .springboardplatform import SpringboardPlatformIE
 from .sprout import SproutIE
@@ -1153,7 +1157,6 @@ from .tv2 import (
     TV2ArticleIE,
 )
 from .tv2hu import TV2HuIE
     TV2ArticleIE,
 )
 from .tv2hu import TV2HuIE
-from .tv3 import TV3IE
 from .tv4 import TV4IE
 from .tv5mondeplus import TV5MondePlusIE
 from .tva import TVAIE
 from .tv4 import TV4IE
 from .tv5mondeplus import TV5MondePlusIE
 from .tva import TVAIE
@@ -1190,6 +1193,7 @@ from .tweakers import TweakersIE
 from .twentyfourvideo import TwentyFourVideoIE
 from .twentymin import TwentyMinutenIE
 from .twentythreevideo import TwentyThreeVideoIE
 from .twentyfourvideo import TwentyFourVideoIE
 from .twentymin import TwentyMinutenIE
 from .twentythreevideo import TwentyThreeVideoIE
+from .twitcasting import TwitCastingIE
 from .twitch import (
     TwitchVideoIE,
     TwitchChapterIE,
 from .twitch import (
     TwitchVideoIE,
     TwitchChapterIE,
@@ -1455,8 +1459,20 @@ from .youtube import (
 from .zapiks import ZapiksIE
 from .zaq1 import Zaq1IE
 from .zattoo import (
 from .zapiks import ZapiksIE
 from .zaq1 import Zaq1IE
 from .zattoo import (
+    BBVTVIE,
+    EinsUndEinsTVIE,
+    EWETVIE,
+    GlattvisionTVIE,
+    MNetTVIE,
+    MyVisionTVIE,
+    NetPlusIE,
+    OsnatelTVIE,
+    QuantumTVIE,
     QuicklineIE,
     QuicklineLiveIE,
     QuicklineIE,
     QuicklineLiveIE,
+    SAKTVIE,
+    VTXTVIE,
+    WalyTVIE,
     ZattooIE,
     ZattooLiveIE,
 )
     ZattooIE,
     ZattooLiveIE,
 )
index 97cfe0fc38ed53f32fd868fd8faa6276a3362dfe..74954049dc79e6e9d0682a73550c52a4d010a06d 100644 (file)
@@ -57,7 +57,7 @@ class FacebookIE(InfoExtractor):
     _CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
 
     _VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
     _CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
 
     _VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
-    _VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true'
+    _VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary'
 
     _TESTS = [{
         'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
 
     _TESTS = [{
         'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
index ad273a0e70c3fbd9087779d33829b817d9d70127..a9a1f911e0b98ab81970a2e4fed09b40606c0e14 100644 (file)
@@ -3,15 +3,45 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_urlparse
+from ..compat import (
+    compat_b64decode,
+    compat_str,
+    compat_urllib_parse_unquote,
+    compat_urlparse,
+)
 from ..utils import (
 from ..utils import (
+    int_or_none,
     parse_duration,
     parse_iso8601,
     parse_duration,
     parse_iso8601,
+    str_or_none,
     str_to_int,
     str_to_int,
+    try_get,
+    unified_timestamp,
+    url_or_none,
 )
 
 
 class FourTubeBaseIE(InfoExtractor):
 )
 
 
 class FourTubeBaseIE(InfoExtractor):
+    _TKN_HOST = 'tkn.kodicdn.com'
+
+    def _extract_formats(self, url, video_id, media_id, sources):
+        token_url = 'https://%s/%s/desktop/%s' % (
+            self._TKN_HOST, media_id, '+'.join(sources))
+
+        parsed_url = compat_urlparse.urlparse(url)
+        tokens = self._download_json(token_url, video_id, data=b'', headers={
+            'Origin': '%s://%s' % (parsed_url.scheme, parsed_url.hostname),
+            'Referer': url,
+        })
+        formats = [{
+            'url': tokens[format]['token'],
+            'format_id': format + 'p',
+            'resolution': format + 'p',
+            'quality': int(format),
+        } for format in sources]
+        self._sort_formats(formats)
+        return formats
+
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         kind, video_id, display_id = mobj.group('kind', 'id', 'display_id')
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         kind, video_id, display_id = mobj.group('kind', 'id', 'display_id')
@@ -68,21 +98,7 @@ class FourTubeBaseIE(InfoExtractor):
             media_id = params[0]
             sources = ['%s' % p for p in params[2]]
 
             media_id = params[0]
             sources = ['%s' % p for p in params[2]]
 
-        token_url = 'https://tkn.kodicdn.com/{0}/desktop/{1}'.format(
-            media_id, '+'.join(sources))
-
-        parsed_url = compat_urlparse.urlparse(url)
-        tokens = self._download_json(token_url, video_id, data=b'', headers={
-            'Origin': '%s://%s' % (parsed_url.scheme, parsed_url.hostname),
-            'Referer': url,
-        })
-        formats = [{
-            'url': tokens[format]['token'],
-            'format_id': format + 'p',
-            'resolution': format + 'p',
-            'quality': int(format),
-        } for format in sources]
-        self._sort_formats(formats)
+        formats = self._extract_formats(url, video_id, media_id, sources)
 
         return {
             'id': video_id,
 
         return {
             'id': video_id,
@@ -164,6 +180,7 @@ class FuxIE(FourTubeBaseIE):
 class PornTubeIE(FourTubeBaseIE):
     _VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?porntube\.com/(?:videos/(?P<display_id>[^/]+)_|embed/)(?P<id>\d+)'
     _URL_TEMPLATE = 'https://www.porntube.com/videos/video_%s'
 class PornTubeIE(FourTubeBaseIE):
     _VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?porntube\.com/(?:videos/(?P<display_id>[^/]+)_|embed/)(?P<id>\d+)'
     _URL_TEMPLATE = 'https://www.porntube.com/videos/video_%s'
+    _TKN_HOST = 'tkn.porntube.com'
     _TESTS = [{
         'url': 'https://www.porntube.com/videos/teen-couple-doing-anal_7089759',
         'info_dict': {
     _TESTS = [{
         'url': 'https://www.porntube.com/videos/teen-couple-doing-anal_7089759',
         'info_dict': {
@@ -171,13 +188,32 @@ class PornTubeIE(FourTubeBaseIE):
             'ext': 'mp4',
             'title': 'Teen couple doing anal',
             'uploader': 'Alexy',
             'ext': 'mp4',
             'title': 'Teen couple doing anal',
             'uploader': 'Alexy',
-            'uploader_id': 'Alexy',
+            'uploader_id': '91488',
             'upload_date': '20150606',
             'timestamp': 1433595647,
             'duration': 5052,
             'view_count': int,
             'like_count': int,
             'upload_date': '20150606',
             'timestamp': 1433595647,
             'duration': 5052,
             'view_count': int,
             'like_count': int,
-            'categories': list,
+            'age_limit': 18,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'https://www.porntube.com/videos/squirting-teen-ballerina-ecg_1331406',
+        'info_dict': {
+            'id': '1331406',
+            'ext': 'mp4',
+            'title': 'Squirting Teen Ballerina on ECG',
+            'uploader': 'Exploited College Girls',
+            'uploader_id': '665',
+            'channel': 'Exploited College Girls',
+            'channel_id': '665',
+            'upload_date': '20130920',
+            'timestamp': 1379685485,
+            'duration': 851,
+            'view_count': int,
+            'like_count': int,
             'age_limit': 18,
         },
         'params': {
             'age_limit': 18,
         },
         'params': {
@@ -191,6 +227,55 @@ class PornTubeIE(FourTubeBaseIE):
         'only_matching': True,
     }]
 
         'only_matching': True,
     }]
 
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id, display_id = mobj.group('id', 'display_id')
+
+        webpage = self._download_webpage(url, display_id)
+
+        video = self._parse_json(
+            self._search_regex(
+                r'INITIALSTATE\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+                webpage, 'data', group='value'), video_id,
+            transform_source=lambda x: compat_urllib_parse_unquote(
+                compat_b64decode(x).decode('utf-8')))['page']['video']
+
+        title = video['title']
+        media_id = video['mediaId']
+        sources = [compat_str(e['height'])
+                   for e in video['encodings'] if e.get('height')]
+        formats = self._extract_formats(url, video_id, media_id, sources)
+
+        thumbnail = url_or_none(video.get('masterThumb'))
+        uploader = try_get(video, lambda x: x['user']['username'], compat_str)
+        uploader_id = str_or_none(try_get(
+            video, lambda x: x['user']['id'], int))
+        channel = try_get(video, lambda x: x['channel']['name'], compat_str)
+        channel_id = str_or_none(try_get(
+            video, lambda x: x['channel']['id'], int))
+        like_count = int_or_none(video.get('likes'))
+        dislike_count = int_or_none(video.get('dislikes'))
+        view_count = int_or_none(video.get('playsQty'))
+        duration = int_or_none(video.get('durationInSeconds'))
+        timestamp = unified_timestamp(video.get('publishedAt'))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+            'thumbnail': thumbnail,
+            'uploader': uploader or channel,
+            'uploader_id': uploader_id or channel_id,
+            'channel': channel,
+            'channel_id': channel_id,
+            'timestamp': timestamp,
+            'like_count': like_count,
+            'dislike_count': dislike_count,
+            'view_count': view_count,
+            'duration': duration,
+            'age_limit': 18,
+        }
+
 
 class PornerBrosIE(FourTubeBaseIE):
     _VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?pornerbros\.com/(?:videos/(?P<display_id>[^/]+)_|embed/)(?P<id>\d+)'
 
 class PornerBrosIE(FourTubeBaseIE):
     _VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?pornerbros\.com/(?:videos/(?P<display_id>[^/]+)_|embed/)(?P<id>\d+)'
index 76ef01332feef1619a6bde5e915893cf0196fe57..545e033711995d04544aa1dafea11110084fa58f 100644 (file)
@@ -47,7 +47,7 @@ from .nbc import NBCSportsVPlayerIE
 from .ooyala import OoyalaIE
 from .rutv import RUTVIE
 from .tvc import TVCIE
 from .ooyala import OoyalaIE
 from .rutv import RUTVIE
 from .tvc import TVCIE
-from .sportbox import SportBoxEmbedIE
+from .sportbox import SportBoxIE
 from .smotri import SmotriIE
 from .myvi import MyviIE
 from .condenast import CondeNastIE
 from .smotri import SmotriIE
 from .myvi import MyviIE
 from .condenast import CondeNastIE
@@ -2636,9 +2636,9 @@ class GenericIE(InfoExtractor):
             return self.url_result(tvc_url, 'TVC')
 
         # Look for embedded SportBox player
             return self.url_result(tvc_url, 'TVC')
 
         # Look for embedded SportBox player
-        sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
+        sportbox_urls = SportBoxIE._extract_urls(webpage)
         if sportbox_urls:
         if sportbox_urls:
-            return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie='SportBoxEmbed')
+            return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie=SportBoxIE.ie_key())
 
         # Look for embedded XHamster player
         xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
 
         # Look for embedded XHamster player
         xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
@@ -3023,7 +3023,7 @@ class GenericIE(InfoExtractor):
                 wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
 
         # Look for Mediaset embeds
                 wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
 
         # Look for Mediaset embeds
-        mediaset_urls = MediasetIE._extract_urls(webpage)
+        mediaset_urls = MediasetIE._extract_urls(self, webpage)
         if mediaset_urls:
             return self.playlist_from_matches(
                 mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
         if mediaset_urls:
             return self.playlist_from_matches(
                 mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
index d28af36ec1704a8d4c508c2036962c8584ea38aa..bf5717f1bf1f8366e45222e716951fb11069e8f6 100644 (file)
@@ -1,49 +1,55 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
 # coding: utf-8
 from __future__ import unicode_literals
 
-import re
+import hashlib
+import hmac
+import time
 
 from .common import InfoExtractor
 
 from .common import InfoExtractor
-from ..compat import compat_str
+from ..compat import compat_HTTPError
 from ..utils import (
     determine_ext,
     ExtractorError,
     int_or_none,
 from ..utils import (
     determine_ext,
     ExtractorError,
     int_or_none,
+    try_get,
 )
 
 
 class HotStarBaseIE(InfoExtractor):
 )
 
 
 class HotStarBaseIE(InfoExtractor):
-    _GEO_COUNTRIES = ['IN']
-
-    def _download_json(self, *args, **kwargs):
-        response = super(HotStarBaseIE, self)._download_json(*args, **kwargs)
-        if response['resultCode'] != 'OK':
-            if kwargs.get('fatal'):
-                raise ExtractorError(
-                    response['errorDescription'], expected=True)
-            return None
-        return response['resultObj']
-
-    def _download_content_info(self, content_id):
-        return self._download_json(
-            'https://account.hotstar.com/AVS/besc', content_id, query={
-                'action': 'GetAggregatedContentDetails',
-                'appVersion': '5.0.40',
-                'channel': 'PCTV',
-                'contentId': content_id,
-            })['contentInfo'][0]
+    _AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee'
+
+    def _call_api(self, path, video_id, query_name='contentId'):
+        st = int(time.time())
+        exp = st + 6000
+        auth = 'st=%d~exp=%d~acl=/*' % (st, exp)
+        auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest()
+        response = self._download_json(
+            'https://api.hotstar.com/' + path,
+            video_id, headers={
+                'hotstarauth': auth,
+                'x-country-code': 'IN',
+                'x-platform-code': 'JIO',
+            }, query={
+                query_name: video_id,
+                'tas': 10000,
+            })
+        if response['statusCode'] != 'OK':
+            raise ExtractorError(
+                response['body']['message'], expected=True)
+        return response['body']['results']
 
 
 class HotStarIE(HotStarBaseIE):
 
 
 class HotStarIE(HotStarBaseIE):
+    IE_NAME = 'hotstar'
     _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
     _TESTS = [{
     _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
     _TESTS = [{
-        'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273',
+        'url': 'https://www.hotstar.com/can-you-not-spread-rumours/1000076273',
         'info_dict': {
             'id': '1000076273',
             'ext': 'mp4',
         'info_dict': {
             'id': '1000076273',
             'ext': 'mp4',
-            'title': 'On Air With AIB',
+            'title': 'Can You Not Spread Rumours?',
             'description': 'md5:c957d8868e9bc793ccb813691cc4c434',
             'description': 'md5:c957d8868e9bc793ccb813691cc4c434',
-            'timestamp': 1447227000,
+            'timestamp': 1447248600,
             'upload_date': '20151111',
             'duration': 381,
         },
             'upload_date': '20151111',
             'duration': 381,
         },
@@ -58,47 +64,47 @@ class HotStarIE(HotStarBaseIE):
         'url': 'http://www.hotstar.com/1000000515',
         'only_matching': True,
     }]
         'url': 'http://www.hotstar.com/1000000515',
         'only_matching': True,
     }]
+    _GEO_BYPASS = False
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        video_data = self._download_content_info(video_id)
+        webpage = self._download_webpage(url, video_id)
+        app_state = self._parse_json(self._search_regex(
+            r'<script>window\.APP_STATE\s*=\s*({.+?})</script>',
+            webpage, 'app state'), video_id)
+        video_data = {}
+        for v in app_state.values():
+            content = try_get(v, lambda x: x['initialState']['contentData']['content'], dict)
+            if content and content.get('contentId') == video_id:
+                video_data = content
 
 
-        title = video_data['episodeTitle']
+        title = video_data['title']
 
 
-        if video_data.get('encrypted') == 'Y':
+        if video_data.get('drmProtected'):
             raise ExtractorError('This video is DRM protected.', expected=True)
 
         formats = []
             raise ExtractorError('This video is DRM protected.', expected=True)
 
         formats = []
-        for f in ('JIO',):
-            format_data = self._download_json(
-                'http://getcdn.hotstar.com/AVS/besc',
-                video_id, 'Downloading %s JSON metadata' % f,
-                fatal=False, query={
-                    'action': 'GetCDN',
-                    'asJson': 'Y',
-                    'channel': f,
-                    'id': video_id,
-                    'type': 'VOD',
-                })
-            if format_data:
-                format_url = format_data.get('src')
-                if not format_url:
-                    continue
-                ext = determine_ext(format_url)
-                if ext == 'm3u8':
-                    formats.extend(self._extract_m3u8_formats(
-                        format_url, video_id, 'mp4',
-                        m3u8_id='hls', fatal=False))
-                elif ext == 'f4m':
-                    # produce broken files
-                    continue
-                else:
-                    formats.append({
-                        'url': format_url,
-                        'width': int_or_none(format_data.get('width')),
-                        'height': int_or_none(format_data.get('height')),
-                    })
+        format_data = self._call_api('h/v1/play', video_id)['item']
+        format_url = format_data['playbackUrl']
+        ext = determine_ext(format_url)
+        if ext == 'm3u8':
+            try:
+                formats.extend(self._extract_m3u8_formats(
+                    format_url, video_id, 'mp4', m3u8_id='hls'))
+            except ExtractorError as e:
+                if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+                    self.raise_geo_restricted(countries=['IN'])
+                raise
+        elif ext == 'f4m':
+            # produce broken files
+            pass
+        else:
+            formats.append({
+                'url': format_url,
+                'width': int_or_none(format_data.get('width')),
+                'height': int_or_none(format_data.get('height')),
+            })
         self._sort_formats(formats)
 
         return {
         self._sort_formats(formats)
 
         return {
@@ -106,57 +112,43 @@ class HotStarIE(HotStarBaseIE):
             'title': title,
             'description': video_data.get('description'),
             'duration': int_or_none(video_data.get('duration')),
             'title': title,
             'description': video_data.get('description'),
             'duration': int_or_none(video_data.get('duration')),
-            'timestamp': int_or_none(video_data.get('broadcastDate')),
+            'timestamp': int_or_none(video_data.get('broadcastDate') or video_data.get('startDate')),
             'formats': formats,
             'formats': formats,
+            'channel': video_data.get('channelName'),
+            'channel_id': video_data.get('channelId'),
+            'series': video_data.get('showName'),
+            'season': video_data.get('seasonName'),
+            'season_number': int_or_none(video_data.get('seasonNo')),
+            'season_id': video_data.get('seasonId'),
             'episode': title,
             'episode': title,
-            'episode_number': int_or_none(video_data.get('episodeNumber')),
-            'series': video_data.get('contentTitle'),
+            'episode_number': int_or_none(video_data.get('episodeNo')),
         }
 
 
 class HotStarPlaylistIE(HotStarBaseIE):
     IE_NAME = 'hotstar:playlist'
         }
 
 
 class HotStarPlaylistIE(HotStarBaseIE):
     IE_NAME = 'hotstar:playlist'
-    _VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com/tv/[^/]+/(?P<content_id>\d+))/(?P<type>[^/]+)/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?hotstar\.com/tv/[^/]+/s-\w+/list/[^/]+/t-(?P<id>\w+)'
     _TESTS = [{
     _TESTS = [{
-        'url': 'http://www.hotstar.com/tv/pratidaan/14982/episodes/14812/9993',
+        'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/popular-clips/t-3_2_26',
         'info_dict': {
         'info_dict': {
-            'id': '14812',
+            'id': '3_2_26',
         },
         },
-        'playlist_mincount': 75,
+        'playlist_mincount': 20,
     }, {
     }, {
-        'url': 'http://www.hotstar.com/tv/pratidaan/14982/popular-clips/9998/9998',
+        'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/extras/t-2480',
         'only_matching': True,
     }]
         'only_matching': True,
     }]
-    _ITEM_TYPES = {
-        'episodes': 'EPISODE',
-        'popular-clips': 'CLIPS',
-    }
 
     def _real_extract(self, url):
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        base_url = mobj.group('url')
-        content_id = mobj.group('content_id')
-        playlist_type = mobj.group('type')
-
-        content_info = self._download_content_info(content_id)
-        playlist_id = compat_str(content_info['categoryId'])
-
-        collection = self._download_json(
-            'https://search.hotstar.com/AVS/besc', playlist_id, query={
-                'action': 'SearchContents',
-                'appVersion': '5.0.40',
-                'channel': 'PCTV',
-                'moreFilters': 'series:%s;' % playlist_id,
-                'query': '*',
-                'searchOrder': 'last_broadcast_date desc,year desc,title asc',
-                'type': self._ITEM_TYPES.get(playlist_type, 'EPISODE'),
-            })
+        playlist_id = self._match_id(url)
+
+        collection = self._call_api('o/v1/tray/find', playlist_id, 'uqId')
 
         entries = [
             self.url_result(
 
         entries = [
             self.url_result(
-                '%s/_/%s' % (base_url, video['contentId']),
+                'https://www.hotstar.com/%s' % video['contentId'],
                 ie=HotStarIE.ie_key(), video_id=video['contentId'])
                 ie=HotStarIE.ie_key(), video_id=video['contentId'])
-            for video in collection['response']['docs']
+            for video in collection['assets']['items']
             if video.get('contentId')]
 
         return self.playlist_result(entries, playlist_id)
             if video.get('contentId')]
 
         return self.playlist_result(entries, playlist_id)
index cb51cef2d45f9096a50a4716874219abaae63642..86c014b076bfaa1d7da143bdd7cb29c10438d2f3 100644 (file)
@@ -15,7 +15,7 @@ from ..utils import (
 class IviIE(InfoExtractor):
     IE_DESC = 'ivi.ru'
     IE_NAME = 'ivi'
 class IviIE(InfoExtractor):
     IE_DESC = 'ivi.ru'
     IE_NAME = 'ivi'
-    _VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?ivi\.(?:ru|tv)/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
     _GEO_BYPASS = False
     _GEO_COUNTRIES = ['RU']
 
     _GEO_BYPASS = False
     _GEO_COUNTRIES = ['RU']
 
@@ -65,7 +65,11 @@ class IviIE(InfoExtractor):
                 'thumbnail': r're:^https?://.*\.jpg$',
             },
             'skip': 'Only works from Russia',
                 'thumbnail': r're:^https?://.*\.jpg$',
             },
             'skip': 'Only works from Russia',
-        }
+        },
+        {
+            'url': 'https://www.ivi.tv/watch/33560/',
+            'only_matching': True,
+        },
     ]
 
     # Sorted by quality
     ]
 
     # Sorted by quality
index 595d7a5b75a25d7e5ac41b29c9052e22cc531e66..c218276184ab4acf9086404cdb679a74e24f353b 100644 (file)
@@ -26,8 +26,15 @@ class JamendoBaseIE(InfoExtractor):
 
 
 class JamendoIE(JamendoBaseIE):
 
 
 class JamendoIE(JamendoBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?jamendo\.com/track/(?P<id>[0-9]+)/(?P<display_id>[^/?#&]+)'
-    _TEST = {
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:
+                            licensing\.jamendo\.com/[^/]+|
+                            (?:www\.)?jamendo\.com
+                        )
+                        /track/(?P<id>[0-9]+)/(?P<display_id>[^/?#&]+)
+                    '''
+    _TESTS = [{
         'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i',
         'md5': '6e9e82ed6db98678f171c25a8ed09ffd',
         'info_dict': {
         'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i',
         'md5': '6e9e82ed6db98678f171c25a8ed09ffd',
         'info_dict': {
@@ -40,14 +47,19 @@ class JamendoIE(JamendoBaseIE):
             'duration': 210,
             'thumbnail': r're:^https?://.*\.jpg'
         }
             'duration': 210,
             'thumbnail': r're:^https?://.*\.jpg'
         }
-    }
+    }, {
+        'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         mobj = self._VALID_URL_RE.match(url)
         track_id = mobj.group('id')
         display_id = mobj.group('display_id')
 
 
     def _real_extract(self, url):
         mobj = self._VALID_URL_RE.match(url)
         track_id = mobj.group('id')
         display_id = mobj.group('display_id')
 
-        webpage = self._download_webpage(url, display_id)
+        webpage = self._download_webpage(
+            'https://www.jamendo.com/track/%s/%s' % (track_id, display_id),
+            display_id)
 
         title, artist, track = self._extract_meta(webpage)
 
 
         title, artist, track = self._extract_meta(webpage)
 
index c7f813370162bb8582db8abd9a3f49e3e4bb1bc1..fa217365a37853f471440d8138d0e20bc4115d52 100644 (file)
@@ -2,6 +2,7 @@
 from __future__ import unicode_literals
 
 import json
 from __future__ import unicode_literals
 
 import json
+import re
 
 from .common import InfoExtractor
 from ..utils import (
 
 from .common import InfoExtractor
 from ..utils import (
@@ -32,7 +33,8 @@ class Laola1TvEmbedIE(InfoExtractor):
 
     def _extract_token_url(self, stream_access_url, video_id, data):
         return self._download_json(
 
     def _extract_token_url(self, stream_access_url, video_id, data):
         return self._download_json(
-            stream_access_url, video_id, headers={
+            self._proto_relative_url(stream_access_url, 'https:'), video_id,
+            headers={
                 'Content-Type': 'application/json',
             }, data=json.dumps(data).encode())['data']['stream-access'][0]
 
                 'Content-Type': 'application/json',
             }, data=json.dumps(data).encode())['data']['stream-access'][0]
 
@@ -119,9 +121,59 @@ class Laola1TvEmbedIE(InfoExtractor):
         }
 
 
         }
 
 
-class Laola1TvIE(Laola1TvEmbedIE):
+class Laola1TvBaseIE(Laola1TvEmbedIE):
+    def _extract_video(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        if 'Dieser Livestream ist bereits beendet.' in webpage:
+            raise ExtractorError('This live stream has already finished.', expected=True)
+
+        conf = self._parse_json(self._search_regex(
+            r'(?s)conf\s*=\s*({.+?});', webpage, 'conf'),
+            display_id,
+            transform_source=lambda s: js_to_json(re.sub(r'shareurl:.+,', '', s)))
+        video_id = conf['videoid']
+
+        config = self._download_json(conf['configUrl'], video_id, query={
+            'videoid': video_id,
+            'partnerid': conf['partnerid'],
+            'language': conf.get('language', ''),
+            'portal': conf.get('portalid', ''),
+        })
+        error = config.get('error')
+        if error:
+            raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
+
+        video_data = config['video']
+        title = video_data['title']
+        is_live = video_data.get('isLivestream') and video_data.get('isLive')
+        meta = video_data.get('metaInformation')
+        sports = meta.get('sports')
+        categories = sports.split(',') if sports else []
+
+        token_url = self._extract_token_url(
+            video_data['streamAccess'], video_id,
+            video_data['abo']['required'])
+
+        formats = self._extract_formats(token_url, video_id)
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': self._live_title(title) if is_live else title,
+            'description': video_data.get('description'),
+            'thumbnail': video_data.get('image'),
+            'categories': categories,
+            'formats': formats,
+            'is_live': is_live,
+        }
+
+
+class Laola1TvIE(Laola1TvBaseIE):
     IE_NAME = 'laola1tv'
     _VALID_URL = r'https?://(?:www\.)?laola1\.tv/[a-z]+-[a-z]+/[^/]+/(?P<id>[^/?#&]+)'
     IE_NAME = 'laola1tv'
     _VALID_URL = r'https?://(?:www\.)?laola1\.tv/[a-z]+-[a-z]+/[^/]+/(?P<id>[^/?#&]+)'
+
     _TESTS = [{
         'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',
         'info_dict': {
     _TESTS = [{
         'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',
         'info_dict': {
@@ -169,52 +221,30 @@ class Laola1TvIE(Laola1TvEmbedIE):
     }]
 
     def _real_extract(self, url):
     }]
 
     def _real_extract(self, url):
-        display_id = self._match_id(url)
+        return self._extract_video(url)
 
 
-        webpage = self._download_webpage(url, display_id)
 
 
-        if 'Dieser Livestream ist bereits beendet.' in webpage:
-            raise ExtractorError('This live stream has already finished.', expected=True)
-
-        conf = self._parse_json(self._search_regex(
-            r'(?s)conf\s*=\s*({.+?});', webpage, 'conf'),
-            display_id, js_to_json)
-
-        video_id = conf['videoid']
-
-        config = self._download_json(conf['configUrl'], video_id, query={
-            'videoid': video_id,
-            'partnerid': conf['partnerid'],
-            'language': conf.get('language', ''),
-            'portal': conf.get('portalid', ''),
-        })
-        error = config.get('error')
-        if error:
-            raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
-
-        video_data = config['video']
-        title = video_data['title']
-        is_live = video_data.get('isLivestream') and video_data.get('isLive')
-        meta = video_data.get('metaInformation')
-        sports = meta.get('sports')
-        categories = sports.split(',') if sports else []
-
-        token_url = self._extract_token_url(
-            video_data['streamAccess'], video_id,
-            video_data['abo']['required'])
+class EHFTVIE(Laola1TvBaseIE):
+    IE_NAME = 'ehftv'
+    _VALID_URL = r'https?://(?:www\.)?ehftv\.com/[a-z]+(?:-[a-z]+)?/[^/]+/(?P<id>[^/?#&]+)'
 
 
-        formats = self._extract_formats(token_url, video_id)
+    _TESTS = [{
+        'url': 'https://www.ehftv.com/int/video/paris-saint-germain-handball-pge-vive-kielce/1166761',
+        'info_dict': {
+            'id': '1166761',
+            'display_id': 'paris-saint-germain-handball-pge-vive-kielce',
+            'ext': 'mp4',
+            'title': 'Paris Saint-Germain Handball - PGE Vive Kielce',
+            'is_live': False,
+            'categories': ['Handball'],
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }]
 
 
-        return {
-            'id': video_id,
-            'display_id': display_id,
-            'title': self._live_title(title) if is_live else title,
-            'description': video_data.get('description'),
-            'thumbnail': video_data.get('image'),
-            'categories': categories,
-            'formats': formats,
-            'is_live': is_live,
-        }
+    def _real_extract(self, url):
+        return self._extract_video(url)
 
 
 class ITTFIE(InfoExtractor):
 
 
 class ITTFIE(InfoExtractor):
diff --git a/youtube_dl/extractor/linkedin.py b/youtube_dl/extractor/linkedin.py
new file mode 100644 (file)
index 0000000..259fc4c
--- /dev/null
@@ -0,0 +1,175 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    float_or_none,
+    int_or_none,
+    urlencode_postdata,
+)
+
+
+class LinkedInLearningBaseIE(InfoExtractor):
+    _NETRC_MACHINE = 'linkedin'
+
+    def _call_api(self, course_slug, fields, video_slug=None, resolution=None):
+        query = {
+            'courseSlug': course_slug,
+            'fields': fields,
+            'q': 'slugs',
+        }
+        sub = ''
+        if video_slug:
+            query.update({
+                'videoSlug': video_slug,
+                'resolution': '_%s' % resolution,
+            })
+            sub = ' %dp' % resolution
+        api_url = 'https://www.linkedin.com/learning-api/detailedCourses'
+        return self._download_json(
+            api_url, video_slug, 'Downloading%s JSON metadata' % sub, headers={
+                'Csrf-Token': self._get_cookies(api_url)['JSESSIONID'].value,
+            }, query=query)['elements'][0]
+
+    def _get_video_id(self, urn, course_slug, video_slug):
+        if urn:
+            mobj = re.search(r'urn:li:lyndaCourse:\d+,(\d+)', urn)
+            if mobj:
+                return mobj.group(1)
+        return '%s/%s' % (course_slug, video_slug)
+
+    def _real_initialize(self):
+        email, password = self._get_login_info()
+        if email is None:
+            return
+
+        login_page = self._download_webpage(
+            'https://www.linkedin.com/uas/login?trk=learning',
+            None, 'Downloading login page')
+        action_url = self._search_regex(
+            r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page, 'post url',
+            default='https://www.linkedin.com/uas/login-submit', group='url')
+        data = self._hidden_inputs(login_page)
+        data.update({
+            'session_key': email,
+            'session_password': password,
+        })
+        login_submit_page = self._download_webpage(
+            action_url, None, 'Logging in',
+            data=urlencode_postdata(data))
+        error = self._search_regex(
+            r'<span[^>]+class="error"[^>]*>\s*(.+?)\s*</span>',
+            login_submit_page, 'error', default=None)
+        if error:
+            raise ExtractorError(error, expected=True)
+
+
+class LinkedInLearningIE(LinkedInLearningBaseIE):
+    IE_NAME = 'linkedin:learning'
+    _VALID_URL = r'https?://(?:www\.)?linkedin\.com/learning/(?P<course_slug>[^/]+)/(?P<id>[^/?#]+)'
+    _TEST = {
+        'url': 'https://www.linkedin.com/learning/programming-foundations-fundamentals/welcome?autoplay=true',
+        'md5': 'a1d74422ff0d5e66a792deb996693167',
+        'info_dict': {
+            'id': '90426',
+            'ext': 'mp4',
+            'title': 'Welcome',
+            'timestamp': 1430396150.82,
+            'upload_date': '20150430',
+        },
+    }
+
+    def _real_extract(self, url):
+        course_slug, video_slug = re.match(self._VALID_URL, url).groups()
+
+        video_data = None
+        formats = []
+        for width, height in ((640, 360), (960, 540), (1280, 720)):
+            video_data = self._call_api(
+                course_slug, 'selectedVideo', video_slug, height)['selectedVideo']
+
+            video_url_data = video_data.get('url') or {}
+            progressive_url = video_url_data.get('progressiveUrl')
+            if progressive_url:
+                formats.append({
+                    'format_id': 'progressive-%dp' % height,
+                    'url': progressive_url,
+                    'height': height,
+                    'width': width,
+                    'source_preference': 1,
+                })
+
+        title = video_data['title']
+
+        audio_url = video_data.get('audio', {}).get('progressiveUrl')
+        if audio_url:
+            formats.append({
+                'abr': 64,
+                'ext': 'm4a',
+                'format_id': 'audio',
+                'url': audio_url,
+                'vcodec': 'none',
+            })
+
+        streaming_url = video_url_data.get('streamingUrl')
+        if streaming_url:
+            formats.extend(self._extract_m3u8_formats(
+                streaming_url, video_slug, 'mp4',
+                'm3u8_native', m3u8_id='hls', fatal=False))
+
+        self._sort_formats(formats, ('width', 'height', 'source_preference', 'tbr', 'abr'))
+
+        return {
+            'id': self._get_video_id(video_data.get('urn'), course_slug, video_slug),
+            'title': title,
+            'formats': formats,
+            'thumbnail': video_data.get('defaultThumbnail'),
+            'timestamp': float_or_none(video_data.get('publishedOn'), 1000),
+            'duration': int_or_none(video_data.get('durationInSeconds')),
+        }
+
+
+class LinkedInLearningCourseIE(LinkedInLearningBaseIE):
+    IE_NAME = 'linkedin:learning:course'
+    _VALID_URL = r'https?://(?:www\.)?linkedin\.com/learning/(?P<id>[^/?#]+)'
+    _TEST = {
+        'url': 'https://www.linkedin.com/learning/programming-foundations-fundamentals',
+        'info_dict': {
+            'id': 'programming-foundations-fundamentals',
+            'title': 'Programming Foundations: Fundamentals',
+            'description': 'md5:76e580b017694eb89dc8e8923fff5c86',
+        },
+        'playlist_mincount': 61,
+    }
+
+    @classmethod
+    def suitable(cls, url):
+        return False if LinkedInLearningIE.suitable(url) else super(LinkedInLearningCourseIE, cls).suitable(url)
+
+    def _real_extract(self, url):
+        course_slug = self._match_id(url)
+        course_data = self._call_api(course_slug, 'chapters,description,title')
+
+        entries = []
+        for chapter in course_data.get('chapters', []):
+            chapter_title = chapter.get('title')
+            for video in chapter.get('videos', []):
+                video_slug = video.get('slug')
+                if not video_slug:
+                    continue
+                entries.append({
+                    '_type': 'url_transparent',
+                    'id': self._get_video_id(video.get('urn'), course_slug, video_slug),
+                    'title': video.get('title'),
+                    'url': 'https://www.linkedin.com/learning/%s/%s' % (course_slug, video_slug),
+                    'chapter': chapter_title,
+                    'ie_key': LinkedInLearningIE.ie_key(),
+                })
+
+        return self.playlist_result(
+            entries, course_slug,
+            course_data.get('title'),
+            course_data.get('description'))
index 57f97409da5f15e1575fe0fb317078dbfb3c33b6..df3748798e9f943c16ddd29cead9515b0dd44c57 100644 (file)
@@ -4,6 +4,11 @@ from __future__ import unicode_literals
 import re
 
 from .theplatform import ThePlatformBaseIE
 import re
 
 from .theplatform import ThePlatformBaseIE
+from ..compat import (
+    compat_parse_qs,
+    compat_str,
+    compat_urllib_parse_urlparse,
+)
 from ..utils import (
     ExtractorError,
     int_or_none,
 from ..utils import (
     ExtractorError,
     int_or_none,
@@ -76,12 +81,33 @@ class MediasetIE(ThePlatformBaseIE):
     }]
 
     @staticmethod
     }]
 
     @staticmethod
-    def _extract_urls(webpage):
-        return [
-            mobj.group('url')
-            for mobj in re.finditer(
-                r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>https?://(?:www\.)?video\.mediaset\.it/player/playerIFrame(?:Twitter)?\.shtml\?.*?\bid=\d+.*?)\1',
-                webpage)]
+    def _extract_urls(ie, webpage):
+        def _qs(url):
+            return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+
+        def _program_guid(qs):
+            return qs.get('programGuid', [None])[0]
+
+        entries = []
+        for mobj in re.finditer(
+                r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?video\.mediaset\.it/player/playerIFrame(?:Twitter)?\.shtml.*?)\1',
+                webpage):
+            embed_url = mobj.group('url')
+            embed_qs = _qs(embed_url)
+            program_guid = _program_guid(embed_qs)
+            if program_guid:
+                entries.append(embed_url)
+                continue
+            video_id = embed_qs.get('id', [None])[0]
+            if not video_id:
+                continue
+            urlh = ie._request_webpage(
+                embed_url, video_id, note='Following embed URL redirect')
+            embed_url = compat_str(urlh.geturl())
+            program_guid = _program_guid(_qs(embed_url))
+            if program_guid:
+                entries.append(embed_url)
+        return entries
 
     def _real_extract(self, url):
         guid = self._match_id(url)
 
     def _real_extract(self, url):
         guid = self._match_id(url)
index febef097af5f3cb31b2e8c4f7ef1981a7285f88e..025c5d249c4a0daa2032cf639b64c5c27f2974fb 100644 (file)
@@ -31,6 +31,8 @@ class NJPWWorldIE(InfoExtractor):
         'skip': 'Requires login',
     }
 
         'skip': 'Requires login',
     }
 
+    _LOGIN_URL = 'https://front.njpwworld.com/auth/login'
+
     def _real_initialize(self):
         self._login()
 
     def _real_initialize(self):
         self._login()
 
@@ -40,13 +42,17 @@ class NJPWWorldIE(InfoExtractor):
         if not username:
             return True
 
         if not username:
             return True
 
+        # Setup session (will set necessary cookies)
+        self._request_webpage(
+            'https://njpwworld.com/', None, note='Setting up session')
+
         webpage, urlh = self._download_webpage_handle(
         webpage, urlh = self._download_webpage_handle(
-            'https://njpwworld.com/auth/login', None,
+            self._LOGIN_URL, None,
             note='Logging in', errnote='Unable to login',
             data=urlencode_postdata({'login_id': username, 'pw': password}),
             note='Logging in', errnote='Unable to login',
             data=urlencode_postdata({'login_id': username, 'pw': password}),
-            headers={'Referer': 'https://njpwworld.com/auth'})
+            headers={'Referer': 'https://front.njpwworld.com/auth'})
         # /auth/login will return 302 for successful logins
         # /auth/login will return 302 for successful logins
-        if urlh.geturl() == 'https://njpwworld.com/auth/login':
+        if urlh.geturl() == self._LOGIN_URL:
             self.report_warning('unable to login')
             return False
 
             self.report_warning('unable to login')
             return False
 
index d264fe20664523a2d0f6387fe1de1f242663b831..2473536fd8ac03a09afce39a3de4dfe771356dcb 100644 (file)
@@ -243,7 +243,7 @@ class PhantomJSwrapper(object):
 
 
 class OpenloadIE(InfoExtractor):
 
 
 class OpenloadIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
 
     _TESTS = [{
         'url': 'https://openload.co/f/kUEfGclsU9o',
 
     _TESTS = [{
         'url': 'https://openload.co/f/kUEfGclsU9o',
@@ -307,10 +307,22 @@ class OpenloadIE(InfoExtractor):
     }, {
         'url': 'https://oload.download/f/kUEfGclsU9o',
         'only_matching': True,
     }, {
         'url': 'https://oload.download/f/kUEfGclsU9o',
         'only_matching': True,
+    }, {
+        'url': 'https://oload.cloud/f/4ZDnBXRWiB8',
+        'only_matching': True,
     }, {
         # Its title has not got its extension but url has it
         'url': 'https://oload.download/f/N4Otkw39VCw/Tomb.Raider.2018.HDRip.XviD.AC3-EVO.avi.mp4',
         'only_matching': True,
     }, {
         # Its title has not got its extension but url has it
         'url': 'https://oload.download/f/N4Otkw39VCw/Tomb.Raider.2018.HDRip.XviD.AC3-EVO.avi.mp4',
         'only_matching': True,
+    }, {
+        'url': 'https://oload.cc/embed/5NEAbI2BDSk',
+        'only_matching': True,
+    }, {
+        'url': 'https://oload.icu/f/-_i4y_F_Hs8',
+        'only_matching': True,
+    }, {
+        'url': 'https://oload.fun/f/gb6G1H4sHXY',
+        'only_matching': True,
     }]
 
     _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
     }]
 
     _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
index c1fb580ca8d9a20d785d71194d2822e51b2ee1ae..d432e344977d1fbef9a52049ce0ba16fd1573489 100644 (file)
@@ -15,6 +15,7 @@ from ..utils import (
     strip_jsonp,
     unescapeHTML,
     unified_strdate,
     strip_jsonp,
     unescapeHTML,
     unified_strdate,
+    url_or_none,
 )
 
 
 )
 
 
@@ -68,26 +69,35 @@ class ORFTVthekIE(InfoExtractor):
                 webpage, 'playlist', group='json'),
             playlist_id, transform_source=unescapeHTML)['playlist']['videos']
 
                 webpage, 'playlist', group='json'),
             playlist_id, transform_source=unescapeHTML)['playlist']['videos']
 
-        def quality_to_int(s):
-            m = re.search('([0-9]+)', s)
-            if m is None:
-                return -1
-            return int(m.group(1))
-
         entries = []
         for sd in data_jsb:
             video_id, title = sd.get('id'), sd.get('title')
             if not video_id or not title:
                 continue
             video_id = compat_str(video_id)
         entries = []
         for sd in data_jsb:
             video_id, title = sd.get('id'), sd.get('title')
             if not video_id or not title:
                 continue
             video_id = compat_str(video_id)
-            formats = [{
-                'preference': -10 if fd['delivery'] == 'hls' else None,
-                'format_id': '%s-%s-%s' % (
-                    fd['delivery'], fd['quality'], fd['quality_string']),
-                'url': fd['src'],
-                'protocol': fd['protocol'],
-                'quality': quality_to_int(fd['quality']),
-            } for fd in sd['sources']]
+            formats = []
+            for fd in sd['sources']:
+                src = url_or_none(fd.get('src'))
+                if not src:
+                    continue
+                format_id_list = []
+                for key in ('delivery', 'quality', 'quality_string'):
+                    value = fd.get(key)
+                    if value:
+                        format_id_list.append(value)
+                format_id = '-'.join(format_id_list)
+                if determine_ext(fd['src']) == 'm3u8':
+                    formats.extend(self._extract_m3u8_formats(
+                        fd['src'], video_id, 'mp4', m3u8_id=format_id))
+                elif determine_ext(fd['src']) == 'f4m':
+                    formats.extend(self._extract_f4m_formats(
+                        fd['src'], video_id, f4m_id=format_id))
+                else:
+                    formats.append({
+                        'format_id': format_id,
+                        'url': src,
+                        'protocol': fd.get('protocol'),
+                    })
 
             # Check for geoblocking.
             # There is a property is_geoprotection, but that's always false
 
             # Check for geoblocking.
             # There is a property is_geoprotection, but that's always false
index 9eb0276795356af12571e98982f35edec490d0ba..426dd812158686a4a86e06c0846dd257fcee6748 100644 (file)
@@ -2,52 +2,63 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import js_to_json
+from ..utils import (
+    clean_html,
+    determine_ext,
+    int_or_none,
+    parse_iso8601,
+)
 
 
 class PatreonIE(InfoExtractor):
 
 
 class PatreonIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(?P<id>[^&#]+)'
-    _TESTS = [
-        {
-            'url': 'http://www.patreon.com/creation?hid=743933',
-            'md5': 'e25505eec1053a6e6813b8ed369875cc',
-            'info_dict': {
-                'id': '743933',
-                'ext': 'mp3',
-                'title': 'Episode 166: David Smalley of Dogma Debate',
-                'uploader': 'Cognitive Dissonance Podcast',
-                'thumbnail': 're:^https?://.*$',
-            },
+    _VALID_URL = r'https?://(?:www\.)?patreon\.com/(?:creation\?hid=|posts/(?:[\w-]+-)?)(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'http://www.patreon.com/creation?hid=743933',
+        'md5': 'e25505eec1053a6e6813b8ed369875cc',
+        'info_dict': {
+            'id': '743933',
+            'ext': 'mp3',
+            'title': 'Episode 166: David Smalley of Dogma Debate',
+            'description': 'md5:713b08b772cd6271b9f3906683cfacdf',
+            'uploader': 'Cognitive Dissonance Podcast',
+            'thumbnail': 're:^https?://.*$',
+            'timestamp': 1406473987,
+            'upload_date': '20140727',
+        },
+    }, {
+        'url': 'http://www.patreon.com/creation?hid=754133',
+        'md5': '3eb09345bf44bf60451b8b0b81759d0a',
+        'info_dict': {
+            'id': '754133',
+            'ext': 'mp3',
+            'title': 'CD 167 Extra',
+            'uploader': 'Cognitive Dissonance Podcast',
+            'thumbnail': 're:^https?://.*$',
         },
         },
-        {
-            'url': 'http://www.patreon.com/creation?hid=754133',
-            'md5': '3eb09345bf44bf60451b8b0b81759d0a',
-            'info_dict': {
-                'id': '754133',
-                'ext': 'mp3',
-                'title': 'CD 167 Extra',
-                'uploader': 'Cognitive Dissonance Podcast',
-                'thumbnail': 're:^https?://.*$',
-            },
+        'skip': 'Patron-only content',
+    }, {
+        'url': 'https://www.patreon.com/creation?hid=1682498',
+        'info_dict': {
+            'id': 'SU4fj_aEMVw',
+            'ext': 'mp4',
+            'title': 'I\'m on Patreon!',
+            'uploader': 'TraciJHines',
+            'thumbnail': 're:^https?://.*$',
+            'upload_date': '20150211',
+            'description': 'md5:c5a706b1f687817a3de09db1eb93acd4',
+            'uploader_id': 'TraciJHines',
         },
         },
-        {
-            'url': 'https://www.patreon.com/creation?hid=1682498',
-            'info_dict': {
-                'id': 'SU4fj_aEMVw',
-                'ext': 'mp4',
-                'title': 'I\'m on Patreon!',
-                'uploader': 'TraciJHines',
-                'thumbnail': 're:^https?://.*$',
-                'upload_date': '20150211',
-                'description': 'md5:c5a706b1f687817a3de09db1eb93acd4',
-                'uploader_id': 'TraciJHines',
-            },
-            'params': {
-                'noplaylist': True,
-                'skip_download': True,
-            }
+        'params': {
+            'noplaylist': True,
+            'skip_download': True,
         }
         }
-    ]
+    }, {
+        'url': 'https://www.patreon.com/posts/episode-166-of-743933',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.patreon.com/posts/743933',
+        'only_matching': True,
+    }]
 
     # Currently Patreon exposes download URL via hidden CSS, so login is not
     # needed. Keeping this commented for when this inevitably changes.
 
     # Currently Patreon exposes download URL via hidden CSS, so login is not
     # needed. Keeping this commented for when this inevitably changes.
@@ -78,38 +89,48 @@ class PatreonIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-        title = self._og_search_title(webpage).strip()
-
-        attach_fn = self._html_search_regex(
-            r'<div class="attach"><a target="_blank" href="([^"]+)">',
-            webpage, 'attachment URL', default=None)
-        embed = self._html_search_regex(
-            r'<div[^>]+id="watchCreation"[^>]*>\s*<iframe[^>]+src="([^"]+)"',
-            webpage, 'embedded URL', default=None)
-
-        if attach_fn is not None:
-            video_url = 'http://www.patreon.com' + attach_fn
-            thumbnail = self._og_search_thumbnail(webpage)
-            uploader = self._html_search_regex(
-                r'<strong>(.*?)</strong> is creating', webpage, 'uploader')
-        elif embed is not None:
-            return self.url_result(embed)
-        else:
-            playlist = self._parse_json(self._search_regex(
-                r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])',
-                webpage, 'playlist JSON'),
-                video_id, transform_source=js_to_json)
-            data = playlist[0]
-            video_url = self._proto_relative_url(data['mp3'])
-            thumbnail = self._proto_relative_url(data.get('cover'))
-            uploader = data.get('artist')
-
-        return {
+        post = self._download_json(
+            'https://www.patreon.com/api/posts/' + video_id, video_id)
+        attributes = post['data']['attributes']
+        title = attributes['title'].strip()
+        image = attributes.get('image') or {}
+        info = {
             'id': video_id,
             'id': video_id,
-            'url': video_url,
-            'ext': 'mp3',
             'title': title,
             'title': title,
-            'uploader': uploader,
-            'thumbnail': thumbnail,
+            'description': clean_html(attributes.get('content')),
+            'thumbnail': image.get('large_url') or image.get('url'),
+            'timestamp': parse_iso8601(attributes.get('published_at')),
+            'like_count': int_or_none(attributes.get('like_count')),
+            'comment_count': int_or_none(attributes.get('comment_count')),
         }
         }
+
+        def add_file(file_data):
+            file_url = file_data.get('url')
+            if file_url:
+                info.update({
+                    'url': file_url,
+                    'ext': determine_ext(file_data.get('name'), 'mp3'),
+                })
+
+        for i in post.get('included', []):
+            i_type = i.get('type')
+            if i_type == 'attachment':
+                add_file(i.get('attributes') or {})
+            elif i_type == 'user':
+                user_attributes = i.get('attributes')
+                if user_attributes:
+                    info.update({
+                        'uploader': user_attributes.get('full_name'),
+                        'uploader_url': user_attributes.get('url'),
+                    })
+
+        if not info.get('url'):
+            add_file(attributes.get('post_file') or {})
+
+        if not info.get('url'):
+            info.update({
+                '_type': 'url',
+                'url': attributes['embed']['url'],
+            })
+
+        return info
index f1008ae514f78f6c843e399031135afb00f5f23f..f723a2b3b507d5aa59428dd4f44057f7bbe3f655 100644 (file)
@@ -2,31 +2,38 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
 from ..utils import (
-    float_or_none,
-    int_or_none,
-    parse_iso8601,
-    xpath_text,
+    try_get,
+    urljoin,
 )
 
 
 class PhilharmonieDeParisIE(InfoExtractor):
     IE_DESC = 'Philharmonie de Paris'
 )
 
 
 class PhilharmonieDeParisIE(InfoExtractor):
     IE_DESC = 'Philharmonie de Paris'
-    _VALID_URL = r'https?://live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)(?P<id>\d+)'
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:
+                            live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)|
+                            pad\.philharmoniedeparis\.fr/doc/CIMU/
+                        )
+                        (?P<id>\d+)
+                    '''
     _TESTS = [{
     _TESTS = [{
+        'url': 'http://pad.philharmoniedeparis.fr/doc/CIMU/1086697/jazz-a-la-villette-knower',
+        'md5': 'a0a4b195f544645073631cbec166a2c2',
+        'info_dict': {
+            'id': '1086697',
+            'ext': 'mp4',
+            'title': 'Jazz à la Villette : Knower',
+        },
+    }, {
         'url': 'http://live.philharmoniedeparis.fr/concert/1032066.html',
         'info_dict': {
             'id': '1032066',
         'url': 'http://live.philharmoniedeparis.fr/concert/1032066.html',
         'info_dict': {
             'id': '1032066',
-            'ext': 'flv',
-            'title': 'md5:d1f5585d87d041d07ce9434804bc8425',
-            'timestamp': 1428179400,
-            'upload_date': '20150404',
-            'duration': 6592.278,
+            'title': 'md5:0a031b81807b3593cffa3c9a87a167a0',
         },
         },
-        'params': {
-            # rtmp download
-            'skip_download': True,
-        }
+        'playlist_mincount': 2,
     }, {
         'url': 'http://live.philharmoniedeparis.fr/Concert/1030324.html',
         'only_matching': True,
     }, {
         'url': 'http://live.philharmoniedeparis.fr/Concert/1030324.html',
         'only_matching': True,
@@ -34,45 +41,60 @@ class PhilharmonieDeParisIE(InfoExtractor):
         'url': 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=1030324&track=&lang=fr',
         'only_matching': True,
     }]
         'url': 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=1030324&track=&lang=fr',
         'only_matching': True,
     }]
+    _LIVE_URL = 'https://live.philharmoniedeparis.fr'
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        concert = self._download_xml(
-            'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=%s' % video_id,
-            video_id).find('./concert')
+        config = self._download_json(
+            '%s/otoPlayer/config.ashx' % self._LIVE_URL, video_id, query={
+                'id': video_id,
+                'lang': 'fr-FR',
+            })
 
 
-        formats = []
-        info_dict = {
-            'id': video_id,
-            'title': xpath_text(concert, './titre', 'title', fatal=True),
-            'formats': formats,
-        }
-
-        fichiers = concert.find('./fichiers')
-        stream = fichiers.attrib['serveurstream']
-        for fichier in fichiers.findall('./fichier'):
-            info_dict['duration'] = float_or_none(fichier.get('timecodefin'))
-            for quality, (format_id, suffix) in enumerate([('lq', ''), ('hq', '_hd')]):
-                format_url = fichier.get('url%s' % suffix)
-                if not format_url:
+        def extract_entry(source):
+            if not isinstance(source, dict):
+                return
+            title = source.get('title')
+            if not title:
+                return
+            files = source.get('files')
+            if not isinstance(files, dict):
+                return
+            format_urls = set()
+            formats = []
+            for format_id in ('mobile', 'desktop'):
+                format_url = try_get(
+                    files, lambda x: x[format_id]['file'], compat_str)
+                if not format_url or format_url in format_urls:
                     continue
                     continue
-                formats.append({
-                    'url': stream,
-                    'play_path': format_url,
-                    'ext': 'flv',
-                    'format_id': format_id,
-                    'width': int_or_none(concert.get('largeur%s' % suffix)),
-                    'height': int_or_none(concert.get('hauteur%s' % suffix)),
-                    'quality': quality,
-                })
-        self._sort_formats(formats)
+                format_urls.add(format_url)
+                m3u8_url = urljoin(self._LIVE_URL, format_url)
+                formats.extend(self._extract_m3u8_formats(
+                    m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                    m3u8_id='hls', fatal=False))
+            if not formats:
+                return
+            self._sort_formats(formats)
+            return {
+                'title': title,
+                'formats': formats,
+            }
+
+        thumbnail = urljoin(self._LIVE_URL, config.get('image'))
+
+        info = extract_entry(config)
+        if info:
+            info.update({
+                'id': video_id,
+                'thumbnail': thumbnail,
+            })
+            return info
 
 
-        date, hour = concert.get('date'), concert.get('heure')
-        if date and hour:
-            info_dict['timestamp'] = parse_iso8601(
-                '%s-%s-%sT%s:00' % (date[0:4], date[4:6], date[6:8], hour))
-        elif date:
-            info_dict['upload_date'] = date
+        entries = []
+        for num, chapter in enumerate(config['chapters'], start=1):
+            entry = extract_entry(chapter)
+            entry['id'] = '%s-%d' % (video_id, num)
+            entries.append(entry)
 
 
-        return info_dict
+        return self.playlist_result(entries, video_id, config.get('title'))
index 1257841e4bbcffbae999cd402f9a6c7982a3fb30..eafe56897dde68c8dcb44d2c315d8c63dd2c805d 100644 (file)
@@ -4,6 +4,7 @@ import collections
 import json
 import os
 import random
 import json
 import os
 import random
+import re
 
 from .common import InfoExtractor
 from ..compat import (
 
 from .common import InfoExtractor
 from ..compat import (
@@ -196,7 +197,10 @@ query viewClip {
         if error:
             raise ExtractorError('Unable to login: %s' % error, expected=True)
 
         if error:
             raise ExtractorError('Unable to login: %s' % error, expected=True)
 
-        if all(p not in response for p in ('__INITIAL_STATE__', '"currentUser"')):
+        if all(not re.search(p, response) for p in (
+                r'__INITIAL_STATE__', r'["\']currentUser["\']',
+                # new layout?
+                r'>\s*Sign out\s*<')):
             BLOCKED = 'Your account has been blocked due to suspicious activity'
             if BLOCKED in response:
                 raise ExtractorError(
             BLOCKED = 'Your account has been blocked due to suspicious activity'
             if BLOCKED in response:
                 raise ExtractorError(
@@ -210,18 +214,26 @@ query viewClip {
 
             raise ExtractorError('Unable to log in')
 
 
             raise ExtractorError('Unable to log in')
 
-    def _get_subtitles(self, author, clip_idx, lang, name, duration, video_id):
-        captions_post = {
-            'a': author,
-            'cn': clip_idx,
-            'lc': lang,
-            'm': name,
-        }
-        captions = self._download_json(
-            '%s/player/retrieve-captions' % self._API_BASE, video_id,
-            'Downloading captions JSON', 'Unable to download captions JSON',
-            fatal=False, data=json.dumps(captions_post).encode('utf-8'),
-            headers={'Content-Type': 'application/json;charset=utf-8'})
+    def _get_subtitles(self, author, clip_idx, clip_id, lang, name, duration, video_id):
+        captions = None
+        if clip_id:
+            captions = self._download_json(
+                '%s/transcript/api/v1/caption/json/%s/%s'
+                % (self._API_BASE, clip_id, lang), video_id,
+                'Downloading captions JSON', 'Unable to download captions JSON',
+                fatal=False)
+        if not captions:
+            captions_post = {
+                'a': author,
+                'cn': int(clip_idx),
+                'lc': lang,
+                'm': name,
+            }
+            captions = self._download_json(
+                '%s/player/retrieve-captions' % self._API_BASE, video_id,
+                'Downloading captions JSON', 'Unable to download captions JSON',
+                fatal=False, data=json.dumps(captions_post).encode('utf-8'),
+                headers={'Content-Type': 'application/json;charset=utf-8'})
         if captions:
             return {
                 lang: [{
         if captions:
             return {
                 lang: [{
@@ -413,7 +425,7 @@ query viewClip {
 
         # TODO: other languages?
         subtitles = self.extract_subtitles(
 
         # TODO: other languages?
         subtitles = self.extract_subtitles(
-            author, clip_idx, 'en', name, duration, display_id)
+            author, clip_idx, clip.get('clipId'), 'en', name, duration, display_id)
 
         return {
             'id': clip_id,
 
         return {
             'id': clip_id,
index ac901f42655a7f78225be3d9532caef41782f457..9f834fb6ce6b8da641da4dc7df64cba0f83811fd 100644 (file)
@@ -58,8 +58,6 @@ class PopcornTVIE(InfoExtractor):
         thumbnail = self._og_search_thumbnail(webpage)
         timestamp = unified_timestamp(self._html_search_meta(
             'uploadDate', webpage, 'timestamp'))
         thumbnail = self._og_search_thumbnail(webpage)
         timestamp = unified_timestamp(self._html_search_meta(
             'uploadDate', webpage, 'timestamp'))
-        print(self._html_search_meta(
-            'duration', webpage))
         duration = int_or_none(self._html_search_meta(
             'duration', webpage), invscale=60)
         view_count = int_or_none(self._html_search_meta(
         duration = int_or_none(self._html_search_meta(
             'duration', webpage), invscale=60)
         view_count = int_or_none(self._html_search_meta(
index 6782848d90c289d02aeef76bb2d0925351d64121..19eaf389f829c2c9b0956e89615119dbab6f0593 100644 (file)
@@ -40,6 +40,7 @@ class PornHubIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Seductive Indian beauty strips down and fingers her pink pussy',
             'uploader': 'Babes',
             'ext': 'mp4',
             'title': 'Seductive Indian beauty strips down and fingers her pink pussy',
             'uploader': 'Babes',
+            'upload_date': '20130628',
             'duration': 361,
             'view_count': int,
             'like_count': int,
             'duration': 361,
             'view_count': int,
             'like_count': int,
@@ -57,6 +58,7 @@ class PornHubIE(InfoExtractor):
             'ext': 'mp4',
             'title': '重庆婷婷女王足交',
             'uploader': 'Unknown',
             'ext': 'mp4',
             'title': '重庆婷婷女王足交',
             'uploader': 'Unknown',
+            'upload_date': '20150213',
             'duration': 1753,
             'view_count': int,
             'like_count': int,
             'duration': 1753,
             'view_count': int,
             'like_count': int,
@@ -237,8 +239,14 @@ class PornHubIE(InfoExtractor):
                 video_urls.append((video_url, None))
                 video_urls_set.add(video_url)
 
                 video_urls.append((video_url, None))
                 video_urls_set.add(video_url)
 
+        upload_date = None
         formats = []
         for video_url, height in video_urls:
         formats = []
         for video_url, height in video_urls:
+            if not upload_date:
+                upload_date = self._search_regex(
+                    r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None)
+                if upload_date:
+                    upload_date = upload_date.replace('/', '')
             tbr = None
             mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url)
             if mobj:
             tbr = None
             mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url)
             if mobj:
@@ -278,6 +286,7 @@ class PornHubIE(InfoExtractor):
         return {
             'id': video_id,
             'uploader': video_uploader,
         return {
             'id': video_id,
             'uploader': video_uploader,
+            'upload_date': upload_date,
             'title': title,
             'thumbnail': thumbnail,
             'duration': duration,
             'title': title,
             'thumbnail': thumbnail,
             'duration': duration,
index f916b26195ed18106655ed84f2be01a032af84c4..548a6553b5b2a426aaf11fba95fb97fd0c898db9 100644 (file)
@@ -274,7 +274,6 @@ class RaiPlayPlaylistIE(InfoExtractor):
             ('programma', 'nomeProgramma'), webpage, 'title')
         description = unescapeHTML(self._html_search_meta(
             ('description', 'og:description'), webpage, 'description'))
             ('programma', 'nomeProgramma'), webpage, 'title')
         description = unescapeHTML(self._html_search_meta(
             ('description', 'og:description'), webpage, 'description'))
-        print(description)
 
         entries = []
         for mobj in re.finditer(
 
         entries = []
         for mobj in re.finditer(
index 261bcbb83ece85610a709701155ab4e244400f10..10ac8ed1f22ad943dc210430d0363d9a4aa0b483 100644 (file)
@@ -103,7 +103,8 @@ class RutubeIE(RutubeBaseIE):
 
         options = self._download_json(
             'http://rutube.ru/api/play/options/%s/?format=json' % video_id,
 
         options = self._download_json(
             'http://rutube.ru/api/play/options/%s/?format=json' % video_id,
-            video_id, 'Downloading options JSON')
+            video_id, 'Downloading options JSON',
+            headers=self.geo_verification_headers())
 
         formats = []
         for format_id, format_url in options['video_balancer'].items():
 
         formats = []
         for format_id, format_url in options['video_balancer'].items():
index 62a6a8337ccf5d247a38be29cf93b5b72f36dfd7..69a0d01f39c24f9fdcef0d31dd4d9d7a2f082c85 100644 (file)
@@ -90,6 +90,15 @@ class ScreencastIE(InfoExtractor):
                     r'src=(.*?)(?:$|&)', video_meta,
                     'meta tag video URL', default=None)
 
                     r'src=(.*?)(?:$|&)', video_meta,
                     'meta tag video URL', default=None)
 
+        if video_url is None:
+            video_url = self._html_search_regex(
+                r'MediaContentUrl["\']\s*:(["\'])(?P<url>(?:(?!\1).)+)\1',
+                webpage, 'video url', default=None, group='url')
+
+        if video_url is None:
+            video_url = self._html_search_meta(
+                'og:video', webpage, default=None)
+
         if video_url is None:
             raise ExtractorError('Cannot find video')
 
         if video_url is None:
             raise ExtractorError('Cannot find video')
 
index e76522b45d2e83f537b057cd9eb4d4b61f7bc403..6090e00662b5517bad5442576c375ad07590a384 100644 (file)
@@ -44,3 +44,10 @@ class ParamountNetworkIE(MTVServicesInfoExtractor):
 
     _FEED_URL = 'http://www.paramountnetwork.com/feeds/mrss/'
     _GEO_COUNTRIES = ['US']
 
     _FEED_URL = 'http://www.paramountnetwork.com/feeds/mrss/'
     _GEO_COUNTRIES = ['US']
+
+    def _extract_mgid(self, webpage):
+        cs = self._parse_json(self._search_regex(
+            r'window\.__DATA__\s*=\s*({.+})',
+            webpage, 'data'), None)['children']
+        c = next(c for c in cs if c.get('type') == 'VideoPlayer')
+        return c['props']['media']['video']['config']['uri']
index 54497c880ec2cc9cbfc5ff20756cbc19d4ee6c65..b9017fd2ab67df5f3e1b855eb30047f790b53765 100644 (file)
@@ -8,20 +8,24 @@ from ..utils import (
     determine_ext,
     int_or_none,
     js_to_json,
     determine_ext,
     int_or_none,
     js_to_json,
+    merge_dicts,
 )
 
 
 )
 
 
-class SportBoxEmbedIE(InfoExtractor):
-    _VALID_URL = r'https?://news\.sportbox\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)'
+class SportBoxIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:news\.sportbox|matchtv)\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)'
     _TESTS = [{
         'url': 'http://news.sportbox.ru/vdl/player/ci/211355',
         'info_dict': {
     _TESTS = [{
         'url': 'http://news.sportbox.ru/vdl/player/ci/211355',
         'info_dict': {
-            'id': '211355',
+            'id': '109158',
             'ext': 'mp4',
             'ext': 'mp4',
-            'title': '211355',
+            'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»',
+            'description': 'В Новороссийске прошел детский турнир «Поле славы боевой»',
             'thumbnail': r're:^https?://.*\.jpg$',
             'duration': 292,
             'view_count': int,
             'thumbnail': r're:^https?://.*\.jpg$',
             'duration': 292,
             'view_count': int,
+            'timestamp': 1426237001,
+            'upload_date': '20150313',
         },
         'params': {
             # m3u8 download
         },
         'params': {
             # m3u8 download
@@ -33,12 +37,18 @@ class SportBoxEmbedIE(InfoExtractor):
     }, {
         'url': 'https://news.sportbox.ru/vdl/player/media/193095',
         'only_matching': True,
     }, {
         'url': 'https://news.sportbox.ru/vdl/player/media/193095',
         'only_matching': True,
+    }, {
+        'url': 'https://news.sportbox.ru/vdl/player/media/109158',
+        'only_matching': True,
+    }, {
+        'url': 'https://matchtv.ru/vdl/player/media/109158',
+        'only_matching': True,
     }]
 
     @staticmethod
     def _extract_urls(webpage):
         return re.findall(
     }]
 
     @staticmethod
     def _extract_urls(webpage):
         return re.findall(
-            r'<iframe[^>]+src="(https?://news\.sportbox\.ru/vdl/player[^"]+)"',
+            r'<iframe[^>]+src="(https?://(?:news\.sportbox|matchtv)\.ru/vdl/player[^"]+)"',
             webpage)
 
     def _real_extract(self, url):
             webpage)
 
     def _real_extract(self, url):
@@ -46,13 +56,14 @@ class SportBoxEmbedIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
 
         webpage = self._download_webpage(url, video_id)
 
-        wjplayer_data = self._parse_json(
+        sources = self._parse_json(
             self._search_regex(
             self._search_regex(
-                r'(?s)wjplayer\(({.+?})\);', webpage, 'wjplayer settings'),
+                r'(?s)playerOptions\.sources(?:WithRes)?\s*=\s*(\[.+?\])\s*;\s*\n',
+                webpage, 'sources'),
             video_id, transform_source=js_to_json)
 
         formats = []
             video_id, transform_source=js_to_json)
 
         formats = []
-        for source in wjplayer_data['sources']:
+        for source in sources:
             src = source.get('src')
             if not src:
                 continue
             src = source.get('src')
             if not src:
                 continue
@@ -66,14 +77,23 @@ class SportBoxEmbedIE(InfoExtractor):
                 })
         self._sort_formats(formats)
 
                 })
         self._sort_formats(formats)
 
+        player = self._parse_json(
+            self._search_regex(
+                r'(?s)playerOptions\s*=\s*({.+?})\s*;\s*\n', webpage,
+                'player options', default='{}'),
+            video_id, transform_source=js_to_json)
+        media_id = player['mediaId']
+
+        info = self._search_json_ld(webpage, media_id, default={})
+
         view_count = int_or_none(self._search_regex(
             r'Просмотров\s*:\s*(\d+)', webpage, 'view count', default=None))
 
         view_count = int_or_none(self._search_regex(
             r'Просмотров\s*:\s*(\d+)', webpage, 'view count', default=None))
 
-        return {
-            'id': video_id,
-            'title': video_id,
-            'thumbnail': wjplayer_data.get('poster'),
-            'duration': int_or_none(wjplayer_data.get('duration')),
+        return merge_dicts(info, {
+            'id': media_id,
+            'title': self._og_search_title(webpage, default=None) or media_id,
+            'thumbnail': player.get('poster'),
+            'duration': int_or_none(player.get('duration')),
             'view_count': view_count,
             'formats': formats,
             'view_count': view_count,
             'formats': formats,
-        }
+        })
index 212ac80abb52ec98c887b9652234fdb50bb81717..f9b6aa48f03d3b3a8cf49f80573eb9d24d115384 100644 (file)
@@ -212,8 +212,6 @@ class TEDIE(InfoExtractor):
 
         http_url = None
         for format_id, resources in resources_.items():
 
         http_url = None
         for format_id, resources in resources_.items():
-            if not isinstance(resources, dict):
-                continue
             if format_id == 'h264':
                 for resource in resources:
                     h264_url = resource.get('file')
             if format_id == 'h264':
                 for resource in resources:
                     h264_url = resource.get('file')
@@ -242,6 +240,8 @@ class TEDIE(InfoExtractor):
                         'tbr': int_or_none(resource.get('bitrate')),
                     })
             elif format_id == 'hls':
                         'tbr': int_or_none(resource.get('bitrate')),
                     })
             elif format_id == 'hls':
+                if not isinstance(resources, dict):
+                    continue
                 stream_url = url_or_none(resources.get('stream'))
                 if not stream_url:
                     continue
                 stream_url = url_or_none(resources.get('stream'))
                 if not stream_url:
                     continue
index ffef5bf06bc0de32f3249474e960e52bf603ff05..18162061578f93d10b0c66c1cc11cae5b76fac6b 100644 (file)
@@ -39,9 +39,17 @@ class ThePlatformBaseIE(OnceIE):
             smil_url, video_id, note=note, query={'format': 'SMIL'},
             headers=self.geo_verification_headers())
         error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src')
             smil_url, video_id, note=note, query={'format': 'SMIL'},
             headers=self.geo_verification_headers())
         error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src')
-        if error_element is not None and error_element.attrib['src'].startswith(
-                'http://link.theplatform.%s/s/errorFiles/Unavailable.' % self._TP_TLD):
-            raise ExtractorError(error_element.attrib['abstract'], expected=True)
+        if error_element is not None:
+            exception = find_xpath_attr(
+                error_element, _x('.//smil:param'), 'name', 'exception')
+            if exception is not None:
+                if exception.get('value') == 'GeoLocationBlocked':
+                    self.raise_geo_restricted(error_element.attrib['abstract'])
+                elif error_element.attrib['src'].startswith(
+                        'http://link.theplatform.%s/s/errorFiles/Unavailable.'
+                        % self._TP_TLD):
+                    raise ExtractorError(
+                        error_element.attrib['abstract'], expected=True)
 
         smil_formats = self._parse_smil_formats(
             meta, smil_url, video_id, namespace=default_ns,
 
         smil_formats = self._parse_smil_formats(
             meta, smil_url, video_id, namespace=default_ns,
index 368c45729af533eca244f4d0e8a29bfe5f4f7a8a..db93b018252d23bfa66c34b4161c56e1d14c568e 100644 (file)
@@ -45,7 +45,7 @@ class Tube8IE(KeezMoviesIE):
                 r'videoTitle\s*=\s*"([^"]+)', webpage, 'title')
 
         description = self._html_search_regex(
                 r'videoTitle\s*=\s*"([^"]+)', webpage, 'title')
 
         description = self._html_search_regex(
-            r'>Description:</strong>\s*(.+?)\s*<', webpage, 'description', fatal=False)
+            r'(?s)Description:</dt>\s*<dd>(.+?)</dd>', webpage, 'description', fatal=False)
         uploader = self._html_search_regex(
             r'<span class="username">\s*(.+?)\s*<',
             webpage, 'uploader', fatal=False)
         uploader = self._html_search_regex(
             r'<span class="username">\s*(.+?)\s*<',
             webpage, 'uploader', fatal=False)
@@ -55,19 +55,19 @@ class Tube8IE(KeezMoviesIE):
         dislike_count = int_or_none(self._search_regex(
             r'rdownVar\s*=\s*"(\d+)"', webpage, 'dislike count', fatal=False))
         view_count = str_to_int(self._search_regex(
         dislike_count = int_or_none(self._search_regex(
             r'rdownVar\s*=\s*"(\d+)"', webpage, 'dislike count', fatal=False))
         view_count = str_to_int(self._search_regex(
-            r'<strong>Views: </strong>([\d,\.]+)\s*</li>',
+            r'Views:\s*</dt>\s*<dd>([\d,\.]+)',
             webpage, 'view count', fatal=False))
         comment_count = str_to_int(self._search_regex(
             r'<span id="allCommentsCount">(\d+)</span>',
             webpage, 'comment count', fatal=False))
 
         category = self._search_regex(
             webpage, 'view count', fatal=False))
         comment_count = str_to_int(self._search_regex(
             r'<span id="allCommentsCount">(\d+)</span>',
             webpage, 'comment count', fatal=False))
 
         category = self._search_regex(
-            r'Category:\s*</strong>\s*<a[^>]+href=[^>]+>([^<]+)',
+            r'Category:\s*</dt>\s*<dd>\s*<a[^>]+href=[^>]+>([^<]+)',
             webpage, 'category', fatal=False)
         categories = [category] if category else None
 
         tags_str = self._search_regex(
             webpage, 'category', fatal=False)
         categories = [category] if category else None
 
         tags_str = self._search_regex(
-            r'(?s)Tags:\s*</strong>(.+?)</(?!a)',
+            r'(?s)Tags:\s*</dt>\s*<dd>(.+?)</(?!a)',
             webpage, 'tags', fatal=False)
         tags = [t for t in re.findall(
             r'<a[^>]+href=[^>]+>([^<]+)', tags_str)] if tags_str else None
             webpage, 'tags', fatal=False)
         tags = [t for t in re.findall(
             r'<a[^>]+href=[^>]+>([^<]+)', tags_str)] if tags_str else None
diff --git a/youtube_dl/extractor/tv3.py b/youtube_dl/extractor/tv3.py
deleted file mode 100644 (file)
index 3867ec9..0000000
+++ /dev/null
@@ -1,34 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-
-
-class TV3IE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?tv3\.co\.nz/(?P<id>[^/]+)/tabid/\d+/articleID/\d+/MCat/\d+/Default\.aspx'
-    _TEST = {
-        'url': 'http://www.tv3.co.nz/MOTORSPORT-SRS-SsangYong-Hampton-Downs-Round-3/tabid/3692/articleID/121615/MCat/2915/Default.aspx',
-        'info_dict': {
-            'id': '4659127992001',
-            'ext': 'mp4',
-            'title': 'CRC Motorsport: SRS SsangYong Hampton Downs Round 3 - S2015 Ep3',
-            'description': 'SsangYong Racing Series returns for Round 3 with drivers from New Zealand and Australia taking to the grid at Hampton Downs raceway.',
-            'uploader_id': '3812193411001',
-            'upload_date': '20151213',
-            'timestamp': 1449975272,
-        },
-        'expected_warnings': [
-            'Failed to download MPD manifest'
-        ],
-        'params': {
-            # m3u8 download
-            'skip_download': True,
-        },
-    }
-    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/3812193411001/default_default/index.html?videoId=%s'
-
-    def _real_extract(self, url):
-        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id)
-        brightcove_id = self._search_regex(r'<param\s*name="@videoPlayer"\s*value="(\d+)"', webpage, 'brightcove id')
-        return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
diff --git a/youtube_dl/extractor/twitcasting.py b/youtube_dl/extractor/twitcasting.py
new file mode 100644 (file)
index 0000000..05f8aa9
--- /dev/null
@@ -0,0 +1,60 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+import re
+
+
+class TwitCastingIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<uploader_id>[^/]+)/movie/(?P<id>\d+)'
+    _TEST = {
+        'url': 'https://twitcasting.tv/ivetesangalo/movie/2357609',
+        'md5': '745243cad58c4681dc752490f7540d7f',
+        'info_dict': {
+            'id': '2357609',
+            'ext': 'mp4',
+            'title': 'Recorded Live #2357609',
+            'uploader_id': 'ivetesangalo',
+            'description': "Moi! I'm live on TwitCasting from my iPhone.",
+            'thumbnail': r're:^https?://.*\.jpg$',
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        uploader_id = mobj.group('uploader_id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._html_search_regex(
+            r'(?s)<[^>]+id=["\']movietitle[^>]+>(.+?)</',
+            webpage, 'title', default=None) or self._html_search_meta(
+            'twitter:title', webpage, fatal=True)
+
+        m3u8_url = self._search_regex(
+            (r'data-movie-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
+             r'(["\'])(?P<url>http.+?\.m3u8.*?)\1'),
+            webpage, 'm3u8 url', group='url')
+
+        formats = self._extract_m3u8_formats(
+            m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
+            m3u8_id='hls')
+
+        thumbnail = self._og_search_thumbnail(webpage)
+        description = self._og_search_description(
+            webpage, default=None) or self._html_search_meta(
+            'twitter:description', webpage)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'uploader_id': uploader_id,
+            'formats': formats,
+        }
index b39972b1efbfd46aab00a2f05e1a82605b8772b8..401615683dc9b22e4110a6db06dd959c524d96f9 100644 (file)
@@ -51,7 +51,9 @@ class TwitchBaseIE(InfoExtractor):
                 expected=True)
 
     def _call_api(self, path, item_id, *args, **kwargs):
                 expected=True)
 
     def _call_api(self, path, item_id, *args, **kwargs):
-        kwargs.setdefault('headers', {})['Client-ID'] = self._CLIENT_ID
+        headers = kwargs.get('headers', {}).copy()
+        headers['Client-ID'] = self._CLIENT_ID
+        kwargs['headers'] = headers
         response = self._download_json(
             '%s/%s' % (self._API_BASE, path), item_id,
             *args, **compat_kwargs(kwargs))
         response = self._download_json(
             '%s/%s' % (self._API_BASE, path), item_id,
             *args, **compat_kwargs(kwargs))
@@ -559,7 +561,8 @@ class TwitchStreamIE(TwitchBaseIE):
                     TwitchAllVideosIE,
                     TwitchUploadsIE,
                     TwitchPastBroadcastsIE,
                     TwitchAllVideosIE,
                     TwitchUploadsIE,
                     TwitchPastBroadcastsIE,
-                    TwitchHighlightsIE))
+                    TwitchHighlightsIE,
+                    TwitchClipsIE))
                 else super(TwitchStreamIE, cls).suitable(url))
 
     def _real_extract(self, url):
                 else super(TwitchStreamIE, cls).suitable(url))
 
     def _real_extract(self, url):
@@ -633,7 +636,7 @@ class TwitchStreamIE(TwitchBaseIE):
 
 class TwitchClipsIE(TwitchBaseIE):
     IE_NAME = 'twitch:clips'
 
 class TwitchClipsIE(TwitchBaseIE):
     IE_NAME = 'twitch:clips'
-    _VALID_URL = r'https?://clips\.twitch\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:clips\.twitch\.tv/(?:[^/]+/)*|(?:www\.)?twitch\.tv/[^/]+/clip/)(?P<id>[^/?#&]+)'
 
     _TESTS = [{
         'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat',
 
     _TESTS = [{
         'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat',
@@ -653,6 +656,9 @@ class TwitchClipsIE(TwitchBaseIE):
         # multiple formats
         'url': 'https://clips.twitch.tv/rflegendary/UninterestedBeeDAESuppy',
         'only_matching': True,
         # multiple formats
         'url': 'https://clips.twitch.tv/rflegendary/UninterestedBeeDAESuppy',
         'only_matching': True,
+    }, {
+        'url': 'https://www.twitch.tv/sergeynixon/clip/StormyThankfulSproutFutureMan',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
     }]
 
     def _real_extract(self, url):
index 79c45f80e0f827d07a2ef362597b6e7fbab122e7..105826e9bb04d8b0e39e2126f637607cc8f6159b 100644 (file)
@@ -122,7 +122,9 @@ class UdemyIE(InfoExtractor):
             raise ExtractorError(error_str, expected=True)
 
     def _download_webpage_handle(self, *args, **kwargs):
             raise ExtractorError(error_str, expected=True)
 
     def _download_webpage_handle(self, *args, **kwargs):
-        kwargs.setdefault('headers', {})['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.4'
+        headers = kwargs.get('headers', {}).copy()
+        headers['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.4'
+        kwargs['headers'] = headers
         return super(UdemyIE, self)._download_webpage_handle(
             *args, **compat_kwargs(kwargs))
 
         return super(UdemyIE, self)._download_webpage_handle(
             *args, **compat_kwargs(kwargs))
 
index d5d5b4c69ff466bbd245289d5e83fc68bc92069e..6e318479c866288dc877fdf4731a3486308ac46c 100644 (file)
@@ -130,16 +130,16 @@ class ViewsterIE(InfoExtractor):
             def concat(suffix, sep='-'):
                 return (base_format_id + '%s%s' % (sep, suffix)) if base_format_id else suffix
 
             def concat(suffix, sep='-'):
                 return (base_format_id + '%s%s' % (sep, suffix)) if base_format_id else suffix
 
-            for media_type in ('application/f4m+xml', 'application/x-mpegURL', 'video/mp4'):
-                media = self._download_json(
-                    'https://public-api.viewster.com/movies/%s/video' % entry_id,
-                    video_id, 'Downloading %s JSON' % concat(media_type, ' '), fatal=False, query={
-                        'mediaType': media_type,
-                        'language': audio,
-                        'subtitle': subtitle,
-                    })
-                if not media:
-                    continue
+            medias = self._download_json(
+                'https://public-api.viewster.com/movies/%s/videos' % entry_id,
+                video_id, fatal=False, query={
+                    'mediaTypes': ['application/f4m+xml', 'application/x-mpegURL', 'video/mp4'],
+                    'language': audio,
+                    'subtitle': subtitle,
+                })
+            if not medias:
+                continue
+            for media in medias:
                 video_url = media.get('Uri')
                 if not video_url:
                     continue
                 video_url = media.get('Uri')
                 if not video_url:
                     continue
index e49b233f2b98ebf5cf559e9e80d8a06a0687381f..88f4d99794e5df73c6383d06b4b5f143a9f76739 100644 (file)
@@ -299,10 +299,13 @@ class VimeoIE(VimeoBaseInfoExtractor):
                 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/atencio',
                 'uploader_id': 'atencio',
                 'uploader': 'Peter Atencio',
                 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/atencio',
                 'uploader_id': 'atencio',
                 'uploader': 'Peter Atencio',
+                'channel_id': 'keypeele',
+                'channel_url': r're:https?://(?:www\.)?vimeo\.com/channels/keypeele',
                 'timestamp': 1380339469,
                 'upload_date': '20130928',
                 'duration': 187,
             },
                 'timestamp': 1380339469,
                 'upload_date': '20130928',
                 'duration': 187,
             },
+            'expected_warnings': ['Unable to download JSON metadata'],
         },
         {
             'url': 'http://vimeo.com/76979871',
         },
         {
             'url': 'http://vimeo.com/76979871',
@@ -355,11 +358,13 @@ class VimeoIE(VimeoBaseInfoExtractor):
             'url': 'https://vimeo.com/channels/tributes/6213729',
             'info_dict': {
                 'id': '6213729',
             'url': 'https://vimeo.com/channels/tributes/6213729',
             'info_dict': {
                 'id': '6213729',
-                'ext': 'mov',
+                'ext': 'mp4',
                 'title': 'Vimeo Tribute: The Shining',
                 'uploader': 'Casey Donahue',
                 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/caseydonahue',
                 'uploader_id': 'caseydonahue',
                 'title': 'Vimeo Tribute: The Shining',
                 'uploader': 'Casey Donahue',
                 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/caseydonahue',
                 'uploader_id': 'caseydonahue',
+                'channel_url': r're:https?://(?:www\.)?vimeo\.com/channels/tributes',
+                'channel_id': 'tributes',
                 'timestamp': 1250886430,
                 'upload_date': '20090821',
                 'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6',
                 'timestamp': 1250886430,
                 'upload_date': '20090821',
                 'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6',
@@ -465,6 +470,9 @@ class VimeoIE(VimeoBaseInfoExtractor):
         if 'Referer' not in headers:
             headers['Referer'] = url
 
         if 'Referer' not in headers:
             headers['Referer'] = url
 
+        channel_id = self._search_regex(
+            r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None)
+
         # Extract ID from URL
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
         # Extract ID from URL
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
@@ -543,6 +551,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
                 else:
                     config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']
                 config_re.append(r'\bvar\s+r\s*=\s*({.+?})\s*;')
                 else:
                     config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']
                 config_re.append(r'\bvar\s+r\s*=\s*({.+?})\s*;')
+                config_re.append(r'\bconfig\s*=\s*({.+?})\s*;')
                 config = self._search_regex(config_re, webpage, 'info section',
                                             flags=re.DOTALL)
                 config = json.loads(config)
                 config = self._search_regex(config_re, webpage, 'info section',
                                             flags=re.DOTALL)
                 config = json.loads(config)
@@ -563,19 +572,23 @@ class VimeoIE(VimeoBaseInfoExtractor):
             if config.get('view') == 4:
                 config = self._verify_player_video_password(redirect_url, video_id)
 
             if config.get('view') == 4:
                 config = self._verify_player_video_password(redirect_url, video_id)
 
+        vod = config.get('video', {}).get('vod', {})
+
         def is_rented():
             if '>You rented this title.<' in webpage:
                 return True
             if config.get('user', {}).get('purchased'):
                 return True
         def is_rented():
             if '>You rented this title.<' in webpage:
                 return True
             if config.get('user', {}).get('purchased'):
                 return True
-            label = try_get(
-                config, lambda x: x['video']['vod']['purchase_options'][0]['label_string'], compat_str)
-            if label and label.startswith('You rented this'):
-                return True
+            for purchase_option in vod.get('purchase_options', []):
+                if purchase_option.get('purchased'):
+                    return True
+                label = purchase_option.get('label_string')
+                if label and (label.startswith('You rented this') or label.endswith(' remaining')):
+                    return True
             return False
 
             return False
 
-        if is_rented():
-            feature_id = config.get('video', {}).get('vod', {}).get('feature_id')
+        if is_rented() and vod.get('is_trailer'):
+            feature_id = vod.get('feature_id')
             if feature_id and not data.get('force_feature_id', False):
                 return self.url_result(smuggle_url(
                     'https://player.vimeo.com/player/%s' % feature_id,
             if feature_id and not data.get('force_feature_id', False):
                 return self.url_result(smuggle_url(
                     'https://player.vimeo.com/player/%s' % feature_id,
@@ -652,6 +665,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
                 r'<link[^>]+rel=["\']license["\'][^>]+href=(["\'])(?P<license>(?:(?!\1).)+)\1',
                 webpage, 'license', default=None, group='license')
 
                 r'<link[^>]+rel=["\']license["\'][^>]+href=(["\'])(?P<license>(?:(?!\1).)+)\1',
                 webpage, 'license', default=None, group='license')
 
+        channel_url = 'https://vimeo.com/channels/%s' % channel_id if channel_id else None
+
         info_dict = {
             'id': video_id,
             'formats': formats,
         info_dict = {
             'id': video_id,
             'formats': formats,
@@ -662,6 +677,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
             'like_count': like_count,
             'comment_count': comment_count,
             'license': cc_license,
             'like_count': like_count,
             'comment_count': comment_count,
             'license': cc_license,
+            'channel_id': channel_id,
+            'channel_url': channel_url,
         }
 
         info_dict = merge_dicts(info_dict, info_dict_config, json_ld)
         }
 
         info_dict = merge_dicts(info_dict, info_dict_config, json_ld)
index 921e9e172496975a2b7ff78d7f08871b6c27f8f7..ac0819c7c10a9bc0db09b76993a4c79c08a02c4b 100644 (file)
@@ -90,7 +90,13 @@ class VRVIE(VRVBaseIE):
     def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang):
         if not url or stream_format not in ('hls', 'dash'):
             return []
     def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang):
         if not url or stream_format not in ('hls', 'dash'):
             return []
-        stream_id = hardsub_lang or audio_lang
+        assert audio_lang or hardsub_lang
+        stream_id_list = []
+        if audio_lang:
+            stream_id_list.append('audio-%s' % audio_lang)
+        if hardsub_lang:
+            stream_id_list.append('hardsub-%s' % hardsub_lang)
+        stream_id = '-'.join(stream_id_list)
         format_id = '%s-%s' % (stream_format, stream_id)
         if stream_format == 'hls':
             adaptive_formats = self._extract_m3u8_formats(
         format_id = '%s-%s' % (stream_format, stream_id)
         if stream_format == 'hls':
             adaptive_formats = self._extract_m3u8_formats(
index 02fcd52c74c268ea8274359af46810a6beeb0636..6000671c31bc399a405f03a0064996287d46fe3e 100644 (file)
@@ -4,15 +4,19 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
 import re
 
 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
     int_or_none,
     float_or_none,
 from ..utils import (
     int_or_none,
     float_or_none,
+    unified_timestamp,
+    url_or_none,
 )
 
 
 class VzaarIE(InfoExtractor):
     _VALID_URL = r'https?://(?:(?:www|view)\.)?vzaar\.com/(?:videos/)?(?P<id>\d+)'
     _TESTS = [{
 )
 
 
 class VzaarIE(InfoExtractor):
     _VALID_URL = r'https?://(?:(?:www|view)\.)?vzaar\.com/(?:videos/)?(?P<id>\d+)'
     _TESTS = [{
+        # HTTP and HLS
         'url': 'https://vzaar.com/videos/1152805',
         'md5': 'bde5ddfeb104a6c56a93a06b04901dbf',
         'info_dict': {
         'url': 'https://vzaar.com/videos/1152805',
         'md5': 'bde5ddfeb104a6c56a93a06b04901dbf',
         'info_dict': {
@@ -40,24 +44,48 @@ class VzaarIE(InfoExtractor):
         video_id = self._match_id(url)
         video_data = self._download_json(
             'http://view.vzaar.com/v2/%s/video' % video_id, video_id)
         video_id = self._match_id(url)
         video_data = self._download_json(
             'http://view.vzaar.com/v2/%s/video' % video_id, video_id)
-        source_url = video_data['sourceUrl']
 
 
-        info = {
+        title = video_data['videoTitle']
+
+        formats = []
+
+        source_url = url_or_none(video_data.get('sourceUrl'))
+        if source_url:
+            f = {
+                'url': source_url,
+                'format_id': 'http',
+            }
+            if 'audio' in source_url:
+                f.update({
+                    'vcodec': 'none',
+                    'ext': 'mp3',
+                })
+            else:
+                f.update({
+                    'width': int_or_none(video_data.get('width')),
+                    'height': int_or_none(video_data.get('height')),
+                    'ext': 'mp4',
+                    'fps': float_or_none(video_data.get('fps')),
+                })
+            formats.append(f)
+
+        video_guid = video_data.get('guid')
+        usp = video_data.get('usp')
+        if isinstance(video_guid, compat_str) and isinstance(usp, dict):
+            m3u8_url = ('http://fable.vzaar.com/v4/usp/%s/%s.ism/.m3u8?'
+                        % (video_guid, video_id)) + '&'.join(
+                '%s=%s' % (k, v) for k, v in usp.items())
+            formats.extend(self._extract_m3u8_formats(
+                m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                m3u8_id='hls', fatal=False))
+
+        self._sort_formats(formats)
+
+        return {
             'id': video_id,
             'id': video_id,
-            'title': video_data['videoTitle'],
-            'url': source_url,
+            'title': title,
             'thumbnail': self._proto_relative_url(video_data.get('poster')),
             'duration': float_or_none(video_data.get('videoDuration')),
             'thumbnail': self._proto_relative_url(video_data.get('poster')),
             'duration': float_or_none(video_data.get('videoDuration')),
+            'timestamp': unified_timestamp(video_data.get('ts')),
+            'formats': formats,
         }
         }
-        if 'audio' in source_url:
-            info.update({
-                'vcodec': 'none',
-                'ext': 'mp3',
-            })
-        else:
-            info.update({
-                'width': int_or_none(video_data.get('width')),
-                'height': int_or_none(video_data.get('height')),
-                'ext': 'mp4',
-            })
-        return info
index 27047425db6a121c0b5e39e11f597e82db26301a..3f49f3889e6bcde1bd0f641f9f421caf8d560b1c 100644 (file)
@@ -41,6 +41,7 @@ from ..utils import (
     remove_quotes,
     remove_start,
     smuggle_url,
     remove_quotes,
     remove_start,
     smuggle_url,
+    str_or_none,
     str_to_int,
     try_get,
     unescapeHTML,
     str_to_int,
     try_get,
     unescapeHTML,
@@ -259,7 +260,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
         return True
 
     def _download_webpage_handle(self, *args, **kwargs):
         return True
 
     def _download_webpage_handle(self, *args, **kwargs):
-        kwargs.setdefault('query', {})['disable_polymer'] = 'true'
+        query = kwargs.get('query', {}).copy()
+        query['disable_polymer'] = 'true'
+        kwargs['query'] = query
         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
             *args, **compat_kwargs(kwargs))
 
         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
             *args, **compat_kwargs(kwargs))
 
@@ -347,6 +350,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                             (?:www\.)?hooktube\.com/|
                             (?:www\.)?yourepeat\.com/|
                             tube\.majestyc\.net/|
                             (?:www\.)?hooktube\.com/|
                             (?:www\.)?yourepeat\.com/|
                             tube\.majestyc\.net/|
+                            (?:www\.)?invidio\.us/|
                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
                          (?:                                                  # the various things that can precede the ID:
                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
                          (?:                                                  # the various things that can precede the ID:
@@ -490,12 +494,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader': 'Philipp Hagemeister',
                 'uploader_id': 'phihag',
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
                 'uploader': 'Philipp Hagemeister',
                 'uploader_id': 'phihag',
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
+                'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
+                'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
                 'upload_date': '20121002',
                 'license': 'Standard YouTube License',
                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
                 'categories': ['Science & Technology'],
                 'tags': ['youtube-dl'],
                 'duration': 10,
                 'upload_date': '20121002',
                 'license': 'Standard YouTube License',
                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
                 'categories': ['Science & Technology'],
                 'tags': ['youtube-dl'],
                 'duration': 10,
+                'view_count': int,
                 'like_count': int,
                 'dislike_count': int,
                 'start_time': 1,
                 'like_count': int,
                 'dislike_count': int,
                 'start_time': 1,
@@ -578,6 +585,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'categories': ['Science & Technology'],
                 'tags': ['youtube-dl'],
                 'duration': 10,
                 'categories': ['Science & Technology'],
                 'tags': ['youtube-dl'],
                 'duration': 10,
+                'view_count': int,
                 'like_count': int,
                 'dislike_count': int,
             },
                 'like_count': int,
                 'dislike_count': int,
             },
@@ -1064,6 +1072,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
             'only_matching': True,
         },
             'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
             'only_matching': True,
         },
+        {
+            'url': 'https://invidio.us/watch?v=BaW_jenozKc',
+            'only_matching': True,
+        },
     ]
 
     def __init__(self, *args, **kwargs):
     ]
 
     def __init__(self, *args, **kwargs):
@@ -1180,7 +1192,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
             (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
-             r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\('),
+             r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+             r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
             jscode, 'Initial JS player signature function name', group='sig')
 
         jsi = JSInterpreter(jscode)
             jscode, 'Initial JS player signature function name', group='sig')
 
         jsi = JSInterpreter(jscode)
@@ -1529,6 +1542,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         def extract_view_count(v_info):
             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
 
         def extract_view_count(v_info):
             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
 
+        player_response = {}
+
         # Get video info
         embed_webpage = None
         if re.search(r'player-age-gate-content">', video_webpage) is not None:
         # Get video info
         embed_webpage = None
         if re.search(r'player-age-gate-content">', video_webpage) is not None:
@@ -1571,6 +1586,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
                     is_live = True
                 sts = ytplayer_config.get('sts')
                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
                     is_live = True
                 sts = ytplayer_config.get('sts')
+                if not player_response:
+                    pl_response = str_or_none(args.get('player_response'))
+                    if pl_response:
+                        pl_response = self._parse_json(pl_response, video_id, fatal=False)
+                        if isinstance(pl_response, dict):
+                            player_response = pl_response
             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
                 # We also try looking in get_video_info since it may contain different dashmpd
                 # URL that points to a DASH manifest with possibly different itag set (some itags
             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
                 # We also try looking in get_video_info since it may contain different dashmpd
                 # URL that points to a DASH manifest with possibly different itag set (some itags
@@ -1599,6 +1620,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     if not video_info_webpage:
                         continue
                     get_video_info = compat_parse_qs(video_info_webpage)
                     if not video_info_webpage:
                         continue
                     get_video_info = compat_parse_qs(video_info_webpage)
+                    if not player_response:
+                        pl_response = get_video_info.get('player_response', [None])[0]
+                        if isinstance(pl_response, dict):
+                            player_response = pl_response
                     add_dash_mpd(get_video_info)
                     if view_count is None:
                         view_count = extract_view_count(get_video_info)
                     add_dash_mpd(get_video_info)
                     if view_count is None:
                         view_count = extract_view_count(get_video_info)
@@ -1644,9 +1669,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     '"token" parameter not in video info for unknown reason',
                     video_id=video_id)
 
                     '"token" parameter not in video info for unknown reason',
                     video_id=video_id)
 
+        video_details = try_get(
+            player_response, lambda x: x['videoDetails'], dict) or {}
+
         # title
         if 'title' in video_info:
             video_title = video_info['title'][0]
         # title
         if 'title' in video_info:
             video_title = video_info['title'][0]
+        elif 'title' in player_response:
+            video_title = video_details['title']
         else:
             self._downloader.report_warning('Unable to extract video title')
             video_title = '_'
         else:
             self._downloader.report_warning('Unable to extract video title')
             video_title = '_'
@@ -1709,6 +1739,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
         if view_count is None:
             view_count = extract_view_count(video_info)
 
         if view_count is None:
             view_count = extract_view_count(video_info)
+        if view_count is None and video_details:
+            view_count = int_or_none(video_details.get('viewCount'))
 
         # Check for "rental" videos
         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
 
         # Check for "rental" videos
         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
@@ -1889,7 +1921,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
 
         # uploader
             raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
 
         # uploader
-        video_uploader = try_get(video_info, lambda x: x['author'][0], compat_str)
+        video_uploader = try_get(
+            video_info, lambda x: x['author'][0],
+            compat_str) or str_or_none(video_details.get('author'))
         if video_uploader:
             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
         else:
         if video_uploader:
             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
         else:
@@ -1907,6 +1941,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         else:
             self._downloader.report_warning('unable to extract uploader nickname')
 
         else:
             self._downloader.report_warning('unable to extract uploader nickname')
 
+        channel_id = self._html_search_meta(
+            'channelId', video_webpage, 'channel id')
+        channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
+
         # thumbnail image
         # We try first to get a high quality image:
         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
         # thumbnail image
         # We try first to get a high quality image:
         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
@@ -1998,12 +2036,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         like_count = _extract_count('like')
         dislike_count = _extract_count('dislike')
 
         like_count = _extract_count('like')
         dislike_count = _extract_count('dislike')
 
+        if view_count is None:
+            view_count = str_to_int(self._search_regex(
+                r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
+                'view count', default=None))
+
         # subtitles
         video_subtitles = self.extract_subtitles(video_id, video_webpage)
         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
 
         video_duration = try_get(
             video_info, lambda x: int_or_none(x['length_seconds'][0]))
         # subtitles
         video_subtitles = self.extract_subtitles(video_id, video_webpage)
         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
 
         video_duration = try_get(
             video_info, lambda x: int_or_none(x['length_seconds'][0]))
+        if not video_duration:
+            video_duration = int_or_none(video_details.get('lengthSeconds'))
         if not video_duration:
             video_duration = parse_duration(self._html_search_meta(
                 'duration', video_webpage, 'video duration'))
         if not video_duration:
             video_duration = parse_duration(self._html_search_meta(
                 'duration', video_webpage, 'video duration'))
@@ -2078,6 +2123,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             'uploader': video_uploader,
             'uploader_id': video_uploader_id,
             'uploader_url': video_uploader_url,
             'uploader': video_uploader,
             'uploader_id': video_uploader_id,
             'uploader_url': video_uploader_url,
+            'channel_id': channel_id,
+            'channel_url': channel_url,
             'upload_date': upload_date,
             'license': video_license,
             'creator': video_creator or artist,
             'upload_date': upload_date,
             'license': video_license,
             'creator': video_creator or artist,
@@ -2116,7 +2163,11 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
                         (?:https?://)?
                         (?:\w+\.)?
                         (?:
                         (?:https?://)?
                         (?:\w+\.)?
                         (?:
-                            youtube\.com/
+                            (?:
+                                youtube\.com|
+                                invidio\.us
+                            )
+                            /
                             (?:
                                (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
                                \? (?:.*?[&;])*? (?:p|a|list)=
                             (?:
                                (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
                                \? (?:.*?[&;])*? (?:p|a|list)=
@@ -2229,6 +2280,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
             'categories': ['People & Blogs'],
             'tags': list,
             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
             'categories': ['People & Blogs'],
             'tags': list,
+            'view_count': int,
             'like_count': int,
             'dislike_count': int,
         },
             'like_count': int,
             'dislike_count': int,
         },
@@ -2267,6 +2319,9 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
         # music album playlist
         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
         'only_matching': True,
         # music album playlist
         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
         'only_matching': True,
+    }, {
+        'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
+        'only_matching': True,
     }]
 
     def _real_initialize(self):
     }]
 
     def _real_initialize(self):
@@ -2409,7 +2464,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
 
 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
     IE_DESC = 'YouTube.com channels'
 
 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
     IE_DESC = 'YouTube.com channels'
-    _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
+    _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
     _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
     _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
     IE_NAME = 'youtube:channel'
     _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
     _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
     IE_NAME = 'youtube:channel'
@@ -2430,6 +2485,9 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
             'id': 'UUs0ifCMCm1icqRbqhUINa0w',
             'title': 'Uploads from Deus Ex',
         },
             'id': 'UUs0ifCMCm1icqRbqhUINa0w',
             'title': 'Uploads from Deus Ex',
         },
+    }, {
+        'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
+        'only_matching': True,
     }]
 
     @classmethod
     }]
 
     @classmethod
index fb167c1985527ada9d06e5f482e20c88ba0a4559..896276301e49a53010bf9b6a07b4ea545b1e64fd 100644 (file)
@@ -18,12 +18,12 @@ from ..utils import (
 )
 
 
 )
 
 
-class ZattooBaseIE(InfoExtractor):
-    _NETRC_MACHINE = 'zattoo'
-    _HOST_URL = 'https://zattoo.com'
-
+class ZattooPlatformBaseIE(InfoExtractor):
     _power_guide_hash = None
 
     _power_guide_hash = None
 
+    def _host_url(self):
+        return 'https://%s' % (self._API_HOST if hasattr(self, '_API_HOST') else self._HOST)
+
     def _login(self):
         username, password = self._get_login_info()
         if not username or not password:
     def _login(self):
         username, password = self._get_login_info()
         if not username or not password:
@@ -33,13 +33,13 @@ class ZattooBaseIE(InfoExtractor):
 
         try:
             data = self._download_json(
 
         try:
             data = self._download_json(
-                '%s/zapi/v2/account/login' % self._HOST_URL, None, 'Logging in',
+                '%s/zapi/v2/account/login' % self._host_url(), None, 'Logging in',
                 data=urlencode_postdata({
                     'login': username,
                     'password': password,
                     'remember': 'true',
                 }), headers={
                 data=urlencode_postdata({
                     'login': username,
                     'password': password,
                     'remember': 'true',
                 }), headers={
-                    'Referer': '%s/login' % self._HOST_URL,
+                    'Referer': '%s/login' % self._host_url(),
                     'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
                 })
         except ExtractorError as e:
                     'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
                 })
         except ExtractorError as e:
@@ -53,7 +53,7 @@ class ZattooBaseIE(InfoExtractor):
 
     def _real_initialize(self):
         webpage = self._download_webpage(
 
     def _real_initialize(self):
         webpage = self._download_webpage(
-            self._HOST_URL, None, 'Downloading app token')
+            self._host_url(), None, 'Downloading app token')
         app_token = self._html_search_regex(
             r'appToken\s*=\s*(["\'])(?P<token>(?:(?!\1).)+?)\1',
             webpage, 'app token', group='token')
         app_token = self._html_search_regex(
             r'appToken\s*=\s*(["\'])(?P<token>(?:(?!\1).)+?)\1',
             webpage, 'app token', group='token')
@@ -62,7 +62,7 @@ class ZattooBaseIE(InfoExtractor):
 
         # Will setup appropriate cookies
         self._request_webpage(
 
         # Will setup appropriate cookies
         self._request_webpage(
-            '%s/zapi/v2/session/hello' % self._HOST_URL, None,
+            '%s/zapi/v2/session/hello' % self._host_url(), None,
             'Opening session', data=urlencode_postdata({
                 'client_app_token': app_token,
                 'uuid': compat_str(uuid4()),
             'Opening session', data=urlencode_postdata({
                 'client_app_token': app_token,
                 'uuid': compat_str(uuid4()),
@@ -75,7 +75,7 @@ class ZattooBaseIE(InfoExtractor):
 
     def _extract_cid(self, video_id, channel_name):
         channel_groups = self._download_json(
 
     def _extract_cid(self, video_id, channel_name):
         channel_groups = self._download_json(
-            '%s/zapi/v2/cached/channels/%s' % (self._HOST_URL,
+            '%s/zapi/v2/cached/channels/%s' % (self._host_url(),
                                                self._power_guide_hash),
             video_id, 'Downloading channel list',
             query={'details': False})['channel_groups']
                                                self._power_guide_hash),
             video_id, 'Downloading channel list',
             query={'details': False})['channel_groups']
@@ -93,28 +93,30 @@ class ZattooBaseIE(InfoExtractor):
 
     def _extract_cid_and_video_info(self, video_id):
         data = self._download_json(
 
     def _extract_cid_and_video_info(self, video_id):
         data = self._download_json(
-            '%s/zapi/program/details' % self._HOST_URL,
+            '%s/zapi/v2/cached/program/power_details/%s' % (
+                self._host_url(), self._power_guide_hash),
             video_id,
             'Downloading video information',
             query={
             video_id,
             'Downloading video information',
             query={
-                'program_id': video_id,
-                'complete': True
+                'program_ids': video_id,
+                'complete': True,
             })
 
             })
 
-        p = data['program']
+        p = data['programs'][0]
         cid = p['cid']
 
         info_dict = {
             'id': video_id,
         cid = p['cid']
 
         info_dict = {
             'id': video_id,
-            'title': p.get('title') or p['episode_title'],
-            'description': p.get('description'),
-            'thumbnail': p.get('image_url'),
+            'title': p.get('t') or p['et'],
+            'description': p.get('d'),
+            'thumbnail': p.get('i_url'),
             'creator': p.get('channel_name'),
             'creator': p.get('channel_name'),
-            'episode': p.get('episode_title'),
-            'episode_number': int_or_none(p.get('episode_number')),
-            'season_number': int_or_none(p.get('season_number')),
+            'episode': p.get('et'),
+            'episode_number': int_or_none(p.get('e_no')),
+            'season_number': int_or_none(p.get('s_no')),
             'release_year': int_or_none(p.get('year')),
             'release_year': int_or_none(p.get('year')),
-            'categories': try_get(p, lambda x: x['categories'], list),
+            'categories': try_get(p, lambda x: x['c'], list),
+            'tags': try_get(p, lambda x: x['g'], list)
         }
 
         return cid, info_dict
         }
 
         return cid, info_dict
@@ -126,11 +128,11 @@ class ZattooBaseIE(InfoExtractor):
 
         if is_live:
             postdata_common.update({'timeshift': 10800})
 
         if is_live:
             postdata_common.update({'timeshift': 10800})
-            url = '%s/zapi/watch/live/%s' % (self._HOST_URL, cid)
+            url = '%s/zapi/watch/live/%s' % (self._host_url(), cid)
         elif record_id:
         elif record_id:
-            url = '%s/zapi/watch/recording/%s' % (self._HOST_URL, record_id)
+            url = '%s/zapi/watch/recording/%s' % (self._host_url(), record_id)
         else:
         else:
-            url = '%s/zapi/watch/recall/%s/%s' % (self._HOST_URL, cid, video_id)
+            url = '%s/zapi/watch/recall/%s/%s' % (self._host_url(), cid, video_id)
 
         formats = []
         for stream_type in ('dash', 'hls', 'hls5', 'hds'):
 
         formats = []
         for stream_type in ('dash', 'hls', 'hls5', 'hds'):
@@ -201,13 +203,13 @@ class ZattooBaseIE(InfoExtractor):
         return info_dict
 
 
         return info_dict
 
 
-class QuicklineBaseIE(ZattooBaseIE):
+class QuicklineBaseIE(ZattooPlatformBaseIE):
     _NETRC_MACHINE = 'quickline'
     _NETRC_MACHINE = 'quickline'
-    _HOST_URL = 'https://mobiltv.quickline.com'
+    _HOST = 'mobiltv.quickline.com'
 
 
 class QuicklineIE(QuicklineBaseIE):
 
 
 class QuicklineIE(QuicklineBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P<channel>[^/]+)/(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?%s/watch/(?P<channel>[^/]+)/(?P<id>[0-9]+)' % re.escape(QuicklineBaseIE._HOST)
 
     _TEST = {
         'url': 'https://mobiltv.quickline.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste',
 
     _TEST = {
         'url': 'https://mobiltv.quickline.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste',
@@ -220,7 +222,7 @@ class QuicklineIE(QuicklineBaseIE):
 
 
 class QuicklineLiveIE(QuicklineBaseIE):
 
 
 class QuicklineLiveIE(QuicklineBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P<id>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?%s/watch/(?P<id>[^/]+)' % re.escape(QuicklineBaseIE._HOST)
 
     _TEST = {
         'url': 'https://mobiltv.quickline.com/watch/srf1',
 
     _TEST = {
         'url': 'https://mobiltv.quickline.com/watch/srf1',
@@ -236,8 +238,18 @@ class QuicklineLiveIE(QuicklineBaseIE):
         return self._extract_video(channel_name, video_id, is_live=True)
 
 
         return self._extract_video(channel_name, video_id, is_live=True)
 
 
+class ZattooBaseIE(ZattooPlatformBaseIE):
+    _NETRC_MACHINE = 'zattoo'
+    _HOST = 'zattoo.com'
+
+
+def _make_valid_url(tmpl, host):
+    return tmpl % re.escape(host)
+
+
 class ZattooIE(ZattooBaseIE):
 class ZattooIE(ZattooBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P<channel>[^/]+?)/(?P<id>[0-9]+)[^/]+(?:/(?P<recid>[0-9]+))?'
+    _VALID_URL_TEMPLATE = r'https?://(?:www\.)?%s/watch/(?P<channel>[^/]+?)/(?P<id>[0-9]+)[^/]+(?:/(?P<recid>[0-9]+))?'
+    _VALID_URL = _make_valid_url(_VALID_URL_TEMPLATE, ZattooBaseIE._HOST)
 
     # Since regular videos are only available for 7 days and recorded videos
     # are only available for a specific user, we cannot have detailed tests.
 
     # Since regular videos are only available for 7 days and recorded videos
     # are only available for a specific user, we cannot have detailed tests.
@@ -269,3 +281,142 @@ class ZattooLiveIE(ZattooBaseIE):
     def _real_extract(self, url):
         channel_name = video_id = self._match_id(url)
         return self._extract_video(channel_name, video_id, is_live=True)
     def _real_extract(self, url):
         channel_name = video_id = self._match_id(url)
         return self._extract_video(channel_name, video_id, is_live=True)
+
+
+class NetPlusIE(ZattooIE):
+    _NETRC_MACHINE = 'netplus'
+    _HOST = 'netplus.tv'
+    _API_HOST = 'www.%s' % _HOST
+    _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+    _TESTS = [{
+        'url': 'https://www.netplus.tv/watch/abc/123-abc',
+        'only_matching': True,
+    }]
+
+
+class MNetTVIE(ZattooIE):
+    _NETRC_MACHINE = 'mnettv'
+    _HOST = 'tvplus.m-net.de'
+    _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+    _TESTS = [{
+        'url': 'https://tvplus.m-net.de/watch/abc/123-abc',
+        'only_matching': True,
+    }]
+
+
+class WalyTVIE(ZattooIE):
+    _NETRC_MACHINE = 'walytv'
+    _HOST = 'player.waly.tv'
+    _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+    _TESTS = [{
+        'url': 'https://player.waly.tv/watch/abc/123-abc',
+        'only_matching': True,
+    }]
+
+
+class BBVTVIE(ZattooIE):
+    _NETRC_MACHINE = 'bbvtv'
+    _HOST = 'bbv-tv.net'
+    _API_HOST = 'www.%s' % _HOST
+    _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+    _TESTS = [{
+        'url': 'https://www.bbv-tv.net/watch/abc/123-abc',
+        'only_matching': True,
+    }]
+
+
+class VTXTVIE(ZattooIE):
+    _NETRC_MACHINE = 'vtxtv'
+    _HOST = 'vtxtv.ch'
+    _API_HOST = 'www.%s' % _HOST
+    _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+    _TESTS = [{
+        'url': 'https://www.vtxtv.ch/watch/abc/123-abc',
+        'only_matching': True,
+    }]
+
+
+class MyVisionTVIE(ZattooIE):
+    _NETRC_MACHINE = 'myvisiontv'
+    _HOST = 'myvisiontv.ch'
+    _API_HOST = 'www.%s' % _HOST
+    _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+    _TESTS = [{
+        'url': 'https://www.myvisiontv.ch/watch/abc/123-abc',
+        'only_matching': True,
+    }]
+
+
+class GlattvisionTVIE(ZattooIE):
+    _NETRC_MACHINE = 'glattvisiontv'
+    _HOST = 'iptv.glattvision.ch'
+    _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+    _TESTS = [{
+        'url': 'https://iptv.glattvision.ch/watch/abc/123-abc',
+        'only_matching': True,
+    }]
+
+
+class SAKTVIE(ZattooIE):
+    _NETRC_MACHINE = 'saktv'
+    _HOST = 'saktv.ch'
+    _API_HOST = 'www.%s' % _HOST
+    _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+    _TESTS = [{
+        'url': 'https://www.saktv.ch/watch/abc/123-abc',
+        'only_matching': True,
+    }]
+
+
+class EWETVIE(ZattooIE):
+    _NETRC_MACHINE = 'ewetv'
+    _HOST = 'tvonline.ewe.de'
+    _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+    _TESTS = [{
+        'url': 'https://tvonline.ewe.de/watch/abc/123-abc',
+        'only_matching': True,
+    }]
+
+
+class QuantumTVIE(ZattooIE):
+    _NETRC_MACHINE = 'quantumtv'
+    _HOST = 'quantum-tv.com'
+    _API_HOST = 'www.%s' % _HOST
+    _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+    _TESTS = [{
+        'url': 'https://www.quantum-tv.com/watch/abc/123-abc',
+        'only_matching': True,
+    }]
+
+
+class OsnatelTVIE(ZattooIE):
+    _NETRC_MACHINE = 'osnateltv'
+    _HOST = 'tvonline.osnatel.de'
+    _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+    _TESTS = [{
+        'url': 'https://tvonline.osnatel.de/watch/abc/123-abc',
+        'only_matching': True,
+    }]
+
+
+class EinsUndEinsTVIE(ZattooIE):
+    _NETRC_MACHINE = '1und1tv'
+    _HOST = '1und1.tv'
+    _API_HOST = 'www.%s' % _HOST
+    _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+    _TESTS = [{
+        'url': 'https://www.1und1.tv/watch/abc/123-abc',
+        'only_matching': True,
+    }]
index b078c4993ca68661dd6b93c669c945ff704d7f7b..7f32ad36c4350bd1e137cb464c9b0dfb32f85303 100644 (file)
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
 from __future__ import unicode_literals
 
-__version__ = '2018.09.10'
+__version__ = '2018.11.07'