]> Raphaël G. Git Repositories - youtubedl/commitdiff
New upstream version 2018.03.14
authorRogério Brito <rbrito@ime.usp.br>
Fri, 16 Mar 2018 15:37:39 +0000 (12:37 -0300)
committerRogério Brito <rbrito@ime.usp.br>
Fri, 16 Mar 2018 15:37:39 +0000 (12:37 -0300)
96 files changed:
AUTHORS
ChangeLog
README.md
README.txt
docs/supportedsites.md
setup.cfg
test/test_downloader_http.py [new file with mode: 0644]
test/test_http.py
test/test_utils.py
youtube-dl
youtube-dl.1
youtube-dl.bash-completion
youtube-dl.fish
youtube-dl.zsh
youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/compat.py
youtube_dl/downloader/common.py
youtube_dl/downloader/hls.py
youtube_dl/downloader/http.py
youtube_dl/downloader/ism.py
youtube_dl/extractor/abcnews.py
youtube_dl/extractor/adn.py
youtube_dl/extractor/aenetworks.py
youtube_dl/extractor/afreecatv.py
youtube_dl/extractor/amcnetworks.py
youtube_dl/extractor/archiveorg.py
youtube_dl/extractor/ard.py
youtube_dl/extractor/brightcove.py
youtube_dl/extractor/canvas.py
youtube_dl/extractor/cbc.py
youtube_dl/extractor/cbsinteractive.py
youtube_dl/extractor/common.py
youtube_dl/extractor/discovery.py
youtube_dl/extractor/dplay.py
youtube_dl/extractor/dvtv.py
youtube_dl/extractor/extractors.py
youtube_dl/extractor/francetv.py
youtube_dl/extractor/funk.py
youtube_dl/extractor/fusion.py
youtube_dl/extractor/gameinformer.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/heise.py
youtube_dl/extractor/hidive.py [new file with mode: 0644]
youtube_dl/extractor/la7.py
youtube_dl/extractor/line.py [new file with mode: 0644]
youtube_dl/extractor/mailru.py
youtube_dl/extractor/myvi.py
youtube_dl/extractor/nationalgeographic.py
youtube_dl/extractor/nbc.py
youtube_dl/extractor/newgrounds.py
youtube_dl/extractor/nexx.py
youtube_dl/extractor/nick.py
youtube_dl/extractor/ninegag.py
youtube_dl/extractor/njpwworld.py
youtube_dl/extractor/npo.py
youtube_dl/extractor/periscope.py
youtube_dl/extractor/pladform.py
youtube_dl/extractor/pokemon.py
youtube_dl/extractor/pornhub.py
youtube_dl/extractor/prosiebensat1.py
youtube_dl/extractor/raywenderlich.py [new file with mode: 0644]
youtube_dl/extractor/redbulltv.py
youtube_dl/extractor/reddit.py
youtube_dl/extractor/redtube.py
youtube_dl/extractor/rtlnl.py
youtube_dl/extractor/ruutu.py
youtube_dl/extractor/seznamzpravy.py
youtube_dl/extractor/sixplay.py
youtube_dl/extractor/sonyliv.py
youtube_dl/extractor/soundcloud.py
youtube_dl/extractor/spankbang.py
youtube_dl/extractor/streamango.py
youtube_dl/extractor/telebruxelles.py
youtube_dl/extractor/telequebec.py
youtube_dl/extractor/tennistv.py [new file with mode: 0644]
youtube_dl/extractor/toggle.py
youtube_dl/extractor/tvnow.py
youtube_dl/extractor/udemy.py
youtube_dl/extractor/veoh.py
youtube_dl/extractor/vice.py
youtube_dl/extractor/viceland.py [deleted file]
youtube_dl/extractor/vidio.py
youtube_dl/extractor/vidlii.py [new file with mode: 0644]
youtube_dl/extractor/vidzi.py
youtube_dl/extractor/vimeo.py
youtube_dl/extractor/vk.py
youtube_dl/extractor/xhamster.py
youtube_dl/extractor/xnxx.py
youtube_dl/extractor/yapfiles.py [new file with mode: 0644]
youtube_dl/extractor/youtube.py
youtube_dl/extractor/zdf.py
youtube_dl/options.py
youtube_dl/postprocessor/embedthumbnail.py
youtube_dl/utils.py
youtube_dl/version.py

diff --git a/AUTHORS b/AUTHORS
index 40215a5cf83f52e0e1252b9bba5fe80f9025c076..6223212aad6eaeee4cc0a0d5a4b3ffbb3a9ff2fe 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -233,3 +233,6 @@ Daniel Weber
 Kay Bouché
 Yang Hongbo
 Lei Wang
 Kay Bouché
 Yang Hongbo
 Lei Wang
+Petr Novák
+Leonardo Taccari
+Martin Weinelt
index 00c5c9c6be8f39a759ad1d0b2437b7139709114b..47736e076e0925fb6540ad4cc01fa997b2bd2af1 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,169 @@
+version 2018.03.14
+
+Extractors
+* [soundcloud] Update client id (#15866)
++ [tennistv] Add support for tennistv.com
++ [line] Add support for tv.line.me (#9427)
+* [xnxx] Fix extraction (#15817)
+* [njpwworld] Fix authentication (#15815)
+
+
+version 2018.03.10
+
+Core
+* [downloader/hls] Skip uplynk ad fragments (#15748)
+
+Extractors
+* [pornhub] Don't override session cookies (#15697)
++ [raywenderlich] Add support for videos.raywenderlich.com (#15251)
+* [funk] Fix extraction and rework extractors (#15792)
+* [nexx] Restore reverse engineered approach
++ [heise] Add support for kaltura embeds (#14961, #15728)
++ [tvnow] Extract series metadata (#15774)
+* [ruutu] Continue formats extraction on NOT-USED URLs (#15775)
+* [vrtnu] Use redirect URL for building video JSON URL (#15767, #15769)
+* [vimeo] Modernize login code and improve error messaging
+* [archiveorg] Fix extraction (#15770, #15772)
++ [hidive] Add support for hidive.com (#15494)
+* [afreecatv] Detect deleted videos
+* [afreecatv] Fix extraction (#15755)
+* [vice] Fix extraction and rework extractors (#11101, #13019, #13622, #13778)
++ [vidzi] Add support for vidzi.si (#15751)
+* [npo] Fix typo
+
+
+version 2018.03.03
+
+Core
++ [utils] Add parse_resolution
+Revert respect --prefer-insecure while updating
+
+Extractors
++ [yapfiles] Add support for yapfiles.ru (#15726, #11085)
+* [spankbang] Fix formats extraction (#15727)
+* [adn] Fix extraction (#15716)
++ [toggle] Extract DASH and ISM formats (#15721)
++ [nickelodeon] Add support for nickelodeon.com.tr (#15706)
+* [npo] Validate and filter format URLs (#15709)
+
+
+version 2018.02.26
+
+Extractors
+* [udemy] Use custom User-Agent (#15571)
+
+
+version 2018.02.25
+
+Core
+* [postprocessor/embedthumbnail] Skip embedding when there aren't any
+  thumbnails (#12573)
+* [extractor/common] Improve jwplayer subtitles extraction (#15695)
+
+Extractors
++ [vidlii] Add support for vidlii.com (#14472, #14512, #14779)
++ [streamango] Capture and output error messages
+* [streamango] Fix extraction (#14160, #14256)
++ [telequebec] Add support for emissions (#14649, #14655)
++ [telequebec:live] Add support for live streams (#15688)
++ [mailru:music] Add support for mail.ru/music (#15618)
+* [aenetworks] Switch to akamai HLS formats (#15612)
+* [ytsearch] Fix flat title extraction (#11260, #15681)
+
+
+version 2018.02.22
+
+Core
++ [utils] Fixup some common URL typos in sanitize_url (#15649)
+* Respect --prefer-insecure while updating (#15497)
+
+Extractors
+* [vidio] Fix HLS URL extraction (#15675)
++ [nexx] Add support for arc.nexx.cloud URLs
+* [nexx] Switch to arc API (#15652)
+* [redtube] Fix duration extraction (#15659)
++ [sonyliv] Respect referrer (#15648)
++ [brightcove:new] Use referrer for formats' HTTP headers
++ [cbc] Add support for olympics.cbc.ca (#15535)
++ [fusion] Add support for fusion.tv (#15628)
+* [npo] Improve quality metadata extraction
+* [npo] Relax URL regular expression (#14987, #14994)
++ [npo] Capture and output error message
++ [pornhub] Add support for channels (#15613)
+* [youtube] Handle shared URLs with generic extractor (#14303)
+
+
+version 2018.02.11
+
+Core
++ [YoutubeDL] Add support for filesize_approx in format selector (#15550)
+
+Extractors
++ [francetv] Add support for live streams (#13689)
++ [francetv] Add support for zouzous.fr and ludo.fr (#10454, #13087, #13103,
+  #15012)
+* [francetv] Separate main extractor and rework others to delegate to it
+* [francetv] Improve manifest URL signing (#15536)
++ [francetv] Sign m3u8 manifest URLs (#15565)
++ [veoh] Add support for embed URLs (#15561)
+* [afreecatv] Fix extraction (#15556)
+* [periscope] Use accessVideoPublic endpoint (#15554)
+* [discovery] Fix auth request (#15542)
++ [6play] Extract subtitles (#15541)
+* [newgrounds] Fix metadata extraction (#15531)
++ [nbc] Add support for stream.nbcolympics.com (#10295)
+* [dvtv] Fix live streams extraction (#15442)
+
+
+version 2018.02.08
+
+Extractors
++ [myvi] Extend URL regular expression
++ [myvi:embed] Add support for myvi.tv embeds (#15521)
++ [prosiebensat1] Extend URL regular expression (#15520)
+* [pokemon] Relax URL regular expression and extend title extraction (#15518)
++ [gameinformer] Use geo verification headers
+* [la7] Fix extraction (#15501, #15502)
+* [gameinformer] Fix brightcove id extraction (#15416)
++ [afreecatv] Pass referrer to video info request (#15507)
++ [telebruxelles] Add support for live streams
+* [telebruxelles] Relax URL regular expression
+* [telebruxelles] Fix extraction (#15504)
+* [extractor/common] Respect secure schemes in _extract_wowza_formats
+
+
+version 2018.02.04
+
+Core
+* [downloader/http] Randomize HTTP chunk size
++ [downloader/http] Add ability to pass downloader options via info dict
+* [downloader/http] Fix 302 infinite loops by not reusing requests
++ Document http_chunk_size
+
+Extractors
++ [brightcove] Pass embed page URL as referrer (#15486)
++ [youtube] Enforce using chunked HTTP downloading for DASH formats
+
+
+version 2018.02.03
+
+Core
++ Introduce --http-chunk-size for chunk-based HTTP downloading
++ Add support for IronPython
+* [downloader/ism] Fix Python 3.2 support
+
+Extractors
+* [redbulltv] Fix extraction (#15481)
+* [redtube] Fix metadata extraction (#15472)
+* [pladform] Respect platform id and extract HLS formats (#15468)
+- [rtlnl] Remove progressive formats (#15459)
+* [6play] Do no modify asset URLs with a token (#15248)
+* [nationalgeographic] Relax URL regular expression
+* [dplay] Relax URL regular expression (#15458)
+* [cbsinteractive] Fix data extraction (#15451)
++ [amcnetworks] Add support for sundancetv.com (#9260)
+
+
 version 2018.01.27
 
 Core
 version 2018.01.27
 
 Core
index eb05f848f73327eff3cde5f99c5715f23ef70b4b..7dba5775d9d910d396175eeefb51f38b71c6c831 100644 (file)
--- a/README.md
+++ b/README.md
@@ -198,6 +198,11 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
                                      size. By default, the buffer size is
                                      automatically resized from an initial value
                                      of SIZE.
                                      size. By default, the buffer size is
                                      automatically resized from an initial value
                                      of SIZE.
+    --http-chunk-size SIZE           Size of a chunk for chunk-based HTTP
+                                     downloading (e.g. 10485760 or 10M) (default
+                                     is disabled). May be useful for bypassing
+                                     bandwidth throttling imposed by a webserver
+                                     (experimental)
     --playlist-reverse               Download playlist videos in reverse order
     --playlist-random                Download playlist videos in random order
     --xattr-set-filesize             Set file xattribute ytdl.filesize with
     --playlist-reverse               Download playlist videos in reverse order
     --playlist-random                Download playlist videos in random order
     --xattr-set-filesize             Set file xattribute ytdl.filesize with
index 54b6137fbc9587ac2792fa7105d711dcbe1a874b..24959f03234926da60136026cc16348bfbdb812a 100644 (file)
@@ -227,6 +227,11 @@ Download Options:
                                      size. By default, the buffer size is
                                      automatically resized from an initial value
                                      of SIZE.
                                      size. By default, the buffer size is
                                      automatically resized from an initial value
                                      of SIZE.
+    --http-chunk-size SIZE           Size of a chunk for chunk-based HTTP
+                                     downloading (e.g. 10485760 or 10M) (default
+                                     is disabled). May be useful for bypassing
+                                     bandwidth throttling imposed by a webserver
+                                     (experimental)
     --playlist-reverse               Download playlist videos in reverse order
     --playlist-random                Download playlist videos in random order
     --xattr-set-filesize             Set file xattribute ytdl.filesize with
     --playlist-reverse               Download playlist videos in reverse order
     --playlist-random                Download playlist videos in random order
     --xattr-set-filesize             Set file xattribute ytdl.filesize with
index c15b5eec5b57557f853a4c1188faf1aa3cea31ca..80358bb147d64dd8520f008c12d5ec45ccef8c33 100644 (file)
  - **CarambaTVPage**
  - **CartoonNetwork**
  - **cbc.ca**
  - **CarambaTVPage**
  - **CartoonNetwork**
  - **cbc.ca**
+ - **cbc.ca:olympics**
  - **cbc.ca:player**
  - **cbc.ca:watch**
  - **cbc.ca:watch:video**
  - **cbc.ca:player**
  - **cbc.ca:watch**
  - **cbc.ca:watch:video**
  - **CSpan**: C-SPAN
  - **CtsNews**: 華視新聞
  - **CTVNews**
  - **CSpan**: C-SPAN
  - **CtsNews**: 華視新聞
  - **CTVNews**
- - **culturebox.francetvinfo.fr**
+ - **Culturebox**
  - **CultureUnplugged**
  - **curiositystream**
  - **curiositystream:collection**
  - **CultureUnplugged**
  - **curiositystream**
  - **curiositystream:collection**
  - **FranceTV**
  - **FranceTVEmbed**
  - **francetvinfo.fr**
  - **FranceTV**
  - **FranceTVEmbed**
  - **francetvinfo.fr**
+ - **FranceTVJeunesse**
+ - **FranceTVSite**
  - **Freesound**
  - **freespeech.org**
  - **FreshLive**
  - **Funimation**
  - **Freesound**
  - **freespeech.org**
  - **FreshLive**
  - **Funimation**
- - **Funk**
+ - **FunkChannel**
+ - **FunkMix**
  - **FunnyOrDie**
  - **Fusion**
  - **Fux**
  - **FunnyOrDie**
  - **Fusion**
  - **Fux**
  - **HentaiStigma**
  - **hetklokhuis**
  - **hgtv.com:show**
  - **HentaiStigma**
  - **hetklokhuis**
  - **hgtv.com:show**
+ - **HiDive**
  - **HistoricFilms**
  - **history:topic**: History.com Topic
  - **hitbox**
  - **HistoricFilms**
  - **history:topic**: History.com Topic
  - **hitbox**
  - **limelight**
  - **limelight:channel**
  - **limelight:channel_list**
  - **limelight**
  - **limelight:channel**
  - **limelight:channel_list**
+ - **LineTV**
  - **LiTV**
  - **LiveLeak**
  - **LiveLeakEmbed**
  - **LiTV**
  - **LiveLeak**
  - **LiveLeakEmbed**
  - **m6**
  - **macgamestore**: MacGameStore trailers
  - **mailru**: Видео@Mail.Ru
  - **m6**
  - **macgamestore**: MacGameStore trailers
  - **mailru**: Видео@Mail.Ru
+ - **mailru:music**: Музыка@Mail.Ru
+ - **mailru:music:search**: Музыка@Mail.Ru
  - **MakersChannel**
  - **MakerTV**
  - **mangomolo:live**
  - **MakersChannel**
  - **MakerTV**
  - **mangomolo:live**
  - **MySpass**
  - **Myvi**
  - **MyVidster**
  - **MySpass**
  - **Myvi**
  - **MyVidster**
+ - **MyviEmbed**
  - **n-tv.de**
  - **natgeo**
  - **natgeo:episodeguide**
  - **n-tv.de**
  - **natgeo**
  - **natgeo:episodeguide**
  - **NBA**
  - **NBC**
  - **NBCNews**
  - **NBA**
  - **NBC**
  - **NBCNews**
- - **NBCOlympics**
+ - **nbcolympics**
+ - **nbcolympics:stream**
  - **NBCSports**
  - **NBCSportsVPlayer**
  - **ndr**: NDR.de - Norddeutscher Rundfunk
  - **NBCSports**
  - **NBCSportsVPlayer**
  - **ndr**: NDR.de - Norddeutscher Rundfunk
  - **RaiPlay**
  - **RaiPlayLive**
  - **RaiPlayPlaylist**
  - **RaiPlay**
  - **RaiPlayLive**
  - **RaiPlayPlaylist**
+ - **RayWenderlich**
  - **RBMARadio**
  - **RDS**: RDS.ca
  - **RedBullTV**
  - **RBMARadio**
  - **RDS**: RDS.ca
  - **RedBullTV**
  - **Telegraaf**
  - **TeleMB**
  - **TeleQuebec**
  - **Telegraaf**
  - **TeleMB**
  - **TeleQuebec**
+ - **TeleQuebecEmission**
+ - **TeleQuebecLive**
  - **TeleTask**
  - **Telewebion**
  - **TeleTask**
  - **Telewebion**
+ - **TennisTV**
  - **TF1**
  - **TFO**
  - **TheIntercept**
  - **TF1**
  - **TFO**
  - **TheIntercept**
  - **vice**
  - **vice:article**
  - **vice:show**
  - **vice**
  - **vice:article**
  - **vice:show**
- - **Viceland**
  - **Vidbit**
  - **Viddler**
  - **Videa**
  - **Vidbit**
  - **Viddler**
  - **Videa**
  - **VideoPress**
  - **videoweed**: VideoWeed
  - **Vidio**
  - **VideoPress**
  - **videoweed**: VideoWeed
  - **Vidio**
+ - **VidLii**
  - **vidme**
  - **vidme:user**
  - **vidme:user:likes**
  - **vidme**
  - **vidme:user**
  - **vidme:user:likes**
  - **yandexmusic:album**: Яндекс.Музыка - Альбом
  - **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
  - **yandexmusic:track**: Яндекс.Музыка - Трек
  - **yandexmusic:album**: Яндекс.Музыка - Альбом
  - **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
  - **yandexmusic:track**: Яндекс.Музыка - Трек
+ - **YapFiles**
  - **YesJapan**
  - **yinyuetai:video**: 音悦Tai
  - **Ynet**
  - **YesJapan**
  - **yinyuetai:video**: 音悦Tai
  - **Ynet**
index 2dc06ffe413f76f4d776fe44780f327a170d7801..5208f7ae234fa15b52a1aac67b7d909c9d53de11 100644 (file)
--- a/setup.cfg
+++ b/setup.cfg
@@ -3,4 +3,4 @@ universal = True
 
 [flake8]
 exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git
 
 [flake8]
 exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git
-ignore = E402,E501,E731
+ignore = E402,E501,E731,E741
diff --git a/test/test_downloader_http.py b/test/test_downloader_http.py
new file mode 100644 (file)
index 0000000..5cf2bf1
--- /dev/null
@@ -0,0 +1,125 @@
+#!/usr/bin/env python
+# coding: utf-8
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import re
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from test.helper import try_rm
+from youtube_dl import YoutubeDL
+from youtube_dl.compat import compat_http_server
+from youtube_dl.downloader.http import HttpFD
+from youtube_dl.utils import encodeFilename
+import ssl
+import threading
+
+TEST_DIR = os.path.dirname(os.path.abspath(__file__))
+
+
+def http_server_port(httpd):
+    if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
+        # In Jython SSLSocket is not a subclass of socket.socket
+        sock = httpd.socket.sock
+    else:
+        sock = httpd.socket
+    return sock.getsockname()[1]
+
+
+TEST_SIZE = 10 * 1024
+
+
+class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
+    def log_message(self, format, *args):
+        pass
+
+    def send_content_range(self, total=None):
+        range_header = self.headers.get('Range')
+        start = end = None
+        if range_header:
+            mobj = re.search(r'^bytes=(\d+)-(\d+)', range_header)
+            if mobj:
+                start = int(mobj.group(1))
+                end = int(mobj.group(2))
+        valid_range = start is not None and end is not None
+        if valid_range:
+            content_range = 'bytes %d-%d' % (start, end)
+            if total:
+                content_range += '/%d' % total
+            self.send_header('Content-Range', content_range)
+        return (end - start + 1) if valid_range else total
+
+    def serve(self, range=True, content_length=True):
+        self.send_response(200)
+        self.send_header('Content-Type', 'video/mp4')
+        size = TEST_SIZE
+        if range:
+            size = self.send_content_range(TEST_SIZE)
+        if content_length:
+            self.send_header('Content-Length', size)
+        self.end_headers()
+        self.wfile.write(b'#' * size)
+
+    def do_GET(self):
+        if self.path == '/regular':
+            self.serve()
+        elif self.path == '/no-content-length':
+            self.serve(content_length=False)
+        elif self.path == '/no-range':
+            self.serve(range=False)
+        elif self.path == '/no-range-no-content-length':
+            self.serve(range=False, content_length=False)
+        else:
+            assert False
+
+
+class FakeLogger(object):
+    def debug(self, msg):
+        pass
+
+    def warning(self, msg):
+        pass
+
+    def error(self, msg):
+        pass
+
+
+class TestHttpFD(unittest.TestCase):
+    def setUp(self):
+        self.httpd = compat_http_server.HTTPServer(
+            ('127.0.0.1', 0), HTTPTestRequestHandler)
+        self.port = http_server_port(self.httpd)
+        self.server_thread = threading.Thread(target=self.httpd.serve_forever)
+        self.server_thread.daemon = True
+        self.server_thread.start()
+
+    def download(self, params, ep):
+        params['logger'] = FakeLogger()
+        ydl = YoutubeDL(params)
+        downloader = HttpFD(ydl, params)
+        filename = 'testfile.mp4'
+        try_rm(encodeFilename(filename))
+        self.assertTrue(downloader.real_download(filename, {
+            'url': 'http://127.0.0.1:%d/%s' % (self.port, ep),
+        }))
+        self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE)
+        try_rm(encodeFilename(filename))
+
+    def download_all(self, params):
+        for ep in ('regular', 'no-content-length', 'no-range', 'no-range-no-content-length'):
+            self.download(params, ep)
+
+    def test_regular(self):
+        self.download_all({})
+
+    def test_chunked(self):
+        self.download_all({
+            'http_chunk_size': 1000,
+        })
+
+
+if __name__ == '__main__':
+    unittest.main()
index 7a7a3510ffb46e2791153dff5e4157bb21433056..409fec9c8a377a79f05b86b4472106c237cdb629 100644 (file)
@@ -47,7 +47,7 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
                 self.end_headers()
                 return
 
                 self.end_headers()
                 return
 
-            new_url = 'http://localhost:%d/中文.html' % http_server_port(self.server)
+            new_url = 'http://127.0.0.1:%d/中文.html' % http_server_port(self.server)
             self.send_response(302)
             self.send_header(b'Location', new_url.encode('utf-8'))
             self.end_headers()
             self.send_response(302)
             self.send_header(b'Location', new_url.encode('utf-8'))
             self.end_headers()
@@ -74,7 +74,7 @@ class FakeLogger(object):
 class TestHTTP(unittest.TestCase):
     def setUp(self):
         self.httpd = compat_http_server.HTTPServer(
 class TestHTTP(unittest.TestCase):
     def setUp(self):
         self.httpd = compat_http_server.HTTPServer(
-            ('localhost', 0), HTTPTestRequestHandler)
+            ('127.0.0.1', 0), HTTPTestRequestHandler)
         self.port = http_server_port(self.httpd)
         self.server_thread = threading.Thread(target=self.httpd.serve_forever)
         self.server_thread.daemon = True
         self.port = http_server_port(self.httpd)
         self.server_thread = threading.Thread(target=self.httpd.serve_forever)
         self.server_thread.daemon = True
@@ -86,15 +86,15 @@ class TestHTTP(unittest.TestCase):
             return
 
         ydl = YoutubeDL({'logger': FakeLogger()})
             return
 
         ydl = YoutubeDL({'logger': FakeLogger()})
-        r = ydl.extract_info('http://localhost:%d/302' % self.port)
-        self.assertEqual(r['entries'][0]['url'], 'http://localhost:%d/vid.mp4' % self.port)
+        r = ydl.extract_info('http://127.0.0.1:%d/302' % self.port)
+        self.assertEqual(r['entries'][0]['url'], 'http://127.0.0.1:%d/vid.mp4' % self.port)
 
 
 class TestHTTPS(unittest.TestCase):
     def setUp(self):
         certfn = os.path.join(TEST_DIR, 'testcert.pem')
         self.httpd = compat_http_server.HTTPServer(
 
 
 class TestHTTPS(unittest.TestCase):
     def setUp(self):
         certfn = os.path.join(TEST_DIR, 'testcert.pem')
         self.httpd = compat_http_server.HTTPServer(
-            ('localhost', 0), HTTPTestRequestHandler)
+            ('127.0.0.1', 0), HTTPTestRequestHandler)
         self.httpd.socket = ssl.wrap_socket(
             self.httpd.socket, certfile=certfn, server_side=True)
         self.port = http_server_port(self.httpd)
         self.httpd.socket = ssl.wrap_socket(
             self.httpd.socket, certfile=certfn, server_side=True)
         self.port = http_server_port(self.httpd)
@@ -107,11 +107,11 @@ class TestHTTPS(unittest.TestCase):
             ydl = YoutubeDL({'logger': FakeLogger()})
             self.assertRaises(
                 Exception,
             ydl = YoutubeDL({'logger': FakeLogger()})
             self.assertRaises(
                 Exception,
-                ydl.extract_info, 'https://localhost:%d/video.html' % self.port)
+                ydl.extract_info, 'https://127.0.0.1:%d/video.html' % self.port)
 
         ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True})
 
         ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True})
-        r = ydl.extract_info('https://localhost:%d/video.html' % self.port)
-        self.assertEqual(r['entries'][0]['url'], 'https://localhost:%d/vid.mp4' % self.port)
+        r = ydl.extract_info('https://127.0.0.1:%d/video.html' % self.port)
+        self.assertEqual(r['entries'][0]['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port)
 
 
 def _build_proxy_handler(name):
 
 
 def _build_proxy_handler(name):
@@ -132,23 +132,23 @@ def _build_proxy_handler(name):
 class TestProxy(unittest.TestCase):
     def setUp(self):
         self.proxy = compat_http_server.HTTPServer(
 class TestProxy(unittest.TestCase):
     def setUp(self):
         self.proxy = compat_http_server.HTTPServer(
-            ('localhost', 0), _build_proxy_handler('normal'))
+            ('127.0.0.1', 0), _build_proxy_handler('normal'))
         self.port = http_server_port(self.proxy)
         self.proxy_thread = threading.Thread(target=self.proxy.serve_forever)
         self.proxy_thread.daemon = True
         self.proxy_thread.start()
 
         self.geo_proxy = compat_http_server.HTTPServer(
         self.port = http_server_port(self.proxy)
         self.proxy_thread = threading.Thread(target=self.proxy.serve_forever)
         self.proxy_thread.daemon = True
         self.proxy_thread.start()
 
         self.geo_proxy = compat_http_server.HTTPServer(
-            ('localhost', 0), _build_proxy_handler('geo'))
+            ('127.0.0.1', 0), _build_proxy_handler('geo'))
         self.geo_port = http_server_port(self.geo_proxy)
         self.geo_proxy_thread = threading.Thread(target=self.geo_proxy.serve_forever)
         self.geo_proxy_thread.daemon = True
         self.geo_proxy_thread.start()
 
     def test_proxy(self):
         self.geo_port = http_server_port(self.geo_proxy)
         self.geo_proxy_thread = threading.Thread(target=self.geo_proxy.serve_forever)
         self.geo_proxy_thread.daemon = True
         self.geo_proxy_thread.start()
 
     def test_proxy(self):
-        geo_proxy = 'localhost:{0}'.format(self.geo_port)
+        geo_proxy = '127.0.0.1:{0}'.format(self.geo_port)
         ydl = YoutubeDL({
         ydl = YoutubeDL({
-            'proxy': 'localhost:{0}'.format(self.port),
+            'proxy': '127.0.0.1:{0}'.format(self.port),
             'geo_verification_proxy': geo_proxy,
         })
         url = 'http://foo.com/bar'
             'geo_verification_proxy': geo_proxy,
         })
         url = 'http://foo.com/bar'
@@ -162,7 +162,7 @@ class TestProxy(unittest.TestCase):
 
     def test_proxy_with_idn(self):
         ydl = YoutubeDL({
 
     def test_proxy_with_idn(self):
         ydl = YoutubeDL({
-            'proxy': 'localhost:{0}'.format(self.port),
+            'proxy': '127.0.0.1:{0}'.format(self.port),
         })
         url = 'http://中文.tw/'
         response = ydl.urlopen(url).read().decode('utf-8')
         })
         url = 'http://中文.tw/'
         response = ydl.urlopen(url).read().decode('utf-8')
index fdf6031f7db157b4989aafc345380ce184f91458..a1fe6fdb2cad56151ff1b21f210112269740cff1 100644 (file)
@@ -53,10 +53,12 @@ from youtube_dl.utils import (
     parse_filesize,
     parse_count,
     parse_iso8601,
     parse_filesize,
     parse_count,
     parse_iso8601,
+    parse_resolution,
     pkcs1pad,
     read_batch_urls,
     sanitize_filename,
     sanitize_path,
     pkcs1pad,
     read_batch_urls,
     sanitize_filename,
     sanitize_path,
+    sanitize_url,
     expand_path,
     prepend_extension,
     replace_extension,
     expand_path,
     prepend_extension,
     replace_extension,
@@ -219,6 +221,12 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(sanitize_path('./abc'), 'abc')
         self.assertEqual(sanitize_path('./../abc'), '..\\abc')
 
         self.assertEqual(sanitize_path('./abc'), 'abc')
         self.assertEqual(sanitize_path('./../abc'), '..\\abc')
 
+    def test_sanitize_url(self):
+        self.assertEqual(sanitize_url('//foo.bar'), 'http://foo.bar')
+        self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar')
+        self.assertEqual(sanitize_url('rmtps://foo.bar'), 'rtmps://foo.bar')
+        self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar')
+
     def test_expand_path(self):
         def env(var):
             return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var)
     def test_expand_path(self):
         def env(var):
             return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var)
@@ -344,6 +352,7 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361)
         self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540)
         self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140)
         self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361)
         self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540)
         self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140)
+        self.assertEqual(unified_timestamp('2018-03-14T08:32:43.1493874+00:00'), 1521016363)
 
     def test_determine_ext(self):
         self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
 
     def test_determine_ext(self):
         self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
@@ -975,6 +984,16 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(parse_count('1.1kk '), 1100000)
         self.assertEqual(parse_count('1.1kk views'), 1100000)
 
         self.assertEqual(parse_count('1.1kk '), 1100000)
         self.assertEqual(parse_count('1.1kk views'), 1100000)
 
+    def test_parse_resolution(self):
+        self.assertEqual(parse_resolution(None), {})
+        self.assertEqual(parse_resolution(''), {})
+        self.assertEqual(parse_resolution('1920x1080'), {'width': 1920, 'height': 1080})
+        self.assertEqual(parse_resolution('1920×1080'), {'width': 1920, 'height': 1080})
+        self.assertEqual(parse_resolution('1920 x 1080'), {'width': 1920, 'height': 1080})
+        self.assertEqual(parse_resolution('720p'), {'height': 720})
+        self.assertEqual(parse_resolution('4k'), {'height': 2160})
+        self.assertEqual(parse_resolution('8K'), {'height': 4320})
+
     def test_version_tuple(self):
         self.assertEqual(version_tuple('1'), (1,))
         self.assertEqual(version_tuple('10.23.344'), (10, 23, 344))
     def test_version_tuple(self):
         self.assertEqual(version_tuple('1'), (1,))
         self.assertEqual(version_tuple('10.23.344'), (10, 23, 344))
index fb4e30dda41667da0ab86275791106f0e5717519..56daa4b63bc52c7df059c5f00e2303143c887c85 100755 (executable)
Binary files a/youtube-dl and b/youtube-dl differ
index 3800a96a6515698da8f9d019bb7dbd64615abb09..b859b1de59c61f51f5cd0e76466aab6ebf3f0b42 100644 (file)
@@ -331,6 +331,14 @@ value of SIZE.
 .RS
 .RE
 .TP
 .RS
 .RE
 .TP
+.B \-\-http\-chunk\-size \f[I]SIZE\f[]
+Size of a chunk for chunk\-based HTTP downloading (e.g.
+10485760 or 10M) (default is disabled).
+May be useful for bypassing bandwidth throttling imposed by a webserver
+(experimental)
+.RS
+.RE
+.TP
 .B \-\-playlist\-reverse
 Download playlist videos in reverse order
 .RS
 .B \-\-playlist\-reverse
 Download playlist videos in reverse order
 .RS
index 2cf75de27f62e464ac7c4ef7bb7a35ec5f6c881d..c1b86f2bf790754cae415486f8dd51bea9906712 100644 (file)
@@ -4,7 +4,7 @@ __youtube_dl()
     COMPREPLY=()
     cur="${COMP_WORDS[COMP_CWORD]}"
     prev="${COMP_WORDS[COMP_CWORD-1]}"
     COMPREPLY=()
     cur="${COMP_WORDS[COMP_CWORD]}"
     prev="${COMP_WORDS[COMP_CWORD-1]}"
-    opts="--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --force-generic-extractor --default-search --ignore-config --config-location --flat-playlist --mark-watched --no-mark-watched --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --geo-verification-proxy --cn-verification-proxy --geo-bypass --no-geo-bypass --geo-bypass-country --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --limit-rate --retries --fragment-retries --skip-unavailable-fragments --abort-on-unavailable-fragment --keep-fragments --buffer-size --no-resize-buffer --test --playlist-reverse --playlist-random --xattr-set-filesize --hls-prefer-native --hls-prefer-ffmpeg --hls-use-mpegts --external-downloader --external-downloader-args --batch-file --id --output --autonumber-size --autonumber-start --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info-json --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --max-sleep-interval --format --all-formats --prefer-free-formats --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --ap-mso --ap-username --ap-password --ap-list-mso --extract-audio --audio-format --audio-quality --recode-video --postprocessor-args --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --metadata-from-title --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subs"
+    opts="--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --force-generic-extractor --default-search --ignore-config --config-location --flat-playlist --mark-watched --no-mark-watched --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --geo-verification-proxy --cn-verification-proxy --geo-bypass --no-geo-bypass --geo-bypass-country --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --limit-rate --retries --fragment-retries --skip-unavailable-fragments --abort-on-unavailable-fragment --keep-fragments --buffer-size --no-resize-buffer --http-chunk-size --test --playlist-reverse --playlist-random --xattr-set-filesize --hls-prefer-native --hls-prefer-ffmpeg --hls-use-mpegts --external-downloader --external-downloader-args --batch-file --id --output --autonumber-size --autonumber-start --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info-json --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --max-sleep-interval --format --all-formats --prefer-free-formats --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --ap-mso --ap-username --ap-password --ap-list-mso --extract-audio --audio-format --audio-quality --recode-video --postprocessor-args --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --metadata-from-title --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subs"
     keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
     fileopts="-a|--batch-file|--download-archive|--cookies|--load-info"
     diropts="--cache-dir"
     keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
     fileopts="-a|--batch-file|--download-archive|--cookies|--load-info"
     diropts="--cache-dir"
index 00d18452b8a08b414504417b0447ea633d896796..aa7c59b8af3ce645d8a3f493be2039501ef18579 100644 (file)
@@ -52,6 +52,7 @@ complete --command youtube-dl --long-option abort-on-unavailable-fragment --desc
 complete --command youtube-dl --long-option keep-fragments --description 'Keep downloaded fragments on disk after downloading is finished; fragments are erased by default'
 complete --command youtube-dl --long-option buffer-size --description 'Size of download buffer (e.g. 1024 or 16K) (default is %default)'
 complete --command youtube-dl --long-option no-resize-buffer --description 'Do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.'
 complete --command youtube-dl --long-option keep-fragments --description 'Keep downloaded fragments on disk after downloading is finished; fragments are erased by default'
 complete --command youtube-dl --long-option buffer-size --description 'Size of download buffer (e.g. 1024 or 16K) (default is %default)'
 complete --command youtube-dl --long-option no-resize-buffer --description 'Do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.'
+complete --command youtube-dl --long-option http-chunk-size --description 'Size of a chunk for chunk-based HTTP downloading (e.g. 10485760 or 10M) (default is disabled). May be useful for bypassing bandwidth throttling imposed by a webserver (experimental)'
 complete --command youtube-dl --long-option test
 complete --command youtube-dl --long-option playlist-reverse --description 'Download playlist videos in reverse order'
 complete --command youtube-dl --long-option playlist-random --description 'Download playlist videos in random order'
 complete --command youtube-dl --long-option test
 complete --command youtube-dl --long-option playlist-reverse --description 'Download playlist videos in reverse order'
 complete --command youtube-dl --long-option playlist-random --description 'Download playlist videos in random order'
index 2d670ee569a468396833fafc074fc5a0abd98878..1f573a57973f290089b7950f5a7f6c11d114162e 100644 (file)
@@ -19,7 +19,7 @@ __youtube_dl() {
             elif [[ ${prev} == "--recode-video" ]]; then
                 _arguments '*: :(mp4 flv ogg webm mkv)'
             else
             elif [[ ${prev} == "--recode-video" ]]; then
                 _arguments '*: :(mp4 flv ogg webm mkv)'
             else
-                _arguments '*: :(--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --force-generic-extractor --default-search --ignore-config --config-location --flat-playlist --mark-watched --no-mark-watched --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --geo-verification-proxy --cn-verification-proxy --geo-bypass --no-geo-bypass --geo-bypass-country --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --limit-rate --retries --fragment-retries --skip-unavailable-fragments --abort-on-unavailable-fragment --keep-fragments --buffer-size --no-resize-buffer --test --playlist-reverse --playlist-random --xattr-set-filesize --hls-prefer-native --hls-prefer-ffmpeg --hls-use-mpegts --external-downloader --external-downloader-args --batch-file --id --output --autonumber-size --autonumber-start --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info-json --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --max-sleep-interval --format --all-formats --prefer-free-formats --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --ap-mso --ap-username --ap-password --ap-list-mso --extract-audio --audio-format --audio-quality --recode-video --postprocessor-args --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --metadata-from-title --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subs)'
+                _arguments '*: :(--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --force-generic-extractor --default-search --ignore-config --config-location --flat-playlist --mark-watched --no-mark-watched --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --geo-verification-proxy --cn-verification-proxy --geo-bypass --no-geo-bypass --geo-bypass-country --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --limit-rate --retries --fragment-retries --skip-unavailable-fragments --abort-on-unavailable-fragment --keep-fragments --buffer-size --no-resize-buffer --http-chunk-size --test --playlist-reverse --playlist-random --xattr-set-filesize --hls-prefer-native --hls-prefer-ffmpeg --hls-use-mpegts --external-downloader --external-downloader-args --batch-file --id --output --autonumber-size --autonumber-start --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info-json --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --max-sleep-interval --format --all-formats --prefer-free-formats --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --ap-mso --ap-username --ap-password --ap-list-mso --extract-audio --audio-format --audio-quality --recode-video --postprocessor-args --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --metadata-from-title --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subs)'
             fi
         ;;
     esac
             fi
         ;;
     esac
index 97bd9c526dc60d71d569b625e2cebd7c6af46bd0..523dd1f7daf80839839d7b8a014f86897640b425 100755 (executable)
@@ -298,7 +298,8 @@ class YoutubeDL(object):
     the downloader (see youtube_dl/downloader/common.py):
     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
     noresizebuffer, retries, continuedl, noprogress, consoletitle,
     the downloader (see youtube_dl/downloader/common.py):
     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
     noresizebuffer, retries, continuedl, noprogress, consoletitle,
-    xattr_set_filesize, external_downloader_args, hls_use_mpegts.
+    xattr_set_filesize, external_downloader_args, hls_use_mpegts,
+    http_chunk_size.
 
     The following options are used by the post processors:
     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 
     The following options are used by the post processors:
     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
@@ -1032,7 +1033,7 @@ class YoutubeDL(object):
             '!=': operator.ne,
         }
         operator_rex = re.compile(r'''(?x)\s*
             '!=': operator.ne,
         }
         operator_rex = re.compile(r'''(?x)\s*
-            (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
+            (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
             $
             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
             $
index ba684a0754bb4a70148148334d1bee474a1d234b..9bb952457e149f3687efd0ca925aed8d69996366 100644 (file)
@@ -191,6 +191,11 @@ def _real_main(argv=None):
         if numeric_buffersize is None:
             parser.error('invalid buffer size specified')
         opts.buffersize = numeric_buffersize
         if numeric_buffersize is None:
             parser.error('invalid buffer size specified')
         opts.buffersize = numeric_buffersize
+    if opts.http_chunk_size is not None:
+        numeric_chunksize = FileDownloader.parse_bytes(opts.http_chunk_size)
+        if not numeric_chunksize:
+            parser.error('invalid http chunk size specified')
+        opts.http_chunk_size = numeric_chunksize
     if opts.playliststart <= 0:
         raise ValueError('Playlist start must be positive')
     if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart:
     if opts.playliststart <= 0:
         raise ValueError('Playlist start must be positive')
     if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart:
@@ -346,6 +351,7 @@ def _real_main(argv=None):
         'keep_fragments': opts.keep_fragments,
         'buffersize': opts.buffersize,
         'noresizebuffer': opts.noresizebuffer,
         'keep_fragments': opts.keep_fragments,
         'buffersize': opts.buffersize,
         'noresizebuffer': opts.noresizebuffer,
+        'http_chunk_size': opts.http_chunk_size,
         'continuedl': opts.continue_dl,
         'noprogress': opts.noprogress,
         'progress_with_newline': opts.progress_with_newline,
         'continuedl': opts.continue_dl,
         'noprogress': opts.noprogress,
         'progress_with_newline': opts.progress_with_newline,
index 646c9d79ccc8826a624c805853d5c02faf63f59d..4a611f183408ce224404c2fce360ee4d50792dac 100644 (file)
@@ -2897,9 +2897,24 @@ except TypeError:
         if isinstance(spec, compat_str):
             spec = spec.encode('ascii')
         return struct.unpack(spec, *args)
         if isinstance(spec, compat_str):
             spec = spec.encode('ascii')
         return struct.unpack(spec, *args)
+
+    class compat_Struct(struct.Struct):
+        def __init__(self, fmt):
+            if isinstance(fmt, compat_str):
+                fmt = fmt.encode('ascii')
+            super(compat_Struct, self).__init__(fmt)
 else:
     compat_struct_pack = struct.pack
     compat_struct_unpack = struct.unpack
 else:
     compat_struct_pack = struct.pack
     compat_struct_unpack = struct.unpack
+    if platform.python_implementation() == 'IronPython' and sys.version_info < (2, 7, 8):
+        class compat_Struct(struct.Struct):
+            def unpack(self, string):
+                if not isinstance(string, buffer):  # noqa: F821
+                    string = buffer(string)  # noqa: F821
+                return super(compat_Struct, self).unpack(string)
+    else:
+        compat_Struct = struct.Struct
+
 
 try:
     from future_builtins import zip as compat_zip
 
 try:
     from future_builtins import zip as compat_zip
@@ -2941,6 +2956,7 @@ __all__ = [
     'compat_HTMLParseError',
     'compat_HTMLParser',
     'compat_HTTPError',
     'compat_HTMLParseError',
     'compat_HTMLParser',
     'compat_HTTPError',
+    'compat_Struct',
     'compat_b64decode',
     'compat_basestring',
     'compat_chr',
     'compat_b64decode',
     'compat_basestring',
     'compat_chr',
index 75b8166c514485bad26fa87a90ca08ab330d654f..cc16bbb83fb5da958df1564a85e50f04dca30ce0 100644 (file)
@@ -49,6 +49,9 @@ class FileDownloader(object):
     external_downloader_args:  A list of additional command-line arguments for the
                         external downloader.
     hls_use_mpegts:     Use the mpegts container for HLS videos.
     external_downloader_args:  A list of additional command-line arguments for the
                         external downloader.
     hls_use_mpegts:     Use the mpegts container for HLS videos.
+    http_chunk_size:    Size of a chunk for chunk-based HTTP downloading. May be
+                        useful for bypassing bandwidth throttling imposed by
+                        a webserver (experimental)
 
     Subclasses of this one must re-define the real_download method.
     """
 
     Subclasses of this one must re-define the real_download method.
     """
index 4dc3ab46aa534132862ef0c29c34c871996ea2e4..fd304527e854e768038880bf785d84f211f4471b 100644 (file)
@@ -75,8 +75,9 @@ class HlsFD(FragmentFD):
                 fd.add_progress_hook(ph)
             return fd.real_download(filename, info_dict)
 
                 fd.add_progress_hook(ph)
             return fd.real_download(filename, info_dict)
 
-        def anvato_ad(s):
-            return s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
+        def is_ad_fragment(s):
+            return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s or
+                    s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))
 
         media_frags = 0
         ad_frags = 0
 
         media_frags = 0
         ad_frags = 0
@@ -86,7 +87,7 @@ class HlsFD(FragmentFD):
             if not line:
                 continue
             if line.startswith('#'):
             if not line:
                 continue
             if line.startswith('#'):
-                if anvato_ad(line):
+                if is_ad_fragment(line):
                     ad_frags += 1
                     ad_frag_next = True
                 continue
                     ad_frags += 1
                     ad_frag_next = True
                 continue
@@ -195,7 +196,7 @@ class HlsFD(FragmentFD):
                         'start': sub_range_start,
                         'end': sub_range_start + int(splitted_byte_range[0]),
                     }
                         'start': sub_range_start,
                         'end': sub_range_start + int(splitted_byte_range[0]),
                     }
-                elif anvato_ad(line):
+                elif is_ad_fragment(line):
                     ad_frag_next = True
 
         self._finish_frag_download(ctx)
                     ad_frag_next = True
 
         self._finish_frag_download(ctx)
index 3ff26ff7086afbf9d1245a144406f891e9ff393c..a22875f6988eef76837c0b6ac62795d50b1e3d9a 100644 (file)
@@ -4,13 +4,18 @@ import errno
 import os
 import socket
 import time
 import os
 import socket
 import time
+import random
 import re
 
 from .common import FileDownloader
 import re
 
 from .common import FileDownloader
-from ..compat import compat_urllib_error
+from ..compat import (
+    compat_str,
+    compat_urllib_error,
+)
 from ..utils import (
     ContentTooShortError,
     encodeFilename,
 from ..utils import (
     ContentTooShortError,
     encodeFilename,
+    int_or_none,
     sanitize_open,
     sanitized_Request,
     write_xattr,
     sanitize_open,
     sanitized_Request,
     write_xattr,
@@ -38,21 +43,26 @@ class HttpFD(FileDownloader):
         add_headers = info_dict.get('http_headers')
         if add_headers:
             headers.update(add_headers)
         add_headers = info_dict.get('http_headers')
         if add_headers:
             headers.update(add_headers)
-        basic_request = sanitized_Request(url, None, headers)
-        request = sanitized_Request(url, None, headers)
 
         is_test = self.params.get('test', False)
 
         is_test = self.params.get('test', False)
-
-        if is_test:
-            request.add_header('Range', 'bytes=0-%s' % str(self._TEST_FILE_SIZE - 1))
+        chunk_size = self._TEST_FILE_SIZE if is_test else (
+            info_dict.get('downloader_options', {}).get('http_chunk_size') or
+            self.params.get('http_chunk_size') or 0)
 
         ctx.open_mode = 'wb'
         ctx.resume_len = 0
 
         ctx.open_mode = 'wb'
         ctx.resume_len = 0
+        ctx.data_len = None
+        ctx.block_size = self.params.get('buffersize', 1024)
+        ctx.start_time = time.time()
+        ctx.chunk_size = None
 
         if self.params.get('continuedl', True):
             # Establish possible resume length
             if os.path.isfile(encodeFilename(ctx.tmpfilename)):
 
         if self.params.get('continuedl', True):
             # Establish possible resume length
             if os.path.isfile(encodeFilename(ctx.tmpfilename)):
-                ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename))
+                ctx.resume_len = os.path.getsize(
+                    encodeFilename(ctx.tmpfilename))
+
+        ctx.is_resume = ctx.resume_len > 0
 
         count = 0
         retries = self.params.get('retries', 0)
 
         count = 0
         retries = self.params.get('retries', 0)
@@ -64,11 +74,36 @@ class HttpFD(FileDownloader):
             def __init__(self, source_error):
                 self.source_error = source_error
 
             def __init__(self, source_error):
                 self.source_error = source_error
 
+        class NextFragment(Exception):
+            pass
+
+        def set_range(req, start, end):
+            range_header = 'bytes=%d-' % start
+            if end:
+                range_header += compat_str(end)
+            req.add_header('Range', range_header)
+
         def establish_connection():
         def establish_connection():
-            if ctx.resume_len != 0:
-                self.report_resuming_byte(ctx.resume_len)
-                request.add_header('Range', 'bytes=%d-' % ctx.resume_len)
+            ctx.chunk_size = (random.randint(int(chunk_size * 0.95), chunk_size)
+                              if not is_test and chunk_size else chunk_size)
+            if ctx.resume_len > 0:
+                range_start = ctx.resume_len
+                if ctx.is_resume:
+                    self.report_resuming_byte(ctx.resume_len)
                 ctx.open_mode = 'ab'
                 ctx.open_mode = 'ab'
+            elif ctx.chunk_size > 0:
+                range_start = 0
+            else:
+                range_start = None
+            ctx.is_resume = False
+            range_end = range_start + ctx.chunk_size - 1 if ctx.chunk_size else None
+            if range_end and ctx.data_len is not None and range_end >= ctx.data_len:
+                range_end = ctx.data_len - 1
+            has_range = range_start is not None
+            ctx.has_range = has_range
+            request = sanitized_Request(url, None, headers)
+            if has_range:
+                set_range(request, range_start, range_end)
             # Establish connection
             try:
                 ctx.data = self.ydl.urlopen(request)
             # Establish connection
             try:
                 ctx.data = self.ydl.urlopen(request)
@@ -77,29 +112,40 @@ class HttpFD(FileDownloader):
                 # that don't support resuming and serve a whole file with no Content-Range
                 # set in response despite of requested Range (see
                 # https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799)
                 # that don't support resuming and serve a whole file with no Content-Range
                 # set in response despite of requested Range (see
                 # https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799)
-                if ctx.resume_len > 0:
+                if has_range:
                     content_range = ctx.data.headers.get('Content-Range')
                     if content_range:
                     content_range = ctx.data.headers.get('Content-Range')
                     if content_range:
-                        content_range_m = re.search(r'bytes (\d+)-', content_range)
+                        content_range_m = re.search(r'bytes (\d+)-(\d+)?(?:/(\d+))?', content_range)
                         # Content-Range is present and matches requested Range, resume is possible
                         # Content-Range is present and matches requested Range, resume is possible
-                        if content_range_m and ctx.resume_len == int(content_range_m.group(1)):
-                            return
+                        if content_range_m:
+                            if range_start == int(content_range_m.group(1)):
+                                content_range_end = int_or_none(content_range_m.group(2))
+                                content_len = int_or_none(content_range_m.group(3))
+                                accept_content_len = (
+                                    # Non-chunked download
+                                    not ctx.chunk_size or
+                                    # Chunked download and requested piece or
+                                    # its part is promised to be served
+                                    content_range_end == range_end or
+                                    content_len < range_end)
+                                if accept_content_len:
+                                    ctx.data_len = content_len
+                                    return
                     # Content-Range is either not present or invalid. Assuming remote webserver is
                     # trying to send the whole file, resume is not possible, so wiping the local file
                     # and performing entire redownload
                     self.report_unable_to_resume()
                     ctx.resume_len = 0
                     ctx.open_mode = 'wb'
                     # Content-Range is either not present or invalid. Assuming remote webserver is
                     # trying to send the whole file, resume is not possible, so wiping the local file
                     # and performing entire redownload
                     self.report_unable_to_resume()
                     ctx.resume_len = 0
                     ctx.open_mode = 'wb'
+                ctx.data_len = int_or_none(ctx.data.info().get('Content-length', None))
                 return
             except (compat_urllib_error.HTTPError, ) as err:
                 return
             except (compat_urllib_error.HTTPError, ) as err:
-                if (err.code < 500 or err.code >= 600) and err.code != 416:
-                    # Unexpected HTTP error
-                    raise
-                elif err.code == 416:
+                if err.code == 416:
                     # Unable to resume (requested range not satisfiable)
                     try:
                         # Open the connection again without the range header
                     # Unable to resume (requested range not satisfiable)
                     try:
                         # Open the connection again without the range header
-                        ctx.data = self.ydl.urlopen(basic_request)
+                        ctx.data = self.ydl.urlopen(
+                            sanitized_Request(url, None, headers))
                         content_length = ctx.data.info()['Content-Length']
                     except (compat_urllib_error.HTTPError, ) as err:
                         if err.code < 500 or err.code >= 600:
                         content_length = ctx.data.info()['Content-Length']
                     except (compat_urllib_error.HTTPError, ) as err:
                         if err.code < 500 or err.code >= 600:
@@ -130,6 +176,9 @@ class HttpFD(FileDownloader):
                             ctx.resume_len = 0
                             ctx.open_mode = 'wb'
                             return
                             ctx.resume_len = 0
                             ctx.open_mode = 'wb'
                             return
+                elif err.code < 500 or err.code >= 600:
+                    # Unexpected HTTP error
+                    raise
                 raise RetryDownload(err)
             except socket.error as err:
                 if err.errno != errno.ECONNRESET:
                 raise RetryDownload(err)
             except socket.error as err:
                 if err.errno != errno.ECONNRESET:
@@ -160,7 +209,7 @@ class HttpFD(FileDownloader):
                     return False
 
             byte_counter = 0 + ctx.resume_len
                     return False
 
             byte_counter = 0 + ctx.resume_len
-            block_size = self.params.get('buffersize', 1024)
+            block_size = ctx.block_size
             start = time.time()
 
             # measure time over whole while-loop, so slow_down() and best_block_size() work together properly
             start = time.time()
 
             # measure time over whole while-loop, so slow_down() and best_block_size() work together properly
@@ -233,25 +282,30 @@ class HttpFD(FileDownloader):
 
                 # Progress message
                 speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
 
                 # Progress message
                 speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
-                if data_len is None:
+                if ctx.data_len is None:
                     eta = None
                 else:
                     eta = None
                 else:
-                    eta = self.calc_eta(start, time.time(), data_len - ctx.resume_len, byte_counter - ctx.resume_len)
+                    eta = self.calc_eta(start, time.time(), ctx.data_len - ctx.resume_len, byte_counter - ctx.resume_len)
 
                 self._hook_progress({
                     'status': 'downloading',
                     'downloaded_bytes': byte_counter,
 
                 self._hook_progress({
                     'status': 'downloading',
                     'downloaded_bytes': byte_counter,
-                    'total_bytes': data_len,
+                    'total_bytes': ctx.data_len,
                     'tmpfilename': ctx.tmpfilename,
                     'filename': ctx.filename,
                     'eta': eta,
                     'speed': speed,
                     'tmpfilename': ctx.tmpfilename,
                     'filename': ctx.filename,
                     'eta': eta,
                     'speed': speed,
-                    'elapsed': now - start,
+                    'elapsed': now - ctx.start_time,
                 })
 
                 if is_test and byte_counter == data_len:
                     break
 
                 })
 
                 if is_test and byte_counter == data_len:
                     break
 
+            if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len:
+                ctx.resume_len = byte_counter
+                # ctx.block_size = block_size
+                raise NextFragment()
+
             if ctx.stream is None:
                 self.to_stderr('\n')
                 self.report_error('Did not get any data blocks')
             if ctx.stream is None:
                 self.to_stderr('\n')
                 self.report_error('Did not get any data blocks')
@@ -276,7 +330,7 @@ class HttpFD(FileDownloader):
                 'total_bytes': byte_counter,
                 'filename': ctx.filename,
                 'status': 'finished',
                 'total_bytes': byte_counter,
                 'filename': ctx.filename,
                 'status': 'finished',
-                'elapsed': time.time() - start,
+                'elapsed': time.time() - ctx.start_time,
             })
 
             return True
             })
 
             return True
@@ -290,6 +344,8 @@ class HttpFD(FileDownloader):
                 if count <= retries:
                     self.report_retry(e.source_error, count, retries)
                 continue
                 if count <= retries:
                     self.report_retry(e.source_error, count, retries)
                 continue
+            except NextFragment:
+                continue
             except SucceedDownload:
                 return True
 
             except SucceedDownload:
                 return True
 
index 9b001ecff4f407a94f74b59a8e1505ed27fd9d03..063fcf4446447d566785c24f94df94d95553de16 100644 (file)
@@ -1,25 +1,27 @@
 from __future__ import unicode_literals
 
 import time
 from __future__ import unicode_literals
 
 import time
-import struct
 import binascii
 import io
 
 from .fragment import FragmentFD
 import binascii
 import io
 
 from .fragment import FragmentFD
-from ..compat import compat_urllib_error
+from ..compat import (
+    compat_Struct,
+    compat_urllib_error,
+)
 
 
 
 
-u8 = struct.Struct(b'>B')
-u88 = struct.Struct(b'>Bx')
-u16 = struct.Struct(b'>H')
-u1616 = struct.Struct(b'>Hxx')
-u32 = struct.Struct(b'>I')
-u64 = struct.Struct(b'>Q')
+u8 = compat_Struct('>B')
+u88 = compat_Struct('>Bx')
+u16 = compat_Struct('>H')
+u1616 = compat_Struct('>Hxx')
+u32 = compat_Struct('>I')
+u64 = compat_Struct('>Q')
 
 
-s88 = struct.Struct(b'>bx')
-s16 = struct.Struct(b'>h')
-s1616 = struct.Struct(b'>hxx')
-s32 = struct.Struct(b'>i')
+s88 = compat_Struct('>bx')
+s16 = compat_Struct('>h')
+s1616 = compat_Struct('>hxx')
+s32 = compat_Struct('>i')
 
 unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000)
 
 
 unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000)
 
@@ -139,7 +141,7 @@ def write_piff_header(stream, params):
         sample_entry_payload += u16.pack(0x18)  # depth
         sample_entry_payload += s16.pack(-1)  # pre defined
 
         sample_entry_payload += u16.pack(0x18)  # depth
         sample_entry_payload += s16.pack(-1)  # pre defined
 
-        codec_private_data = binascii.unhexlify(params['codec_private_data'])
+        codec_private_data = binascii.unhexlify(params['codec_private_data'].encode('utf-8'))
         if fourcc in ('H264', 'AVC1'):
             sps, pps = codec_private_data.split(u32.pack(1))[1:]
             avcc_payload = u8.pack(1)  # configuration version
         if fourcc in ('H264', 'AVC1'):
             sps, pps = codec_private_data.split(u32.pack(1))[1:]
             avcc_payload = u8.pack(1)  # configuration version
index f770fe901369e85d3df881cf651313f540544b03..cd29aca7789cd677081c0293848ff4b85d74db21 100644 (file)
@@ -66,7 +66,7 @@ class AbcNewsIE(InfoExtractor):
     _TESTS = [{
         'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY',
         'info_dict': {
     _TESTS = [{
         'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY',
         'info_dict': {
-            'id': '10498713',
+            'id': '10505354',
             'ext': 'flv',
             'display_id': 'dramatic-video-rare-death-job-america',
             'title': 'Occupational Hazards',
             'ext': 'flv',
             'display_id': 'dramatic-video-rare-death-job-america',
             'title': 'Occupational Hazards',
@@ -79,7 +79,7 @@ class AbcNewsIE(InfoExtractor):
     }, {
         'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
         'info_dict': {
     }, {
         'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
         'info_dict': {
-            'id': '39125818',
+            'id': '38897857',
             'ext': 'mp4',
             'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016',
             'title': 'Justin Timberlake Drops Hints For Secret Single',
             'ext': 'mp4',
             'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016',
             'title': 'Justin Timberlake Drops Hints For Secret Single',
index 64fb755da0f663670778453590afa75974f8895d..041c61aff7cafc01cf30a9c714b2d35d1af978ee 100644 (file)
@@ -51,7 +51,7 @@ class ADNIE(InfoExtractor):
         # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
         dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
             bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
         # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
         dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
             bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
-            bytes_to_intlist(b'\x1b\xe0\x29\x61\x38\x94\x24\x00\x12\xbd\xc5\x80\xac\xce\xbe\xb0'),
+            bytes_to_intlist(b'\xc8\x6e\x06\xbc\xbe\xc6\x49\xf5\x88\x0d\xc8\x47\xc4\x27\x0c\x60'),
             bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
         ))
         subtitles_json = self._parse_json(
             bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
         ))
         subtitles_json = self._parse_json(
@@ -107,15 +107,18 @@ class ADNIE(InfoExtractor):
 
         options = player_config.get('options') or {}
         metas = options.get('metas') or {}
 
         options = player_config.get('options') or {}
         metas = options.get('metas') or {}
-        title = metas.get('title') or video_info['title']
         links = player_config.get('links') or {}
         links = player_config.get('links') or {}
+        sub_path = player_config.get('subtitles')
         error = None
         if not links:
         error = None
         if not links:
-            links_url = player_config['linksurl']
+            links_url = player_config.get('linksurl') or options['videoUrl']
             links_data = self._download_json(urljoin(
                 self._BASE_URL, links_url), video_id)
             links = links_data.get('links') or {}
             links_data = self._download_json(urljoin(
                 self._BASE_URL, links_url), video_id)
             links = links_data.get('links') or {}
+            metas = metas or links_data.get('meta') or {}
+            sub_path = sub_path or links_data.get('subtitles')
             error = links_data.get('error')
             error = links_data.get('error')
+        title = metas.get('title') or video_info['title']
 
         formats = []
         for format_id, qualities in links.items():
 
         formats = []
         for format_id, qualities in links.items():
@@ -146,7 +149,7 @@ class ADNIE(InfoExtractor):
             'description': strip_or_none(metas.get('summary') or video_info.get('resume')),
             'thumbnail': video_info.get('image'),
             'formats': formats,
             'description': strip_or_none(metas.get('summary') or video_info.get('resume')),
             'thumbnail': video_info.get('image'),
             'formats': formats,
-            'subtitles': self.extract_subtitles(player_config.get('subtitles'), video_id),
+            'subtitles': self.extract_subtitles(sub_path, video_id),
             'episode': metas.get('subtitle') or video_info.get('videoTitle'),
             'series': video_info.get('playlistTitle'),
         }
             'episode': metas.get('subtitle') or video_info.get('videoTitle'),
             'series': video_info.get('playlistTitle'),
         }
index da1b566c20eb6c4477e86f26dfec21b281a5f07c..398e56ea301f1fb88eeb481c33c801371c09e430 100644 (file)
@@ -122,7 +122,8 @@ class AENetworksIE(AENetworksBaseIE):
 
         query = {
             'mbr': 'true',
 
         query = {
             'mbr': 'true',
-            'assetTypes': 'high_video_s3'
+            'assetTypes': 'high_video_ak',
+            'switch': 'hls_high_ak',
         }
         video_id = self._html_search_meta('aetn:VideoID', webpage)
         media_url = self._search_regex(
         }
         video_id = self._html_search_meta('aetn:VideoID', webpage)
         media_url = self._search_regex(
index 513dd81df5cd8c044eaba800facdf76cca3cc509..df2a3fc4a2a24d2feeb81083d2414d8dc09b682b 100644 (file)
@@ -175,10 +175,27 @@ class AfreecaTVIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
+        webpage = self._download_webpage(url, video_id)
+
+        if re.search(r'alert\(["\']This video has been deleted', webpage):
+            raise ExtractorError(
+                'Video %s has been deleted' % video_id, expected=True)
+
+        station_id = self._search_regex(
+            r'nStationNo\s*=\s*(\d+)', webpage, 'station')
+        bbs_id = self._search_regex(
+            r'nBbsNo\s*=\s*(\d+)', webpage, 'bbs')
+        video_id = self._search_regex(
+            r'nTitleNo\s*=\s*(\d+)', webpage, 'title', default=video_id)
+
         video_xml = self._download_xml(
             'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
         video_xml = self._download_xml(
             'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
-            video_id, query={
+            video_id, headers={
+                'Referer': 'http://vod.afreecatv.com/embed.php',
+            }, query={
                 'nTitleNo': video_id,
                 'nTitleNo': video_id,
+                'nStationNo': station_id,
+                'nBbsNo': bbs_id,
                 'partialView': 'SKIP_ADULT',
             })
 
                 'partialView': 'SKIP_ADULT',
             })
 
@@ -187,10 +204,10 @@ class AfreecaTVIE(InfoExtractor):
             raise ExtractorError(
                 '%s said: %s' % (self.IE_NAME, flag), expected=True)
 
             raise ExtractorError(
                 '%s said: %s' % (self.IE_NAME, flag), expected=True)
 
-        video_element = video_xml.findall(compat_xpath('./track/video'))[1]
+        video_element = video_xml.findall(compat_xpath('./track/video'))[-1]
         if video_element is None or video_element.text is None:
         if video_element is None or video_element.text is None:
-            raise ExtractorError('Specified AfreecaTV video does not exist',
-                                 expected=True)
+            raise ExtractorError(
+                'Video %s video does not exist' % video_id, expected=True)
 
         video_url = video_element.text.strip()
 
 
         video_url = video_element.text.strip()
 
index dd3b18d72d05f3deaab902b75cb6064e4b9d16ac..6fb3d6c53fe8e25382b16f93257b9f98b05594cd 100644 (file)
@@ -11,7 +11,7 @@ from ..utils import (
 
 
 class AMCNetworksIE(ThePlatformIE):
 
 
 class AMCNetworksIE(ThePlatformIE):
-    _VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies|shows(?:/[^/]+)+)/(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?:movies|shows(?:/[^/]+)+)/(?P<id>[^/?#]+)'
     _TESTS = [{
         'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1',
         'md5': '',
     _TESTS = [{
         'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1',
         'md5': '',
@@ -51,6 +51,9 @@ class AMCNetworksIE(ThePlatformIE):
     }, {
         'url': 'http://www.wetv.com/shows/la-hair/videos/season-05/episode-09-episode-9-2/episode-9-sneak-peek-3',
         'only_matching': True,
     }, {
         'url': 'http://www.wetv.com/shows/la-hair/videos/season-05/episode-09-episode-9-2/episode-9-sneak-peek-3',
         'only_matching': True,
+    }, {
+        'url': 'https://www.sundancetv.com/shows/riviera/full-episodes/season-1/episode-01-episode-1',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
     }]
 
     def _real_extract(self, url):
index 3c7d7250b611a7e3a7bf7a6777a53932858588d7..c79c58e828150b9608bc2ac74c34a3428deb821a 100644 (file)
@@ -41,7 +41,7 @@ class ArchiveOrgIE(InfoExtractor):
         webpage = self._download_webpage(
             'http://archive.org/embed/' + video_id, video_id)
         jwplayer_playlist = self._parse_json(self._search_regex(
         webpage = self._download_webpage(
             'http://archive.org/embed/' + video_id, video_id)
         jwplayer_playlist = self._parse_json(self._search_regex(
-            r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\);",
+            r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\)",
             webpage, 'jwplayer playlist'), video_id)
         info = self._parse_jwplayer_data(
             {'playlist': jwplayer_playlist}, video_id, base_url=url)
             webpage, 'jwplayer playlist'), video_id)
         info = self._parse_jwplayer_data(
             {'playlist': jwplayer_playlist}, video_id, base_url=url)
index ef73d5a933f2ae08bfff028f0bd58afc0433ce54..86951d975dd1a11926c630b59f7c608921e077de 100644 (file)
@@ -24,57 +24,30 @@ class ARDMediathekIE(InfoExtractor):
     _VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
 
     _TESTS = [{
     _VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
 
     _TESTS = [{
-        'url': 'http://www.ardmediathek.de/tv/Dokumentation-und-Reportage/Ich-liebe-das-Leben-trotzdem/rbb-Fernsehen/Video?documentId=29582122&bcastId=3822114',
+        # available till 26.07.2022
+        'url': 'http://www.ardmediathek.de/tv/S%C3%9CDLICHT/Was-ist-die-Kunst-der-Zukunft-liebe-Ann/BR-Fernsehen/Video?bcastId=34633636&documentId=44726822',
         'info_dict': {
         'info_dict': {
-            'id': '29582122',
+            'id': '44726822',
             'ext': 'mp4',
             'ext': 'mp4',
-            'title': 'Ich liebe das Leben trotzdem',
-            'description': 'md5:45e4c225c72b27993314b31a84a5261c',
-            'duration': 4557,
+            'title': 'Was ist die Kunst der Zukunft, liebe Anna McCarthy?',
+            'description': 'md5:4ada28b3e3b5df01647310e41f3a62f5',
+            'duration': 1740,
         },
         'params': {
             # m3u8 download
             'skip_download': True,
         },
         'params': {
             # m3u8 download
             'skip_download': True,
-        },
-        'skip': 'HTTP Error 404: Not Found',
-    }, {
-        'url': 'http://www.ardmediathek.de/tv/Tatort/Tatort-Scheinwelten-H%C3%B6rfassung-Video/Das-Erste/Video?documentId=29522730&bcastId=602916',
-        'md5': 'f4d98b10759ac06c0072bbcd1f0b9e3e',
-        'info_dict': {
-            'id': '29522730',
-            'ext': 'mp4',
-            'title': 'Tatort: Scheinwelten - Hörfassung (Video tgl. ab 20 Uhr)',
-            'description': 'md5:196392e79876d0ac94c94e8cdb2875f1',
-            'duration': 5252,
-        },
-        'skip': 'HTTP Error 404: Not Found',
+        }
     }, {
         # audio
         'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
     }, {
         # audio
         'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
-        'md5': '219d94d8980b4f538c7fcb0865eb7f2c',
-        'info_dict': {
-            'id': '28488308',
-            'ext': 'mp3',
-            'title': 'Tod eines Fußballers',
-            'description': 'md5:f6e39f3461f0e1f54bfa48c8875c86ef',
-            'duration': 3240,
-        },
-        'skip': 'HTTP Error 404: Not Found',
+        'only_matching': True,
     }, {
         'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
         'only_matching': True,
     }, {
         # audio
         'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158',
     }, {
         'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
         'only_matching': True,
     }, {
         # audio
         'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158',
-        'md5': '4e8f00631aac0395fee17368ac0e9867',
-        'info_dict': {
-            'id': '30796318',
-            'ext': 'mp3',
-            'title': 'Vor dem Fest',
-            'description': 'md5:c0c1c8048514deaed2a73b3a60eecacb',
-            'duration': 3287,
-        },
-        'skip': 'Video is no longer available',
+        'only_matching': True,
     }]
 
     def _extract_media_info(self, media_info_url, webpage, video_id):
     }]
 
     def _extract_media_info(self, media_info_url, webpage, video_id):
@@ -252,20 +225,23 @@ class ARDMediathekIE(InfoExtractor):
 
 class ARDIE(InfoExtractor):
     _VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
 
 class ARDIE(InfoExtractor):
     _VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
-    _TEST = {
-        'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
-        'md5': 'd216c3a86493f9322545e045ddc3eb35',
+    _TESTS = [{
+        # available till 14.02.2019
+        'url': 'http://www.daserste.de/information/talk/maischberger/videos/das-groko-drama-zerlegen-sich-die-volksparteien-video-102.html',
+        'md5': '8e4ec85f31be7c7fc08a26cdbc5a1f49',
         'info_dict': {
         'info_dict': {
-            'display_id': 'die-story-im-ersten-mission-unter-falscher-flagge',
-            'id': '100',
+            'display_id': 'das-groko-drama-zerlegen-sich-die-volksparteien-video',
+            'id': '102',
             'ext': 'mp4',
             'ext': 'mp4',
-            'duration': 2600,
-            'title': 'Die Story im Ersten: Mission unter falscher Flagge',
-            'upload_date': '20140804',
+            'duration': 4435.0,
+            'title': 'Das GroKo-Drama: Zerlegen sich die Volksparteien?',
+            'upload_date': '20180214',
             'thumbnail': r're:^https?://.*\.jpg$',
         },
             'thumbnail': r're:^https?://.*\.jpg$',
         },
-        'skip': 'HTTP Error 404: Not Found',
-    }
+    }, {
+        'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
index f04505011c39fbfe315763ce8182daf6c2b9f663..0e4eaef659105df0248d6ef5171f5f444660f1ee 100644 (file)
@@ -564,7 +564,7 @@ class BrightcoveNewIE(AdobePassIE):
 
         return entries
 
 
         return entries
 
-    def _parse_brightcove_metadata(self, json_data, video_id):
+    def _parse_brightcove_metadata(self, json_data, video_id, headers={}):
         title = json_data['name'].strip()
 
         formats = []
         title = json_data['name'].strip()
 
         formats = []
@@ -638,6 +638,9 @@ class BrightcoveNewIE(AdobePassIE):
 
         self._sort_formats(formats)
 
 
         self._sort_formats(formats)
 
+        for f in formats:
+            f.setdefault('http_headers', {}).update(headers)
+
         subtitles = {}
         for text_track in json_data.get('text_tracks', []):
             if text_track.get('src'):
         subtitles = {}
         for text_track in json_data.get('text_tracks', []):
             if text_track.get('src'):
@@ -690,10 +693,17 @@ class BrightcoveNewIE(AdobePassIE):
                 webpage, 'policy key', group='pk')
 
         api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id)
                 webpage, 'policy key', group='pk')
 
         api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id)
-        try:
-            json_data = self._download_json(api_url, video_id, headers={
-                'Accept': 'application/json;pk=%s' % policy_key
+        headers = {
+            'Accept': 'application/json;pk=%s' % policy_key,
+        }
+        referrer = smuggled_data.get('referrer')
+        if referrer:
+            headers.update({
+                'Referer': referrer,
+                'Origin': re.search(r'https?://[^/]+', referrer).group(0),
             })
             })
+        try:
+            json_data = self._download_json(api_url, video_id, headers=headers)
         except ExtractorError as e:
             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
                 json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
         except ExtractorError as e:
             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
                 json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
@@ -717,4 +727,5 @@ class BrightcoveNewIE(AdobePassIE):
                     'tveToken': tve_token,
                 })
 
                     'tveToken': tve_token,
                 })
 
-        return self._parse_brightcove_metadata(json_data, video_id)
+        return self._parse_brightcove_metadata(
+            json_data, video_id, headers=headers)
index 3faa76076318813d8b16f005f9a87ce5df986034..8ac62c1a680ccb0ab8a8c038bf0c7270c510d3e6 100644 (file)
@@ -246,7 +246,7 @@ class VrtNUIE(GigyaBaseIE):
     def _real_extract(self, url):
         display_id = self._match_id(url)
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
 
-        webpage = self._download_webpage(url, display_id)
+        webpage, urlh = self._download_webpage_handle(url, display_id)
 
         title = self._html_search_regex(
             r'(?ms)<h1 class="content__heading">(.+?)</h1>',
 
         title = self._html_search_regex(
             r'(?ms)<h1 class="content__heading">(.+?)</h1>',
@@ -276,7 +276,7 @@ class VrtNUIE(GigyaBaseIE):
             webpage, 'release_date', default=None))
 
         # If there's a ? or a # in the URL, remove them and everything after
             webpage, 'release_date', default=None))
 
         # If there's a ? or a # in the URL, remove them and everything after
-        clean_url = url.split('?')[0].split('#')[0].strip('/')
+        clean_url = urlh.geturl().split('?')[0].split('#')[0].strip('/')
         securevideo_url = clean_url + '.mssecurevideo.json'
 
         try:
         securevideo_url = clean_url + '.mssecurevideo.json'
 
         try:
index 9faf4022758c46e8ebbd38db74881ad26e692f19..3be0c646bb8b4e431ea47fa5db4b46e13208dcd6 100644 (file)
@@ -1,6 +1,7 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
 # coding: utf-8
 from __future__ import unicode_literals
 
+import json
 import re
 
 from .common import InfoExtractor
 import re
 
 from .common import InfoExtractor
@@ -13,6 +14,7 @@ from ..utils import (
     xpath_element,
     xpath_with_ns,
     find_xpath_attr,
     xpath_element,
     xpath_with_ns,
     find_xpath_attr,
+    parse_duration,
     parse_iso8601,
     parse_age_limit,
     int_or_none,
     parse_iso8601,
     parse_age_limit,
     int_or_none,
@@ -359,3 +361,63 @@ class CBCWatchIE(CBCWatchBaseIE):
         video_id = self._match_id(url)
         rss = self._call_api('web/browse/' + video_id, video_id)
         return self._parse_rss_feed(rss)
         video_id = self._match_id(url)
         rss = self._call_api('web/browse/' + video_id, video_id)
         return self._parse_rss_feed(rss)
+
+
+class CBCOlympicsIE(InfoExtractor):
+    IE_NAME = 'cbc.ca:olympics'
+    _VALID_URL = r'https?://olympics\.cbc\.ca/video/[^/]+/(?P<id>[^/?#]+)'
+    _TESTS = [{
+        'url': 'https://olympics.cbc.ca/video/whats-on-tv/olympic-morning-featuring-the-opening-ceremony/',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        video_id = self._hidden_inputs(webpage)['videoId']
+        video_doc = self._download_xml(
+            'https://olympics.cbc.ca/videodata/%s.xml' % video_id, video_id)
+        title = xpath_text(video_doc, 'title', fatal=True)
+        is_live = xpath_text(video_doc, 'kind') == 'Live'
+        if is_live:
+            title = self._live_title(title)
+
+        formats = []
+        for video_source in video_doc.findall('videoSources/videoSource'):
+            uri = xpath_text(video_source, 'uri')
+            if not uri:
+                continue
+            tokenize = self._download_json(
+                'https://olympics.cbc.ca/api/api-akamai/tokenize',
+                video_id, data=json.dumps({
+                    'VideoSource': uri,
+                }).encode(), headers={
+                    'Content-Type': 'application/json',
+                    'Referer': url,
+                    # d3.VideoPlayer._init in https://olympics.cbc.ca/components/script/base.js
+                    'Cookie': '_dvp=TK:C0ObxjerU',  # AKAMAI CDN cookie
+                }, fatal=False)
+            if not tokenize:
+                continue
+            content_url = tokenize['ContentUrl']
+            video_source_format = video_source.get('format')
+            if video_source_format == 'IIS':
+                formats.extend(self._extract_ism_formats(
+                    content_url, video_id, ism_id=video_source_format, fatal=False))
+            else:
+                formats.extend(self._extract_m3u8_formats(
+                    content_url, video_id, 'mp4',
+                    'm3u8' if is_live else 'm3u8_native',
+                    m3u8_id=video_source_format, fatal=False))
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'description': xpath_text(video_doc, 'description'),
+            'thumbnail': xpath_text(video_doc, 'thumbnailUrl'),
+            'duration': parse_duration(xpath_text(video_doc, 'duration')),
+            'formats': formats,
+            'is_live': is_live,
+        }
index 681d63e29222715a0bb0d45462169edde0787330..6596e98a691d3013d0375f071e0978e16f73e71a 100644 (file)
@@ -75,10 +75,10 @@ class CBSInteractiveIE(CBSIE):
         webpage = self._download_webpage(url, display_id)
 
         data_json = self._html_search_regex(
         webpage = self._download_webpage(url, display_id)
 
         data_json = self._html_search_regex(
-            r"data-(?:cnet|zdnet)-video(?:-uvp(?:js)?)?-options='([^']+)'",
+            r"data(?:-(?:cnet|zdnet))?-video(?:-(?:uvp(?:js)?|player))?-options='([^']+)'",
             webpage, 'data json')
         data = self._parse_json(data_json, display_id)
             webpage, 'data json')
         data = self._parse_json(data_json, display_id)
-        vdata = data.get('video') or data['videos'][0]
+        vdata = data.get('video') or (data.get('videos') or data.get('playlist'))[0]
 
         video_id = vdata['mpxRefId']
 
 
         video_id = vdata['mpxRefId']
 
index deafb48508fc7a0def88e5bd23fab558d37d8213..fcdd0fd14a85a12690031b409058d932a3d4e4db 100644 (file)
@@ -174,6 +174,8 @@ class InfoExtractor(object):
                                  width : height ratio as float.
                     * no_resume  The server does not support resuming the
                                  (HTTP or RTMP) download. Boolean.
                                  width : height ratio as float.
                     * no_resume  The server does not support resuming the
                                  (HTTP or RTMP) download. Boolean.
+                    * downloader_options  A dictionary of downloader options as
+                                 described in FileDownloader
 
     url:            Final video URL.
     ext:            Video filename extension.
 
     url:            Final video URL.
     ext:            Video filename extension.
@@ -2248,9 +2250,10 @@ class InfoExtractor(object):
     def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
         query = compat_urlparse.urlparse(url).query
         url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
     def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
         query = compat_urlparse.urlparse(url).query
         url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
-        url_base = self._search_regex(
-            r'(?:(?:https?|rtmp|rtsp):)?(//[^?]+)', url, 'format url')
-        http_base_url = '%s:%s' % ('http', url_base)
+        mobj = re.search(
+            r'(?:(?:http|rtmp|rtsp)(?P<s>s)?:)?(?P<url>//[^?]+)', url)
+        url_base = mobj.group('url')
+        http_base_url = '%s%s:%s' % ('http', mobj.group('s') or '', url_base)
         formats = []
 
         def manifest_url(manifest):
         formats = []
 
         def manifest_url(manifest):
@@ -2350,7 +2353,10 @@ class InfoExtractor(object):
                 for track in tracks:
                     if not isinstance(track, dict):
                         continue
                 for track in tracks:
                     if not isinstance(track, dict):
                         continue
-                    if track.get('kind') != 'captions':
+                    track_kind = track.get('kind')
+                    if not track_kind or not isinstance(track_kind, compat_str):
+                        continue
+                    if track_kind.lower() not in ('captions', 'subtitles'):
                         continue
                     track_url = urljoin(base_url, track.get('file'))
                     if not track_url:
                         continue
                     track_url = urljoin(base_url, track.get('file'))
                     if not track_url:
index f9cec1d23db9e0ff389a9fa5ffd24ff9c8558d4b..91449dcd8549e992afed748651ad0e3312812721 100644 (file)
@@ -5,15 +5,16 @@ import re
 import string
 
 from .discoverygo import DiscoveryGoBaseIE
 import string
 
 from .discoverygo import DiscoveryGoBaseIE
+from ..compat import compat_str
 from ..utils import (
     ExtractorError,
 from ..utils import (
     ExtractorError,
-    update_url_query,
+    try_get,
 )
 from ..compat import compat_HTTPError
 
 
 class DiscoveryIE(DiscoveryGoBaseIE):
 )
 from ..compat import compat_HTTPError
 
 
 class DiscoveryIE(DiscoveryGoBaseIE):
-    _VALID_URL = r'''(?x)https?://(?:www\.)?(?:
+    _VALID_URL = r'''(?x)https?://(?:www\.)?(?P<site>
             discovery|
             investigationdiscovery|
             discoverylife|
             discovery|
             investigationdiscovery|
             discoverylife|
@@ -44,7 +45,7 @@ class DiscoveryIE(DiscoveryGoBaseIE):
     _GEO_BYPASS = False
 
     def _real_extract(self, url):
     _GEO_BYPASS = False
 
     def _real_extract(self, url):
-        path, display_id = re.match(self._VALID_URL, url).groups()
+        site, path, display_id = re.match(self._VALID_URL, url).groups()
         webpage = self._download_webpage(url, display_id)
 
         react_data = self._parse_json(self._search_regex(
         webpage = self._download_webpage(url, display_id)
 
         react_data = self._parse_json(self._search_regex(
@@ -55,14 +56,13 @@ class DiscoveryIE(DiscoveryGoBaseIE):
         video_id = video['id']
 
         access_token = self._download_json(
         video_id = video['id']
 
         access_token = self._download_json(
-            'https://www.discovery.com/anonymous', display_id, query={
-                'authLink': update_url_query(
-                    'https://login.discovery.com/v1/oauth2/authorize', {
-                        'client_id': react_data['application']['apiClientId'],
-                        'redirect_uri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html',
-                        'response_type': 'anonymous',
-                        'state': 'nonce,' + ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
-                    })
+            'https://www.%s.com/anonymous' % site, display_id, query={
+                'authRel': 'authorization',
+                'client_id': try_get(
+                    react_data, lambda x: x['application']['apiClientId'],
+                    compat_str) or '3020a40c2356a645b4b4',
+                'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
+                'redirectUri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html?https://www.%s.com' % site,
             })['access_token']
 
         try:
             })['access_token']
 
         try:
index a08dace4318a05dfacd9c6e9ad94508934b615ca..b734467734c30a880badab5ff72cfac7543b3b56 100644 (file)
@@ -26,7 +26,7 @@ from ..utils import (
 
 
 class DPlayIE(InfoExtractor):
 
 
 class DPlayIE(InfoExtractor):
-    _VALID_URL = r'https?://(?P<domain>www\.(?P<host>dplay\.(?P<country>dk|se|no)))/(?:videoer/)?(?P<id>[^/]+/[^/?#]+)'
+    _VALID_URL = r'https?://(?P<domain>www\.(?P<host>dplay\.(?P<country>dk|se|no)))/(?:video(?:er|s)/)?(?P<id>[^/]+/[^/?#]+)'
 
     _TESTS = [{
         # non geo restricted, via secure api, unsigned download hls URL
 
     _TESTS = [{
         # non geo restricted, via secure api, unsigned download hls URL
@@ -89,9 +89,12 @@ class DPlayIE(InfoExtractor):
             'skip_download': True,
         },
     }, {
             'skip_download': True,
         },
     }, {
-        # geo restricted, bypassable via X-Forwarded-For
+
         'url': 'https://www.dplay.dk/videoer/singleliv/season-5-episode-3',
         'only_matching': True,
         'url': 'https://www.dplay.dk/videoer/singleliv/season-5-episode-3',
         'only_matching': True,
+    }, {
+        'url': 'https://www.dplay.se/videos/sofias-anglar/sofias-anglar-1001',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
     }]
 
     def _real_extract(self, url):
index e85c58bd5669bd14cb069690d43277f77cea49d3..3f760888e6060e1522c735a0d688fe790becb9e0 100644 (file)
@@ -32,7 +32,7 @@ class DVTVIE(InfoExtractor):
     }, {
         'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/',
         'info_dict': {
     }, {
         'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/',
         'info_dict': {
-            'title': 'DVTV 16. 12. 2014: útok Talibanu, boj o kliniku, uprchlíci',
+            'title': r're:^DVTV 16\. 12\. 2014: útok Talibanu, boj o kliniku, uprchlíci',
             'id': '973eb3bc854e11e498be002590604f2e',
         },
         'playlist': [{
             'id': '973eb3bc854e11e498be002590604f2e',
         },
         'playlist': [{
@@ -91,10 +91,24 @@ class DVTVIE(InfoExtractor):
     }, {
         'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/',
         'only_matching': True,
     }, {
         'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/',
         'only_matching': True,
+    }, {
+        'url': 'https://video.aktualne.cz/dvtv/babis-a-zeman-nesou-vinu-za-to-ze-nemame-jasno-v-tom-kdo-bud/r~026afb54fad711e79704ac1f6b220ee8/',
+        'md5': '87defe16681b1429c91f7a74809823c6',
+        'info_dict': {
+            'id': 'f5ae72f6fad611e794dbac1f6b220ee8',
+            'ext': 'mp4',
+            'title': 'Babiš a Zeman nesou vinu za to, že nemáme jasno v tom, kdo bude vládnout, říká Pekarová Adamová',
+        },
+        'params': {
+            'skip_download': True,
+        },
     }]
 
     }]
 
-    def _parse_video_metadata(self, js, video_id):
+    def _parse_video_metadata(self, js, video_id, live_js=None):
         data = self._parse_json(js, video_id, transform_source=js_to_json)
         data = self._parse_json(js, video_id, transform_source=js_to_json)
+        if live_js:
+            data.update(self._parse_json(
+                live_js, video_id, transform_source=js_to_json))
 
         title = unescapeHTML(data['title'])
 
 
         title = unescapeHTML(data['title'])
 
@@ -142,13 +156,18 @@ class DVTVIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
 
         webpage = self._download_webpage(url, video_id)
 
+        # live content
+        live_item = self._search_regex(
+            r'(?s)embedData[0-9a-f]{32}\.asset\.liveStarter\s*=\s*(\{.+?\});',
+            webpage, 'video', default=None)
+
         # single video
         item = self._search_regex(
             r'(?s)embedData[0-9a-f]{32}\[["\']asset["\']\]\s*=\s*(\{.+?\});',
         # single video
         item = self._search_regex(
             r'(?s)embedData[0-9a-f]{32}\[["\']asset["\']\]\s*=\s*(\{.+?\});',
-            webpage, 'video', default=None, fatal=False)
+            webpage, 'video', default=None)
 
         if item:
 
         if item:
-            return self._parse_video_metadata(item, video_id)
+            return self._parse_video_metadata(item, video_id, live_item)
 
         # playlist
         items = re.findall(
 
         # playlist
         items = re.findall(
index b442256fee258b573fcc28e392c0d0ccb7a8ea1c..3bde40eb3cef5ad6c9a22ad894de334e03ea0ce2 100644 (file)
@@ -162,6 +162,7 @@ from .cbc import (
     CBCPlayerIE,
     CBCWatchVideoIE,
     CBCWatchIE,
     CBCPlayerIE,
     CBCWatchVideoIE,
     CBCWatchIE,
+    CBCOlympicsIE,
 )
 from .cbs import CBSIE
 from .cbslocal import CBSLocalIE
 )
 from .cbs import CBSIE
 from .cbslocal import CBSLocalIE
@@ -373,8 +374,10 @@ from .franceculture import FranceCultureIE
 from .franceinter import FranceInterIE
 from .francetv import (
     FranceTVIE,
 from .franceinter import FranceInterIE
 from .francetv import (
     FranceTVIE,
+    FranceTVSiteIE,
     FranceTVEmbedIE,
     FranceTVInfoIE,
     FranceTVEmbedIE,
     FranceTVInfoIE,
+    FranceTVJeunesseIE,
     GenerationWhatIE,
     CultureboxIE,
 )
     GenerationWhatIE,
     CultureboxIE,
 )
@@ -382,7 +385,10 @@ from .freesound import FreesoundIE
 from .freespeech import FreespeechIE
 from .freshlive import FreshLiveIE
 from .funimation import FunimationIE
 from .freespeech import FreespeechIE
 from .freshlive import FreshLiveIE
 from .funimation import FunimationIE
-from .funk import FunkIE
+from .funk import (
+    FunkMixIE,
+    FunkChannelIE,
+)
 from .funnyordie import FunnyOrDieIE
 from .fusion import FusionIE
 from .fxnetworks import FXNetworksIE
 from .funnyordie import FunnyOrDieIE
 from .fusion import FusionIE
 from .fxnetworks import FXNetworksIE
@@ -426,6 +432,7 @@ from .hellporno import HellPornoIE
 from .helsinki import HelsinkiIE
 from .hentaistigma import HentaiStigmaIE
 from .hgtv import HGTVComShowIE
 from .helsinki import HelsinkiIE
 from .hentaistigma import HentaiStigmaIE
 from .hgtv import HGTVComShowIE
+from .hidive import HiDiveIE
 from .historicfilms import HistoricFilmsIE
 from .hitbox import HitboxIE, HitboxLiveIE
 from .hitrecord import HitRecordIE
 from .historicfilms import HistoricFilmsIE
 from .hitbox import HitboxIE, HitboxLiveIE
 from .hitrecord import HitRecordIE
@@ -543,6 +550,7 @@ from .limelight import (
     LimelightChannelIE,
     LimelightChannelListIE,
 )
     LimelightChannelIE,
     LimelightChannelListIE,
 )
+from .line import LineTVIE
 from .litv import LiTVIE
 from .liveleak import (
     LiveLeakIE,
 from .litv import LiTVIE
 from .liveleak import (
     LiveLeakIE,
@@ -563,7 +571,11 @@ from .lynda import (
 )
 from .m6 import M6IE
 from .macgamestore import MacGameStoreIE
 )
 from .m6 import M6IE
 from .macgamestore import MacGameStoreIE
-from .mailru import MailRuIE
+from .mailru import (
+    MailRuIE,
+    MailRuMusicIE,
+    MailRuMusicSearchIE,
+)
 from .makerschannel import MakersChannelIE
 from .makertv import MakerTVIE
 from .mangomolo import (
 from .makerschannel import MakersChannelIE
 from .makertv import MakerTVIE
 from .mangomolo import (
@@ -630,7 +642,10 @@ from .musicplayon import MusicPlayOnIE
 from .mwave import MwaveIE, MwaveMeetGreetIE
 from .myspace import MySpaceIE, MySpaceAlbumIE
 from .myspass import MySpassIE
 from .mwave import MwaveIE, MwaveMeetGreetIE
 from .myspace import MySpaceIE, MySpaceAlbumIE
 from .myspass import MySpassIE
-from .myvi import MyviIE
+from .myvi import (
+    MyviIE,
+    MyviEmbedIE,
+)
 from .myvidster import MyVidsterIE
 from .nationalgeographic import (
     NationalGeographicVideoIE,
 from .myvidster import MyVidsterIE
 from .nationalgeographic import (
     NationalGeographicVideoIE,
@@ -644,6 +659,7 @@ from .nbc import (
     NBCIE,
     NBCNewsIE,
     NBCOlympicsIE,
     NBCIE,
     NBCNewsIE,
     NBCOlympicsIE,
+    NBCOlympicsStreamIE,
     NBCSportsIE,
     NBCSportsVPlayerIE,
 )
     NBCSportsIE,
     NBCSportsVPlayerIE,
 )
@@ -860,6 +876,7 @@ from .rai import (
     RaiPlayPlaylistIE,
     RaiIE,
 )
     RaiPlayPlaylistIE,
     RaiIE,
 )
+from .raywenderlich import RayWenderlichIE
 from .rbmaradio import RBMARadioIE
 from .rds import RDSIE
 from .redbulltv import RedBullTVIE
 from .rbmaradio import RBMARadioIE
 from .rds import RDSIE
 from .redbulltv import RedBullTVIE
@@ -1038,9 +1055,14 @@ from .telebruxelles import TeleBruxellesIE
 from .telecinco import TelecincoIE
 from .telegraaf import TelegraafIE
 from .telemb import TeleMBIE
 from .telecinco import TelecincoIE
 from .telegraaf import TelegraafIE
 from .telemb import TeleMBIE
-from .telequebec import TeleQuebecIE
+from .telequebec import (
+    TeleQuebecIE,
+    TeleQuebecEmissionIE,
+    TeleQuebecLiveIE,
+)
 from .teletask import TeleTaskIE
 from .telewebion import TelewebionIE
 from .teletask import TeleTaskIE
 from .telewebion import TelewebionIE
+from .tennistv import TennisTVIE
 from .testurl import TestURLIE
 from .tf1 import TF1IE
 from .tfo import TFOIE
 from .testurl import TestURLIE
 from .tf1 import TF1IE
 from .tfo import TFOIE
@@ -1195,7 +1217,6 @@ from .vice import (
     ViceArticleIE,
     ViceShowIE,
 )
     ViceArticleIE,
     ViceShowIE,
 )
-from .viceland import VicelandIE
 from .vidbit import VidbitIE
 from .viddler import ViddlerIE
 from .videa import VideaIE
 from .vidbit import VidbitIE
 from .viddler import ViddlerIE
 from .videa import VideaIE
@@ -1210,6 +1231,7 @@ from .videomore import (
 from .videopremium import VideoPremiumIE
 from .videopress import VideoPressIE
 from .vidio import VidioIE
 from .videopremium import VideoPremiumIE
 from .videopress import VideoPressIE
 from .vidio import VidioIE
+from .vidlii import VidLiiIE
 from .vidme import (
     VidmeIE,
     VidmeUserIE,
 from .vidme import (
     VidmeIE,
     VidmeUserIE,
@@ -1353,6 +1375,7 @@ from .yandexmusic import (
     YandexMusicPlaylistIE,
 )
 from .yandexdisk import YandexDiskIE
     YandexMusicPlaylistIE,
 )
 from .yandexdisk import YandexDiskIE
+from .yapfiles import YapFilesIE
 from .yesjapan import YesJapanIE
 from .yinyuetai import YinYueTaiIE
 from .ynet import YnetIE
 from .yesjapan import YesJapanIE
 from .yinyuetai import YinYueTaiIE
 from .ynet import YnetIE
index 095bb3954c523852be8d7d09d498d324d429c826..c02cd03de1c59452ac1ff2432f9e5ea54134e7de 100644 (file)
@@ -5,19 +5,89 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_urlparse
+from ..compat import (
+    compat_str,
+    compat_urlparse,
+)
 from ..utils import (
     clean_html,
 from ..utils import (
     clean_html,
+    determine_ext,
     ExtractorError,
     int_or_none,
     parse_duration,
     ExtractorError,
     int_or_none,
     parse_duration,
-    determine_ext,
+    try_get,
 )
 from .dailymotion import DailymotionIE
 
 
 class FranceTVBaseInfoExtractor(InfoExtractor):
 )
 from .dailymotion import DailymotionIE
 
 
 class FranceTVBaseInfoExtractor(InfoExtractor):
+    def _make_url_result(self, video_or_full_id, catalog=None):
+        full_id = 'francetv:%s' % video_or_full_id
+        if '@' not in video_or_full_id and catalog:
+            full_id += '@%s' % catalog
+        return self.url_result(
+            full_id, ie=FranceTVIE.ie_key(),
+            video_id=video_or_full_id.split('@')[0])
+
+
+class FranceTVIE(InfoExtractor):
+    _VALID_URL = r'''(?x)
+                    (?:
+                        https?://
+                            sivideo\.webservices\.francetelevisions\.fr/tools/getInfosOeuvre/v2/\?
+                            .*?\bidDiffusion=[^&]+|
+                        (?:
+                            https?://videos\.francetv\.fr/video/|
+                            francetv:
+                        )
+                        (?P<id>[^@]+)(?:@(?P<catalog>.+))?
+                    )
+                    '''
+
+    _TESTS = [{
+        # without catalog
+        'url': 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=162311093&callback=_jsonp_loader_callback_request_0',
+        'md5': 'c2248a8de38c4e65ea8fae7b5df2d84f',
+        'info_dict': {
+            'id': '162311093',
+            'ext': 'mp4',
+            'title': '13h15, le dimanche... - Les mystères de Jésus',
+            'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
+            'timestamp': 1502623500,
+            'upload_date': '20170813',
+        },
+    }, {
+        # with catalog
+        'url': 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=NI_1004933&catalogue=Zouzous&callback=_jsonp_loader_callback_request_4',
+        'only_matching': True,
+    }, {
+        'url': 'http://videos.francetv.fr/video/NI_657393@Regions',
+        'only_matching': True,
+    }, {
+        'url': 'francetv:162311093',
+        'only_matching': True,
+    }, {
+        'url': 'francetv:NI_1004933@Zouzous',
+        'only_matching': True,
+    }, {
+        'url': 'francetv:NI_983319@Info-web',
+        'only_matching': True,
+    }, {
+        'url': 'francetv:NI_983319',
+        'only_matching': True,
+    }, {
+        'url': 'francetv:NI_657393@Regions',
+        'only_matching': True,
+    }, {
+        # france-3 live
+        'url': 'francetv:SIM_France3',
+        'only_matching': True,
+    }]
+
     def _extract_video(self, video_id, catalogue=None):
     def _extract_video(self, video_id, catalogue=None):
+        # Videos are identified by idDiffusion so catalogue part is optional.
+        # However when provided, some extra formats may be returned so we pass
+        # it if available.
         info = self._download_json(
             'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/',
             video_id, 'Downloading video JSON', query={
         info = self._download_json(
             'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/',
             video_id, 'Downloading video JSON', query={
@@ -27,7 +97,8 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
 
         if info.get('status') == 'NOK':
             raise ExtractorError(
 
         if info.get('status') == 'NOK':
             raise ExtractorError(
-                '%s returned error: %s' % (self.IE_NAME, info['message']), expected=True)
+                '%s returned error: %s' % (self.IE_NAME, info['message']),
+                expected=True)
         allowed_countries = info['videos'][0].get('geoblocage')
         if allowed_countries:
             georestricted = True
         allowed_countries = info['videos'][0].get('geoblocage')
         if allowed_countries:
             georestricted = True
@@ -42,6 +113,21 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
         else:
             georestricted = False
 
         else:
             georestricted = False
 
+        def sign(manifest_url, manifest_id):
+            for host in ('hdfauthftv-a.akamaihd.net', 'hdfauth.francetv.fr'):
+                signed_url = self._download_webpage(
+                    'https://%s/esi/TA' % host, video_id,
+                    'Downloading signed %s manifest URL' % manifest_id,
+                    fatal=False, query={
+                        'url': manifest_url,
+                    })
+                if (signed_url and isinstance(signed_url, compat_str) and
+                        re.search(r'^(?:https?:)?//', signed_url)):
+                    return signed_url
+            return manifest_url
+
+        is_live = None
+
         formats = []
         for video in info['videos']:
             if video['statut'] != 'ONLINE':
         formats = []
         for video in info['videos']:
             if video['statut'] != 'ONLINE':
@@ -49,6 +135,10 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
             video_url = video['url']
             if not video_url:
                 continue
             video_url = video['url']
             if not video_url:
                 continue
+            if is_live is None:
+                is_live = (try_get(
+                    video, lambda x: x['plages_ouverture'][0]['direct'],
+                    bool) is True) or '/live.francetv.fr/' in video_url
             format_id = video['format']
             ext = determine_ext(video_url)
             if ext == 'f4m':
             format_id = video['format']
             ext = determine_ext(video_url)
             if ext == 'f4m':
@@ -56,17 +146,14 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
                     # See https://github.com/rg3/youtube-dl/issues/3963
                     # m3u8 urls work fine
                     continue
                     # See https://github.com/rg3/youtube-dl/issues/3963
                     # m3u8 urls work fine
                     continue
-                f4m_url = self._download_webpage(
-                    'http://hdfauth.francetv.fr/esi/TA?url=%s' % video_url,
-                    video_id, 'Downloading f4m manifest token', fatal=False)
-                if f4m_url:
-                    formats.extend(self._extract_f4m_formats(
-                        f4m_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44',
-                        video_id, f4m_id=format_id, fatal=False))
+                formats.extend(self._extract_f4m_formats(
+                    sign(video_url, format_id) + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44',
+                    video_id, f4m_id=format_id, fatal=False))
             elif ext == 'm3u8':
                 formats.extend(self._extract_m3u8_formats(
             elif ext == 'm3u8':
                 formats.extend(self._extract_m3u8_formats(
-                    video_url, video_id, 'mp4', entry_protocol='m3u8_native',
-                    m3u8_id=format_id, fatal=False))
+                    sign(video_url, format_id), video_id, 'mp4',
+                    entry_protocol='m3u8_native', m3u8_id=format_id,
+                    fatal=False))
             elif video_url.startswith('rtmp'):
                 formats.append({
                     'url': video_url,
             elif video_url.startswith('rtmp'):
                 formats.append({
                     'url': video_url,
@@ -97,33 +184,48 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
 
         return {
             'id': video_id,
 
         return {
             'id': video_id,
-            'title': title,
+            'title': self._live_title(title) if is_live else title,
             'description': clean_html(info['synopsis']),
             'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
             'duration': int_or_none(info.get('real_duration')) or parse_duration(info['duree']),
             'timestamp': int_or_none(info['diffusion']['timestamp']),
             'description': clean_html(info['synopsis']),
             'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
             'duration': int_or_none(info.get('real_duration')) or parse_duration(info['duree']),
             'timestamp': int_or_none(info['diffusion']['timestamp']),
+            'is_live': is_live,
             'formats': formats,
             'subtitles': subtitles,
         }
 
             'formats': formats,
             'subtitles': subtitles,
         }
 
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        catalog = mobj.group('catalog')
 
 
-class FranceTVIE(FranceTVBaseInfoExtractor):
+        if not video_id:
+            qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+            video_id = qs.get('idDiffusion', [None])[0]
+            catalog = qs.get('catalogue', [None])[0]
+            if not video_id:
+                raise ExtractorError('Invalid URL', expected=True)
+
+        return self._extract_video(video_id, catalog)
+
+
+class FranceTVSiteIE(FranceTVBaseInfoExtractor):
     _VALID_URL = r'https?://(?:(?:www\.)?france\.tv|mobile\.france\.tv)/(?:[^/]+/)*(?P<id>[^/]+)\.html'
 
     _TESTS = [{
         'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
         'info_dict': {
     _VALID_URL = r'https?://(?:(?:www\.)?france\.tv|mobile\.france\.tv)/(?:[^/]+/)*(?P<id>[^/]+)\.html'
 
     _TESTS = [{
         'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
         'info_dict': {
-            'id': '157550144',
+            'id': '162311093',
             'ext': 'mp4',
             'title': '13h15, le dimanche... - Les mystères de Jésus',
             'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
             'ext': 'mp4',
             'title': '13h15, le dimanche... - Les mystères de Jésus',
             'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
-            'timestamp': 1494156300,
-            'upload_date': '20170507',
+            'timestamp': 1502623500,
+            'upload_date': '20170813',
         },
         'params': {
         },
         'params': {
-            # m3u8 downloads
             'skip_download': True,
         },
             'skip_download': True,
         },
+        'add_ie': [FranceTVIE.ie_key()],
     }, {
         # france3
         'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html',
     }, {
         # france3
         'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html',
@@ -156,6 +258,10 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
     }, {
         'url': 'https://www.france.tv/142749-rouge-sang.html',
         'only_matching': True,
     }, {
         'url': 'https://www.france.tv/142749-rouge-sang.html',
         'only_matching': True,
+    }, {
+        # france-3 live
+        'url': 'https://www.france.tv/france-3/direct.html',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
     }]
 
     def _real_extract(self, url):
@@ -172,13 +278,14 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
             video_id, catalogue = self._html_search_regex(
                 r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"',
                 webpage, 'video ID').split('@')
             video_id, catalogue = self._html_search_regex(
                 r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"',
                 webpage, 'video ID').split('@')
-        return self._extract_video(video_id, catalogue)
+
+        return self._make_url_result(video_id, catalogue)
 
 
 class FranceTVEmbedIE(FranceTVBaseInfoExtractor):
     _VALID_URL = r'https?://embed\.francetv\.fr/*\?.*?\bue=(?P<id>[^&]+)'
 
 
 
 class FranceTVEmbedIE(FranceTVBaseInfoExtractor):
     _VALID_URL = r'https?://embed\.francetv\.fr/*\?.*?\bue=(?P<id>[^&]+)'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://embed.francetv.fr/?ue=7fd581a2ccf59d2fc5719c5c13cf6961',
         'info_dict': {
             'id': 'NI_983319',
         'url': 'http://embed.francetv.fr/?ue=7fd581a2ccf59d2fc5719c5c13cf6961',
         'info_dict': {
             'id': 'NI_983319',
@@ -188,7 +295,11 @@ class FranceTVEmbedIE(FranceTVBaseInfoExtractor):
             'timestamp': 1493981780,
             'duration': 16,
         },
             'timestamp': 1493981780,
             'duration': 16,
         },
-    }
+        'params': {
+            'skip_download': True,
+        },
+        'add_ie': [FranceTVIE.ie_key()],
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -197,12 +308,12 @@ class FranceTVEmbedIE(FranceTVBaseInfoExtractor):
             'http://api-embed.webservices.francetelevisions.fr/key/%s' % video_id,
             video_id)
 
             'http://api-embed.webservices.francetelevisions.fr/key/%s' % video_id,
             video_id)
 
-        return self._extract_video(video['video_id'], video.get('catalog'))
+        return self._make_url_result(video['video_id'], video.get('catalog'))
 
 
 class FranceTVInfoIE(FranceTVBaseInfoExtractor):
     IE_NAME = 'francetvinfo.fr'
 
 
 class FranceTVInfoIE(FranceTVBaseInfoExtractor):
     IE_NAME = 'francetvinfo.fr'
-    _VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P<title>[^/?#&.]+)'
+    _VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&.]+)'
 
     _TESTS = [{
         'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
 
     _TESTS = [{
         'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
@@ -217,51 +328,18 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
             },
         },
         'params': {
             },
         },
         'params': {
-            # m3u8 downloads
             'skip_download': True,
         },
             'skip_download': True,
         },
+        'add_ie': [FranceTVIE.ie_key()],
     }, {
         'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html',
     }, {
         'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html',
-        'info_dict': {
-            'id': 'EV_20019',
-            'ext': 'mp4',
-            'title': 'Débat des candidats à la Commission européenne',
-            'description': 'Débat des candidats à la Commission européenne',
-        },
-        'params': {
-            'skip_download': 'HLS (reqires ffmpeg)'
-        },
-        'skip': 'Ce direct est terminé et sera disponible en rattrapage dans quelques minutes.',
+        'only_matching': True,
     }, {
         'url': 'http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html',
     }, {
         'url': 'http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html',
-        'md5': 'f485bda6e185e7d15dbc69b72bae993e',
-        'info_dict': {
-            'id': 'NI_173343',
-            'ext': 'mp4',
-            'title': 'Les entreprises familiales : le secret de la réussite',
-            'thumbnail': r're:^https?://.*\.jpe?g$',
-            'timestamp': 1433273139,
-            'upload_date': '20150602',
-        },
-        'params': {
-            # m3u8 downloads
-            'skip_download': True,
-        },
+        'only_matching': True,
     }, {
         'url': 'http://france3-regions.francetvinfo.fr/bretagne/cotes-d-armor/thalassa-echappee-breizh-ce-venredi-dans-les-cotes-d-armor-954961.html',
     }, {
         'url': 'http://france3-regions.francetvinfo.fr/bretagne/cotes-d-armor/thalassa-echappee-breizh-ce-venredi-dans-les-cotes-d-armor-954961.html',
-        'md5': 'f485bda6e185e7d15dbc69b72bae993e',
-        'info_dict': {
-            'id': 'NI_657393',
-            'ext': 'mp4',
-            'title': 'Olivier Monthus, réalisateur de "Bretagne, le choix de l’Armor"',
-            'description': 'md5:a3264114c9d29aeca11ced113c37b16c',
-            'thumbnail': r're:^https?://.*\.jpe?g$',
-            'timestamp': 1458300695,
-            'upload_date': '20160318',
-        },
-        'params': {
-            'skip_download': True,
-        },
+        'only_matching': True,
     }, {
         # Dailymotion embed
         'url': 'http://www.francetvinfo.fr/politique/notre-dame-des-landes/video-sur-france-inter-cecile-duflot-denonce-le-regard-meprisant-de-patrick-cohen_1520091.html',
     }, {
         # Dailymotion embed
         'url': 'http://www.francetvinfo.fr/politique/notre-dame-des-landes/video-sur-france-inter-cecile-duflot-denonce-le-regard-meprisant-de-patrick-cohen_1520091.html',
@@ -283,9 +361,9 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
     }]
 
     def _real_extract(self, url):
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        page_title = mobj.group('title')
-        webpage = self._download_webpage(url, page_title)
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
 
         dailymotion_urls = DailymotionIE._extract_urls(webpage)
         if dailymotion_urls:
 
         dailymotion_urls = DailymotionIE._extract_urls(webpage)
         if dailymotion_urls:
@@ -297,12 +375,13 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
             (r'id-video=([^@]+@[^"]+)',
              r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"'),
             webpage, 'video id').split('@')
             (r'id-video=([^@]+@[^"]+)',
              r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"'),
             webpage, 'video id').split('@')
-        return self._extract_video(video_id, catalogue)
+
+        return self._make_url_result(video_id, catalogue)
 
 
 class GenerationWhatIE(InfoExtractor):
     IE_NAME = 'france2.fr:generation-what'
 
 
 class GenerationWhatIE(InfoExtractor):
     IE_NAME = 'france2.fr:generation-what'
-    _VALID_URL = r'https?://generation-what\.francetv\.fr/[^/]+/video/(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://generation-what\.francetv\.fr/[^/]+/video/(?P<id>[^/?#&]+)'
 
     _TESTS = [{
         'url': 'http://generation-what.francetv.fr/portrait/video/present-arms',
 
     _TESTS = [{
         'url': 'http://generation-what.francetv.fr/portrait/video/present-arms',
@@ -314,6 +393,10 @@ class GenerationWhatIE(InfoExtractor):
             'uploader_id': 'UCHH9p1eetWCgt4kXBYCb3_w',
             'upload_date': '20160411',
         },
             'uploader_id': 'UCHH9p1eetWCgt4kXBYCb3_w',
             'upload_date': '20160411',
         },
+        'params': {
+            'skip_download': True,
+        },
+        'add_ie': ['Youtube'],
     }, {
         'url': 'http://generation-what.francetv.fr/europe/video/present-arms',
         'only_matching': True,
     }, {
         'url': 'http://generation-what.francetv.fr/europe/video/present-arms',
         'only_matching': True,
@@ -321,42 +404,87 @@ class GenerationWhatIE(InfoExtractor):
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
+
         webpage = self._download_webpage(url, display_id)
         webpage = self._download_webpage(url, display_id)
+
         youtube_id = self._search_regex(
             r"window\.videoURL\s*=\s*'([0-9A-Za-z_-]{11})';",
             webpage, 'youtube id')
         youtube_id = self._search_regex(
             r"window\.videoURL\s*=\s*'([0-9A-Za-z_-]{11})';",
             webpage, 'youtube id')
-        return self.url_result(youtube_id, 'Youtube', youtube_id)
+
+        return self.url_result(youtube_id, ie='Youtube', video_id=youtube_id)
 
 
 class CultureboxIE(FranceTVBaseInfoExtractor):
 
 
 class CultureboxIE(FranceTVBaseInfoExtractor):
-    IE_NAME = 'culturebox.francetvinfo.fr'
-    _VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
+    _VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&]+)'
 
 
-    _TEST = {
-        'url': 'http://culturebox.francetvinfo.fr/live/musique/musique-classique/le-livre-vermeil-de-montserrat-a-la-cathedrale-delne-214511',
-        'md5': '9b88dc156781c4dbebd4c3e066e0b1d6',
+    _TESTS = [{
+        'url': 'https://culturebox.francetvinfo.fr/opera-classique/musique-classique/c-est-baroque/concerts/cantates-bwv-4-106-et-131-de-bach-par-raphael-pichon-57-268689',
         'info_dict': {
         'info_dict': {
-            'id': 'EV_50111',
-            'ext': 'flv',
-            'title': "Le Livre Vermeil de Montserrat à la Cathédrale d'Elne",
-            'description': 'md5:f8a4ad202e8fe533e2c493cc12e739d9',
-            'upload_date': '20150320',
-            'timestamp': 1426892400,
-            'duration': 2760.9,
+            'id': 'EV_134885',
+            'ext': 'mp4',
+            'title': 'Cantates BWV 4, 106 et 131 de Bach par Raphaël Pichon 5/7',
+            'description': 'md5:19c44af004b88219f4daa50fa9a351d4',
+            'upload_date': '20180206',
+            'timestamp': 1517945220,
+            'duration': 5981,
         },
         },
-    }
+        'params': {
+            'skip_download': True,
+        },
+        'add_ie': [FranceTVIE.ie_key()],
+    }]
 
     def _real_extract(self, url):
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        name = mobj.group('name')
+        display_id = self._match_id(url)
 
 
-        webpage = self._download_webpage(url, name)
+        webpage = self._download_webpage(url, display_id)
 
         if ">Ce live n'est plus disponible en replay<" in webpage:
 
         if ">Ce live n'est plus disponible en replay<" in webpage:
-            raise ExtractorError('Video %s is not available' % name, expected=True)
+            raise ExtractorError(
+                'Video %s is not available' % display_id, expected=True)
 
         video_id, catalogue = self._search_regex(
             r'["\'>]https?://videos\.francetv\.fr/video/([^@]+@.+?)["\'<]',
             webpage, 'video id').split('@')
 
 
         video_id, catalogue = self._search_regex(
             r'["\'>]https?://videos\.francetv\.fr/video/([^@]+@.+?)["\'<]',
             webpage, 'video id').split('@')
 
-        return self._extract_video(video_id, catalogue)
+        return self._make_url_result(video_id, catalogue)
+
+
+class FranceTVJeunesseIE(FranceTVBaseInfoExtractor):
+    _VALID_URL = r'(?P<url>https?://(?:www\.)?(?:zouzous|ludo)\.fr/heros/(?P<id>[^/?#&]+))'
+
+    _TESTS = [{
+        'url': 'https://www.zouzous.fr/heros/simon',
+        'info_dict': {
+            'id': 'simon',
+        },
+        'playlist_count': 9,
+    }, {
+        'url': 'https://www.ludo.fr/heros/ninjago',
+        'info_dict': {
+            'id': 'ninjago',
+        },
+        'playlist_count': 10,
+    }, {
+        'url': 'https://www.zouzous.fr/heros/simon?abc',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        playlist_id = mobj.group('id')
+
+        playlist = self._download_json(
+            '%s/%s' % (mobj.group('url'), 'playlist'), playlist_id)
+
+        if not playlist.get('count'):
+            raise ExtractorError(
+                '%s is not available' % playlist_id, expected=True)
+
+        entries = []
+        for item in playlist['items']:
+            identity = item.get('identity')
+            if identity and isinstance(identity, compat_str):
+                entries.append(self._make_url_result(identity))
+
+        return self.playlist_result(entries, playlist_id)
index ce5c67fbbb7100a35174836aee4e51e20b96be11..faea6576fe6877293ee94cc882c74cbca93eabfb 100644 (file)
 # coding: utf-8
 from __future__ import unicode_literals
 
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from .nexx import NexxIE
 from .common import InfoExtractor
 from .nexx import NexxIE
-from ..utils import extract_attributes
+from ..utils import int_or_none
+
+
+class FunkBaseIE(InfoExtractor):
+    def _make_url_result(self, video):
+        return {
+            '_type': 'url_transparent',
+            'url': 'nexx:741:%s' % video['sourceId'],
+            'ie_key': NexxIE.ie_key(),
+            'id': video['sourceId'],
+            'title': video.get('title'),
+            'description': video.get('description'),
+            'duration': int_or_none(video.get('duration')),
+            'season_number': int_or_none(video.get('seasonNr')),
+            'episode_number': int_or_none(video.get('episodeNr')),
+        }
+
+
+class FunkMixIE(FunkBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?funk\.net/mix/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)'
+    _TESTS = [{
+        'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/die-realste-kifferdoku-aller-zeiten',
+        'md5': '8edf617c2f2b7c9847dfda313f199009',
+        'info_dict': {
+            'id': '123748',
+            'ext': 'mp4',
+            'title': '"Die realste Kifferdoku aller Zeiten"',
+            'description': 'md5:c97160f5bafa8d47ec8e2e461012aa9d',
+            'timestamp': 1490274721,
+            'upload_date': '20170323',
+        },
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        mix_id = mobj.group('id')
+        alias = mobj.group('alias')
+
+        lists = self._download_json(
+            'https://www.funk.net/api/v3.1/curation/curatedLists/',
+            mix_id, headers={
+                'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoiY3VyYXRpb24tdG9vbC12Mi4wIiwic2NvcGUiOiJzdGF0aWMtY29udGVudC1hcGksY3VyYXRpb24tc2VydmljZSxzZWFyY2gtYXBpIn0.SGCC1IXHLtZYoo8PvRKlU2gXH1su8YSu47sB3S4iXBI',
+                'Referer': url,
+            }, query={
+                'size': 100,
+            })['result']['lists']
+
+        metas = next(
+            l for l in lists
+            if mix_id in (l.get('entityId'), l.get('alias')))['videoMetas']
+        video = next(
+            meta['videoDataDelegate']
+            for meta in metas if meta.get('alias') == alias)
+
+        return self._make_url_result(video)
 
 
 
 
-class FunkIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?funk\.net/(?:mix|channel)/(?:[^/]+/)*(?P<id>[^?/#]+)'
+class FunkChannelIE(FunkBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?funk\.net/channel/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)'
     _TESTS = [{
     _TESTS = [{
-        'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/0/59d517e741dca10001252574/',
-        'md5': '4d40974481fa3475f8bccfd20c5361f8',
+        'url': 'https://www.funk.net/channel/ba/die-lustigsten-instrumente-aus-dem-internet-teil-2',
         'info_dict': {
         'info_dict': {
-            'id': '716599',
+            'id': '1155821',
             'ext': 'mp4',
             'ext': 'mp4',
-            'title': 'Neue Rechte Welle',
-            'description': 'md5:a30a53f740ffb6bfd535314c2cc5fb69',
-            'timestamp': 1501337639,
-            'upload_date': '20170729',
+            'title': 'Die LUSTIGSTEN INSTRUMENTE aus dem Internet - Teil 2',
+            'description': 'md5:a691d0413ef4835588c5b03ded670c1f',
+            'timestamp': 1514507395,
+            'upload_date': '20171229',
         },
         'params': {
         },
         'params': {
-            'format': 'bestvideo',
             'skip_download': True,
         },
     }, {
             'skip_download': True,
         },
     }, {
-        'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/0/59d52049999264000182e79d/',
+        'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/mein-erster-job-lovemilla-folge-1/lovemilla/',
         'only_matching': True,
     }]
 
     def _real_extract(self, url):
         'only_matching': True,
     }]
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        channel_id = mobj.group('id')
+        alias = mobj.group('alias')
 
 
-        webpage = self._download_webpage(url, video_id)
+        results = self._download_json(
+            'https://www.funk.net/api/v3.0/content/videos/filter', channel_id,
+            headers={
+                'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoiY3VyYXRpb24tdG9vbCIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxzZWFyY2gtYXBpIn0.q4Y2xZG8PFHai24-4Pjx2gym9RmJejtmK6lMXP5wAgc',
+                'Referer': url,
+            }, query={
+                'channelId': channel_id,
+                'size': 100,
+            })['result']
 
 
-        domain_id = NexxIE._extract_domain_id(webpage) or '741'
-        nexx_id = extract_attributes(self._search_regex(
-            r'(<div[^>]id=["\']mediaplayer-funk[^>]+>)',
-            webpage, 'media player'))['data-id']
+        video = next(r for r in results if r.get('alias') == alias)
 
 
-        return self.url_result(
-            'nexx:%s:%s' % (domain_id, nexx_id), ie=NexxIE.ie_key(),
-            video_id=nexx_id)
+        return self._make_url_result(video)
index ede729b5262c286c347b544fe7493bea020b5afd..25e284d46f228151447b88c20811d4e37742a0d6 100644 (file)
@@ -5,9 +5,9 @@ from .ooyala import OoyalaIE
 
 
 class FusionIE(InfoExtractor):
 
 
 class FusionIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?fusion\.net/video/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?fusion\.(?:net|tv)/video/(?P<id>\d+)'
     _TESTS = [{
     _TESTS = [{
-        'url': 'http://fusion.net/video/201781/u-s-and-panamanian-forces-work-together-to-stop-a-vessel-smuggling-drugs/',
+        'url': 'http://fusion.tv/video/201781/u-s-and-panamanian-forces-work-together-to-stop-a-vessel-smuggling-drugs/',
         'info_dict': {
             'id': 'ZpcWNoMTE6x6uVIIWYpHh0qQDjxBuq5P',
             'ext': 'mp4',
         'info_dict': {
             'id': 'ZpcWNoMTE6x6uVIIWYpHh0qQDjxBuq5P',
             'ext': 'mp4',
@@ -20,7 +20,7 @@ class FusionIE(InfoExtractor):
         },
         'add_ie': ['Ooyala'],
     }, {
         },
         'add_ie': ['Ooyala'],
     }, {
-        'url': 'http://fusion.net/video/201781',
+        'url': 'http://fusion.tv/video/201781',
         'only_matching': True,
     }]
 
         'only_matching': True,
     }]
 
index a66e309de6993210052d09c4107fe26a168f1b33..a2920a793ba45d3fef47eebba26bc3c19517b63c 100644 (file)
@@ -23,6 +23,11 @@ class GameInformerIE(InfoExtractor):
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id)
-        brightcove_id = self._search_regex(r"getVideo\('[^']+video_id=(\d+)", webpage, 'brightcove id')
-        return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
+        webpage = self._download_webpage(
+            url, display_id, headers=self.geo_verification_headers())
+        brightcove_id = self._search_regex(
+            [r'<[^>]+\bid=["\']bc_(\d+)', r"getVideo\('[^']+video_id=(\d+)"],
+            webpage, 'brightcove id')
+        return self.url_result(
+            self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew',
+            brightcove_id)
index 1d9da8115832126671233101dbc3b51759e63a33..a98f3636ab4bc44ef2f5d8a604d80bf6d48b38b1 100644 (file)
@@ -102,6 +102,8 @@ from .channel9 import Channel9IE
 from .vshare import VShareIE
 from .mediasite import MediasiteIE
 from .springboardplatform import SpringboardPlatformIE
 from .vshare import VShareIE
 from .mediasite import MediasiteIE
 from .springboardplatform import SpringboardPlatformIE
+from .yapfiles import YapFilesIE
+from .vice import ViceIE
 
 
 class GenericIE(InfoExtractor):
 
 
 class GenericIE(InfoExtractor):
@@ -1954,6 +1956,34 @@ class GenericIE(InfoExtractor):
                 'skip_download': True,
             },
             'add_ie': [SpringboardPlatformIE.ie_key()],
                 'skip_download': True,
             },
             'add_ie': [SpringboardPlatformIE.ie_key()],
+        },
+        {
+            'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU',
+            'info_dict': {
+                'id': 'uPDB5I9wfp8',
+                'ext': 'webm',
+                'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3',
+                'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d',
+                'upload_date': '20160219',
+                'uploader': 'Pocoyo - Português (BR)',
+                'uploader_id': 'PocoyoBrazil',
+            },
+            'add_ie': [YoutubeIE.ie_key()],
+            'params': {
+                'skip_download': True,
+            },
+        },
+        {
+            'url': 'https://www.yapfiles.ru/show/1872528/690b05d3054d2dbe1e69523aa21bb3b1.mp4.html',
+            'info_dict': {
+                'id': 'vMDE4NzI1Mjgt690b',
+                'ext': 'mp4',
+                'title': 'Котята',
+            },
+            'add_ie': [YapFilesIE.ie_key()],
+            'params': {
+                'skip_download': True,
+            },
         }
         # {
         #     # TODO: find another test
         }
         # {
         #     # TODO: find another test
@@ -2280,7 +2310,10 @@ class GenericIE(InfoExtractor):
         # Look for Brightcove New Studio embeds
         bc_urls = BrightcoveNewIE._extract_urls(self, webpage)
         if bc_urls:
         # Look for Brightcove New Studio embeds
         bc_urls = BrightcoveNewIE._extract_urls(self, webpage)
         if bc_urls:
-            return self.playlist_from_matches(bc_urls, video_id, video_title, ie='BrightcoveNew')
+            return self.playlist_from_matches(
+                bc_urls, video_id, video_title,
+                getter=lambda x: smuggle_url(x, {'referrer': url}),
+                ie='BrightcoveNew')
 
         # Look for Nexx embeds
         nexx_urls = NexxIE._extract_urls(webpage)
 
         # Look for Nexx embeds
         nexx_urls = NexxIE._extract_urls(webpage)
@@ -2928,6 +2961,16 @@ class GenericIE(InfoExtractor):
                 springboardplatform_urls, video_id, video_title,
                 ie=SpringboardPlatformIE.ie_key())
 
                 springboardplatform_urls, video_id, video_title,
                 ie=SpringboardPlatformIE.ie_key())
 
+        yapfiles_urls = YapFilesIE._extract_urls(webpage)
+        if yapfiles_urls:
+            return self.playlist_from_matches(
+                yapfiles_urls, video_id, video_title, ie=YapFilesIE.ie_key())
+
+        vice_urls = ViceIE._extract_urls(webpage)
+        if vice_urls:
+            return self.playlist_from_matches(
+                vice_urls, video_id, video_title, ie=ViceIE.ie_key())
+
         def merge_dicts(dict1, dict2):
             merged = {}
             for k, v in dict1.items():
         def merge_dicts(dict1, dict2):
             merged = {}
             for k, v in dict1.items():
index 82e11a7d88735f2105d0dff70c4304ecbf274ff0..8f49f52efd5398abbc7b922b9e2a268b3f609e2a 100644 (file)
@@ -2,11 +2,13 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from .kaltura import KalturaIE
 from .youtube import YoutubeIE
 from ..utils import (
     determine_ext,
     int_or_none,
     parse_iso8601,
 from .youtube import YoutubeIE
 from ..utils import (
     determine_ext,
     int_or_none,
     parse_iso8601,
+    smuggle_url,
     xpath_text,
 )
 
     xpath_text,
 )
 
@@ -42,6 +44,19 @@ class HeiseIE(InfoExtractor):
         'params': {
             'skip_download': True,
         },
         'params': {
             'skip_download': True,
         },
+    }, {
+        'url': 'https://www.heise.de/video/artikel/nachgehakt-Wie-sichert-das-c-t-Tool-Restric-tor-Windows-10-ab-3700244.html',
+        'md5': '4b58058b46625bdbd841fc2804df95fc',
+        'info_dict': {
+            'id': '1_ntrmio2s',
+            'timestamp': 1512470717,
+            'upload_date': '20171205',
+            'ext': 'mp4',
+            'title': 'ct10 nachgehakt hos restrictor',
+        },
+        'params': {
+            'skip_download': True,
+        },
     }, {
         'url': 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html',
         'only_matching': True,
     }, {
         'url': 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html',
         'only_matching': True,
@@ -67,9 +82,14 @@ class HeiseIE(InfoExtractor):
         if yt_urls:
             return self.playlist_from_matches(yt_urls, video_id, title, ie=YoutubeIE.ie_key())
 
         if yt_urls:
             return self.playlist_from_matches(yt_urls, video_id, title, ie=YoutubeIE.ie_key())
 
+        kaltura_url = KalturaIE._extract_url(webpage)
+        if kaltura_url:
+            return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
+
         container_id = self._search_regex(
             r'<div class="videoplayerjw"[^>]+data-container="([0-9]+)"',
             webpage, 'container ID')
         container_id = self._search_regex(
             r'<div class="videoplayerjw"[^>]+data-container="([0-9]+)"',
             webpage, 'container ID')
+
         sequenz_id = self._search_regex(
             r'<div class="videoplayerjw"[^>]+data-sequenz="([0-9]+)"',
             webpage, 'sequenz ID')
         sequenz_id = self._search_regex(
             r'<div class="videoplayerjw"[^>]+data-sequenz="([0-9]+)"',
             webpage, 'sequenz ID')
diff --git a/youtube_dl/extractor/hidive.py b/youtube_dl/extractor/hidive.py
new file mode 100644 (file)
index 0000000..eee5170
--- /dev/null
@@ -0,0 +1,96 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    urlencode_postdata,
+)
+
+
+class HiDiveIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?hidive\.com/stream/(?P<title>[^/]+)/(?P<key>[^/?#&]+)'
+    # Using X-Forwarded-For results in 403 HTTP error for HLS fragments,
+    # so disabling geo bypass completely
+    _GEO_BYPASS = False
+
+    _TESTS = [{
+        'url': 'https://www.hidive.com/stream/the-comic-artist-and-his-assistants/s01e001',
+        'info_dict': {
+            'id': 'the-comic-artist-and-his-assistants/s01e001',
+            'ext': 'mp4',
+            'title': 'the-comic-artist-and-his-assistants/s01e001',
+            'series': 'the-comic-artist-and-his-assistants',
+            'season_number': 1,
+            'episode_number': 1,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        title, key = mobj.group('title', 'key')
+        video_id = '%s/%s' % (title, key)
+
+        settings = self._download_json(
+            'https://www.hidive.com/play/settings', video_id,
+            data=urlencode_postdata({
+                'Title': title,
+                'Key': key,
+            }))
+
+        restriction = settings.get('restrictionReason')
+        if restriction == 'RegionRestricted':
+            self.raise_geo_restricted()
+
+        if restriction and restriction != 'None':
+            raise ExtractorError(
+                '%s said: %s' % (self.IE_NAME, restriction), expected=True)
+
+        formats = []
+        subtitles = {}
+        for rendition_id, rendition in settings['renditions'].items():
+            bitrates = rendition.get('bitrates')
+            if not isinstance(bitrates, dict):
+                continue
+            m3u8_url = bitrates.get('hls')
+            if not isinstance(m3u8_url, compat_str):
+                continue
+            formats.extend(self._extract_m3u8_formats(
+                m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                m3u8_id='%s-hls' % rendition_id, fatal=False))
+            cc_files = rendition.get('ccFiles')
+            if not isinstance(cc_files, list):
+                continue
+            for cc_file in cc_files:
+                if not isinstance(cc_file, list) or len(cc_file) < 3:
+                    continue
+                cc_lang = cc_file[0]
+                cc_url = cc_file[2]
+                if not isinstance(cc_lang, compat_str) or not isinstance(
+                        cc_url, compat_str):
+                    continue
+                subtitles.setdefault(cc_lang, []).append({
+                    'url': cc_url,
+                })
+
+        season_number = int_or_none(self._search_regex(
+            r's(\d+)', key, 'season number', default=None))
+        episode_number = int_or_none(self._search_regex(
+            r'e(\d+)', key, 'episode number', default=None))
+
+        return {
+            'id': video_id,
+            'title': video_id,
+            'subtitles': subtitles,
+            'formats': formats,
+            'series': title,
+            'season_number': season_number,
+            'episode_number': episode_number,
+        }
index da5a5de4ad7e65b995a257303096b4bc58061b67..6373268c400744f9b51a3d65a41a20d1eb27e034 100644 (file)
@@ -49,7 +49,9 @@ class LA7IE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         player_data = self._parse_json(
         webpage = self._download_webpage(url, video_id)
 
         player_data = self._parse_json(
-            self._search_regex(r'videoLa7\(({[^;]+})\);', webpage, 'player data'),
+            self._search_regex(
+                [r'(?s)videoParams\s*=\s*({.+?});', r'videoLa7\(({[^;]+})\);'],
+                webpage, 'player data'),
             video_id, transform_source=js_to_json)
 
         return {
             video_id, transform_source=js_to_json)
 
         return {
diff --git a/youtube_dl/extractor/line.py b/youtube_dl/extractor/line.py
new file mode 100644 (file)
index 0000000..7f5fa44
--- /dev/null
@@ -0,0 +1,90 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import js_to_json
+
+
+class LineTVIE(InfoExtractor):
+    _VALID_URL = r'https?://tv\.line\.me/v/(?P<id>\d+)_[^/]+-(?P<segment>ep\d+-\d+)'
+
+    _TESTS = [{
+        'url': 'https://tv.line.me/v/793123_goodbye-mrblack-ep1-1/list/69246',
+        'info_dict': {
+            'id': '793123_ep1-1',
+            'ext': 'mp4',
+            'title': 'Goodbye Mr.Black | EP.1-1',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 998.509,
+            'view_count': int,
+        },
+    }, {
+        'url': 'https://tv.line.me/v/2587507_%E6%B4%BE%E9%81%A3%E5%A5%B3%E9%86%ABx-ep1-02/list/185245',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        series_id, segment = re.match(self._VALID_URL, url).groups()
+        video_id = '%s_%s' % (series_id, segment)
+
+        webpage = self._download_webpage(url, video_id)
+
+        player_params = self._parse_json(self._search_regex(
+            r'naver\.WebPlayer\(({[^}]+})\)', webpage, 'player parameters'),
+            video_id, transform_source=js_to_json)
+
+        video_info = self._download_json(
+            'https://global-nvapis.line.me/linetv/rmcnmv/vod_play_videoInfo.json',
+            video_id, query={
+                'videoId': player_params['videoId'],
+                'key': player_params['key'],
+            })
+
+        stream = video_info['streams'][0]
+        extra_query = '?__gda__=' + stream['key']['value']
+        formats = self._extract_m3u8_formats(
+            stream['source'] + extra_query, video_id, ext='mp4',
+            entry_protocol='m3u8_native', m3u8_id='hls')
+
+        for a_format in formats:
+            a_format['url'] += extra_query
+
+        duration = None
+        for video in video_info.get('videos', {}).get('list', []):
+            encoding_option = video.get('encodingOption', {})
+            abr = video['bitrate']['audio']
+            vbr = video['bitrate']['video']
+            tbr = abr + vbr
+            formats.append({
+                'url': video['source'],
+                'format_id': 'http-%d' % int(tbr),
+                'height': encoding_option.get('height'),
+                'width': encoding_option.get('width'),
+                'abr': abr,
+                'vbr': vbr,
+                'filesize': video.get('size'),
+            })
+            if video.get('duration') and duration is None:
+                duration = video['duration']
+
+        self._sort_formats(formats)
+
+        if not formats[0].get('width'):
+            formats[0]['vcodec'] = 'none'
+
+        title = self._og_search_title(webpage)
+
+        # like_count requires an additional API request https://tv.line.me/api/likeit/getCount
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+            'extra_param_to_segment_url': extra_query[1:],
+            'duration': duration,
+            'thumbnails': [{'url': thumbnail['source']}
+                           for thumbnail in video_info.get('thumbnails', {}).get('list', [])],
+            'view_count': video_info.get('meta', {}).get('count'),
+        }
index 6b7c5e3e03dc5b2cb6012a6e03d23a71fe359fee..6b0e64b7f1032159262220dcf77c6ffaa358d014 100644 (file)
@@ -1,12 +1,17 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
 # coding: utf-8
 from __future__ import unicode_literals
 
+import itertools
+import json
 import re
 
 from .common import InfoExtractor
 import re
 
 from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
 from ..utils import (
     int_or_none,
 from ..utils import (
     int_or_none,
+    parse_duration,
     remove_end,
     remove_end,
+    try_get,
 )
 
 
 )
 
 
@@ -157,3 +162,153 @@ class MailRuIE(InfoExtractor):
             'view_count': view_count,
             'formats': formats,
         }
             'view_count': view_count,
             'formats': formats,
         }
+
+
+class MailRuMusicSearchBaseIE(InfoExtractor):
+    def _search(self, query, url, audio_id, limit=100, offset=0):
+        search = self._download_json(
+            'https://my.mail.ru/cgi-bin/my/ajax', audio_id,
+            'Downloading songs JSON page %d' % (offset // limit + 1),
+            headers={
+                'Referer': url,
+                'X-Requested-With': 'XMLHttpRequest',
+            }, query={
+                'xemail': '',
+                'ajax_call': '1',
+                'func_name': 'music.search',
+                'mna': '',
+                'mnb': '',
+                'arg_query': query,
+                'arg_extended': '1',
+                'arg_search_params': json.dumps({
+                    'music': {
+                        'limit': limit,
+                        'offset': offset,
+                    },
+                }),
+                'arg_limit': limit,
+                'arg_offset': offset,
+            })
+        return next(e for e in search if isinstance(e, dict))
+
+    @staticmethod
+    def _extract_track(t, fatal=True):
+        audio_url = t['URL'] if fatal else t.get('URL')
+        if not audio_url:
+            return
+
+        audio_id = t['File'] if fatal else t.get('File')
+        if not audio_id:
+            return
+
+        thumbnail = t.get('AlbumCoverURL') or t.get('FiledAlbumCover')
+        uploader = t.get('OwnerName') or t.get('OwnerName_Text_HTML')
+        uploader_id = t.get('UploaderID')
+        duration = int_or_none(t.get('DurationInSeconds')) or parse_duration(
+            t.get('Duration') or t.get('DurationStr'))
+        view_count = int_or_none(t.get('PlayCount') or t.get('PlayCount_hr'))
+
+        track = t.get('Name') or t.get('Name_Text_HTML')
+        artist = t.get('Author') or t.get('Author_Text_HTML')
+
+        if track:
+            title = '%s - %s' % (artist, track) if artist else track
+        else:
+            title = audio_id
+
+        return {
+            'extractor_key': MailRuMusicIE.ie_key(),
+            'id': audio_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'uploader': uploader,
+            'uploader_id': uploader_id,
+            'duration': duration,
+            'view_count': view_count,
+            'vcodec': 'none',
+            'abr': int_or_none(t.get('BitRate')),
+            'track': track,
+            'artist': artist,
+            'album': t.get('Album'),
+            'url': audio_url,
+        }
+
+
+class MailRuMusicIE(MailRuMusicSearchBaseIE):
+    IE_NAME = 'mailru:music'
+    IE_DESC = 'Музыка@Mail.Ru'
+    _VALID_URL = r'https?://my\.mail\.ru/music/songs/[^/?#&]+-(?P<id>[\da-f]+)'
+    _TESTS = [{
+        'url': 'https://my.mail.ru/music/songs/%D0%BC8%D0%BB8%D1%82%D1%85-l-a-h-luciferian-aesthetics-of-herrschaft-single-2017-4e31f7125d0dfaef505d947642366893',
+        'md5': '0f8c22ef8c5d665b13ac709e63025610',
+        'info_dict': {
+            'id': '4e31f7125d0dfaef505d947642366893',
+            'ext': 'mp3',
+            'title': 'L.A.H. (Luciferian Aesthetics of Herrschaft) single, 2017 - М8Л8ТХ',
+            'uploader': 'Игорь Мудрый',
+            'uploader_id': '1459196328',
+            'duration': 280,
+            'view_count': int,
+            'vcodec': 'none',
+            'abr': 320,
+            'track': 'L.A.H. (Luciferian Aesthetics of Herrschaft) single, 2017',
+            'artist': 'М8Л8ТХ',
+        },
+    }]
+
+    def _real_extract(self, url):
+        audio_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, audio_id)
+
+        title = self._og_search_title(webpage)
+        music_data = self._search(title, url, audio_id)['MusicData']
+        t = next(t for t in music_data if t.get('File') == audio_id)
+
+        info = self._extract_track(t)
+        info['title'] = title
+        return info
+
+
+class MailRuMusicSearchIE(MailRuMusicSearchBaseIE):
+    IE_NAME = 'mailru:music:search'
+    IE_DESC = 'Музыка@Mail.Ru'
+    _VALID_URL = r'https?://my\.mail\.ru/music/search/(?P<id>[^/?#&]+)'
+    _TESTS = [{
+        'url': 'https://my.mail.ru/music/search/black%20shadow',
+        'info_dict': {
+            'id': 'black shadow',
+        },
+        'playlist_mincount': 532,
+    }]
+
+    def _real_extract(self, url):
+        query = compat_urllib_parse_unquote(self._match_id(url))
+
+        entries = []
+
+        LIMIT = 100
+        offset = 0
+
+        for _ in itertools.count(1):
+            search = self._search(query, url, query, LIMIT, offset)
+
+            music_data = search.get('MusicData')
+            if not music_data or not isinstance(music_data, list):
+                break
+
+            for t in music_data:
+                track = self._extract_track(t, fatal=False)
+                if track:
+                    entries.append(track)
+
+            total = try_get(
+                search, lambda x: x['Results']['music']['Total'], int)
+
+            if total is not None:
+                if offset > total:
+                    break
+
+            offset += LIMIT
+
+        return self.playlist_result(entries, query)
index 621ae74a7930cbaefb1f5c867de27d70499fe5d6..75d286365573f050d83b2fe420efe2ea8776d6b8 100644 (file)
@@ -3,22 +3,31 @@ from __future__ import unicode_literals
 
 import re
 
 
 import re
 
+from .common import InfoExtractor
 from .vimple import SprutoBaseIE
 
 
 class MyviIE(SprutoBaseIE):
     _VALID_URL = r'''(?x)
 from .vimple import SprutoBaseIE
 
 
 class MyviIE(SprutoBaseIE):
     _VALID_URL = r'''(?x)
-                    https?://
-                        myvi\.(?:ru/player|tv)/
-                            (?:
+                        (?:
+                            https?://
+                                (?:www\.)?
+                                myvi\.
                                 (?:
                                 (?:
-                                    embed/html|
-                                    flash|
-                                    api/Video/Get
-                                )/|
-                                content/preloader\.swf\?.*\bid=
-                            )
-                            (?P<id>[\da-zA-Z_-]+)
+                                    (?:ru/player|tv)/
+                                    (?:
+                                        (?:
+                                            embed/html|
+                                            flash|
+                                            api/Video/Get
+                                        )/|
+                                        content/preloader\.swf\?.*\bid=
+                                    )|
+                                    ru/watch/
+                                )|
+                            myvi:
+                        )
+                        (?P<id>[\da-zA-Z_-]+)
                     '''
     _TESTS = [{
         'url': 'http://myvi.ru/player/embed/html/oOy4euHA6LVwNNAjhD9_Jq5Ha2Qf0rtVMVFMAZav8wObeRTZaCATzucDQIDph8hQU0',
                     '''
     _TESTS = [{
         'url': 'http://myvi.ru/player/embed/html/oOy4euHA6LVwNNAjhD9_Jq5Ha2Qf0rtVMVFMAZav8wObeRTZaCATzucDQIDph8hQU0',
@@ -42,6 +51,12 @@ class MyviIE(SprutoBaseIE):
     }, {
         'url': 'http://myvi.ru/player/flash/ocp2qZrHI-eZnHKQBK4cZV60hslH8LALnk0uBfKsB-Q4WnY26SeGoYPi8HWHxu0O30',
         'only_matching': True,
     }, {
         'url': 'http://myvi.ru/player/flash/ocp2qZrHI-eZnHKQBK4cZV60hslH8LALnk0uBfKsB-Q4WnY26SeGoYPi8HWHxu0O30',
         'only_matching': True,
+    }, {
+        'url': 'https://www.myvi.ru/watch/YwbqszQynUaHPn_s82sx0Q2',
+        'only_matching': True,
+    }, {
+        'url': 'myvi:YwbqszQynUaHPn_s82sx0Q2',
+        'only_matching': True,
     }]
 
     @classmethod
     }]
 
     @classmethod
@@ -58,3 +73,39 @@ class MyviIE(SprutoBaseIE):
             'http://myvi.ru/player/api/Video/Get/%s?sig' % video_id, video_id)['sprutoData']
 
         return self._extract_spruto(spruto, video_id)
             'http://myvi.ru/player/api/Video/Get/%s?sig' % video_id, video_id)['sprutoData']
 
         return self._extract_spruto(spruto, video_id)
+
+
+class MyviEmbedIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?myvi\.tv/(?:[^?]+\?.*?\bv=|embed/)(?P<id>[\da-z]+)'
+    _TESTS = [{
+        'url': 'https://www.myvi.tv/embed/ccdqic3wgkqwpb36x9sxg43t4r',
+        'info_dict': {
+            'id': 'b3ea0663-3234-469d-873e-7fecf36b31d1',
+            'ext': 'mp4',
+            'title': 'Твоя (original song).mp4',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 277,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'https://www.myvi.tv/idmi6o?v=ccdqic3wgkqwpb36x9sxg43t4r#watch',
+        'only_matching': True,
+    }]
+
+    @classmethod
+    def suitable(cls, url):
+        return False if MyviIE.suitable(url) else super(MyviEmbedIE, cls).suitable(url)
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(
+            'https://www.myvi.tv/embed/%s' % video_id, video_id)
+
+        myvi_id = self._search_regex(
+            r'CreatePlayer\s*\(\s*["\'].*?\bv=([\da-zA-Z_]+)',
+            webpage, 'video id')
+
+        return self.url_result('myvi:%s' % myvi_id, ie=MyviIE.ie_key())
index 9e8d28f4848165ccdfda771800031e6c68359684..246f6795a131908bf62c2b8c1477a7bc327afce5 100644 (file)
@@ -68,7 +68,7 @@ class NationalGeographicVideoIE(InfoExtractor):
 
 class NationalGeographicIE(ThePlatformIE, AdobePassIE):
     IE_NAME = 'natgeo'
 
 class NationalGeographicIE(ThePlatformIE, AdobePassIE):
     IE_NAME = 'natgeo'
-    _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?[^/]+/(?:videos|episodes)/(?P<id>[^/?]+)'
+    _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:(?:wild/)?[^/]+/)?(?:videos|episodes)/(?P<id>[^/?]+)'
 
     _TESTS = [
         {
 
     _TESTS = [
         {
@@ -102,6 +102,10 @@ class NationalGeographicIE(ThePlatformIE, AdobePassIE):
         {
             'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/episodes/the-power-of-miracles/',
             'only_matching': True,
         {
             'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/episodes/the-power-of-miracles/',
             'only_matching': True,
+        },
+        {
+            'url': 'http://channel.nationalgeographic.com/videos/treasures-rediscovered/',
+            'only_matching': True,
         }
     ]
 
         }
     ]
 
index 554dec36e62dc246ea314ac07f9cff6b3c1323fe..9dc8f9ebcbe7bd0fbde8374fb91d7727e7ebb648 100644 (file)
@@ -1,6 +1,7 @@
 from __future__ import unicode_literals
 
 import re
 from __future__ import unicode_literals
 
 import re
+import base64
 
 from .common import InfoExtractor
 from .theplatform import ThePlatformIE
 
 from .common import InfoExtractor
 from .theplatform import ThePlatformIE
@@ -358,6 +359,7 @@ class NBCNewsIE(ThePlatformIE):
 
 
 class NBCOlympicsIE(InfoExtractor):
 
 
 class NBCOlympicsIE(InfoExtractor):
+    IE_NAME = 'nbcolympics'
     _VALID_URL = r'https?://www\.nbcolympics\.com/video/(?P<id>[a-z-]+)'
 
     _TEST = {
     _VALID_URL = r'https?://www\.nbcolympics\.com/video/(?P<id>[a-z-]+)'
 
     _TEST = {
@@ -395,3 +397,54 @@ class NBCOlympicsIE(InfoExtractor):
             'ie_key': ThePlatformIE.ie_key(),
             'display_id': display_id,
         }
             'ie_key': ThePlatformIE.ie_key(),
             'display_id': display_id,
         }
+
+
+class NBCOlympicsStreamIE(AdobePassIE):
+    IE_NAME = 'nbcolympics:stream'
+    _VALID_URL = r'https?://stream\.nbcolympics\.com/(?P<id>[0-9a-z-]+)'
+    _TEST = {
+        'url': 'http://stream.nbcolympics.com/2018-winter-olympics-nbcsn-evening-feb-8',
+        'info_dict': {
+            'id': '203493',
+            'ext': 'mp4',
+            'title': 're:Curling, Alpine, Luge [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }
+    _DATA_URL_TEMPLATE = 'http://stream.nbcolympics.com/data/%s_%s.json'
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        pid = self._search_regex(r'pid\s*=\s*(\d+);', webpage, 'pid')
+        resource = self._search_regex(
+            r"resource\s*=\s*'(.+)';", webpage,
+            'resource').replace("' + pid + '", pid)
+        event_config = self._download_json(
+            self._DATA_URL_TEMPLATE % ('event_config', pid),
+            pid)['eventConfig']
+        title = self._live_title(event_config['eventTitle'])
+        source_url = self._download_json(
+            self._DATA_URL_TEMPLATE % ('live_sources', pid),
+            pid)['videoSources'][0]['sourceUrl']
+        media_token = self._extract_mvpd_auth(
+            url, pid, event_config.get('requestorId', 'NBCOlympics'), resource)
+        formats = self._extract_m3u8_formats(self._download_webpage(
+            'http://sp.auth.adobe.com/tvs/v1/sign', pid, query={
+                'cdn': 'akamai',
+                'mediaToken': base64.b64encode(media_token.encode()),
+                'resource': base64.b64encode(resource.encode()),
+                'url': source_url,
+            }), pid, 'mp4')
+        self._sort_formats(formats)
+
+        return {
+            'id': pid,
+            'display_id': display_id,
+            'title': title,
+            'formats': formats,
+            'is_live': True,
+        }
index 0e26f8399dd8ea8777c28d0bb61483e27f954965..82e7cf52216ac431a09d4503caaf7cdb787cd876 100644 (file)
@@ -87,19 +87,21 @@ class NewgroundsIE(InfoExtractor):
         self._check_formats(formats, media_id)
         self._sort_formats(formats)
 
         self._check_formats(formats, media_id)
         self._sort_formats(formats)
 
-        uploader = self._search_regex(
-            r'(?:Author|Writer)\s*<a[^>]+>([^<]+)', webpage, 'uploader',
+        uploader = self._html_search_regex(
+            (r'(?s)<h4[^>]*>(.+?)</h4>.*?<em>\s*Author\s*</em>',
+             r'(?:Author|Writer)\s*<a[^>]+>([^<]+)'), webpage, 'uploader',
             fatal=False)
 
             fatal=False)
 
-        timestamp = unified_timestamp(self._search_regex(
-            r'<dt>Uploaded</dt>\s*<dd>([^<]+)', webpage, 'timestamp',
+        timestamp = unified_timestamp(self._html_search_regex(
+            (r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+</dd>\s*<dd>[^<]+)',
+             r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+)'), webpage, 'timestamp',
             default=None))
         duration = parse_duration(self._search_regex(
             default=None))
         duration = parse_duration(self._search_regex(
-            r'<dd>Song\s*</dd><dd>.+?</dd><dd>([^<]+)', webpage, 'duration',
-            default=None))
+            r'(?s)<dd>\s*Song\s*</dd>\s*<dd>.+?</dd>\s*<dd>([^<]+)', webpage,
+            'duration', default=None))
 
         filesize_approx = parse_filesize(self._html_search_regex(
 
         filesize_approx = parse_filesize(self._html_search_regex(
-            r'<dd>Song\s*</dd><dd>(.+?)</dd>', webpage, 'filesize',
+            r'(?s)<dd>\s*Song\s*</dd>\s*<dd>(.+?)</dd>', webpage, 'filesize',
             default=None))
         if len(formats) == 1:
             formats[0]['filesize_approx'] = filesize_approx
             default=None))
         if len(formats) == 1:
             formats[0]['filesize_approx'] = filesize_approx
index 9203c04777db9d84ab7efad05476878e6687080f..c7029d29ebc0f4f9b28ee4474a1cfa04600d3650 100644 (file)
@@ -21,7 +21,8 @@ class NexxIE(InfoExtractor):
     _VALID_URL = r'''(?x)
                         (?:
                             https?://api\.nexx(?:\.cloud|cdn\.com)/v3/(?P<domain_id>\d+)/videos/byid/|
     _VALID_URL = r'''(?x)
                         (?:
                             https?://api\.nexx(?:\.cloud|cdn\.com)/v3/(?P<domain_id>\d+)/videos/byid/|
-                            nexx:(?P<domain_id_s>\d+):
+                            nexx:(?:(?P<domain_id_s>\d+):)?|
+                            https?://arc\.nexx\.cloud/api/video/
                         )
                         (?P<id>\d+)
                     '''
                         )
                         (?P<id>\d+)
                     '''
@@ -61,12 +62,33 @@ class NexxIE(InfoExtractor):
         'params': {
             'skip_download': True,
         },
         'params': {
             'skip_download': True,
         },
+    }, {
+        # does not work via arc
+        'url': 'nexx:741:1269984',
+        'md5': 'c714b5b238b2958dc8d5642addba6886',
+        'info_dict': {
+            'id': '1269984',
+            'ext': 'mp4',
+            'title': '1 TAG ohne KLO... wortwörtlich! 😑',
+            'alt_title': '1 TAG ohne KLO... wortwörtlich! 😑',
+            'description': 'md5:4604539793c49eda9443ab5c5b1d612f',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 607,
+            'timestamp': 1518614955,
+            'upload_date': '20180214',
+        },
     }, {
         'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907',
         'only_matching': True,
     }, {
         'url': 'nexx:748:128907',
         'only_matching': True,
     }, {
         'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907',
         'only_matching': True,
     }, {
         'url': 'nexx:748:128907',
         'only_matching': True,
+    }, {
+        'url': 'nexx:128907',
+        'only_matching': True,
+    }, {
+        'url': 'https://arc.nexx.cloud/api/video/128907.json',
+        'only_matching': True,
     }]
 
     @staticmethod
     }]
 
     @staticmethod
@@ -124,65 +146,77 @@ class NexxIE(InfoExtractor):
         domain_id = mobj.group('domain_id') or mobj.group('domain_id_s')
         video_id = mobj.group('id')
 
         domain_id = mobj.group('domain_id') or mobj.group('domain_id_s')
         video_id = mobj.group('id')
 
-        # Reverse engineered from JS code (see getDeviceID function)
-        device_id = '%d:%d:%d%d' % (
-            random.randint(1, 4), int(time.time()),
-            random.randint(1e4, 99999), random.randint(1, 9))
-
-        result = self._call_api(domain_id, 'session/init', video_id, data={
-            'nxp_devh': device_id,
-            'nxp_userh': '',
-            'precid': '0',
-            'playlicense': '0',
-            'screenx': '1920',
-            'screeny': '1080',
-            'playerversion': '6.0.00',
-            'gateway': 'html5',
-            'adGateway': '',
-            'explicitlanguage': 'en-US',
-            'addTextTemplates': '1',
-            'addDomainData': '1',
-            'addAdModel': '1',
-        }, headers={
-            'X-Request-Enable-Auth-Fallback': '1',
-        })
-
-        cid = result['general']['cid']
-
-        # As described in [1] X-Request-Token generation algorithm is
-        # as follows:
-        #   md5( operation + domain_id + domain_secret )
-        # where domain_secret is a static value that will be given by nexx.tv
-        # as per [1]. Here is how this "secret" is generated (reversed
-        # from _play.api.init function, search for clienttoken). So it's
-        # actually not static and not that much of a secret.
-        # 1. https://nexxtvstorage.blob.core.windows.net/files/201610/27.pdf
-        secret = result['device']['clienttoken'][int(device_id[0]):]
-        secret = secret[0:len(secret) - int(device_id[-1])]
-
-        op = 'byid'
-
-        # Reversed from JS code for _play.api.call function (search for
-        # X-Request-Token)
-        request_token = hashlib.md5(
-            ''.join((op, domain_id, secret)).encode('utf-8')).hexdigest()
-
-        video = self._call_api(
-            domain_id, 'videos/%s/%s' % (op, video_id), video_id, data={
-                'additionalfields': 'language,channel,actors,studio,licenseby,slug,subtitle,teaser,description',
-                'addInteractionOptions': '1',
-                'addStatusDetails': '1',
-                'addStreamDetails': '1',
-                'addCaptions': '1',
-                'addScenes': '1',
-                'addHotSpots': '1',
-                'addBumpers': '1',
-                'captionFormat': 'data',
+        video = None
+
+        response = self._download_json(
+            'https://arc.nexx.cloud/api/video/%s.json' % video_id,
+            video_id, fatal=False)
+        if response and isinstance(response, dict):
+            result = response.get('result')
+            if result and isinstance(result, dict):
+                video = result
+
+        # not all videos work via arc, e.g. nexx:741:1269984
+        if not video:
+            # Reverse engineered from JS code (see getDeviceID function)
+            device_id = '%d:%d:%d%d' % (
+                random.randint(1, 4), int(time.time()),
+                random.randint(1e4, 99999), random.randint(1, 9))
+
+            result = self._call_api(domain_id, 'session/init', video_id, data={
+                'nxp_devh': device_id,
+                'nxp_userh': '',
+                'precid': '0',
+                'playlicense': '0',
+                'screenx': '1920',
+                'screeny': '1080',
+                'playerversion': '6.0.00',
+                'gateway': 'html5',
+                'adGateway': '',
+                'explicitlanguage': 'en-US',
+                'addTextTemplates': '1',
+                'addDomainData': '1',
+                'addAdModel': '1',
             }, headers={
             }, headers={
-                'X-Request-CID': cid,
-                'X-Request-Token': request_token,
+                'X-Request-Enable-Auth-Fallback': '1',
             })
 
             })
 
+            cid = result['general']['cid']
+
+            # As described in [1] X-Request-Token generation algorithm is
+            # as follows:
+            #   md5( operation + domain_id + domain_secret )
+            # where domain_secret is a static value that will be given by nexx.tv
+            # as per [1]. Here is how this "secret" is generated (reversed
+            # from _play.api.init function, search for clienttoken). So it's
+            # actually not static and not that much of a secret.
+            # 1. https://nexxtvstorage.blob.core.windows.net/files/201610/27.pdf
+            secret = result['device']['clienttoken'][int(device_id[0]):]
+            secret = secret[0:len(secret) - int(device_id[-1])]
+
+            op = 'byid'
+
+            # Reversed from JS code for _play.api.call function (search for
+            # X-Request-Token)
+            request_token = hashlib.md5(
+                ''.join((op, domain_id, secret)).encode('utf-8')).hexdigest()
+
+            video = self._call_api(
+                domain_id, 'videos/%s/%s' % (op, video_id), video_id, data={
+                    'additionalfields': 'language,channel,actors,studio,licenseby,slug,subtitle,teaser,description',
+                    'addInteractionOptions': '1',
+                    'addStatusDetails': '1',
+                    'addStreamDetails': '1',
+                    'addCaptions': '1',
+                    'addScenes': '1',
+                    'addHotSpots': '1',
+                    'addBumpers': '1',
+                    'captionFormat': 'data',
+                }, headers={
+                    'X-Request-CID': cid,
+                    'X-Request-Token': request_token,
+                })
+
         general = video['general']
         title = general['title']
 
         general = video['general']
         title = general['title']
 
index 7edd68472b1fd6b5a9410e94aec5a6d0933be064..090f1acee4744740a45d9c1a0895c0ec5047a38a 100644 (file)
@@ -198,7 +198,7 @@ class NickNightIE(NickDeIE):
 
 class NickRuIE(MTVServicesInfoExtractor):
     IE_NAME = 'nickelodeonru'
 
 class NickRuIE(MTVServicesInfoExtractor):
     IE_NAME = 'nickelodeonru'
-    _VALID_URL = r'https?://(?:www\.)nickelodeon\.(?:ru|fr|es|pt|ro|hu)/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)nickelodeon\.(?:ru|fr|es|pt|ro|hu|com\.tr)/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)'
     _TESTS = [{
         'url': 'http://www.nickelodeon.ru/shows/henrydanger/videos/episodes/3-sezon-15-seriya-licenziya-na-polyot/pmomfb#playlist/7airc6',
         'only_matching': True,
     _TESTS = [{
         'url': 'http://www.nickelodeon.ru/shows/henrydanger/videos/episodes/3-sezon-15-seriya-licenziya-na-polyot/pmomfb#playlist/7airc6',
         'only_matching': True,
@@ -220,6 +220,9 @@ class NickRuIE(MTVServicesInfoExtractor):
     }, {
         'url': 'http://www.nickelodeon.hu/musorok/spongyabob-kockanadrag/videok/episodes/buborekfujas-az-elszakadt-nadrag/q57iob#playlist/k6te4y',
         'only_matching': True,
     }, {
         'url': 'http://www.nickelodeon.hu/musorok/spongyabob-kockanadrag/videok/episodes/buborekfujas-az-elszakadt-nadrag/q57iob#playlist/k6te4y',
         'only_matching': True,
+    }, {
+        'url': 'http://www.nickelodeon.com.tr/programlar/sunger-bob/videolar/kayip-yatak/mgqbjy',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
     }]
 
     def _real_extract(self, url):
index a06d38afde37a0f4ad3947776910e9c3b5a39286..dc6a27d3643d335a69a75d494f7673ecf5b43a7d 100644 (file)
@@ -13,7 +13,7 @@ class NineGagIE(InfoExtractor):
     _TESTS = [{
         'url': 'http://9gag.com/tv/p/Kk2X5/people-are-awesome-2013-is-absolutely-awesome',
         'info_dict': {
     _TESTS = [{
         'url': 'http://9gag.com/tv/p/Kk2X5/people-are-awesome-2013-is-absolutely-awesome',
         'info_dict': {
-            'id': 'Kk2X5',
+            'id': 'kXzwOKyGlSA',
             'ext': 'mp4',
             'description': 'This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)',
             'title': '\"People Are Awesome 2013\" Is Absolutely Awesome',
             'ext': 'mp4',
             'description': 'This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)',
             'title': '\"People Are Awesome 2013\" Is Absolutely Awesome',
index 9b5ad5a9f0c38427834fb9e942f018a7aa3c2cbf..febef097af5f3cb31b2e8c4f7ef1981a7285f88e 100644 (file)
@@ -43,7 +43,8 @@ class NJPWWorldIE(InfoExtractor):
         webpage, urlh = self._download_webpage_handle(
             'https://njpwworld.com/auth/login', None,
             note='Logging in', errnote='Unable to login',
         webpage, urlh = self._download_webpage_handle(
             'https://njpwworld.com/auth/login', None,
             note='Logging in', errnote='Unable to login',
-            data=urlencode_postdata({'login_id': username, 'pw': password}))
+            data=urlencode_postdata({'login_id': username, 'pw': password}),
+            headers={'Referer': 'https://njpwworld.com/auth'})
         # /auth/login will return 302 for successful logins
         if urlh.geturl() == 'https://njpwworld.com/auth/login':
             self.report_warning('unable to login')
         # /auth/login will return 302 for successful logins
         if urlh.geturl() == 'https://njpwworld.com/auth/login':
             self.report_warning('unable to login')
index b8fe244071d05e1daac7514b932be148802c21a7..ff215338744893a29b93202e6f1f72dd0186546f 100644 (file)
@@ -11,6 +11,7 @@ from ..utils import (
     determine_ext,
     ExtractorError,
     fix_xml_ampersands,
     determine_ext,
     ExtractorError,
     fix_xml_ampersands,
+    int_or_none,
     orderedSet,
     parse_duration,
     qualities,
     orderedSet,
     parse_duration,
     qualities,
@@ -38,7 +39,7 @@ class NPOIE(NPOBaseIE):
                                 npo\.nl/(?!(?:live|radio)/)(?:[^/]+/){2}|
                                 ntr\.nl/(?:[^/]+/){2,}|
                                 omroepwnl\.nl/video/fragment/[^/]+__|
                                 npo\.nl/(?!(?:live|radio)/)(?:[^/]+/){2}|
                                 ntr\.nl/(?:[^/]+/){2,}|
                                 omroepwnl\.nl/video/fragment/[^/]+__|
-                                (?:zapp|npo3)\.nl/(?:[^/]+/){2}
+                                (?:zapp|npo3)\.nl/(?:[^/]+/){2,}
                             )
                         )
                         (?P<id>[^/?#]+)
                             )
                         )
                         (?P<id>[^/?#]+)
@@ -156,6 +157,9 @@ class NPOIE(NPOBaseIE):
     }, {
         'url': 'http://www.npo.nl/radio-gaga/13-06-2017/BNN_101383373',
         'only_matching': True,
     }, {
         'url': 'http://www.npo.nl/radio-gaga/13-06-2017/BNN_101383373',
         'only_matching': True,
+    }, {
+        'url': 'https://www.zapp.nl/1803-skelterlab/instructie-video-s/740-instructievideo-s/POMS_AT_11736927',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
     }]
 
     def _real_extract(self, url):
@@ -170,6 +174,10 @@ class NPOIE(NPOBaseIE):
             transform_source=strip_jsonp,
         )
 
             transform_source=strip_jsonp,
         )
 
+        error = metadata.get('error')
+        if error:
+            raise ExtractorError(error, expected=True)
+
         # For some videos actual video id (prid) is different (e.g. for
         # http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698
         # video id is POMS_WNL_853698 but prid is POW_00996502)
         # For some videos actual video id (prid) is different (e.g. for
         # http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698
         # video id is POMS_WNL_853698 but prid is POW_00996502)
@@ -187,7 +195,15 @@ class NPOIE(NPOBaseIE):
         formats = []
         urls = set()
 
         formats = []
         urls = set()
 
-        quality = qualities(['adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std'])
+        def is_legal_url(format_url):
+            return format_url and format_url not in urls and re.match(
+                r'^(?:https?:)?//', format_url)
+
+        QUALITY_LABELS = ('Laag', 'Normaal', 'Hoog')
+        QUALITY_FORMATS = ('adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std')
+
+        quality_from_label = qualities(QUALITY_LABELS)
+        quality_from_format_id = qualities(QUALITY_FORMATS)
         items = self._download_json(
             'http://ida.omroep.nl/app.php/%s' % video_id, video_id,
             'Downloading formats JSON', query={
         items = self._download_json(
             'http://ida.omroep.nl/app.php/%s' % video_id, video_id,
             'Downloading formats JSON', query={
@@ -196,18 +212,34 @@ class NPOIE(NPOBaseIE):
             })['items'][0]
         for num, item in enumerate(items):
             item_url = item.get('url')
             })['items'][0]
         for num, item in enumerate(items):
             item_url = item.get('url')
-            if not item_url or item_url in urls:
+            if not is_legal_url(item_url):
                 continue
             urls.add(item_url)
             format_id = self._search_regex(
                 r'video/ida/([^/]+)', item_url, 'format id',
                 default=None)
 
                 continue
             urls.add(item_url)
             format_id = self._search_regex(
                 r'video/ida/([^/]+)', item_url, 'format id',
                 default=None)
 
+            item_label = item.get('label')
+
             def add_format_url(format_url):
             def add_format_url(format_url):
+                width = int_or_none(self._search_regex(
+                    r'(\d+)[xX]\d+', format_url, 'width', default=None))
+                height = int_or_none(self._search_regex(
+                    r'\d+[xX](\d+)', format_url, 'height', default=None))
+                if item_label in QUALITY_LABELS:
+                    quality = quality_from_label(item_label)
+                    f_id = item_label
+                elif item_label in QUALITY_FORMATS:
+                    quality = quality_from_format_id(format_id)
+                    f_id = format_id
+                else:
+                    quality, f_id = [None] * 2
                 formats.append({
                     'url': format_url,
                 formats.append({
                     'url': format_url,
-                    'format_id': format_id,
-                    'quality': quality(format_id),
+                    'format_id': f_id,
+                    'width': width,
+                    'height': height,
+                    'quality': quality,
                 })
 
             # Example: http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706
                 })
 
             # Example: http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706
@@ -219,7 +251,7 @@ class NPOIE(NPOBaseIE):
                 stream_info = self._download_json(
                     item_url + '&type=json', video_id,
                     'Downloading %s stream JSON'
                 stream_info = self._download_json(
                     item_url + '&type=json', video_id,
                     'Downloading %s stream JSON'
-                    % item.get('label') or item.get('format') or format_id or num)
+                    % item_label or item.get('format') or format_id or num)
             except ExtractorError as ee:
                 if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
                     error = (self._parse_json(
             except ExtractorError as ee:
                 if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
                     error = (self._parse_json(
@@ -251,7 +283,7 @@ class NPOIE(NPOBaseIE):
         if not is_live:
             for num, stream in enumerate(metadata.get('streams', [])):
                 stream_url = stream.get('url')
         if not is_live:
             for num, stream in enumerate(metadata.get('streams', [])):
                 stream_url = stream.get('url')
-                if not stream_url or stream_url in urls:
+                if not is_legal_url(stream_url):
                     continue
                 urls.add(stream_url)
                 # smooth streaming is not supported
                     continue
                 urls.add(stream_url)
                 # smooth streaming is not supported
index e5e08538c3dcde0797cd3805dad67d247f66b80f..8afe541ec3ee7be90571f4ce5cfc39436ae26007 100644 (file)
@@ -56,18 +56,16 @@ class PeriscopeIE(PeriscopeBaseIE):
     def _real_extract(self, url):
         token = self._match_id(url)
 
     def _real_extract(self, url):
         token = self._match_id(url)
 
-        broadcast_data = self._call_api(
-            'getBroadcastPublic', {'broadcast_id': token}, token)
-        broadcast = broadcast_data['broadcast']
-        status = broadcast['status']
+        stream = self._call_api(
+            'accessVideoPublic', {'broadcast_id': token}, token)
 
 
-        user = broadcast_data.get('user', {})
+        broadcast = stream['broadcast']
+        title = broadcast['status']
 
 
-        uploader = broadcast.get('user_display_name') or user.get('display_name')
-        uploader_id = (broadcast.get('username') or user.get('username') or
-                       broadcast.get('user_id') or user.get('id'))
+        uploader = broadcast.get('user_display_name') or broadcast.get('username')
+        uploader_id = (broadcast.get('user_id') or broadcast.get('username'))
 
 
-        title = '%s - %s' % (uploader, status) if uploader else status
+        title = '%s - %s' % (uploader, title) if uploader else title
         state = broadcast.get('state').lower()
         if state == 'running':
             title = self._live_title(title)
         state = broadcast.get('state').lower()
         if state == 'running':
             title = self._live_title(title)
@@ -77,9 +75,6 @@ class PeriscopeIE(PeriscopeBaseIE):
             'url': broadcast[image],
         } for image in ('image_url', 'image_url_small') if broadcast.get(image)]
 
             'url': broadcast[image],
         } for image in ('image_url', 'image_url_small') if broadcast.get(image)]
 
-        stream = self._call_api(
-            'getAccessPublic', {'broadcast_id': token}, token)
-
         video_urls = set()
         formats = []
         for format_id in ('replay', 'rtmp', 'hls', 'https_hls', 'lhls', 'lhlsweb'):
         video_urls = set()
         formats = []
         for format_id in ('replay', 'rtmp', 'hls', 'https_hls', 'lhls', 'lhlsweb'):
index e38c7618e4d29177721f21a36479b7cbd3d0cf28..e86c65396bc0326c80268b4a3130b6bb786dfa9f 100644 (file)
@@ -4,7 +4,9 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
 import re
 
 from .common import InfoExtractor
+from ..compat import compat_urlparse
 from ..utils import (
 from ..utils import (
+    determine_ext,
     ExtractorError,
     int_or_none,
     xpath_text,
     ExtractorError,
     int_or_none,
     xpath_text,
@@ -26,17 +28,15 @@ class PladformIE(InfoExtractor):
                         (?P<id>\d+)
                     '''
     _TESTS = [{
                         (?P<id>\d+)
                     '''
     _TESTS = [{
-        # http://muz-tv.ru/kinozal/view/7400/
-        'url': 'http://out.pladform.ru/player?pl=24822&videoid=100183293',
-        'md5': '61f37b575dd27f1bb2e1854777fe31f4',
+        'url': 'https://out.pladform.ru/player?pl=64471&videoid=3777899&vk_puid15=0&vk_puid34=0',
+        'md5': '53362fac3a27352da20fa2803cc5cd6f',
         'info_dict': {
         'info_dict': {
-            'id': '100183293',
+            'id': '3777899',
             'ext': 'mp4',
             'ext': 'mp4',
-            'title': 'ТайнÑ\8b Ð¿ÐµÑ\80евала Ð\94Ñ\8fÑ\82лова â\80¢ 1 Ñ\81еÑ\80иÑ\8f 2 Ñ\87аÑ\81Ñ\82Ñ\8c',
-            'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
+            'title': 'СТУÐ\94Ð\98Я Ð¡Ð\9eЮÐ\97 â\80¢ Ð¨Ð¾Ñ\83 Ð¡Ñ\82Ñ\83диÑ\8f Ð¡Ð¾Ñ\8eз, 24 Ð²Ñ\8bпÑ\83Ñ\81к (01.02.2018) Ð\9dÑ\83Ñ\80лан Ð¡Ð°Ð±Ñ\83Ñ\80ов Ð¸ Ð¡Ð»Ð°Ð²Ð° Ð\9aомиÑ\81Ñ\81аÑ\80енко',
+            'description': 'md5:05140e8bf1b7e2d46e7ba140be57fd95',
             'thumbnail': r're:^https?://.*\.jpg$',
             'thumbnail': r're:^https?://.*\.jpg$',
-            'duration': 694,
-            'age_limit': 0,
+            'duration': 3190,
         },
     }, {
         'url': 'http://static.pladform.ru/player.swf?pl=21469&videoid=100183293&vkcid=0',
         },
     }, {
         'url': 'http://static.pladform.ru/player.swf?pl=21469&videoid=100183293&vkcid=0',
@@ -56,22 +56,48 @@ class PladformIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
+        qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+        pl = qs.get('pl', ['1'])[0]
+
         video = self._download_xml(
         video = self._download_xml(
-            'http://out.pladform.ru/getVideo?pl=1&videoid=%s' % video_id,
-            video_id)
+            'http://out.pladform.ru/getVideo', video_id, query={
+                'pl': pl,
+                'videoid': video_id,
+            })
 
 
-        if video.tag == 'error':
+        def fail(text):
             raise ExtractorError(
             raise ExtractorError(
-                '%s returned error: %s' % (self.IE_NAME, video.text),
+                '%s returned error: %s' % (self.IE_NAME, text),
                 expected=True)
 
                 expected=True)
 
+        if video.tag == 'error':
+            fail(video.text)
+
         quality = qualities(('ld', 'sd', 'hd'))
 
         quality = qualities(('ld', 'sd', 'hd'))
 
-        formats = [{
-            'url': src.text,
-            'format_id': src.get('quality'),
-            'quality': quality(src.get('quality')),
-        } for src in video.findall('./src')]
+        formats = []
+        for src in video.findall('./src'):
+            if src is None:
+                continue
+            format_url = src.text
+            if not format_url:
+                continue
+            if src.get('type') == 'hls' or determine_ext(format_url) == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                    m3u8_id='hls', fatal=False))
+            else:
+                formats.append({
+                    'url': src.text,
+                    'format_id': src.get('quality'),
+                    'quality': quality(src.get('quality')),
+                })
+
+        if not formats:
+            error = xpath_text(video, './cap', 'error', default=None)
+            if error:
+                fail(error)
+
         self._sort_formats(formats)
 
         webpage = self._download_webpage(
         self._sort_formats(formats)
 
         webpage = self._download_webpage(
index 2d87e7e70896857f1cd45fb52052fd4f81ec9bc9..dd5f17f1192c3543636f6ff24624b0c9cc9a0bd6 100644 (file)
@@ -11,19 +11,34 @@ from ..utils import (
 
 
 class PokemonIE(InfoExtractor):
 
 
 class PokemonIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P<id>[a-z0-9]{32})|/[^/]+/\d+_\d+-(?P<display_id>[^/?#]+))'
+    _VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P<id>[a-z0-9]{32})|/(?:[^/]+/)+(?P<display_id>[^/?#&]+))'
     _TESTS = [{
     _TESTS = [{
-        'url': 'http://www.pokemon.com/us/pokemon-episodes/19_01-from-a-to-z/?play=true',
-        'md5': '9fb209ae3a569aac25de0f5afc4ee08f',
+        'url': 'https://www.pokemon.com/us/pokemon-episodes/20_30-the-ol-raise-and-switch/',
+        'md5': '2fe8eaec69768b25ef898cda9c43062e',
         'info_dict': {
         'info_dict': {
-            'id': 'd0436c00c3ce4071ac6cee8130ac54a1',
+            'id': 'afe22e30f01c41f49d4f1d9eab5cd9a4',
             'ext': 'mp4',
             'ext': 'mp4',
-            'title': 'From A to Z!',
-            'description': 'Bonnie makes a new friend, Ash runs into an old friend, and a terrifying premonition begins to unfold!',
-            'timestamp': 1460478136,
-            'upload_date': '20160412',
+            'title': 'The Ol’ Raise and Switch!',
+            'description': 'md5:7db77f7107f98ba88401d3adc80ff7af',
+            'timestamp': 1511824728,
+            'upload_date': '20171127',
+        },
+        'add_id': ['LimelightMedia'],
+    }, {
+        # no data-video-title
+        'url': 'https://www.pokemon.com/us/pokemon-episodes/pokemon-movies/pokemon-the-rise-of-darkrai-2008',
+        'info_dict': {
+            'id': '99f3bae270bf4e5097274817239ce9c8',
+            'ext': 'mp4',
+            'title': 'Pokémon: The Rise of Darkrai',
+            'description': 'md5:ea8fbbf942e1e497d54b19025dd57d9d',
+            'timestamp': 1417778347,
+            'upload_date': '20141205',
+        },
+        'add_id': ['LimelightMedia'],
+        'params': {
+            'skip_download': True,
         },
         },
-        'add_id': ['LimelightMedia']
     }, {
         'url': 'http://www.pokemon.com/uk/pokemon-episodes/?play=2e8b5c761f1d4a9286165d7748c1ece2',
         'only_matching': True,
     }, {
         'url': 'http://www.pokemon.com/uk/pokemon-episodes/?play=2e8b5c761f1d4a9286165d7748c1ece2',
         'only_matching': True,
@@ -42,7 +57,9 @@ class PokemonIE(InfoExtractor):
             r'(<[^>]+data-video-id="%s"[^>]*>)' % (video_id if video_id else '[a-z0-9]{32}'),
             webpage, 'video data element'))
         video_id = video_data['data-video-id']
             r'(<[^>]+data-video-id="%s"[^>]*>)' % (video_id if video_id else '[a-z0-9]{32}'),
             webpage, 'video data element'))
         video_id = video_data['data-video-id']
-        title = video_data['data-video-title']
+        title = video_data.get('data-video-title') or self._html_search_meta(
+            'pkm-title', webpage, ' title', default=None) or self._search_regex(
+            r'<h1[^>]+\bclass=["\']us-title[^>]+>([^<]+)', webpage, 'title')
         return {
             '_type': 'url_transparent',
             'id': video_id,
         return {
             '_type': 'url_transparent',
             'id': video_id,
index 3428458afa987fb3eb8c3be061742cefce6e2734..9ce513aeb1968264b0150faecc3612b6739f4dc7 100644 (file)
@@ -115,12 +115,13 @@ class PornHubIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
+        self._set_cookie('pornhub.com', 'age_verified', '1')
+
         def dl_webpage(platform):
         def dl_webpage(platform):
+            self._set_cookie('pornhub.com', 'platform', platform)
             return self._download_webpage(
                 'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id,
             return self._download_webpage(
                 'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id,
-                video_id, headers={
-                    'Cookie': 'age_verified=1; platform=%s' % platform,
-                })
+                video_id)
 
         webpage = dl_webpage('pc')
 
 
         webpage = dl_webpage('pc')
 
@@ -275,7 +276,7 @@ class PornHubPlaylistIE(PornHubPlaylistBaseIE):
 
 
 class PornHubUserVideosIE(PornHubPlaylistBaseIE):
 
 
 class PornHubUserVideosIE(PornHubPlaylistBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?pornhub\.com/users/(?P<id>[^/]+)/videos'
+    _VALID_URL = r'https?://(?:www\.)?pornhub\.com/(?:user|channel)s/(?P<id>[^/]+)/videos'
     _TESTS = [{
         'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
         'info_dict': {
     _TESTS = [{
         'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
         'info_dict': {
@@ -285,6 +286,25 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE):
     }, {
         'url': 'http://www.pornhub.com/users/rushandlia/videos',
         'only_matching': True,
     }, {
         'url': 'http://www.pornhub.com/users/rushandlia/videos',
         'only_matching': True,
+    }, {
+        # default sorting as Top Rated Videos
+        'url': 'https://www.pornhub.com/channels/povd/videos',
+        'info_dict': {
+            'id': 'povd',
+        },
+        'playlist_mincount': 293,
+    }, {
+        # Top Rated Videos
+        'url': 'https://www.pornhub.com/channels/povd/videos?o=ra',
+        'only_matching': True,
+    }, {
+        # Most Recent Videos
+        'url': 'https://www.pornhub.com/channels/povd/videos?o=da',
+        'only_matching': True,
+    }, {
+        # Most Viewed Videos
+        'url': 'https://www.pornhub.com/channels/povd/videos?o=vi',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
     }]
 
     def _real_extract(self, url):
index 48757fd4f71de7ee0c2682f818f3976d72080741..7efff45662906b7a861ba6e22fc1a840f82635f6 100644 (file)
@@ -129,6 +129,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
                     https?://
                         (?:www\.)?
                         (?:
                     https?://
                         (?:www\.)?
                         (?:
+                            (?:beta\.)?
                             (?:
                                 prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv|advopedia
                             )\.(?:de|at|ch)|
                             (?:
                                 prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv|advopedia
                             )\.(?:de|at|ch)|
diff --git a/youtube_dl/extractor/raywenderlich.py b/youtube_dl/extractor/raywenderlich.py
new file mode 100644 (file)
index 0000000..640c3ee
--- /dev/null
@@ -0,0 +1,102 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .vimeo import VimeoIE
+from ..utils import (
+    extract_attributes,
+    ExtractorError,
+    smuggle_url,
+    unsmuggle_url,
+    urljoin,
+)
+
+
+class RayWenderlichIE(InfoExtractor):
+    _VALID_URL = r'https?://videos\.raywenderlich\.com/courses/(?P<course_id>[^/]+)/lessons/(?P<id>\d+)'
+
+    _TESTS = [{
+        'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1',
+        'info_dict': {
+            'id': '248377018',
+            'ext': 'mp4',
+            'title': 'Testing In iOS Episode 1: Introduction',
+            'duration': 133,
+            'uploader': 'Ray Wenderlich',
+            'uploader_id': 'user3304672',
+        },
+        'params': {
+            'noplaylist': True,
+            'skip_download': True,
+        },
+        'add_ie': [VimeoIE.ie_key()],
+        'expected_warnings': ['HTTP Error 403: Forbidden'],
+    }, {
+        'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1',
+        'info_dict': {
+            'title': 'Testing in iOS',
+            'id': '105-testing-in-ios',
+        },
+        'params': {
+            'noplaylist': False,
+        },
+        'playlist_count': 29,
+    }]
+
+    def _real_extract(self, url):
+        url, smuggled_data = unsmuggle_url(url, {})
+
+        mobj = re.match(self._VALID_URL, url)
+        course_id, lesson_id = mobj.group('course_id', 'id')
+        video_id = '%s/%s' % (course_id, lesson_id)
+
+        webpage = self._download_webpage(url, video_id)
+
+        no_playlist = self._downloader.params.get('noplaylist')
+        if no_playlist or smuggled_data.get('force_video', False):
+            if no_playlist:
+                self.to_screen(
+                    'Downloading just video %s because of --no-playlist'
+                    % video_id)
+            if '>Subscribe to unlock' in webpage:
+                raise ExtractorError(
+                    'This content is only available for subscribers',
+                    expected=True)
+            vimeo_id = self._search_regex(
+                r'data-vimeo-id=["\'](\d+)', webpage, 'video id')
+            return self.url_result(
+                VimeoIE._smuggle_referrer(
+                    'https://player.vimeo.com/video/%s' % vimeo_id, url),
+                ie=VimeoIE.ie_key(), video_id=vimeo_id)
+
+        self.to_screen(
+            'Downloading playlist %s - add --no-playlist to just download video'
+            % course_id)
+
+        lesson_ids = set((lesson_id, ))
+        for lesson in re.findall(
+                r'(<a[^>]+\bclass=["\']lesson-link[^>]+>)', webpage):
+            attrs = extract_attributes(lesson)
+            if not attrs:
+                continue
+            lesson_url = attrs.get('href')
+            if not lesson_url:
+                continue
+            lesson_id = self._search_regex(
+                r'/lessons/(\d+)', lesson_url, 'lesson id', default=None)
+            if not lesson_id:
+                continue
+            lesson_ids.add(lesson_id)
+
+        entries = []
+        for lesson_id in sorted(lesson_ids):
+            entries.append(self.url_result(
+                smuggle_url(urljoin(url, lesson_id), {'force_video': True}),
+                ie=RayWenderlichIE.ie_key()))
+
+        title = self._search_regex(
+            r'class=["\']course-title[^>]+>([^<]+)', webpage, 'course title',
+            default=None)
+
+        return self.playlist_result(entries, course_id, title)
index 5d6cc3610c4311ea637e137c87ac216e16c8e719..243603676a21c15180c021cf7c711aeb6de3ca7a 100644 (file)
@@ -5,135 +5,93 @@ from .common import InfoExtractor
 from ..compat import compat_HTTPError
 from ..utils import (
     float_or_none,
 from ..compat import compat_HTTPError
 from ..utils import (
     float_or_none,
-    int_or_none,
-    try_get,
-    # unified_timestamp,
     ExtractorError,
 )
 
 
 class RedBullTVIE(InfoExtractor):
     ExtractorError,
 )
 
 
 class RedBullTVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?redbull\.tv/(?:video|film|live)/(?:AP-\w+/segment/)?(?P<id>AP-\w+)'
+    _VALID_URL = r'https?://(?:www\.)?redbull\.tv/video/(?P<id>AP-\w+)'
     _TESTS = [{
         # film
     _TESTS = [{
         # film
-        'url': 'https://www.redbull.tv/video/AP-1Q756YYX51W11/abc-of-wrc',
+        'url': 'https://www.redbull.tv/video/AP-1Q6XCDTAN1W11',
         'md5': 'fb0445b98aa4394e504b413d98031d1f',
         'info_dict': {
         'md5': 'fb0445b98aa4394e504b413d98031d1f',
         'info_dict': {
-            'id': 'AP-1Q756YYX51W11',
+            'id': 'AP-1Q6XCDTAN1W11',
             'ext': 'mp4',
             'ext': 'mp4',
-            'title': 'ABC of...WRC',
+            'title': 'ABC of... WRC - ABC of... S1E6',
             'description': 'md5:5c7ed8f4015c8492ecf64b6ab31e7d31',
             'duration': 1582.04,
             'description': 'md5:5c7ed8f4015c8492ecf64b6ab31e7d31',
             'duration': 1582.04,
-            # 'timestamp': 1488405786,
-            # 'upload_date': '20170301',
         },
     }, {
         # episode
         },
     }, {
         # episode
-        'url': 'https://www.redbull.tv/video/AP-1PMT5JCWH1W11/grime?playlist=shows:shows-playall:web',
+        'url': 'https://www.redbull.tv/video/AP-1PMHKJFCW1W11',
         'info_dict': {
         'info_dict': {
-            'id': 'AP-1PMT5JCWH1W11',
+            'id': 'AP-1PMHKJFCW1W11',
             'ext': 'mp4',
             'ext': 'mp4',
-            'title': 'Grime - Hashtags S2 E4',
-            'description': 'md5:334b741c8c1ce65be057eab6773c1cf5',
+            'title': 'Grime - Hashtags S2E4',
+            'description': 'md5:b5f522b89b72e1e23216e5018810bb25',
             'duration': 904.6,
             'duration': 904.6,
-            # 'timestamp': 1487290093,
-            # 'upload_date': '20170217',
-            'series': 'Hashtags',
-            'season_number': 2,
-            'episode_number': 4,
         },
         'params': {
             'skip_download': True,
         },
         },
         'params': {
             'skip_download': True,
         },
-    }, {
-        # segment
-        'url': 'https://www.redbull.tv/live/AP-1R5DX49XS1W11/segment/AP-1QSAQJ6V52111/semi-finals',
-        'info_dict': {
-            'id': 'AP-1QSAQJ6V52111',
-            'ext': 'mp4',
-            'title': 'Semi Finals - Vans Park Series Pro Tour',
-            'description': 'md5:306a2783cdafa9e65e39aa62f514fd97',
-            'duration': 11791.991,
-        },
-        'params': {
-            'skip_download': True,
-        },
-    }, {
-        'url': 'https://www.redbull.tv/film/AP-1MSKKF5T92111/in-motion',
-        'only_matching': True,
     }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
         session = self._download_json(
     }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
         session = self._download_json(
-            'https://api-v2.redbull.tv/session', video_id,
+            'https://api.redbull.tv/v3/session', video_id,
             note='Downloading access token', query={
             note='Downloading access token', query={
-                'build': '4.370.0',
                 'category': 'personal_computer',
                 'category': 'personal_computer',
-                'os_version': '1.0',
                 'os_family': 'http',
             })
         if session.get('code') == 'error':
             raise ExtractorError('%s said: %s' % (
                 self.IE_NAME, session['message']))
                 'os_family': 'http',
             })
         if session.get('code') == 'error':
             raise ExtractorError('%s said: %s' % (
                 self.IE_NAME, session['message']))
-        auth = '%s %s' % (session.get('token_type', 'Bearer'), session['access_token'])
+        token = session['token']
 
         try:
 
         try:
-            info = self._download_json(
-                'https://api-v2.redbull.tv/content/%s' % video_id,
+            video = self._download_json(
+                'https://api.redbull.tv/v3/products/' + video_id,
                 video_id, note='Downloading video information',
                 video_id, note='Downloading video information',
-                headers={'Authorization': auth}
+                headers={'Authorization': token}
             )
         except ExtractorError as e:
             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
                 error_message = self._parse_json(
             )
         except ExtractorError as e:
             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
                 error_message = self._parse_json(
-                    e.cause.read().decode(), video_id)['message']
+                    e.cause.read().decode(), video_id)['error']
                 raise ExtractorError('%s said: %s' % (
                     self.IE_NAME, error_message), expected=True)
             raise
 
                 raise ExtractorError('%s said: %s' % (
                     self.IE_NAME, error_message), expected=True)
             raise
 
-        video = info['video_product']
-
-        title = info['title'].strip()
+        title = video['title'].strip()
 
         formats = self._extract_m3u8_formats(
 
         formats = self._extract_m3u8_formats(
-            video['url'], video_id, 'mp4', entry_protocol='m3u8_native',
-            m3u8_id='hls')
+            'https://dms.redbull.tv/v3/%s/%s/playlist.m3u8' % (video_id, token),
+            video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
         self._sort_formats(formats)
 
         subtitles = {}
         self._sort_formats(formats)
 
         subtitles = {}
-        for _, captions in (try_get(
-                video, lambda x: x['attachments']['captions'],
-                dict) or {}).items():
-            if not captions or not isinstance(captions, list):
-                continue
-            for caption in captions:
-                caption_url = caption.get('url')
-                if not caption_url:
-                    continue
-                ext = caption.get('format')
-                if ext == 'xml':
-                    ext = 'ttml'
-                subtitles.setdefault(caption.get('lang') or 'en', []).append({
-                    'url': caption_url,
-                    'ext': ext,
-                })
+        for resource in video.get('resources', []):
+            if resource.startswith('closed_caption_'):
+                splitted_resource = resource.split('_')
+                if splitted_resource[2]:
+                    subtitles.setdefault('en', []).append({
+                        'url': 'https://resources.redbull.tv/%s/%s' % (video_id, resource),
+                        'ext': splitted_resource[2],
+                    })
 
 
-        subheading = info.get('subheading')
+        subheading = video.get('subheading')
         if subheading:
             title += ' - %s' % subheading
 
         return {
             'id': video_id,
             'title': title,
         if subheading:
             title += ' - %s' % subheading
 
         return {
             'id': video_id,
             'title': title,
-            'description': info.get('long_description') or info.get(
+            'description': video.get('long_description') or video.get(
                 'short_description'),
             'duration': float_or_none(video.get('duration'), scale=1000),
                 'short_description'),
             'duration': float_or_none(video.get('duration'), scale=1000),
-            # 'timestamp': unified_timestamp(info.get('published')),
-            'series': info.get('show_title'),
-            'season_number': int_or_none(info.get('season_number')),
-            'episode_number': int_or_none(info.get('episode_number')),
             'formats': formats,
             'subtitles': subtitles,
         }
             'formats': formats,
             'subtitles': subtitles,
         }
index f36bc648c28b31623b50b531ee053cfdae354320..53b1c967e5cb98c300a81996bb483f204890b1ad 100644 (file)
@@ -15,7 +15,7 @@ class RedditIE(InfoExtractor):
     _TEST = {
         # from https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/
         'url': 'https://v.redd.it/zv89llsvexdz',
     _TEST = {
         # from https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/
         'url': 'https://v.redd.it/zv89llsvexdz',
-        'md5': '655d06ace653ea3b87bccfb1b27ec99d',
+        'md5': '0a070c53eba7ec4534d95a5a1259e253',
         'info_dict': {
             'id': 'zv89llsvexdz',
             'ext': 'mp4',
         'info_dict': {
             'id': 'zv89llsvexdz',
             'ext': 'mp4',
index f70a75256c638f4a3ce9cda3b9577176e49f3cca..879bcf81d8136ff4bb1f90a9312ff5c7812fdfb8 100644 (file)
@@ -16,12 +16,12 @@ class RedTubeIE(InfoExtractor):
     _VALID_URL = r'https?://(?:(?:www\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
     _TESTS = [{
         'url': 'http://www.redtube.com/66418',
     _VALID_URL = r'https?://(?:(?:www\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
     _TESTS = [{
         'url': 'http://www.redtube.com/66418',
-        'md5': '7b8c22b5e7098a3e1c09709df1126d2d',
+        'md5': 'fc08071233725f26b8f014dba9590005',
         'info_dict': {
             'id': '66418',
             'ext': 'mp4',
             'title': 'Sucked on a toilet',
         'info_dict': {
             'id': '66418',
             'ext': 'mp4',
             'title': 'Sucked on a toilet',
-            'upload_date': '20120831',
+            'upload_date': '20110811',
             'duration': 596,
             'view_count': int,
             'age_limit': 18,
             'duration': 596,
             'view_count': int,
             'age_limit': 18,
@@ -46,9 +46,10 @@ class RedTubeIE(InfoExtractor):
             raise ExtractorError('Video %s has been removed' % video_id, expected=True)
 
         title = self._html_search_regex(
             raise ExtractorError('Video %s has been removed' % video_id, expected=True)
 
         title = self._html_search_regex(
-            (r'<h1 class="videoTitle[^"]*">(?P<title>.+?)</h1>',
-             r'videoTitle\s*:\s*(["\'])(?P<title>)\1'),
-            webpage, 'title', group='title')
+            (r'<h(\d)[^>]+class="(?:video_title_text|videoTitle)[^"]*">(?P<title>(?:(?!\1).)+)</h\1>',
+             r'(?:videoTitle|title)\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',),
+            webpage, 'title', group='title',
+            default=None) or self._og_search_title(webpage)
 
         formats = []
         sources = self._parse_json(
 
         formats = []
         sources = self._parse_json(
@@ -87,12 +88,14 @@ class RedTubeIE(InfoExtractor):
 
         thumbnail = self._og_search_thumbnail(webpage)
         upload_date = unified_strdate(self._search_regex(
 
         thumbnail = self._og_search_thumbnail(webpage)
         upload_date = unified_strdate(self._search_regex(
-            r'<span[^>]+class="added-time"[^>]*>ADDED ([^<]+)<',
+            r'<span[^>]+>ADDED ([^<]+)<',
             webpage, 'upload date', fatal=False))
             webpage, 'upload date', fatal=False))
-        duration = int_or_none(self._search_regex(
-            r'videoDuration\s*:\s*(\d+)', webpage, 'duration', default=None))
+        duration = int_or_none(self._og_search_property(
+            'video:duration', webpage, default=None) or self._search_regex(
+                r'videoDuration\s*:\s*(\d+)', webpage, 'duration', default=None))
         view_count = str_to_int(self._search_regex(
         view_count = str_to_int(self._search_regex(
-            r'<span[^>]*>VIEWS</span></td>\s*<td>([\d,.]+)',
+            (r'<div[^>]*>Views</div>\s*<div[^>]*>\s*([\d,.]+)',
+             r'<span[^>]*>VIEWS</span>\s*</td>\s*<td>\s*([\d,.]+)'),
             webpage, 'view count', fatal=False))
 
         # No self-labeling, but they describe themselves as
             webpage, 'view count', fatal=False))
 
         # No self-labeling, but they describe themselves as
index bba25a233e3fef88ae8e06e504e4524e64c3bd1c..be36acc46f4ded4782c97dc0124b63a36137646f 100644 (file)
@@ -93,58 +93,11 @@ class RtlNlIE(InfoExtractor):
 
         meta = info.get('meta', {})
 
 
         meta = info.get('meta', {})
 
-        # m3u8 streams are encrypted and may not be handled properly by older ffmpeg/avconv.
-        # To workaround this previously adaptive -> flash trick was used to obtain
-        # unencrypted m3u8 streams (see https://github.com/rg3/youtube-dl/issues/4118)
-        # and bypass georestrictions as well.
-        # Currently, unencrypted m3u8 playlists are (intentionally?) invalid and therefore
-        # unusable albeit can be fixed by simple string replacement (see
-        # https://github.com/rg3/youtube-dl/pull/6337)
-        # Since recent ffmpeg and avconv handle encrypted streams just fine encrypted
-        # streams are used now.
         videopath = material['videopath']
         m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath
 
         formats = self._extract_m3u8_formats(
             m3u8_url, uuid, 'mp4', m3u8_id='hls', fatal=False)
         videopath = material['videopath']
         m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath
 
         formats = self._extract_m3u8_formats(
             m3u8_url, uuid, 'mp4', m3u8_id='hls', fatal=False)
-
-        video_urlpart = videopath.split('/adaptive/')[1][:-5]
-        PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4'
-
-        PG_FORMATS = (
-            ('a2t', 512, 288),
-            ('a3t', 704, 400),
-            ('nettv', 1280, 720),
-        )
-
-        def pg_format(format_id, width, height):
-            return {
-                'url': PG_URL_TEMPLATE % (format_id, video_urlpart),
-                'format_id': 'pg-%s' % format_id,
-                'protocol': 'http',
-                'width': width,
-                'height': height,
-            }
-
-        if not formats:
-            formats = [pg_format(*pg_tuple) for pg_tuple in PG_FORMATS]
-        else:
-            pg_formats = []
-            for format_id, width, height in PG_FORMATS:
-                try:
-                    # Find hls format with the same width and height corresponding
-                    # to progressive format and copy metadata from it.
-                    f = next(f for f in formats if f.get('height') == height)
-                    # hls formats may have invalid width
-                    f['width'] = width
-                    f_copy = f.copy()
-                    f_copy.update(pg_format(format_id, width, height))
-                    pg_formats.append(f_copy)
-                except StopIteration:
-                    # Missing hls format does mean that no progressive format with
-                    # such width and height exists either.
-                    pass
-            formats.extend(pg_formats)
         self._sort_formats(formats)
 
         thumbnails = []
         self._sort_formats(formats)
 
         thumbnails = []
index 6c09df25a07de17dfb29ead61bd1464e72a1c36a..9fa8688f838c902de682ed69e1caedc93ed5f4bc 100644 (file)
@@ -53,6 +53,12 @@ class RuutuIE(InfoExtractor):
                 'age_limit': 0,
             },
         },
                 'age_limit': 0,
             },
         },
+        # Episode where <SourceFile> is "NOT-USED", but has other
+        # downloadable sources available.
+        {
+            'url': 'http://www.ruutu.fi/video/3193728',
+            'only_matching': True,
+        },
     ]
 
     def _real_extract(self, url):
     ]
 
     def _real_extract(self, url):
@@ -72,7 +78,7 @@ class RuutuIE(InfoExtractor):
                     video_url = child.text
                     if (not video_url or video_url in processed_urls or
                             any(p in video_url for p in ('NOT_USED', 'NOT-USED'))):
                     video_url = child.text
                     if (not video_url or video_url in processed_urls or
                             any(p in video_url for p in ('NOT_USED', 'NOT-USED'))):
-                        return
+                        continue
                     processed_urls.append(video_url)
                     ext = determine_ext(video_url)
                     if ext == 'm3u8':
                     processed_urls.append(video_url)
                     ext = determine_ext(video_url)
                     if ext == 'm3u8':
index cf32d1e0c71eac256a1533aef42bc41de90fb9bb..6d4e3b76daba1b265fcd358122e7fe2e13695906 100644 (file)
@@ -159,7 +159,6 @@ class SeznamZpravyArticleIE(InfoExtractor):
         webpage = self._download_webpage(url, article_id)
 
         info = self._search_json_ld(webpage, article_id, default={})
         webpage = self._download_webpage(url, article_id)
 
         info = self._search_json_ld(webpage, article_id, default={})
-        print(info)
 
         title = info.get('title') or self._og_search_title(webpage, fatal=False)
         description = info.get('description') or self._og_search_description(webpage)
 
         title = info.get('title') or self._og_search_title(webpage, fatal=False)
         description = info.get('description') or self._og_search_description(webpage)
index 547be8f9555c6691a1d57ea4e3c4555b943ee53a..69951e38759945d34dd25ebf161464eeb36b0c13 100644 (file)
@@ -4,7 +4,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_str
+from ..compat import (
+    compat_parse_qs,
+    compat_str,
+    compat_urllib_parse_urlparse,
+)
 from ..utils import (
     determine_ext,
     int_or_none,
 from ..utils import (
     determine_ext,
     int_or_none,
@@ -48,6 +52,7 @@ class SixPlayIE(InfoExtractor):
         urls = []
         quality_key = qualities(['lq', 'sd', 'hq', 'hd'])
         formats = []
         urls = []
         quality_key = qualities(['lq', 'sd', 'hq', 'hd'])
         formats = []
+        subtitles = {}
         for asset in clip_data['assets']:
             asset_url = asset.get('full_physical_path')
             protocol = asset.get('protocol')
         for asset in clip_data['assets']:
             asset_url = asset.get('full_physical_path')
             protocol = asset.get('protocol')
@@ -56,8 +61,11 @@ class SixPlayIE(InfoExtractor):
             urls.append(asset_url)
             container = asset.get('video_container')
             ext = determine_ext(asset_url)
             urls.append(asset_url)
             container = asset.get('video_container')
             ext = determine_ext(asset_url)
+            if protocol == 'http_subtitle' or ext == 'vtt':
+                subtitles.setdefault('fr', []).append({'url': asset_url})
+                continue
             if container == 'm3u8' or ext == 'm3u8':
             if container == 'm3u8' or ext == 'm3u8':
-                if protocol == 'usp':
+                if protocol == 'usp' and not compat_parse_qs(compat_urllib_parse_urlparse(asset_url).query).get('token', [None])[0]:
                     asset_url = re.sub(r'/([^/]+)\.ism/[^/]*\.m3u8', r'/\1.ism/\1.m3u8', asset_url)
                     formats.extend(self._extract_m3u8_formats(
                         asset_url, video_id, 'mp4', 'm3u8_native',
                     asset_url = re.sub(r'/([^/]+)\.ism/[^/]*\.m3u8', r'/\1.ism/\1.m3u8', asset_url)
                     formats.extend(self._extract_m3u8_formats(
                         asset_url, video_id, 'mp4', 'm3u8_native',
@@ -98,4 +106,5 @@ class SixPlayIE(InfoExtractor):
             'duration': int_or_none(clip_data.get('duration')),
             'series': get(lambda x: x['program']['title']),
             'formats': formats,
             'duration': int_or_none(clip_data.get('duration')),
             'series': get(lambda x: x['program']['title']),
             'formats': formats,
+            'subtitles': subtitles,
         }
         }
index c3078e285799ab9d159a072d67bf560ee03bd25b..58a8c0d4ddb2f282241af2afad37e9d4b8403085 100644 (file)
@@ -33,5 +33,8 @@ class SonyLIVIE(InfoExtractor):
     def _real_extract(self, url):
         brightcove_id = self._match_id(url)
         return self.url_result(
     def _real_extract(self, url):
         brightcove_id = self._match_id(url)
         return self.url_result(
-            smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, {'geo_countries': ['IN']}),
+            smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, {
+                'geo_countries': ['IN'],
+                'referrer': url,
+            }),
             'BrightcoveNew', brightcove_id)
             'BrightcoveNew', brightcove_id)
index 97ff422f04b7f68e2040a01607f974567e46e744..46332e5c238619c9b572e4c5701ed1169eae2d20 100644 (file)
@@ -157,8 +157,7 @@ class SoundcloudIE(InfoExtractor):
         },
     ]
 
         },
     ]
 
-    _CLIENT_ID = 'DQskPX1pntALRzMp4HSxya3Mc0AO66Ro'
-    _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
+    _CLIENT_ID = 'LvWovRaJZlWCHql0bISuum8Bd2KX79mb'
 
     @staticmethod
     def _extract_urls(webpage):
 
     @staticmethod
     def _extract_urls(webpage):
index e6c2dcfc438b758bf43080799817fb8f6a3e2075..67500b69c1b8b076f147c5da3afb3df8cc5bd6fd 100644 (file)
@@ -3,7 +3,12 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
 import re
 
 from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
+    ExtractorError,
+    parse_duration,
+    parse_resolution,
+    str_to_int,
+)
 
 
 class SpankBangIE(InfoExtractor):
 
 
 class SpankBangIE(InfoExtractor):
@@ -15,7 +20,7 @@ class SpankBangIE(InfoExtractor):
             'id': '3vvn',
             'ext': 'mp4',
             'title': 'fantasy solo',
             'id': '3vvn',
             'ext': 'mp4',
             'title': 'fantasy solo',
-            'description': 'Watch fantasy solo free HD porn video - 05 minutes -  Babe,Masturbation,Solo,Toy  - dillion harper masturbates on a bed free adult movies sexy clips.',
+            'description': 'dillion harper masturbates on a bed',
             'thumbnail': r're:^https?://.*\.jpg$',
             'uploader': 'silly2587',
             'age_limit': 18,
             'thumbnail': r're:^https?://.*\.jpg$',
             'uploader': 'silly2587',
             'age_limit': 18,
@@ -32,36 +37,49 @@ class SpankBangIE(InfoExtractor):
         # mobile page
         'url': 'http://m.spankbang.com/1o2de/video/can+t+remember+her+name',
         'only_matching': True,
         # mobile page
         'url': 'http://m.spankbang.com/1o2de/video/can+t+remember+her+name',
         'only_matching': True,
+    }, {
+        # 4k
+        'url': 'https://spankbang.com/1vwqx/video/jade+kush+solo+4k',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
     }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        webpage = self._download_webpage(url, video_id, headers={
+            'Cookie': 'country=US'
+        })
 
         if re.search(r'<[^>]+\bid=["\']video_removed', webpage):
             raise ExtractorError(
                 'Video %s is not available' % video_id, expected=True)
 
 
         if re.search(r'<[^>]+\bid=["\']video_removed', webpage):
             raise ExtractorError(
                 'Video %s is not available' % video_id, expected=True)
 
-        stream_key = self._html_search_regex(
-            r'''var\s+stream_key\s*=\s*['"](.+?)['"]''',
-            webpage, 'stream key')
-
-        formats = [{
-            'url': 'http://spankbang.com/_%s/%s/title/%sp__mp4' % (video_id, stream_key, height),
-            'ext': 'mp4',
-            'format_id': '%sp' % height,
-            'height': int(height),
-        } for height in re.findall(r'<(?:span|li|p)[^>]+[qb]_(\d+)p', webpage)]
-        self._check_formats(formats, video_id)
+        formats = []
+        for mobj in re.finditer(
+                r'stream_url_(?P<id>[^\s=]+)\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2',
+                webpage):
+            format_id, format_url = mobj.group('id', 'url')
+            f = parse_resolution(format_id)
+            f.update({
+                'url': format_url,
+                'format_id': format_id,
+            })
+            formats.append(f)
         self._sort_formats(formats)
 
         title = self._html_search_regex(
             r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title')
         self._sort_formats(formats)
 
         title = self._html_search_regex(
             r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title')
-        description = self._og_search_description(webpage)
+        description = self._search_regex(
+            r'<div[^>]+\bclass=["\']bottom[^>]+>\s*<p>[^<]*</p>\s*<p>([^<]+)',
+            webpage, 'description', fatal=False)
         thumbnail = self._og_search_thumbnail(webpage)
         uploader = self._search_regex(
             r'class="user"[^>]*><img[^>]+>([^<]+)',
             webpage, 'uploader', default=None)
         thumbnail = self._og_search_thumbnail(webpage)
         uploader = self._search_regex(
             r'class="user"[^>]*><img[^>]+>([^<]+)',
             webpage, 'uploader', default=None)
+        duration = parse_duration(self._search_regex(
+            r'<div[^>]+\bclass=["\']right_side[^>]+>\s*<span>([^<]+)',
+            webpage, 'duration', fatal=False))
+        view_count = str_to_int(self._search_regex(
+            r'([\d,.]+)\s+plays', webpage, 'view count', fatal=False))
 
         age_limit = self._rta_search(webpage)
 
 
         age_limit = self._rta_search(webpage)
 
@@ -71,6 +89,8 @@ class SpankBangIE(InfoExtractor):
             'description': description,
             'thumbnail': thumbnail,
             'uploader': uploader,
             'description': description,
             'thumbnail': thumbnail,
             'uploader': uploader,
+            'duration': duration,
+            'view_count': view_count,
             'formats': formats,
             'age_limit': age_limit,
         }
             'formats': formats,
             'age_limit': age_limit,
         }
index a9e34c027504ff0f6585fa51cae4ba3a1ec5bfcd..fcaa5ac0b08fa3361267f0ac2fefd7dc52deed0a 100644 (file)
@@ -4,8 +4,10 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
 import re
 
 from .common import InfoExtractor
+from ..compat import compat_chr
 from ..utils import (
     determine_ext,
 from ..utils import (
     determine_ext,
+    ExtractorError,
     int_or_none,
     js_to_json,
 )
     int_or_none,
     js_to_json,
 )
@@ -32,12 +34,34 @@ class StreamangoIE(InfoExtractor):
         'params': {
             'skip_download': True,
         },
         'params': {
             'skip_download': True,
         },
+        'skip': 'gone',
     }, {
         'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4',
         'only_matching': True,
     }]
 
     def _real_extract(self, url):
     }, {
         'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4',
         'only_matching': True,
     }]
 
     def _real_extract(self, url):
+        def decrypt_src(encoded, val):
+            ALPHABET = '=/+9876543210zyxwvutsrqponmlkjihgfedcbaZYXWVUTSRQPONMLKJIHGFEDCBA'
+            encoded = re.sub(r'[^A-Za-z0-9+/=]', '', encoded)
+            decoded = ''
+            sm = [None] * 4
+            i = 0
+            str_len = len(encoded)
+            while i < str_len:
+                for j in range(4):
+                    sm[j % 4] = ALPHABET.index(encoded[i])
+                    i += 1
+                char_code = ((sm[0] << 0x2) | (sm[1] >> 0x4)) ^ val
+                decoded += compat_chr(char_code)
+                if sm[2] != 0x40:
+                    char_code = ((sm[1] & 0xf) << 0x4) | (sm[2] >> 0x2)
+                    decoded += compat_chr(char_code)
+                if sm[3] != 0x40:
+                    char_code = ((sm[2] & 0x3) << 0x6) | sm[3]
+                    decoded += compat_chr(char_code)
+            return decoded
+
         video_id = self._match_id(url)
 
         webpage = self._download_webpage(url, video_id)
         video_id = self._match_id(url)
 
         webpage = self._download_webpage(url, video_id)
@@ -46,13 +70,26 @@ class StreamangoIE(InfoExtractor):
 
         formats = []
         for format_ in re.findall(r'({[^}]*\bsrc\s*:\s*[^}]*})', webpage):
 
         formats = []
         for format_ in re.findall(r'({[^}]*\bsrc\s*:\s*[^}]*})', webpage):
+            mobj = re.search(r'(src\s*:\s*[^(]+\(([^)]*)\)[\s,]*)', format_)
+            if mobj is None:
+                continue
+
+            format_ = format_.replace(mobj.group(0), '')
+
             video = self._parse_json(
             video = self._parse_json(
-                format_, video_id, transform_source=js_to_json, fatal=False)
-            if not video:
+                format_, video_id, transform_source=js_to_json,
+                fatal=False) or {}
+
+            mobj = re.search(
+                r'([\'"])(?P<src>(?:(?!\1).)+)\1\s*,\s*(?P<val>\d+)',
+                mobj.group(1))
+            if mobj is None:
                 continue
                 continue
-            src = video.get('src')
+
+            src = decrypt_src(mobj.group('src'), int_or_none(mobj.group('val')))
             if not src:
                 continue
             if not src:
                 continue
+
             ext = determine_ext(src, default_ext=None)
             if video.get('type') == 'application/dash+xml' or ext == 'mpd':
                 formats.extend(self._extract_mpd_formats(
             ext = determine_ext(src, default_ext=None)
             if video.get('type') == 'application/dash+xml' or ext == 'mpd':
                 formats.extend(self._extract_mpd_formats(
@@ -65,6 +102,16 @@ class StreamangoIE(InfoExtractor):
                     'height': int_or_none(video.get('height')),
                     'tbr': int_or_none(video.get('bitrate')),
                 })
                     'height': int_or_none(video.get('height')),
                     'tbr': int_or_none(video.get('bitrate')),
                 })
+
+        if not formats:
+            error = self._search_regex(
+                r'<p[^>]+\bclass=["\']lead[^>]+>(.+?)</p>', webpage,
+                'error', default=None)
+            if not error and '>Sorry' in webpage:
+                error = 'Video %s is not available' % video_id
+            if error:
+                raise ExtractorError(error, expected=True)
+
         self._sort_formats(formats)
 
         return {
         self._sort_formats(formats)
 
         return {
index 5886e9c1bb7e0c4e9b192480ac2cfa48118ffe2a..a0353fe3ae30ab089d846f8360e89d21602c1446 100644 (file)
@@ -7,7 +7,7 @@ from .common import InfoExtractor
 
 
 class TeleBruxellesIE(InfoExtractor):
 
 
 class TeleBruxellesIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?(?:telebruxelles|bx1)\.be/(news|sport|dernier-jt|emission)/?(?P<id>[^/#?]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:telebruxelles|bx1)\.be/(?:[^/]+/)*(?P<id>[^/#?]+)'
     _TESTS = [{
         'url': 'http://bx1.be/news/que-risque-lauteur-dune-fausse-alerte-a-la-bombe/',
         'md5': 'a2a67a5b1c3e8c9d33109b902f474fd9',
     _TESTS = [{
         'url': 'http://bx1.be/news/que-risque-lauteur-dune-fausse-alerte-a-la-bombe/',
         'md5': 'a2a67a5b1c3e8c9d33109b902f474fd9',
@@ -31,6 +31,16 @@ class TeleBruxellesIE(InfoExtractor):
     }, {
         'url': 'http://bx1.be/emission/bxenf1-gastronomie/',
         'only_matching': True,
     }, {
         'url': 'http://bx1.be/emission/bxenf1-gastronomie/',
         'only_matching': True,
+    }, {
+        'url': 'https://bx1.be/berchem-sainte-agathe/personnel-carrefour-de-berchem-sainte-agathe-inquiet/',
+        'only_matching': True,
+    }, {
+        'url': 'https://bx1.be/dernier-jt/',
+        'only_matching': True,
+    }, {
+        # live stream
+        'url': 'https://bx1.be/lives/direct-tv/',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
     }]
 
     def _real_extract(self, url):
@@ -38,22 +48,29 @@ class TeleBruxellesIE(InfoExtractor):
         webpage = self._download_webpage(url, display_id)
 
         article_id = self._html_search_regex(
         webpage = self._download_webpage(url, display_id)
 
         article_id = self._html_search_regex(
-            r"<article id=\"post-(\d+)\"", webpage, 'article ID', default=None)
+            r'<article[^>]+\bid=["\']post-(\d+)', webpage, 'article ID', default=None)
         title = self._html_search_regex(
         title = self._html_search_regex(
-            r'<h1 class=\"entry-title\">(.*?)</h1>', webpage, 'title')
+            r'<h1[^>]*>(.+?)</h1>', webpage, 'title',
+            default=None) or self._og_search_title(webpage)
         description = self._og_search_description(webpage, default=None)
 
         rtmp_url = self._html_search_regex(
         description = self._og_search_description(webpage, default=None)
 
         rtmp_url = self._html_search_regex(
-            r'file\s*:\s*"(rtmp://[^/]+/vod/mp4:"\s*\+\s*"[^"]+"\s*\+\s*".mp4)"',
+            r'file["\']?\s*:\s*"(r(?:tm|mt)ps?://[^/]+/(?:vod/mp4:"\s*\+\s*"[^"]+"\s*\+\s*"\.mp4|stream/live))"',
             webpage, 'RTMP url')
             webpage, 'RTMP url')
+        # Yes, they have a typo in scheme name for live stream URLs (e.g.
+        # https://bx1.be/lives/direct-tv/)
+        rtmp_url = re.sub(r'^rmtp', 'rtmp', rtmp_url)
         rtmp_url = re.sub(r'"\s*\+\s*"', '', rtmp_url)
         formats = self._extract_wowza_formats(rtmp_url, article_id or display_id)
         self._sort_formats(formats)
 
         rtmp_url = re.sub(r'"\s*\+\s*"', '', rtmp_url)
         formats = self._extract_wowza_formats(rtmp_url, article_id or display_id)
         self._sort_formats(formats)
 
+        is_live = 'stream/live' in rtmp_url
+
         return {
             'id': article_id or display_id,
             'display_id': display_id,
         return {
             'id': article_id or display_id,
             'display_id': display_id,
-            'title': title,
+            'title': self._live_title(title) if is_live else title,
             'description': description,
             'formats': formats,
             'description': description,
             'formats': formats,
+            'is_live': is_live,
         }
         }
index fafaa826fe91e2eaddc2fe9b700c7e0367797807..6965c127b3351122e894608c8a41ac3e79aff58d 100644 (file)
@@ -10,19 +10,33 @@ from ..utils import (
 )
 
 
 )
 
 
-class TeleQuebecIE(InfoExtractor):
+class TeleQuebecBaseIE(InfoExtractor):
+    @staticmethod
+    def _limelight_result(media_id):
+        return {
+            '_type': 'url_transparent',
+            'url': smuggle_url(
+                'limelight:media:' + media_id, {'geo_countries': ['CA']}),
+            'ie_key': 'LimelightMedia',
+        }
+
+
+class TeleQuebecIE(TeleQuebecBaseIE):
     _VALID_URL = r'https?://zonevideo\.telequebec\.tv/media/(?P<id>\d+)'
     _TESTS = [{
     _VALID_URL = r'https?://zonevideo\.telequebec\.tv/media/(?P<id>\d+)'
     _TESTS = [{
-        'url': 'http://zonevideo.telequebec.tv/media/20984/le-couronnement-de-new-york/couronnement-de-new-york',
-        'md5': 'fe95a0957e5707b1b01f5013e725c90f',
+        # available till 01.01.2023
+        'url': 'http://zonevideo.telequebec.tv/media/37578/un-petit-choc-et-puis-repart/un-chef-a-la-cabane',
         'info_dict': {
         'info_dict': {
-            'id': '20984',
+            'id': '577116881b4b439084e6b1cf4ef8b1b3',
             'ext': 'mp4',
             'ext': 'mp4',
-            'title': 'Le couronnement de New York',
-            'description': 'md5:f5b3d27a689ec6c1486132b2d687d432',
-            'upload_date': '20170201',
-            'timestamp': 1485972222,
-        }
+            'title': 'Un petit choc et puis repart!',
+            'description': 'md5:b04a7e6b3f74e32d7b294cffe8658374',
+            'upload_date': '20180222',
+            'timestamp': 1519326631,
+        },
+        'params': {
+            'skip_download': True,
+        },
     }, {
         # no description
         'url': 'http://zonevideo.telequebec.tv/media/30261',
     }, {
         # no description
         'url': 'http://zonevideo.telequebec.tv/media/30261',
@@ -31,19 +45,107 @@ class TeleQuebecIE(InfoExtractor):
 
     def _real_extract(self, url):
         media_id = self._match_id(url)
 
     def _real_extract(self, url):
         media_id = self._match_id(url)
+
         media_data = self._download_json(
             'https://mnmedias.api.telequebec.tv/api/v2/media/' + media_id,
             media_id)['media']
         media_data = self._download_json(
             'https://mnmedias.api.telequebec.tv/api/v2/media/' + media_id,
             media_id)['media']
-        return {
-            '_type': 'url_transparent',
-            'id': media_id,
-            'url': smuggle_url(
-                'limelight:media:' + media_data['streamInfo']['sourceId'],
-                {'geo_countries': ['CA']}),
-            'title': media_data['title'],
+
+        info = self._limelight_result(media_data['streamInfo']['sourceId'])
+        info.update({
+            'title': media_data.get('title'),
             'description': try_get(
                 media_data, lambda x: x['descriptions'][0]['text'], compat_str),
             'duration': int_or_none(
                 media_data.get('durationInMilliseconds'), 1000),
             'description': try_get(
                 media_data, lambda x: x['descriptions'][0]['text'], compat_str),
             'duration': int_or_none(
                 media_data.get('durationInMilliseconds'), 1000),
-            'ie_key': 'LimelightMedia',
+        })
+        return info
+
+
+class TeleQuebecEmissionIE(TeleQuebecBaseIE):
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:
+                            [^/]+\.telequebec\.tv/emissions/|
+                            (?:www\.)?telequebec\.tv/
+                        )
+                        (?P<id>[^?#&]+)
+                    '''
+    _TESTS = [{
+        'url': 'http://lindicemcsween.telequebec.tv/emissions/100430013/des-soins-esthetiques-a-377-d-interets-annuels-ca-vous-tente',
+        'info_dict': {
+            'id': '66648a6aef914fe3badda25e81a4d50a',
+            'ext': 'mp4',
+            'title': "Des soins esthétiques à 377 % d'intérêts annuels, ça vous tente?",
+            'description': 'md5:369e0d55d0083f1fc9b71ffb640ea014',
+            'upload_date': '20171024',
+            'timestamp': 1508862118,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'http://bancpublic.telequebec.tv/emissions/emission-49/31986/jeunes-meres-sous-pression',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.telequebec.tv/masha-et-michka/epi059masha-et-michka-3-053-078',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.telequebec.tv/documentaire/bebes-sur-mesure/',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        media_id = self._search_regex(
+            r'mediaUID\s*:\s*["\'][Ll]imelight_(?P<id>[a-z0-9]{32})', webpage,
+            'limelight id')
+
+        info = self._limelight_result(media_id)
+        info.update({
+            'title': self._og_search_title(webpage, default=None),
+            'description': self._og_search_description(webpage, default=None),
+        })
+        return info
+
+
+class TeleQuebecLiveIE(InfoExtractor):
+    _VALID_URL = r'https?://zonevideo\.telequebec\.tv/(?P<id>endirect)'
+    _TEST = {
+        'url': 'http://zonevideo.telequebec.tv/endirect/',
+        'info_dict': {
+            'id': 'endirect',
+            'ext': 'mp4',
+            'title': 're:^Télé-Québec - En direct [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+            'is_live': True,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        m3u8_url = None
+        webpage = self._download_webpage(
+            'https://player.telequebec.tv/Tq_VideoPlayer.js', video_id,
+            fatal=False)
+        if webpage:
+            m3u8_url = self._search_regex(
+                r'm3U8Url\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+                'm3u8 url', default=None, group='url')
+        if not m3u8_url:
+            m3u8_url = 'https://teleqmmd.mmdlive.lldns.net/teleqmmd/f386e3b206814e1f8c8c1c71c0f8e748/manifest.m3u8'
+        formats = self._extract_m3u8_formats(
+            m3u8_url, video_id, 'mp4', m3u8_id='hls')
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': self._live_title('Télé-Québec - En direct'),
+            'is_live': True,
+            'formats': formats,
         }
         }
diff --git a/youtube_dl/extractor/tennistv.py b/youtube_dl/extractor/tennistv.py
new file mode 100644 (file)
index 0000000..0c6f707
--- /dev/null
@@ -0,0 +1,112 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+
+from ..utils import (
+    ExtractorError,
+    unified_timestamp,
+)
+
+
+class TennisTVIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?tennistv\.com/videos/(?P<id>[-a-z0-9]+)'
+    _TEST = {
+        'url': 'https://www.tennistv.com/videos/indian-wells-2018-verdasco-fritz',
+        'info_dict': {
+            'id': 'indian-wells-2018-verdasco-fritz',
+            'ext': 'mp4',
+            'title': 'Fernando Verdasco v Taylor Fritz',
+            'description': 're:^After his stunning victory.{174}$',
+            'thumbnail': 'https://atp-prod.akamaized.net/api/images/v1/images/112831/landscape/1242/0',
+            'timestamp': 1521017381,
+            'upload_date': '20180314',
+        },
+        'params': {
+            'skip_download': True,
+        },
+        'skip': 'Requires email and password of a subscribed account',
+    }
+    _NETRC_MACHINE = 'tennistv'
+
+    def _login(self):
+        (username, password) = self._get_login_info()
+        if not username or not password:
+            raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
+
+        login_form = {
+            'Email': username,
+            'Password': password,
+        }
+        login_json = json.dumps(login_form).encode('utf-8')
+        headers = {
+            'content-type': 'application/json',
+            'Referer': 'https://www.tennistv.com/login',
+            'Origin': 'https://www.tennistv.com',
+        }
+
+        login_result = self._download_json(
+            'https://www.tennistv.com/api/users/v1/login', None,
+            note='Logging in',
+            errnote='Login failed (wrong password?)',
+            headers=headers,
+            data=login_json)
+
+        if login_result['error']['errorCode']:
+            raise ExtractorError('Login failed, %s said: %r' % (self.IE_NAME, login_result['error']['errorMessage']))
+
+        if login_result['entitlement'] != 'SUBSCRIBED':
+            self.report_warning('%s may not be subscribed to %s.' % (username, self.IE_NAME))
+
+        self._session_token = login_result['sessionToken']
+
+    def _real_initialize(self):
+        self._login()
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        internal_id = self._search_regex(r'video=([0-9]+)', webpage, 'internal video id')
+
+        headers = {
+            'Origin': 'https://www.tennistv.com',
+            'authorization': 'ATP %s' % self._session_token,
+            'content-type': 'application/json',
+            'Referer': url,
+        }
+        check_data = {
+            'videoID': internal_id,
+            'VideoUrlType': 'HLSV3',
+        }
+        check_json = json.dumps(check_data).encode('utf-8')
+        check_result = self._download_json(
+            'https://www.tennistv.com/api/users/v1/entitlementchecknondiva',
+            video_id, note='Checking video authorization', headers=headers, data=check_json)
+        formats = self._extract_m3u8_formats(check_result['contentUrl'], video_id, ext='mp4')
+
+        vdata_url = 'https://www.tennistv.com/api/channels/v1/de/none/video/%s' % video_id
+        vdata = self._download_json(vdata_url, video_id)
+
+        timestamp = unified_timestamp(vdata['timestamp'])
+        thumbnail = vdata['video']['thumbnailUrl']
+        description = vdata['displayText']['description']
+        title = vdata['video']['title']
+
+        series = vdata['tour']
+        venue = vdata['displayText']['venue']
+        round_str = vdata['seo']['round']
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'formats': formats,
+            'thumbnail': thumbnail,
+            'timestamp': timestamp,
+            'series': series,
+            'season': venue,
+            'episode': round_str,
+        }
index 348d6ecdf19ecde7787cbb12331561c8bb2a0284..5e5efda0f0780fb98b7c37b788ad2734a837e90d 100644 (file)
@@ -132,7 +132,7 @@ class ToggleIE(InfoExtractor):
         formats = []
         for video_file in info.get('Files', []):
             video_url, vid_format = video_file.get('URL'), video_file.get('Format')
         formats = []
         for video_file in info.get('Files', []):
             video_url, vid_format = video_file.get('URL'), video_file.get('Format')
-            if not video_url or not vid_format:
+            if not video_url or video_url == 'NA' or not vid_format:
                 continue
             ext = determine_ext(video_url)
             vid_format = vid_format.replace(' ', '')
                 continue
             ext = determine_ext(video_url)
             vid_format = vid_format.replace(' ', '')
@@ -143,6 +143,18 @@ class ToggleIE(InfoExtractor):
                     note='Downloading %s m3u8 information' % vid_format,
                     errnote='Failed to download %s m3u8 information' % vid_format,
                     fatal=False))
                     note='Downloading %s m3u8 information' % vid_format,
                     errnote='Failed to download %s m3u8 information' % vid_format,
                     fatal=False))
+            elif ext == 'mpd':
+                formats.extend(self._extract_mpd_formats(
+                    video_url, video_id, mpd_id=vid_format,
+                    note='Downloading %s MPD manifest' % vid_format,
+                    errnote='Failed to download %s MPD manifest' % vid_format,
+                    fatal=False))
+            elif ext == 'ism':
+                formats.extend(self._extract_ism_formats(
+                    video_url, video_id, ism_id=vid_format,
+                    note='Downloading %s ISM manifest' % vid_format,
+                    errnote='Failed to download %s ISM manifest' % vid_format,
+                    fatal=False))
             elif ext in ('mp4', 'wvm'):
                 # wvm are drm-protected files
                 formats.append({
             elif ext in ('mp4', 'wvm'):
                 # wvm are drm-protected files
                 formats.append({
index e2169f2bce30a3bc42fcb422bd6978dda0175f3b..1bf47244440809f154d01b5664f1ffa085e4fb29 100644 (file)
@@ -7,6 +7,7 @@ from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
     ExtractorError,
 from ..compat import compat_str
 from ..utils import (
     ExtractorError,
+    int_or_none,
     parse_iso8601,
     parse_duration,
     update_url_query,
     parse_iso8601,
     parse_duration,
     update_url_query,
@@ -16,8 +17,9 @@ from ..utils import (
 class TVNowBaseIE(InfoExtractor):
     _VIDEO_FIELDS = (
         'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort',
 class TVNowBaseIE(InfoExtractor):
     _VIDEO_FIELDS = (
         'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort',
-        'broadcastStartDate', 'isDrm', 'duration', 'manifest.dashclear',
-        'format.defaultImage169Format', 'format.defaultImage169Logo')
+        'broadcastStartDate', 'isDrm', 'duration', 'season', 'episode',
+        'manifest.dashclear', 'format.title', 'format.defaultImage169Format',
+        'format.defaultImage169Logo')
 
     def _call_api(self, path, video_id, query):
         return self._download_json(
 
     def _call_api(self, path, video_id, query):
         return self._download_json(
@@ -66,6 +68,10 @@ class TVNowBaseIE(InfoExtractor):
             'thumbnail': thumbnail,
             'timestamp': timestamp,
             'duration': duration,
             'thumbnail': thumbnail,
             'timestamp': timestamp,
             'duration': duration,
+            'series': f.get('title'),
+            'season_number': int_or_none(info.get('season')),
+            'episode_number': int_or_none(info.get('episode')),
+            'episode': title,
             'formats': formats,
         }
 
             'formats': formats,
         }
 
@@ -74,18 +80,21 @@ class TVNowIE(TVNowBaseIE):
     _VALID_URL = r'https?://(?:www\.)?tvnow\.(?:de|at|ch)/(?:rtl(?:2|plus)?|nitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/(?:(?:list/[^/]+|jahr/\d{4}/\d{1,2})/)?(?P<id>[^/]+)/(?:player|preview)'
 
     _TESTS = [{
     _VALID_URL = r'https?://(?:www\.)?tvnow\.(?:de|at|ch)/(?:rtl(?:2|plus)?|nitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/(?:(?:list/[^/]+|jahr/\d{4}/\d{1,2})/)?(?P<id>[^/]+)/(?:player|preview)'
 
     _TESTS = [{
-        # rtl
-        'url': 'https://www.tvnow.de/rtl/alarm-fuer-cobra-11/freier-fall/player?return=/rtl',
+        'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3/player',
         'info_dict': {
         'info_dict': {
-            'id': '385314',
-            'display_id': 'alarm-fuer-cobra-11/freier-fall',
+            'id': '331082',
+            'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3',
             'ext': 'mp4',
             'ext': 'mp4',
-            'title': 'Freier Fall',
-            'description': 'md5:8c2d8f727261adf7e0dc18366124ca02',
+            'title': 'Der neue Porsche 911 GT 3',
+            'description': 'md5:6143220c661f9b0aae73b245e5d898bb',
             'thumbnail': r're:^https?://.*\.jpg$',
             'thumbnail': r're:^https?://.*\.jpg$',
-            'timestamp': 1512677700,
-            'upload_date': '20171207',
-            'duration': 2862.0,
+            'timestamp': 1495994400,
+            'upload_date': '20170528',
+            'duration': 5283,
+            'series': 'GRIP - Das Motormagazin',
+            'season_number': 14,
+            'episode_number': 405,
+            'episode': 'Der neue Porsche 911 GT 3',
         },
     }, {
         # rtl2
         },
     }, {
         # rtl2
index 195f5ce78d308126a1077cda11a4c00b437343fe..6d6c0a98fa64e9e2afc68ce2ad569f5a91d5c24b 100644 (file)
@@ -5,6 +5,7 @@ import re
 from .common import InfoExtractor
 from ..compat import (
     compat_HTTPError,
 from .common import InfoExtractor
 from ..compat import (
     compat_HTTPError,
+    compat_kwargs,
     compat_str,
     compat_urllib_request,
     compat_urlparse,
     compat_str,
     compat_urllib_request,
     compat_urlparse,
@@ -114,6 +115,11 @@ class UdemyIE(InfoExtractor):
                 error_str += ' - %s' % error_data.get('formErrors')
             raise ExtractorError(error_str, expected=True)
 
                 error_str += ' - %s' % error_data.get('formErrors')
             raise ExtractorError(error_str, expected=True)
 
+    def _download_webpage(self, *args, **kwargs):
+        kwargs.setdefault('headers', {})['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.4'
+        return super(UdemyIE, self)._download_webpage(
+            *args, **compat_kwargs(kwargs))
+
     def _download_json(self, url_or_request, *args, **kwargs):
         headers = {
             'X-Udemy-Snail-Case': 'true',
     def _download_json(self, url_or_request, *args, **kwargs):
         headers = {
             'X-Udemy-Snail-Case': 'true',
index b20dddc5c4eb1307e02da56efc6bff35c603933c..071774a6f79e4d2c974b2fa2abe3e2ea13477d80 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 
 class VeohIE(InfoExtractor):
 
 
 class VeohIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|iphone/#_Watch)/(?P<id>(?:v|e|yapi-)[\da-zA-Z]+)'
+    _VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|embed|iphone/#_Watch)/(?P<id>(?:v|e|yapi-)[\da-zA-Z]+)'
 
     _TESTS = [{
         'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3',
 
     _TESTS = [{
         'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3',
@@ -24,6 +24,9 @@ class VeohIE(InfoExtractor):
             'uploader': 'LUMOback',
             'description': 'At LUMOback, we believe straight backs are stronger.  The LUMOback Posture & Movement Sensor:  It gently vibrates when you slouch, inspiring improved posture and mobility.  Use the app to track your data and improve your posture over time. ',
         },
             'uploader': 'LUMOback',
             'description': 'At LUMOback, we believe straight backs are stronger.  The LUMOback Posture & Movement Sensor:  It gently vibrates when you slouch, inspiring improved posture and mobility.  Use the app to track your data and improve your posture over time. ',
         },
+    }, {
+        'url': 'http://www.veoh.com/embed/v56314296nk7Zdmz3',
+        'only_matching': True,
     }, {
         'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage',
         'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa',
     }, {
         'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage',
         'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa',
index bcc28693a4545f9260f5e7942bcac4942faac380..5382586176b321c4e4f877e8f7cef7a5c22d0663 100644 (file)
@@ -5,56 +5,169 @@ import re
 import time
 import hashlib
 import json
 import time
 import hashlib
 import json
+import random
 
 from .adobepass import AdobePassIE
 from .youtube import YoutubeIE
 from .common import InfoExtractor
 
 from .adobepass import AdobePassIE
 from .youtube import YoutubeIE
 from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..compat import (
+    compat_HTTPError,
+    compat_str,
+)
 from ..utils import (
 from ..utils import (
+    ExtractorError,
     int_or_none,
     parse_age_limit,
     str_or_none,
     int_or_none,
     parse_age_limit,
     str_or_none,
-    parse_duration,
-    ExtractorError,
-    extract_attributes,
+    try_get,
 )
 
 
 )
 
 
-class ViceBaseIE(AdobePassIE):
-    def _extract_preplay_video(self, url, locale, webpage):
-        watch_hub_data = extract_attributes(self._search_regex(
-            r'(?s)(<watch-hub\s*.+?</watch-hub>)', webpage, 'watch hub'))
-        video_id = watch_hub_data['vms-id']
-        title = watch_hub_data['video-title']
+class ViceIE(AdobePassIE):
+    IE_NAME = 'vice'
+    _VALID_URL = r'https?://(?:(?:video|vms)\.vice|(?:www\.)?viceland)\.com/(?P<locale>[^/]+)/(?:video/[^/]+|embed)/(?P<id>[\da-f]+)'
+    _TESTS = [{
+        'url': 'https://video.vice.com/en_us/video/pet-cremator/58c69e38a55424f1227dc3f7',
+        'info_dict': {
+            'id': '5e647f0125e145c9aef2069412c0cbde',
+            'ext': 'mp4',
+            'title': '10 Questions You Always Wanted To Ask: Pet Cremator',
+            'description': 'md5:fe856caacf61fe0e74fab15ce2b07ca5',
+            'uploader': 'vice',
+            'uploader_id': '57a204088cb727dec794c67b',
+            'timestamp': 1489664942,
+            'upload_date': '20170316',
+            'age_limit': 14,
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+        'add_ie': ['UplynkPreplay'],
+    }, {
+        # geo restricted to US
+        'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56',
+        'info_dict': {
+            'id': '930c0ad1f47141cc955087eecaddb0e2',
+            'ext': 'mp4',
+            'uploader': 'waypoint',
+            'title': 'The Signal From Tölva',
+            'description': 'md5:3927e3c79f9e8094606a2b3c5b5e55d5',
+            'uploader_id': '57f7d621e05ca860fa9ccaf9',
+            'timestamp': 1477941983,
+            'upload_date': '20161031',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+        'add_ie': ['UplynkPreplay'],
+    }, {
+        'url': 'https://video.vice.com/alps/video/ulfs-wien-beruchtigste-grafitti-crew-part-1/581b12b60a0e1f4c0fb6ea2f',
+        'info_dict': {
+            'id': '581b12b60a0e1f4c0fb6ea2f',
+            'ext': 'mp4',
+            'title': 'ULFs - Wien berüchtigste Grafitti Crew - Part 1',
+            'description': '<p>Zwischen Hinterzimmer-Tattoos und U-Bahnschächten erzählen uns die Ulfs, wie es ist, "süchtig nach Sachbeschädigung" zu sein.</p>',
+            'uploader': 'VICE',
+            'uploader_id': '57a204088cb727dec794c67b',
+            'timestamp': 1485368119,
+            'upload_date': '20170125',
+            'age_limit': 14,
+        },
+        'params': {
+            # AES-encrypted m3u8
+            'skip_download': True,
+            'proxy': '127.0.0.1:8118',
+        },
+        'add_ie': ['UplynkPreplay'],
+    }, {
+        'url': 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4',
+        'only_matching': True,
+    }, {
+        'url': 'https://video.vice.com/en_us/embed/57f41d3556a0a80f54726060',
+        'only_matching': True,
+    }, {
+        'url': 'https://vms.vice.com/en_us/video/preplay/58c69e38a55424f1227dc3f7',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.viceland.com/en_us/video/thursday-march-1-2018/5a8f2d7ff1cdb332dd446ec1',
+        'only_matching': True,
+    }]
+    _PREPLAY_HOST = 'vms.vice'
+
+    @staticmethod
+    def _extract_urls(webpage):
+        return re.findall(
+            r'<iframe\b[^>]+\bsrc=["\']((?:https?:)?//video\.vice\.com/[^/]+/embed/[\da-f]+)',
+            webpage)
+
+    @staticmethod
+    def _extract_url(webpage):
+        urls = ViceIE._extract_urls(webpage)
+        return urls[0] if urls else None
+
+    def _real_extract(self, url):
+        locale, video_id = re.match(self._VALID_URL, url).groups()
+
+        webpage = self._download_webpage(
+            'https://video.vice.com/%s/embed/%s' % (locale, video_id),
+            video_id)
+
+        video = self._parse_json(
+            self._search_regex(
+                r'PREFETCH_DATA\s*=\s*({.+?})\s*;\s*\n', webpage,
+                'app state'), video_id)['video']
+        video_id = video.get('vms_id') or video.get('id') or video_id
+        title = video['title']
+        is_locked = video.get('locked')
+        rating = video.get('rating')
+        thumbnail = video.get('thumbnail_url')
+        duration = int_or_none(video.get('duration'))
+        series = try_get(
+            video, lambda x: x['episode']['season']['show']['title'],
+            compat_str)
+        episode_number = try_get(
+            video, lambda x: x['episode']['episode_number'])
+        season_number = try_get(
+            video, lambda x: x['episode']['season']['season_number'])
+        uploader = None
 
         query = {}
 
         query = {}
-        is_locked = watch_hub_data.get('video-locked') == '1'
         if is_locked:
             resource = self._get_mvpd_resource(
         if is_locked:
             resource = self._get_mvpd_resource(
-                'VICELAND', title, video_id,
-                watch_hub_data.get('video-rating'))
+                'VICELAND', title, video_id, rating)
             query['tvetoken'] = self._extract_mvpd_auth(
                 url, video_id, 'VICELAND', resource)
 
         # signature generation algorithm is reverse engineered from signatureGenerator in
         # webpack:///../shared/~/vice-player/dist/js/vice-player.js in
         # https://www.viceland.com/assets/common/js/web.vendor.bundle.js
             query['tvetoken'] = self._extract_mvpd_auth(
                 url, video_id, 'VICELAND', resource)
 
         # signature generation algorithm is reverse engineered from signatureGenerator in
         # webpack:///../shared/~/vice-player/dist/js/vice-player.js in
         # https://www.viceland.com/assets/common/js/web.vendor.bundle.js
-        exp = int(time.time()) + 14400
+        # new JS is located here https://vice-web-statics-cdn.vice.com/vice-player/player-embed.js
+        exp = int(time.time()) + 1440
+
         query.update({
             'exp': exp,
             'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(),
         query.update({
             'exp': exp,
             'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(),
+            '_ad_blocked': None,
+            '_ad_unit': '',
+            '_debug': '',
+            'platform': 'desktop',
+            'rn': random.randint(10000, 100000),
+            'fbprebidtoken': '',
         })
 
         try:
             host = 'www.viceland' if is_locked else self._PREPLAY_HOST
             preplay = self._download_json(
         })
 
         try:
             host = 'www.viceland' if is_locked else self._PREPLAY_HOST
             preplay = self._download_json(
-                'https://%s.com/%s/preplay/%s' % (host, locale, video_id),
+                'https://%s.com/%s/video/preplay/%s' % (host, locale, video_id),
                 video_id, query=query)
         except ExtractorError as e:
                 video_id, query=query)
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401):
                 error = json.loads(e.cause.read().decode())
                 error = json.loads(e.cause.read().decode())
+                error_message = error.get('error_description') or error['details']
                 raise ExtractorError('%s said: %s' % (
                 raise ExtractorError('%s said: %s' % (
-                    self.IE_NAME, error['details']), expected=True)
+                    self.IE_NAME, error_message), expected=True)
             raise
 
         video_data = preplay['video']
             raise
 
         video_data = preplay['video']
@@ -76,92 +189,22 @@ class ViceBaseIE(AdobePassIE):
             'id': video_id,
             'title': title,
             'description': base.get('body') or base.get('display_body'),
             'id': video_id,
             'title': title,
             'description': base.get('body') or base.get('display_body'),
-            'thumbnail': watch_hub_data.get('cover-image') or watch_hub_data.get('thumbnail'),
-            'duration': int_or_none(video_data.get('video_duration')) or parse_duration(watch_hub_data.get('video-duration')),
+            'thumbnail': thumbnail,
+            'duration': int_or_none(video_data.get('video_duration')) or duration,
             'timestamp': int_or_none(video_data.get('created_at'), 1000),
             'age_limit': parse_age_limit(video_data.get('video_rating')),
             'timestamp': int_or_none(video_data.get('created_at'), 1000),
             'age_limit': parse_age_limit(video_data.get('video_rating')),
-            'series': video_data.get('show_title') or watch_hub_data.get('show-title'),
-            'episode_number': int_or_none(episode.get('episode_number') or watch_hub_data.get('episode')),
+            'series': video_data.get('show_title') or series,
+            'episode_number': int_or_none(episode.get('episode_number') or episode_number),
             'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')),
             'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')),
-            'season_number': int_or_none(watch_hub_data.get('season')),
+            'season_number': int_or_none(season_number),
             'season_id': str_or_none(episode.get('season_id')),
             'season_id': str_or_none(episode.get('season_id')),
-            'uploader': channel.get('base', {}).get('title') or watch_hub_data.get('channel-title'),
+            'uploader': channel.get('base', {}).get('title') or channel.get('name') or uploader,
             'uploader_id': str_or_none(channel.get('id')),
             'subtitles': subtitles,
             'ie_key': 'UplynkPreplay',
         }
 
 
             'uploader_id': str_or_none(channel.get('id')),
             'subtitles': subtitles,
             'ie_key': 'UplynkPreplay',
         }
 
 
-class ViceIE(ViceBaseIE):
-    IE_NAME = 'vice'
-    _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:(?P<locale>[^/]+)/)?videos?/(?P<id>[^/?#&]+)'
-
-    _TESTS = [{
-        'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
-        'md5': '7d3ae2f9ba5f196cdd9f9efd43657ac2',
-        'info_dict': {
-            'id': 'N2bzkydjraWDGwnt8jAttCF6Y0PDv4Zj',
-            'ext': 'flv',
-            'title': 'Monkey Labs of Holland',
-            'description': 'md5:92b3c7dcbfe477f772dd4afa496c9149',
-        },
-        'add_ie': ['Ooyala'],
-    }, {
-        'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56',
-        'info_dict': {
-            'id': '5816510690b70e6c5fd39a56',
-            'ext': 'mp4',
-            'uploader': 'Waypoint',
-            'title': 'The Signal From Tölva',
-            'description': 'md5:3927e3c79f9e8094606a2b3c5b5e55d5',
-            'uploader_id': '57f7d621e05ca860fa9ccaf9',
-            'timestamp': 1477941983,
-            'upload_date': '20161031',
-        },
-        'params': {
-            # m3u8 download
-            'skip_download': True,
-        },
-        'add_ie': ['UplynkPreplay'],
-    }, {
-        'url': 'https://video.vice.com/alps/video/ulfs-wien-beruchtigste-grafitti-crew-part-1/581b12b60a0e1f4c0fb6ea2f',
-        'info_dict': {
-            'id': '581b12b60a0e1f4c0fb6ea2f',
-            'ext': 'mp4',
-            'title': 'ULFs - Wien berüchtigste Grafitti Crew - Part 1',
-            'description': '<p>Zwischen Hinterzimmer-Tattoos und U-Bahnschächten erzählen uns die Ulfs, wie es ist, "süchtig nach Sachbeschädigung" zu sein.</p>',
-            'uploader': 'VICE',
-            'uploader_id': '57a204088cb727dec794c67b',
-            'timestamp': 1485368119,
-            'upload_date': '20170125',
-            'age_limit': 14,
-        },
-        'params': {
-            # AES-encrypted m3u8
-            'skip_download': True,
-        },
-        'add_ie': ['UplynkPreplay'],
-    }, {
-        'url': 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4',
-        'only_matching': True,
-    }]
-    _PREPLAY_HOST = 'video.vice'
-
-    def _real_extract(self, url):
-        locale, video_id = re.match(self._VALID_URL, url).groups()
-        webpage, urlh = self._download_webpage_handle(url, video_id)
-        embed_code = self._search_regex(
-            r'embedCode=([^&\'"]+)', webpage,
-            'ooyala embed code', default=None)
-        if embed_code:
-            return self.url_result('ooyala:%s' % embed_code, 'Ooyala')
-        youtube_id = self._search_regex(
-            r'data-youtube-id="([^"]+)"', webpage, 'youtube id', default=None)
-        if youtube_id:
-            return self.url_result(youtube_id, 'Youtube')
-        return self._extract_preplay_video(urlh.geturl(), locale, webpage)
-
-
 class ViceShowIE(InfoExtractor):
     IE_NAME = 'vice:show'
     _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?show/(?P<id>[^/?#&]+)'
 class ViceShowIE(InfoExtractor):
     IE_NAME = 'vice:show'
     _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?show/(?P<id>[^/?#&]+)'
@@ -203,14 +246,15 @@ class ViceArticleIE(InfoExtractor):
     _TESTS = [{
         'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah',
         'info_dict': {
     _TESTS = [{
         'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah',
         'info_dict': {
-            'id': '58dc0a3dee202d2a0ccfcbd8',
+            'id': '41eae2a47b174a1398357cec55f1f6fc',
             'ext': 'mp4',
             'title': 'Mormon War on Porn ',
             'ext': 'mp4',
             'title': 'Mormon War on Porn ',
-            'description': 'md5:ad396a2481e7f8afb5ed486878421090',
-            'uploader': 'VICE',
-            'uploader_id': '57a204088cb727dec794c693',
-            'timestamp': 1489160690,
-            'upload_date': '20170310',
+            'description': 'md5:6394a8398506581d0346b9ab89093fef',
+            'uploader': 'vice',
+            'uploader_id': '57a204088cb727dec794c67b',
+            'timestamp': 1491883129,
+            'upload_date': '20170411',
+            'age_limit': 17,
         },
         'params': {
             # AES-encrypted m3u8
         },
         'params': {
             # AES-encrypted m3u8
@@ -219,17 +263,35 @@ class ViceArticleIE(InfoExtractor):
         'add_ie': ['UplynkPreplay'],
     }, {
         'url': 'https://www.vice.com/en_us/article/how-to-hack-a-car',
         'add_ie': ['UplynkPreplay'],
     }, {
         'url': 'https://www.vice.com/en_us/article/how-to-hack-a-car',
-        'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2',
+        'md5': '7fe8ebc4fa3323efafc127b82bd821d9',
         'info_dict': {
             'id': '3jstaBeXgAs',
             'ext': 'mp4',
             'title': 'How to Hack a Car: Phreaked Out (Episode 2)',
             'description': 'md5:ee95453f7ff495db8efe14ae8bf56f30',
         'info_dict': {
             'id': '3jstaBeXgAs',
             'ext': 'mp4',
             'title': 'How to Hack a Car: Phreaked Out (Episode 2)',
             'description': 'md5:ee95453f7ff495db8efe14ae8bf56f30',
-            'uploader_id': 'MotherboardTV',
             'uploader': 'Motherboard',
             'uploader': 'Motherboard',
+            'uploader_id': 'MotherboardTV',
             'upload_date': '20140529',
         },
         'add_ie': ['Youtube'],
             'upload_date': '20140529',
         },
         'add_ie': ['Youtube'],
+    }, {
+        'url': 'https://www.vice.com/en_us/article/znm9dx/karley-sciortino-slutever-reloaded',
+        'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2',
+        'info_dict': {
+            'id': 'e2ed435eb67e43efb66e6ef9a6930a88',
+            'ext': 'mp4',
+            'title': "Making The World's First Male Sex Doll",
+            'description': 'md5:916078ef0e032d76343116208b6cc2c4',
+            'uploader': 'vice',
+            'uploader_id': '57a204088cb727dec794c67b',
+            'timestamp': 1476919911,
+            'upload_date': '20161019',
+            'age_limit': 17,
+        },
+        'params': {
+            'skip_download': True,
+        },
+        'add_ie': [ViceIE.ie_key()],
     }, {
         'url': 'https://www.vice.com/en_us/article/cowboy-capitalists-part-1',
         'only_matching': True,
     }, {
         'url': 'https://www.vice.com/en_us/article/cowboy-capitalists-part-1',
         'only_matching': True,
@@ -244,8 +306,8 @@ class ViceArticleIE(InfoExtractor):
         webpage = self._download_webpage(url, display_id)
 
         prefetch_data = self._parse_json(self._search_regex(
         webpage = self._download_webpage(url, display_id)
 
         prefetch_data = self._parse_json(self._search_regex(
-            r'window\.__PREFETCH_DATA\s*=\s*({.*});',
-            webpage, 'prefetch data'), display_id)
+            r'__APP_STATE\s*=\s*({.+?})(?:\s*\|\|\s*{}\s*)?;\s*\n',
+            webpage, 'app state'), display_id)['pageData']
         body = prefetch_data['body']
 
         def _url_res(video_url, ie_key):
         body = prefetch_data['body']
 
         def _url_res(video_url, ie_key):
@@ -256,6 +318,10 @@ class ViceArticleIE(InfoExtractor):
                 'ie_key': ie_key,
             }
 
                 'ie_key': ie_key,
             }
 
+        vice_url = ViceIE._extract_url(webpage)
+        if vice_url:
+            return _url_res(vice_url, ViceIE.ie_key())
+
         embed_code = self._search_regex(
             r'embedCode=([^&\'"]+)', body,
             'ooyala embed code', default=None)
         embed_code = self._search_regex(
             r'embedCode=([^&\'"]+)', body,
             'ooyala embed code', default=None)
diff --git a/youtube_dl/extractor/viceland.py b/youtube_dl/extractor/viceland.py
deleted file mode 100644 (file)
index bd60235..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .vice import ViceBaseIE
-
-
-class VicelandIE(ViceBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?viceland\.com/(?P<locale>[^/]+)/video/[^/]+/(?P<id>[a-f0-9]+)'
-    _TEST = {
-        'url': 'https://www.viceland.com/en_us/video/trapped/588a70d0dba8a16007de7316',
-        'info_dict': {
-            'id': '588a70d0dba8a16007de7316',
-            'ext': 'mp4',
-            'title': 'TRAPPED (Series Trailer)',
-            'description': 'md5:7a8e95c2b6cd86461502a2845e581ccf',
-            'age_limit': 14,
-            'timestamp': 1485474122,
-            'upload_date': '20170126',
-            'uploader_id': '57a204098cb727dec794c6a3',
-            'uploader': 'Viceland',
-        },
-        'params': {
-            # m3u8 download
-            'skip_download': True,
-        },
-        'add_ie': ['UplynkPreplay'],
-        'skip': '404',
-    }
-    _PREPLAY_HOST = 'www.viceland'
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        locale = mobj.group('locale')
-        webpage = self._download_webpage(url, video_id)
-        return self._extract_preplay_video(url, locale, webpage)
index 01da32f1cdd05505a6d53eff4f9bc6691aaae512..b48baf00be3b93f9dbddcc35713f2ff6604851d2 100644 (file)
@@ -49,8 +49,8 @@ class VidioIE(InfoExtractor):
             thumbnail = clip.get('image')
 
         m3u8_url = m3u8_url or self._search_regex(
             thumbnail = clip.get('image')
 
         m3u8_url = m3u8_url or self._search_regex(
-            r'data(?:-vjs)?-clip-hls-url=(["\'])(?P<url>(?!\1).+)\1',
-            webpage, 'hls url')
+            r'data(?:-vjs)?-clip-hls-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
+            webpage, 'hls url', group='url')
         formats = self._extract_m3u8_formats(
             m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native')
         self._sort_formats(formats)
         formats = self._extract_m3u8_formats(
             m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native')
         self._sort_formats(formats)
diff --git a/youtube_dl/extractor/vidlii.py b/youtube_dl/extractor/vidlii.py
new file mode 100644 (file)
index 0000000..f477425
--- /dev/null
@@ -0,0 +1,125 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    float_or_none,
+    get_element_by_id,
+    int_or_none,
+    strip_or_none,
+    unified_strdate,
+    urljoin,
+)
+
+
+class VidLiiIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?vidlii\.com/(?:watch|embed)\?.*?\bv=(?P<id>[0-9A-Za-z_-]{11})'
+    _TESTS = [{
+        'url': 'https://www.vidlii.com/watch?v=tJluaH4BJ3v',
+        'md5': '9bf7d1e005dfa909b6efb0a1ff5175e2',
+        'info_dict': {
+            'id': 'tJluaH4BJ3v',
+            'ext': 'mp4',
+            'title': 'Vidlii is against me',
+            'description': 'md5:fa3f119287a2bfb922623b52b1856145',
+            'thumbnail': 're:https://.*.jpg',
+            'uploader': 'APPle5auc31995',
+            'uploader_url': 'https://www.vidlii.com/user/APPle5auc31995',
+            'upload_date': '20171107',
+            'duration': 212,
+            'view_count': int,
+            'comment_count': int,
+            'average_rating': float,
+            'categories': ['News & Politics'],
+            'tags': ['Vidlii', 'Jan', 'Videogames'],
+        }
+    }, {
+        'url': 'https://www.vidlii.com/embed?v=tJluaH4BJ3v&a=0',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(
+            'https://www.vidlii.com/watch?v=%s' % video_id, video_id)
+
+        video_url = self._search_regex(
+            r'src\s*:\s*(["\'])(?P<url>(?:https?://)?(?:(?!\1).)+)\1', webpage,
+            'video url', group='url')
+
+        title = self._search_regex(
+            (r'<h1>([^<]+)</h1>', r'<title>([^<]+) - VidLii<'), webpage,
+            'title')
+
+        description = self._html_search_meta(
+            ('description', 'twitter:description'), webpage,
+            default=None) or strip_or_none(
+            get_element_by_id('des_text', webpage))
+
+        thumbnail = self._html_search_meta(
+            'twitter:image', webpage, default=None)
+        if not thumbnail:
+            thumbnail_path = self._search_regex(
+                r'img\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+                'thumbnail', fatal=False, group='url')
+            if thumbnail_path:
+                thumbnail = urljoin(url, thumbnail_path)
+
+        uploader = self._search_regex(
+            r'<div[^>]+class=["\']wt_person[^>]+>\s*<a[^>]+\bhref=["\']/user/[^>]+>([^<]+)',
+            webpage, 'uploader', fatal=False)
+        uploader_url = 'https://www.vidlii.com/user/%s' % uploader if uploader else None
+
+        upload_date = unified_strdate(self._html_search_meta(
+            'datePublished', webpage, default=None) or self._search_regex(
+            r'<date>([^<]+)', webpage, 'upload date', fatal=False))
+
+        duration = int_or_none(self._html_search_meta(
+            'video:duration', webpage, 'duration',
+            default=None) or self._search_regex(
+            r'duration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
+
+        view_count = int_or_none(self._search_regex(
+            (r'<strong>(\d+)</strong> views',
+             r'Views\s*:\s*<strong>(\d+)</strong>'),
+            webpage, 'view count', fatal=False))
+
+        comment_count = int_or_none(self._search_regex(
+            (r'<span[^>]+id=["\']cmt_num[^>]+>(\d+)',
+             r'Comments\s*:\s*<strong>(\d+)'),
+            webpage, 'comment count', fatal=False))
+
+        average_rating = float_or_none(self._search_regex(
+            r'rating\s*:\s*([\d.]+)', webpage, 'average rating', fatal=False))
+
+        category = self._html_search_regex(
+            r'<div>Category\s*:\s*</div>\s*<div>\s*<a[^>]+>([^<]+)', webpage,
+            'category', fatal=False)
+        categories = [category] if category else None
+
+        tags = [
+            strip_or_none(tag)
+            for tag in re.findall(
+                r'<a[^>]+\bhref=["\']/results\?.*?q=[^>]*>([^<]+)',
+                webpage) if strip_or_none(tag)
+        ] or None
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'uploader': uploader,
+            'uploader_url': uploader_url,
+            'upload_date': upload_date,
+            'duration': duration,
+            'view_count': view_count,
+            'comment_count': comment_count,
+            'average_rating': average_rating,
+            'categories': categories,
+            'tags': tags,
+        }
index ac35d55a9505808144fed105d535d53779afb1cb..9026e778cfb108462c5fdfa5ca2a42d166dc37a7 100644 (file)
@@ -13,7 +13,7 @@ from ..utils import (
 
 
 class VidziIE(InfoExtractor):
 
 
 class VidziIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?vidzi\.(?:tv|cc)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
+    _VALID_URL = r'https?://(?:www\.)?vidzi\.(?:tv|cc|si)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
     _TESTS = [{
         'url': 'http://vidzi.tv/cghql9yq6emu.html',
         'md5': '4f16c71ca0c8c8635ab6932b5f3f1660',
     _TESTS = [{
         'url': 'http://vidzi.tv/cghql9yq6emu.html',
         'md5': '4f16c71ca0c8c8635ab6932b5f3f1660',
@@ -32,6 +32,9 @@ class VidziIE(InfoExtractor):
     }, {
         'url': 'http://vidzi.cc/cghql9yq6emu.html',
         'only_matching': True,
     }, {
         'url': 'http://vidzi.cc/cghql9yq6emu.html',
         'only_matching': True,
+    }, {
+        'url': 'https://vidzi.si/rph9gztxj1et.html',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
     }]
 
     def _real_extract(self, url):
index 6af70565781e391915d807f49639a859c8f1b9ff..08257147ef5abba46224190b9d8c32ab280182bf 100644 (file)
@@ -41,21 +41,30 @@ class VimeoBaseInfoExtractor(InfoExtractor):
             if self._LOGIN_REQUIRED:
                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
             return
             if self._LOGIN_REQUIRED:
                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
             return
-        self.report_login()
-        webpage = self._download_webpage(self._LOGIN_URL, None, False)
+        webpage = self._download_webpage(
+            self._LOGIN_URL, None, 'Downloading login page')
         token, vuid = self._extract_xsrft_and_vuid(webpage)
         token, vuid = self._extract_xsrft_and_vuid(webpage)
-        data = urlencode_postdata({
+        data = {
             'action': 'login',
             'email': username,
             'password': password,
             'service': 'vimeo',
             'token': token,
             'action': 'login',
             'email': username,
             'password': password,
             'service': 'vimeo',
             'token': token,
-        })
-        login_request = sanitized_Request(self._LOGIN_URL, data)
-        login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
-        login_request.add_header('Referer', self._LOGIN_URL)
+        }
         self._set_vimeo_cookie('vuid', vuid)
         self._set_vimeo_cookie('vuid', vuid)
-        self._download_webpage(login_request, None, False, 'Wrong login info')
+        try:
+            self._download_webpage(
+                self._LOGIN_URL, None, 'Logging in',
+                data=urlencode_postdata(data), headers={
+                    'Content-Type': 'application/x-www-form-urlencoded',
+                    'Referer': self._LOGIN_URL,
+                })
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 418:
+                raise ExtractorError(
+                    'Unable to log in: bad username or password',
+                    expected=True)
+            raise ExtractorError('Unable to log in')
 
     def _verify_video_password(self, url, video_id, webpage):
         password = self._downloader.params.get('videopassword')
 
     def _verify_video_password(self, url, video_id, webpage):
         password = self._downloader.params.get('videopassword')
@@ -218,7 +227,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
                 'id': '56015672',
                 'ext': 'mp4',
                 'title': "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
                 'id': '56015672',
                 'ext': 'mp4',
                 'title': "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
-                'description': 'md5:2d3305bad981a06ff79f027f19865021',
+                'description': 'md5:509a9ad5c9bf97c60faee9203aca4479',
                 'timestamp': 1355990239,
                 'upload_date': '20121220',
                 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user7108434',
                 'timestamp': 1355990239,
                 'upload_date': '20121220',
                 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user7108434',
index b8ea50362fc83f9bcdb480a9625d0dc55cd28b52..b50d4f170328728fbfc75b75e5a5ed6dcf281f84 100644 (file)
@@ -99,10 +99,10 @@ class VKIE(VKBaseIE):
     _TESTS = [
         {
             'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
     _TESTS = [
         {
             'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
-            'md5': '0deae91935c54e00003c2a00646315f0',
+            'md5': '7babad3b85ea2e91948005b1b8b0cb84',
             'info_dict': {
                 'id': '162222515',
             'info_dict': {
                 'id': '162222515',
-                'ext': 'flv',
+                'ext': 'mp4',
                 'title': 'ProtivoGunz - Хуёвая песня',
                 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
                 'duration': 195,
                 'title': 'ProtivoGunz - Хуёвая песня',
                 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
                 'duration': 195,
index 68652a22fc7453d01802334b3af021557cc1cedd..d1bc992fd95deee13d407380112a4548921290e2 100644 (file)
@@ -39,7 +39,7 @@ class XHamsterIE(InfoExtractor):
             'uploader': 'Ruseful2011',
             'duration': 893,
             'age_limit': 18,
             'uploader': 'Ruseful2011',
             'duration': 893,
             'age_limit': 18,
-            'categories': ['Fake Hub', 'Amateur', 'MILFs', 'POV', 'Boss', 'Office', 'Oral', 'Reality', 'Sexy'],
+            'categories': ['Fake Hub', 'Amateur', 'MILFs', 'POV', 'Beauti', 'Beauties', 'Beautiful', 'Boss', 'Office', 'Oral', 'Reality', 'Sexy', 'Taking'],
         },
     }, {
         'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
         },
     }, {
         'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
index e0a6255dc4df8f2a2bd56ffcf1363089a08e6aea..ac1ccc4043ead820283c4cba57a0bb7873de9872 100644 (file)
@@ -1,19 +1,29 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote
+from ..utils import (
+    determine_ext,
+    int_or_none,
+    NO_DEFAULT,
+    str_to_int,
+)
 
 
 class XNXXIE(InfoExtractor):
     _VALID_URL = r'https?://(?:video|www)\.xnxx\.com/video-?(?P<id>[0-9a-z]+)/'
     _TESTS = [{
         'url': 'http://www.xnxx.com/video-55awb78/skyrim_test_video',
 
 
 class XNXXIE(InfoExtractor):
     _VALID_URL = r'https?://(?:video|www)\.xnxx\.com/video-?(?P<id>[0-9a-z]+)/'
     _TESTS = [{
         'url': 'http://www.xnxx.com/video-55awb78/skyrim_test_video',
-        'md5': 'ef7ecee5af78f8b03dca2cf31341d3a0',
+        'md5': '7583e96c15c0f21e9da3453d9920fbba',
         'info_dict': {
             'id': '55awb78',
         'info_dict': {
             'id': '55awb78',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'Skyrim Test Video',
             'title': 'Skyrim Test Video',
+            'thumbnail': r're:^https?://.*\.jpg',
+            'duration': 469,
+            'view_count': int,
             'age_limit': 18,
         },
     }, {
             'age_limit': 18,
         },
     }, {
@@ -26,23 +36,49 @@ class XNXXIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
+
         webpage = self._download_webpage(url, video_id)
 
         webpage = self._download_webpage(url, video_id)
 
-        video_url = self._search_regex(r'flv_url=(.*?)&amp;',
-                                       webpage, 'video URL')
-        video_url = compat_urllib_parse_unquote(video_url)
+        def get(meta, default=NO_DEFAULT, fatal=True):
+            return self._search_regex(
+                r'set%s\s*\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % meta,
+                webpage, meta, default=default, fatal=fatal, group='value')
+
+        title = self._og_search_title(
+            webpage, default=None) or get('VideoTitle')
 
 
-        video_title = self._html_search_regex(r'<title>(.*?)\s+-\s+XNXX.COM',
-                                              webpage, 'title')
+        formats = []
+        for mobj in re.finditer(
+                r'setVideo(?:Url(?P<id>Low|High)|HLS)\s*\(\s*(?P<q>["\'])(?P<url>(?:https?:)?//.+?)(?P=q)', webpage):
+            format_url = mobj.group('url')
+            if determine_ext(format_url) == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                    preference=1, m3u8_id='hls', fatal=False))
+            else:
+                format_id = mobj.group('id')
+                if format_id:
+                    format_id = format_id.lower()
+                formats.append({
+                    'url': format_url,
+                    'format_id': format_id,
+                    'quality': -1 if format_id == 'low' else 0,
+                })
+        self._sort_formats(formats)
 
 
-        video_thumbnail = self._search_regex(r'url_bigthumb=(.*?)&amp;',
-                                             webpage, 'thumbnail', fatal=False)
+        thumbnail = self._og_search_thumbnail(webpage, default=None) or get(
+            'ThumbUrl', fatal=False) or get('ThumbUrl169', fatal=False)
+        duration = int_or_none(self._og_search_property('duration', webpage))
+        view_count = str_to_int(self._search_regex(
+            r'id=["\']nb-views-number[^>]+>([\d,.]+)', webpage, 'view count',
+            default=None))
 
         return {
             'id': video_id,
 
         return {
             'id': video_id,
-            'url': video_url,
-            'title': video_title,
-            'ext': 'flv',
-            'thumbnail': video_thumbnail,
+            'title': title,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'view_count': view_count,
             'age_limit': 18,
             'age_limit': 18,
+            'formats': formats,
         }
         }
diff --git a/youtube_dl/extractor/yapfiles.py b/youtube_dl/extractor/yapfiles.py
new file mode 100644 (file)
index 0000000..7fafbf5
--- /dev/null
@@ -0,0 +1,101 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    qualities,
+    unescapeHTML,
+)
+
+
+class YapFilesIE(InfoExtractor):
+    _YAPFILES_URL = r'//(?:(?:www|api)\.)?yapfiles\.ru/get_player/*\?.*?\bv=(?P<id>\w+)'
+    _VALID_URL = r'https?:%s' % _YAPFILES_URL
+    _TESTS = [{
+        # with hd
+        'url': 'http://www.yapfiles.ru/get_player/?v=vMDE1NjcyNDUt0413',
+        'md5': '2db19e2bfa2450568868548a1aa1956c',
+        'info_dict': {
+            'id': 'vMDE1NjcyNDUt0413',
+            'ext': 'mp4',
+            'title': 'Самый худший пароль WIFI',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 72,
+        },
+    }, {
+        # without hd
+        'url': 'https://api.yapfiles.ru/get_player/?uid=video_player_1872528&plroll=1&adv=1&v=vMDE4NzI1Mjgt690b',
+        'only_matching': True,
+    }]
+
+    @staticmethod
+    def _extract_urls(webpage):
+        return [unescapeHTML(mobj.group('url')) for mobj in re.finditer(
+            r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?%s.*?)\1'
+            % YapFilesIE._YAPFILES_URL, webpage)]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id, fatal=False)
+
+        player_url = None
+        query = {}
+        if webpage:
+            player_url = self._search_regex(
+                r'player\.init\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+                'player url', default=None, group='url')
+
+        if not player_url:
+            player_url = 'http://api.yapfiles.ru/load/%s/' % video_id
+            query = {
+                'md5': 'ded5f369be61b8ae5f88e2eeb2f3caff',
+                'type': 'json',
+                'ref': url,
+            }
+
+        player = self._download_json(
+            player_url, video_id, query=query)['player']
+
+        playlist_url = player['playlist']
+        title = player['title']
+        thumbnail = player.get('poster')
+
+        if title == 'Ролик удален' or 'deleted.jpg' in (thumbnail or ''):
+            raise ExtractorError(
+                'Video %s has been removed' % video_id, expected=True)
+
+        playlist = self._download_json(
+            playlist_url, video_id)['player']['main']
+
+        hd_height = int_or_none(player.get('hd'))
+
+        QUALITIES = ('sd', 'hd')
+        quality_key = qualities(QUALITIES)
+        formats = []
+        for format_id in QUALITIES:
+            is_hd = format_id == 'hd'
+            format_url = playlist.get(
+                'file%s' % ('_hd' if is_hd else ''))
+            if not format_url or not isinstance(format_url, compat_str):
+                continue
+            formats.append({
+                'url': format_url,
+                'format_id': format_id,
+                'quality': quality_key(format_id),
+                'height': hd_height if is_hd else None,
+            })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'duration': int_or_none(player.get('length')),
+            'formats': formats,
+        }
index 43051512bc1013640bc99d98437b3cf2a7b258a3..617be8e96b6e305988b3ae4d0c5d3790074868e4 100644 (file)
@@ -1944,6 +1944,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                                     break
                             if codecs:
                                 dct.update(parse_codecs(codecs))
                                     break
                             if codecs:
                                 dct.update(parse_codecs(codecs))
+                if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
+                    dct['downloader_options'] = {
+                        # Youtube throttles chunks >~10M
+                        'http_chunk_size': 10485760,
+                    }
                 formats.append(dct)
         elif video_info.get('hlsvp'):
             manifest_url = video_info['hlsvp'][0]
                 formats.append(dct)
         elif video_info.get('hlsvp'):
             manifest_url = video_info['hlsvp'][0]
@@ -2446,7 +2451,7 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
 
 class YoutubeUserIE(YoutubeChannelIE):
     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
 
 class YoutubeUserIE(YoutubeChannelIE):
     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
-    _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
+    _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
     _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
     IE_NAME = 'youtube:user'
 
     _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
     IE_NAME = 'youtube:user'
 
@@ -2578,7 +2583,11 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
     }]
 
 
     }]
 
 
-class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
+class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
+    _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
+
+
+class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
     IE_DESC = 'YouTube.com searches'
     # there doesn't appear to be a real limit, for example if you search for
     # 'python' you get more than 8.000.000 results
     IE_DESC = 'YouTube.com searches'
     # there doesn't appear to be a real limit, for example if you search for
     # 'python' you get more than 8.000.000 results
@@ -2612,8 +2621,7 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
                 raise ExtractorError(
                     '[youtube] No video results', expected=True)
 
                 raise ExtractorError(
                     '[youtube] No video results', expected=True)
 
-            new_videos = self._ids_to_results(orderedSet(re.findall(
-                r'href="/watch\?v=(.{11})', html_content)))
+            new_videos = list(self._process_page(html_content))
             videos += new_videos
             if not new_videos or len(videos) > limit:
                 break
             videos += new_videos
             if not new_videos or len(videos) > limit:
                 break
@@ -2636,11 +2644,10 @@ class YoutubeSearchDateIE(YoutubeSearchIE):
     _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
 
 
     _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
 
 
-class YoutubeSearchURLIE(YoutubePlaylistBaseInfoExtractor):
+class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
     IE_DESC = 'YouTube.com search URLs'
     IE_NAME = 'youtube:search_url'
     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
     IE_DESC = 'YouTube.com search URLs'
     IE_NAME = 'youtube:search_url'
     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
-    _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
     _TESTS = [{
         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
         'playlist_mincount': 5,
     _TESTS = [{
         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
         'playlist_mincount': 5,
index 523bb5c95cad19ca6c201774623456d916b1886c..bb9020c918b3659437d752c5a4109bc520e3ab88 100644 (file)
@@ -42,16 +42,19 @@ class ZDFIE(ZDFBaseIE):
     _QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh')
 
     _TESTS = [{
     _QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh')
 
     _TESTS = [{
-        'url': 'https://www.zdf.de/service-und-hilfe/die-neue-zdf-mediathek/zdfmediathek-trailer-100.html',
+        'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html',
         'info_dict': {
         'info_dict': {
-            'id': 'zdfmediathek-trailer-100',
+            'id': 'die-magie-der-farben-von-koenigspurpur-und-jeansblau-100',
             'ext': 'mp4',
             'ext': 'mp4',
-            'title': 'Die neue ZDFmediathek',
-            'description': 'md5:3003d36487fb9a5ea2d1ff60beb55e8d',
-            'duration': 30,
-            'timestamp': 1477627200,
-            'upload_date': '20161028',
-        }
+            'title': 'Die Magie der Farben (2/2)',
+            'description': 'md5:a89da10c928c6235401066b60a6d5c1a',
+            'duration': 2615,
+            'timestamp': 1465021200,
+            'upload_date': '20160604',
+        },
+    }, {
+        'url': 'https://www.zdf.de/service-und-hilfe/die-neue-zdf-mediathek/zdfmediathek-trailer-100.html',
+        'only_matching': True,
     }, {
         'url': 'https://www.zdf.de/filme/taunuskrimi/die-lebenden-und-die-toten-1---ein-taunuskrimi-100.html',
         'only_matching': True,
     }, {
         'url': 'https://www.zdf.de/filme/taunuskrimi/die-lebenden-und-die-toten-1---ein-taunuskrimi-100.html',
         'only_matching': True,
index 4c04550444308832862517d881539ebe1b417bfe..7d1bbc02102ec860ab417301f478309fe0c379e0 100644 (file)
@@ -478,6 +478,11 @@ def parseOpts(overrideArguments=None):
         '--no-resize-buffer',
         action='store_true', dest='noresizebuffer', default=False,
         help='Do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.')
         '--no-resize-buffer',
         action='store_true', dest='noresizebuffer', default=False,
         help='Do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.')
+    downloader.add_option(
+        '--http-chunk-size',
+        dest='http_chunk_size', metavar='SIZE', default=None,
+        help='Size of a chunk for chunk-based HTTP downloading (e.g. 10485760 or 10M) (default is disabled). '
+             'May be useful for bypassing bandwidth throttling imposed by a webserver (experimental)')
     downloader.add_option(
         '--test',
         action='store_true', dest='test', default=False,
     downloader.add_option(
         '--test',
         action='store_true', dest='test', default=False,
index e606a58de886533fb5239b9bb958fbff9606a4ee..56be914b8f1b6e98802163ae1013392079d93fb3 100644 (file)
@@ -31,7 +31,8 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
         temp_filename = prepend_extension(filename, 'temp')
 
         if not info.get('thumbnails'):
         temp_filename = prepend_extension(filename, 'temp')
 
         if not info.get('thumbnails'):
-            raise EmbedThumbnailPPError('Thumbnail was not found. Nothing to do.')
+            self._downloader.to_screen('[embedthumbnail] There aren\'t any thumbnails to embed')
+            return [], info
 
         thumbnail_filename = info['thumbnails'][-1]['filename']
 
 
         thumbnail_filename = info['thumbnails'][-1]['filename']
 
index 2fe9cf585db817e1d86831c71ef28502b4a16ee5..027d12785da68055477b0bd7475cfd25e4678c6b 100644 (file)
@@ -82,7 +82,7 @@ def register_socks_protocols():
 compiled_regex_type = type(re.compile(''))
 
 std_headers = {
 compiled_regex_type = type(re.compile(''))
 
 std_headers = {
-    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/47.0 (Chrome)',
+    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0 (Chrome)',
     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
     'Accept-Encoding': 'gzip, deflate',
     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
     'Accept-Encoding': 'gzip, deflate',
@@ -538,10 +538,22 @@ def sanitize_path(s):
     return os.path.join(*sanitized_path)
 
 
     return os.path.join(*sanitized_path)
 
 
-# Prepend protocol-less URLs with `http:` scheme in order to mitigate the number of
-# unwanted failures due to missing protocol
 def sanitize_url(url):
 def sanitize_url(url):
-    return 'http:%s' % url if url.startswith('//') else url
+    # Prepend protocol-less URLs with `http:` scheme in order to mitigate
+    # the number of unwanted failures due to missing protocol
+    if url.startswith('//'):
+        return 'http:%s' % url
+    # Fix some common typos seen so far
+    COMMON_TYPOS = (
+        # https://github.com/rg3/youtube-dl/issues/15649
+        (r'^httpss://', r'https://'),
+        # https://bx1.be/lives/direct-tv/
+        (r'^rmtp([es]?)://', r'rtmp\1://'),
+    )
+    for mistake, fixup in COMMON_TYPOS:
+        if re.match(mistake, url):
+            return re.sub(mistake, fixup, url)
+    return url
 
 
 def sanitized_Request(url, *args, **kwargs):
 
 
 def sanitized_Request(url, *args, **kwargs):
@@ -866,8 +878,8 @@ def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
     # expected HTTP responses to meet HTTP/1.0 or later (see also
     # https://github.com/rg3/youtube-dl/issues/6727)
     if sys.version_info < (3, 0):
     # expected HTTP responses to meet HTTP/1.0 or later (see also
     # https://github.com/rg3/youtube-dl/issues/6727)
     if sys.version_info < (3, 0):
-        kwargs[b'strict'] = True
-    hc = http_class(*args, **kwargs)
+        kwargs['strict'] = True
+    hc = http_class(*args, **compat_kwargs(kwargs))
     source_address = ydl_handler._params.get('source_address')
     if source_address is not None:
         sa = (source_address, 0)
     source_address = ydl_handler._params.get('source_address')
     if source_address is not None:
         sa = (source_address, 0)
@@ -1199,6 +1211,11 @@ def unified_timestamp(date_str, day_first=True):
     if m:
         date_str = date_str[:-len(m.group('tz'))]
 
     if m:
         date_str = date_str[:-len(m.group('tz'))]
 
+    # Python only supports microseconds, so remove nanoseconds
+    m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
+    if m:
+        date_str = m.group(1)
+
     for expression in date_formats(day_first):
         try:
             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
     for expression in date_formats(day_first):
         try:
             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
@@ -1677,6 +1694,28 @@ def parse_count(s):
     return lookup_unit_table(_UNIT_TABLE, s)
 
 
     return lookup_unit_table(_UNIT_TABLE, s)
 
 
+def parse_resolution(s):
+    if s is None:
+        return {}
+
+    mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
+    if mobj:
+        return {
+            'width': int(mobj.group('w')),
+            'height': int(mobj.group('h')),
+        }
+
+    mobj = re.search(r'\b(\d+)[pPiI]\b', s)
+    if mobj:
+        return {'height': int(mobj.group(1))}
+
+    mobj = re.search(r'\b([48])[kK]\b', s)
+    if mobj:
+        return {'height': int(mobj.group(1)) * 540}
+
+    return {}
+
+
 def month_by_name(name, lang='en'):
     """ Return the number of a month by (locale-independently) English name """
 
 def month_by_name(name, lang='en'):
     """ Return the number of a month by (locale-independently) English name """
 
index 8a2b57ffba2eb4763695ae4a2c1af0fa27ff6484..6ce11c39bc2016e5d29829f41e530064dd3717d9 100644 (file)
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
 from __future__ import unicode_literals
 
-__version__ = '2018.01.27'
+__version__ = '2018.03.14'