]> Raphaël G. Git Repositories - youtubedl/commitdiff
New upstream version 2018.09.10
authorRogério Brito <rbrito@ime.usp.br>
Fri, 14 Sep 2018 20:01:44 +0000 (17:01 -0300)
committerRogério Brito <rbrito@ime.usp.br>
Fri, 14 Sep 2018 20:01:44 +0000 (17:01 -0300)
133 files changed:
AUTHORS
ChangeLog
README.md
README.txt
docs/supportedsites.md
test/test_utils.py
youtube-dl
youtube-dl.1
youtube-dl.fish
youtube_dl/YoutubeDL.py
youtube_dl/compat.py
youtube_dl/downloader/dash.py
youtube_dl/extractor/adultswim.py
youtube_dl/extractor/afreecatv.py
youtube_dl/extractor/amp.py
youtube_dl/extractor/animeondemand.py
youtube_dl/extractor/anvato.py
youtube_dl/extractor/aol.py
youtube_dl/extractor/apa.py
youtube_dl/extractor/aparat.py
youtube_dl/extractor/ard.py
youtube_dl/extractor/bandcamp.py
youtube_dl/extractor/bbc.py
youtube_dl/extractor/bitchute.py [new file with mode: 0644]
youtube_dl/extractor/breakcom.py
youtube_dl/extractor/brightcove.py
youtube_dl/extractor/cammodels.py
youtube_dl/extractor/canvas.py
youtube_dl/extractor/ccma.py
youtube_dl/extractor/ceskatelevize.py
youtube_dl/extractor/clyp.py
youtube_dl/extractor/common.py
youtube_dl/extractor/crackle.py
youtube_dl/extractor/crunchyroll.py
youtube_dl/extractor/cwtv.py
youtube_dl/extractor/dailymotion.py
youtube_dl/extractor/dctp.py
youtube_dl/extractor/discoverygo.py
youtube_dl/extractor/dplay.py
youtube_dl/extractor/dramafever.py
youtube_dl/extractor/dtube.py
youtube_dl/extractor/eagleplatform.py
youtube_dl/extractor/egghead.py
youtube_dl/extractor/eporner.py
youtube_dl/extractor/expressen.py
youtube_dl/extractor/extractors.py
youtube_dl/extractor/facebook.py
youtube_dl/extractor/firsttv.py
youtube_dl/extractor/foxnews.py
youtube_dl/extractor/francetv.py
youtube_dl/extractor/frontendmasters.py [new file with mode: 0644]
youtube_dl/extractor/funk.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/go.py
youtube_dl/extractor/go90.py
youtube_dl/extractor/hidive.py
youtube_dl/extractor/imdb.py
youtube_dl/extractor/imgur.py
youtube_dl/extractor/instagram.py
youtube_dl/extractor/internazionale.py
youtube_dl/extractor/iprima.py
youtube_dl/extractor/itv.py
youtube_dl/extractor/iwara.py
youtube_dl/extractor/joj.py
youtube_dl/extractor/keezmovies.py
youtube_dl/extractor/kinopoisk.py [new file with mode: 0644]
youtube_dl/extractor/konserthusetplay.py
youtube_dl/extractor/lci.py
youtube_dl/extractor/lynda.py
youtube_dl/extractor/markiza.py
youtube_dl/extractor/mediaset.py
youtube_dl/extractor/mediasite.py
youtube_dl/extractor/mitele.py
youtube_dl/extractor/motherless.py
youtube_dl/extractor/nbc.py
youtube_dl/extractor/niconico.py
youtube_dl/extractor/nova.py
youtube_dl/extractor/npo.py
youtube_dl/extractor/nrk.py
youtube_dl/extractor/pbs.py
youtube_dl/extractor/peertube.py
youtube_dl/extractor/pluralsight.py
youtube_dl/extractor/porncom.py
youtube_dl/extractor/pornhub.py
youtube_dl/extractor/puhutv.py [new file with mode: 0644]
youtube_dl/extractor/radiojavan.py
youtube_dl/extractor/rai.py
youtube_dl/extractor/raywenderlich.py
youtube_dl/extractor/redbulltv.py
youtube_dl/extractor/redtube.py
youtube_dl/extractor/rentv.py
youtube_dl/extractor/rtbf.py
youtube_dl/extractor/rutube.py
youtube_dl/extractor/seznamzpravy.py
youtube_dl/extractor/sixplay.py
youtube_dl/extractor/slideslive.py
youtube_dl/extractor/slutload.py
youtube_dl/extractor/streamcloud.py
youtube_dl/extractor/svt.py
youtube_dl/extractor/ted.py
youtube_dl/extractor/tele5.py [new file with mode: 0644]
youtube_dl/extractor/telecinco.py
youtube_dl/extractor/theplatform.py
youtube_dl/extractor/turner.py
youtube_dl/extractor/tvnet.py
youtube_dl/extractor/tvnow.py
youtube_dl/extractor/tvplay.py
youtube_dl/extractor/twitch.py
youtube_dl/extractor/udemy.py
youtube_dl/extractor/vgtv.py
youtube_dl/extractor/vidme.py
youtube_dl/extractor/vidzi.py
youtube_dl/extractor/vimeo.py
youtube_dl/extractor/viqeo.py [new file with mode: 0644]
youtube_dl/extractor/viu.py
youtube_dl/extractor/vk.py
youtube_dl/extractor/vlive.py
youtube_dl/extractor/vrv.py
youtube_dl/extractor/watchbox.py
youtube_dl/extractor/webofstories.py
youtube_dl/extractor/xfileshare.py
youtube_dl/extractor/xhamster.py
youtube_dl/extractor/yapfiles.py
youtube_dl/extractor/youjizz.py
youtube_dl/extractor/youporn.py
youtube_dl/extractor/yourporn.py [new file with mode: 0644]
youtube_dl/extractor/youtube.py
youtube_dl/extractor/zattoo.py
youtube_dl/extractor/zdf.py
youtube_dl/options.py
youtube_dl/postprocessor/ffmpeg.py
youtube_dl/utils.py
youtube_dl/version.py

diff --git a/AUTHORS b/AUTHORS
index eaf96d79d87ad6c73a8ff46a2a0ab8e344fcf419..b507cb8dfab6bf8768f795e2468cb7ea7a499e1b 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -239,3 +239,10 @@ Martin Weinelt
 Surya Oktafendri
 TingPing
 Alexandre Macabies
 Surya Oktafendri
 TingPing
 Alexandre Macabies
+Bastian de Groot
+Niklas Haas
+András Veres-Szentkirályi
+Enes Solak
+Nathan Rossi
+Thomas van der Berg
+Luca Cherubin
index fe50870977efbb353ef0c9f56530320de0d9ca45..d184f69eee1d8818e499c60d64605c4157e56067 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,199 @@
+version 2018.09.10
+
+Core
++ [utils] Properly recognize AV1 codec (#17506)
+
+Extractors
++ [iprima] Add support for prima.iprima.cz (#17514)
++ [tele5] Add support for tele5.de (#7805, #7922, #17331, #17414)
+* [nbc] Fix extraction of percent encoded URLs (#17374)
+
+
+version 2018.09.08
+
+Extractors
+* [youtube] Fix extraction (#17457, #17464)
++ [pornhub:uservideos] Add support for new URLs (#17388)
+* [iprima] Confirm adult check (#17437)
+* [slideslive] Make check for video service name case-insensitive (#17429)
+* [radiojavan] Fix extraction (#17151)
+* [generic] Skip unsuccessful jwplayer extraction (#16735)
+
+
+version 2018.09.01
+
+Core
+* [utils] Skip remote IP addresses non matching to source address' IP version
+  when creating a connection (#13422, #17362)
+
+Extractors
++ [ard] Add support for one.ard.de (#17397)
+* [niconico] Fix extraction on python3 (#17393, #17407)
+* [ard] Extract f4m formats
+* [crunchyroll] Parse vilos media data (#17343)
++ [ard] Add support for Beta ARD Mediathek
++ [bandcamp] Extract more metadata (#13197)
+* [internazionale] Fix extraction of non-available-abroad videos (#17386)
+
+
+version 2018.08.28
+
+Extractors
++ [youtube:playlist] Add support for music album playlists (OLAK5uy_ prefix)
+  (#17361)
+* [bitchute] Fix extraction by pass custom User-Agent (#17360)
+* [webofstories:playlist] Fix extraction (#16914)
++ [tvplayhome] Add support for new tvplay URLs (#17344)
++ [generic] Allow relative src for videojs embeds (#17324)
++ [xfileshare] Add support for vidto.se (#17317)
++ [vidzi] Add support for vidzi.nu (#17316)
++ [nova:embed] Add support for media.cms.nova.cz (#17282)
+
+
+version 2018.08.22
+
+Core
+* [utils] Use pure browser header for User-Agent (#17236)
+
+Extractors
++ [kinopoisk] Add support for kinopoisk.ru (#17283)
++ [yourporn] Add support for yourporn.sexy (#17298)
++ [go] Add support for disneynow.go.com (#16299, #17264)
++ [6play] Add support for play.rtl.hr (#17249)
+* [anvato] Fallback to generic API key for access-key-to-API-key lookup
+  (#16788, #17254)
+* [lci] Fix extraction (#17274)
+* [bbccouk] Extend id URL regular expression (#17270)
+* [cwtv] Fix extraction (#17256)
+* [nova] Fix extraction (#17241)
++ [generic] Add support for expressen embeds
+* [raywenderlich] Adapt to site redesign (#17225)
++ [redbulltv] Add support redbull.com tv URLs (#17218)
++ [bitchute] Add support for bitchute.com (#14052)
++ [clyp] Add support for token protected media (#17184)
+* [imdb] Fix extension extraction (#17167)
+
+
+version 2018.08.04
+
+Extractors
+* [funk:channel] Improve byChannelAlias extraction (#17142)
+* [twitch] Fix authentication (#17024, #17126)
+* [twitch:vod] Improve URL regular expression (#17135)
+* [watchbox] Fix extraction (#17107)
+* [pbs] Fix extraction (#17109)
+* [theplatform] Relax URL regular expression (#16181, #17097)
++ [viqeo] Add support for viqeo.tv (#17066)
+
+
+version 2018.07.29
+
+Extractors
+* [crunchyroll:playlist] Restrict URL regular expression (#17069, #17076)
++ [pornhub] Add support for subtitles (#16924, #17088)
+* [ceskatelevize] Use https for API call (#16997, #16999)
+* [dailymotion:playlist] Fix extraction (#16894)
+* [ted] Improve extraction
+* [ted] Fix extraction for videos without nativeDownloads (#16756, #17085)
+* [telecinco] Fix extraction (#17080)
+* [mitele] Reduce number of requests
+* [rai] Return non HTTP relinker URL intact (#17055)
+* [vk] Fix extraction for inline only videos (#16923)
+* [streamcloud] Fix extraction (#17054)
+* [facebook] Fix tahoe player extraction with authentication (#16655)
++ [puhutv] Add support for puhutv.com (#12712, #16010, #16269)
+
+
+version 2018.07.21
+
+Core
++ [utils] Introduce url_or_none
+* [utils] Allow JSONP without function name (#17028)
++ [extractor/common] Extract DASH and MSS formats from SMIL manifests
+
+Extractors
++ [bbc] Add support for BBC Radio Play pages (#17022)
+* [iwara] Fix download URLs (#17026)
+* [vrtnu] Relax title extraction and extract JSON-LD (#17018)
++ [viu] Pass Referer and Origin headers and area id (#16992)
++ [vimeo] Add another config regular expression (#17013)
++ [facebook] Extract view count (#16942)
+* [dailymotion] Improve description extraction (#16984)
+* [slutload] Fix and improve extraction (#17001)
+* [mediaset] Fix extraction (#16977)
++ [theplatform] Add support for theplatform TLD customization (#16977)
+* [imgur] Relax URL regular expression (#16987)
+* [pornhub] Improve extraction and extract all formats (#12166, #15891, #16262,
+  #16959)
+
+
+version 2018.07.10
+
+Core
+* [utils] Share JSON-LD regular expression
+* [downloader/dash] Improve error handling (#16927)
+
+Extractors
++ [nrktv] Add support for new season and serie URL schema
++ [nrktv] Add support for new episode URL schema (#16909)
++ [frontendmasters] Add support for frontendmasters.com (#3661, #16328)
+* [funk] Fix extraction (#16918)
+* [watchbox] Fix extraction (#16904)
+* [dplayit] Sort formats
+* [dplayit] Fix extraction (#16901)
+* [youtube] Improve login error handling (#13822)
+
+
+version 2018.07.04
+
+Core
+* [extractor/common] Properly escape % in MPD templates (#16867)
+* [extractor/common] Use source URL as Referer for HTML5 entries (16849)
+* Prefer ffmpeg over avconv by default (#8622)
+
+Extractors
+* [pluralsight] Switch to graphql (#16889, #16895, #16896, #16899)
+* [lynda] Simplify login and improve error capturing (#16891)
++ [go90] Add support for embed URLs (#16873)
+* [go90] Detect geo restriction error and pass geo verification headers
+  (#16874)
+* [vlive] Fix live streams extraction (#16871)
+* [npo] Fix typo (#16872)
++ [mediaset] Add support for new videos and extract all formats (#16568)
+* [dctptv] Restore extraction based on REST API (#16850)
+* [svt] Improve extraction and add support for pages (#16802)
+* [porncom] Fix extraction (#16808)
+
+
+version 2018.06.25
+
+Extractors
+* [joj] Relax URL regular expression (#16771)
+* [brightcove] Workaround sonyliv DRM protected videos (#16807)
+* [motherless] Fix extraction (#16786)
+* [itv] Make SOAP request non fatal and extract metadata from webpage (#16780)
+- [foxnews:insider] Remove extractor (#15810)
++ [foxnews] Add support for iframe embeds (#15810, #16711)
+
+
+version 2018.06.19
+
+Core
++ [extractor/common] Introduce expected_status in _download_* methods
+  for convenient accept of HTTP requests failed with non 2xx status codes
++ [compat] Introduce compat_integer_types
+
+Extractors
+* [peertube] Improve generic support (#16733)
++ [6play] Use geo verification headers
+* [rtbf] Fix extraction for python 3.2
+* [vgtv] Improve HLS formats extraction
++ [vgtv] Add support for www.aftonbladet.se/tv URLs
+* [bbccouk] Use expected_status
+* [markiza] Expect 500 HTTP status code
+* [tvnow] Try all clear manifest URLs (#15361)
+
+
 version 2018.06.18
 
 Core
 version 2018.06.18
 
 Core
index 499a0c2067ca39a201a8379a7ce1c2bf7cdb755e..dd068a462bebd3fb8a6982181b364c967ceff083 100644 (file)
--- a/README.md
+++ b/README.md
@@ -17,7 +17,7 @@ youtube-dl - download videos from youtube.com or other video platforms
 
 # INSTALLATION
 
 
 # INSTALLATION
 
-To install it right away for all UNIX users (Linux, OS X, etc.), type:
+To install it right away for all UNIX users (Linux, macOS, etc.), type:
 
     sudo curl -L https://yt-dl.org/downloads/latest/youtube-dl -o /usr/local/bin/youtube-dl
     sudo chmod a+rx /usr/local/bin/youtube-dl
 
     sudo curl -L https://yt-dl.org/downloads/latest/youtube-dl -o /usr/local/bin/youtube-dl
     sudo chmod a+rx /usr/local/bin/youtube-dl
@@ -35,7 +35,7 @@ You can also use pip:
     
 This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information.
 
     
 This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information.
 
-OS X users can install youtube-dl with [Homebrew](https://brew.sh/):
+macOS users can install youtube-dl with [Homebrew](https://brew.sh/):
 
     brew install youtube-dl
 
 
     brew install youtube-dl
 
@@ -427,9 +427,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
                                      default; fix file if we can, warn
                                      otherwise)
     --prefer-avconv                  Prefer avconv over ffmpeg for running the
                                      default; fix file if we can, warn
                                      otherwise)
     --prefer-avconv                  Prefer avconv over ffmpeg for running the
-                                     postprocessors (default)
-    --prefer-ffmpeg                  Prefer ffmpeg over avconv for running the
                                      postprocessors
                                      postprocessors
+    --prefer-ffmpeg                  Prefer ffmpeg over avconv for running the
+                                     postprocessors (default)
     --ffmpeg-location PATH           Location of the ffmpeg/avconv binary;
                                      either the path to the binary or its
                                      containing directory.
     --ffmpeg-location PATH           Location of the ffmpeg/avconv binary;
                                      either the path to the binary or its
                                      containing directory.
@@ -442,7 +442,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
 
 # CONFIGURATION
 
 
 # CONFIGURATION
 
-You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and OS X, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. Note that by default configuration file may not exist so you may need to create it yourself.
+You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and macOS, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. Note that by default configuration file may not exist so you may need to create it yourself.
 
 For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory:
 ```
 
 For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory:
 ```
@@ -870,7 +870,7 @@ Either prepend `https://www.youtube.com/watch?v=` or separate the ID from the op
 
 Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
 
 
 Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
 
-In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [Export Cookies](https://addons.mozilla.org/en-US/firefox/addon/export-cookies/) (for Firefox).
+In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [cookies.txt](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) (for Firefox).
 
 Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, macOS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
 
 
 Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, macOS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
 
index fec09ab4af79fa02e7dc6f9a725644e705f9b384..a0f20fdb246486f7d7f8e39322c0f4536a8e5c81 100644 (file)
@@ -20,7 +20,7 @@ youtube-dl - download videos from youtube.com or other video platforms
 INSTALLATION
 
 
 INSTALLATION
 
 
-To install it right away for all UNIX users (Linux, OS X, etc.), type:
+To install it right away for all UNIX users (Linux, macOS, etc.), type:
 
     sudo curl -L https://yt-dl.org/downloads/latest/youtube-dl -o /usr/local/bin/youtube-dl
     sudo chmod a+rx /usr/local/bin/youtube-dl
 
     sudo curl -L https://yt-dl.org/downloads/latest/youtube-dl -o /usr/local/bin/youtube-dl
     sudo chmod a+rx /usr/local/bin/youtube-dl
@@ -41,7 +41,7 @@ You can also use pip:
 This command will update youtube-dl if you have already installed it.
 See the pypi page for more information.
 
 This command will update youtube-dl if you have already installed it.
 See the pypi page for more information.
 
-OS X users can install youtube-dl with Homebrew:
+macOS users can install youtube-dl with Homebrew:
 
     brew install youtube-dl
 
 
     brew install youtube-dl
 
@@ -474,9 +474,9 @@ Post-processing Options:
                                      default; fix file if we can, warn
                                      otherwise)
     --prefer-avconv                  Prefer avconv over ffmpeg for running the
                                      default; fix file if we can, warn
                                      otherwise)
     --prefer-avconv                  Prefer avconv over ffmpeg for running the
-                                     postprocessors (default)
-    --prefer-ffmpeg                  Prefer ffmpeg over avconv for running the
                                      postprocessors
                                      postprocessors
+    --prefer-ffmpeg                  Prefer ffmpeg over avconv for running the
+                                     postprocessors (default)
     --ffmpeg-location PATH           Location of the ffmpeg/avconv binary;
                                      either the path to the binary or its
                                      containing directory.
     --ffmpeg-location PATH           Location of the ffmpeg/avconv binary;
                                      either the path to the binary or its
                                      containing directory.
@@ -493,7 +493,7 @@ CONFIGURATION
 
 
 You can configure youtube-dl by placing any supported command line
 
 
 You can configure youtube-dl by placing any supported command line
-option to a configuration file. On Linux and OS X, the system wide
+option to a configuration file. On Linux and macOS, the system wide
 configuration file is located at /etc/youtube-dl.conf and the user wide
 configuration file at ~/.config/youtube-dl/config. On Windows, the user
 wide configuration file locations are %APPDATA%\youtube-dl\config.txt or
 configuration file is located at /etc/youtube-dl.conf and the user wide
 configuration file at ~/.config/youtube-dl/config. On Windows, the user
 wide configuration file locations are %APPDATA%\youtube-dl\config.txt or
@@ -1147,7 +1147,7 @@ Use the --cookies option, for example
 
 In order to extract cookies from browser use any conforming browser
 extension for exporting cookies. For example, cookies.txt (for Chrome)
 
 In order to extract cookies from browser use any conforming browser
 extension for exporting cookies. For example, cookies.txt (for Chrome)
-or Export Cookies (for Firefox).
+or cookies.txt (for Firefox).
 
 Note that the cookies file must be in Mozilla/Netscape format and the
 first line of the cookies file must be either # HTTP Cookie File or
 
 Note that the cookies file must be in Mozilla/Netscape format and the
 first line of the cookies file must be either # HTTP Cookie File or
index 432a7ba934125014ed4c73ca3b26eec13619bcce..9b86017519043125ffc1851399ac6d0585ab415c 100644 (file)
@@ -56,6 +56,7 @@
  - **archive.org**: archive.org videos
  - **ARD**
  - **ARD:mediathek**
  - **archive.org**: archive.org videos
  - **ARD**
  - **ARD:mediathek**
+ - **ARDBetaMediathek**
  - **Arkena**
  - **arte.tv**
  - **arte.tv:+7**
  - **Arkena**
  - **arte.tv**
  - **arte.tv:+7**
  - **BiliBili**
  - **BioBioChileTV**
  - **BIQLE**
  - **BiliBili**
  - **BioBioChileTV**
  - **BIQLE**
+ - **BitChute**
+ - **BitChuteChannel**
  - **BleacherReport**
  - **BleacherReportCMS**
  - **blinkx**
  - **BleacherReport**
  - **BleacherReportCMS**
  - **blinkx**
  - **Crackle**
  - **Criterion**
  - **CrooksAndLiars**
  - **Crackle**
  - **Criterion**
  - **CrooksAndLiars**
- - **Crunchyroll**
+ - **crunchyroll**
  - **crunchyroll:playlist**
  - **CSNNE**
  - **CSpan**: C-SPAN
  - **crunchyroll:playlist**
  - **CSNNE**
  - **CSpan**: C-SPAN
  - **Foxgay**
  - **foxnews**: Fox News and Fox Business Video
  - **foxnews:article**
  - **Foxgay**
  - **foxnews**: Fox News and Fox Business Video
  - **foxnews:article**
- - **foxnews:insider**
  - **FoxSports**
  - **france2.fr:generation-what**
  - **FranceCulture**
  - **FoxSports**
  - **france2.fr:generation-what**
  - **FranceCulture**
  - **Freesound**
  - **freespeech.org**
  - **FreshLive**
  - **Freesound**
  - **freespeech.org**
  - **FreshLive**
+ - **FrontendMasters**
+ - **FrontendMastersCourse**
+ - **FrontendMastersLesson**
  - **Funimation**
  - **FunkChannel**
  - **FunkMix**
  - **Funimation**
  - **FunkChannel**
  - **FunkMix**
  - **Ketnet**
  - **KhanAcademy**
  - **KickStarter**
  - **Ketnet**
  - **KhanAcademy**
  - **KickStarter**
+ - **KinoPoisk**
  - **KonserthusetPlay**
  - **kontrtube**: KontrTube.ru - Труба зовёт
  - **KrasView**: Красвью
  - **KonserthusetPlay**
  - **kontrtube**: KontrTube.ru - Труба зовёт
  - **KrasView**: Красвью
  - **Normalboots**
  - **NosVideo**
  - **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz
  - **Normalboots**
  - **NosVideo**
  - **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz
+ - **NovaEmbed**
  - **nowness**
  - **nowness:playlist**
  - **nowness:series**
  - **nowness**
  - **nowness:playlist**
  - **nowness:series**
  - **NRKSkole**: NRK Skole
  - **NRKTV**: NRK TV and NRK Radio
  - **NRKTVDirekte**: NRK TV Direkte and NRK Radio Direkte
  - **NRKSkole**: NRK Skole
  - **NRKTV**: NRK TV and NRK Radio
  - **NRKTVDirekte**: NRK TV Direkte and NRK Radio Direkte
+ - **NRKTVEpisode**
  - **NRKTVEpisodes**
  - **NRKTVEpisodes**
+ - **NRKTVSeason**
  - **NRKTVSeries**
  - **ntv.ru**
  - **Nuvid**
  - **NRKTVSeries**
  - **ntv.ru**
  - **Nuvid**
  - **PrimeShareTV**
  - **PromptFile**
  - **prosiebensat1**: ProSiebenSat.1 Digital
  - **PrimeShareTV**
  - **PromptFile**
  - **prosiebensat1**: ProSiebenSat.1 Digital
+ - **puhutv**
+ - **puhutv:serie**
  - **Puls4**
  - **Pyvideo**
  - **qqmusic**: QQ音乐
  - **Puls4**
  - **Pyvideo**
  - **qqmusic**: QQ音乐
  - **RaiPlayLive**
  - **RaiPlayPlaylist**
  - **RayWenderlich**
  - **RaiPlayLive**
  - **RaiPlayPlaylist**
  - **RayWenderlich**
+ - **RayWenderlichCourse**
  - **RBMARadio**
  - **RDS**: RDS.ca
  - **RedBullTV**
  - **RBMARadio**
  - **RDS**: RDS.ca
  - **RedBullTV**
  - **StretchInternet**
  - **SunPorno**
  - **SVT**
  - **StretchInternet**
  - **SunPorno**
  - **SVT**
+ - **SVTPage**
  - **SVTPlay**: SVT Play and Öppet arkiv
  - **SVTSeries**
  - **SWRMediathek**
  - **SVTPlay**: SVT Play and Öppet arkiv
  - **SVTSeries**
  - **SWRMediathek**
  - **techtv.mit.edu**
  - **ted**
  - **Tele13**
  - **techtv.mit.edu**
  - **ted**
  - **Tele13**
+ - **Tele5**
  - **TeleBruxelles**
  - **Telecinco**: telecinco.es, cuatro.com and mediaset.es
  - **Telegraaf**
  - **TeleBruxelles**
  - **Telecinco**: telecinco.es, cuatro.com and mediaset.es
  - **Telegraaf**
  - **tvp:embed**: Telewizja Polska
  - **tvp:series**
  - **TVPlayer**
  - **tvp:embed**: Telewizja Polska
  - **tvp:series**
  - **TVPlayer**
+ - **TVPlayHome**
  - **Tweakers**
  - **twitch:chapter**
  - **twitch:clips**
  - **Tweakers**
  - **twitch:chapter**
  - **twitch:clips**
  - **Vimple**: Vimple - one-click video hosting
  - **Vine**
  - **vine:user**
  - **Vimple**: Vimple - one-click video hosting
  - **Vine**
  - **vine:user**
+ - **Viqeo**
  - **Viu**
  - **viu:ott**
  - **viu:playlist**
  - **Viu**
  - **viu:ott**
  - **viu:playlist**
  - **YouNowLive**
  - **YouNowMoment**
  - **YouPorn**
  - **YouNowLive**
  - **YouNowMoment**
  - **YouPorn**
+ - **YourPorn**
  - **YourUpload**
  - **youtube**: YouTube.com
  - **youtube:channel**: YouTube.com channels
  - **YourUpload**
  - **youtube**: YouTube.com
  - **youtube:channel**: YouTube.com channels
index e63af01668ceb84cc2d8452490bc94eb48791f40..9e28e008f5548f28598e38a64ad18165eecd463a 100644 (file)
@@ -78,6 +78,7 @@ from youtube_dl.utils import (
     uppercase_escape,
     lowercase_escape,
     url_basename,
     uppercase_escape,
     lowercase_escape,
     url_basename,
+    url_or_none,
     base_url,
     urljoin,
     urlencode_postdata,
     base_url,
     urljoin,
     urlencode_postdata,
@@ -507,6 +508,16 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(urljoin('http://foo.de/', ['foobar']), None)
         self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt')
 
         self.assertEqual(urljoin('http://foo.de/', ['foobar']), None)
         self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt')
 
+    def test_url_or_none(self):
+        self.assertEqual(url_or_none(None), None)
+        self.assertEqual(url_or_none(''), None)
+        self.assertEqual(url_or_none('foo'), None)
+        self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de')
+        self.assertEqual(url_or_none('https://foo.de'), 'https://foo.de')
+        self.assertEqual(url_or_none('http$://foo.de'), None)
+        self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de')
+        self.assertEqual(url_or_none('//foo.de'), '//foo.de')
+
     def test_parse_age_limit(self):
         self.assertEqual(parse_age_limit(None), None)
         self.assertEqual(parse_age_limit(False), None)
     def test_parse_age_limit(self):
         self.assertEqual(parse_age_limit(None), None)
         self.assertEqual(parse_age_limit(False), None)
@@ -717,6 +728,10 @@ class TestUtil(unittest.TestCase):
         d = json.loads(stripped)
         self.assertEqual(d, {'status': 'success'})
 
         d = json.loads(stripped)
         self.assertEqual(d, {'status': 'success'})
 
+        stripped = strip_jsonp('({"status": "success"});')
+        d = json.loads(stripped)
+        self.assertEqual(d, {'status': 'success'})
+
     def test_uppercase_escape(self):
         self.assertEqual(uppercase_escape('aä'), 'aä')
         self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
     def test_uppercase_escape(self):
         self.assertEqual(uppercase_escape('aä'), 'aä')
         self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
@@ -770,6 +785,10 @@ class TestUtil(unittest.TestCase):
             'vcodec': 'h264',
             'acodec': 'aac',
         })
             'vcodec': 'h264',
             'acodec': 'aac',
         })
+        self.assertEqual(parse_codecs('av01.0.05M.08'), {
+            'vcodec': 'av01.0.05M.08',
+            'acodec': 'none',
+        })
 
     def test_escape_rfc3986(self):
         reserved = "!*'();:@&=+$,/?#[]"
 
     def test_escape_rfc3986(self):
         reserved = "!*'();:@&=+$,/?#[]"
index 32154d701f5e82e28c2f8d711565536ce7c4da07..8879627e81e648ba6e37e920db3fba323e2ea16d 100755 (executable)
Binary files a/youtube-dl and b/youtube-dl differ
index 89a3fb67efc6c544642483d21c240104df290591..61ee72ff416faf3d55e97160e65a1f1bd9c55af0 100644 (file)
@@ -890,12 +890,12 @@ One of never (do nothing), warn (only emit a warning), detect_or_warn
 .RE
 .TP
 .B \-\-prefer\-avconv
 .RE
 .TP
 .B \-\-prefer\-avconv
-Prefer avconv over ffmpeg for running the postprocessors (default)
+Prefer avconv over ffmpeg for running the postprocessors
 .RS
 .RE
 .TP
 .B \-\-prefer\-ffmpeg
 .RS
 .RE
 .TP
 .B \-\-prefer\-ffmpeg
-Prefer ffmpeg over avconv for running the postprocessors
+Prefer ffmpeg over avconv for running the postprocessors (default)
 .RS
 .RE
 .TP
 .RS
 .RE
 .TP
@@ -921,7 +921,7 @@ srt|ass|vtt|lrc)
 .PP
 You can configure youtube\-dl by placing any supported command line
 option to a configuration file.
 .PP
 You can configure youtube\-dl by placing any supported command line
 option to a configuration file.
-On Linux and OS X, the system wide configuration file is located at
+On Linux and macOS, the system wide configuration file is located at
 \f[C]/etc/youtube\-dl.conf\f[] and the user wide configuration file at
 \f[C]~/.config/youtube\-dl/config\f[].
 On Windows, the user wide configuration file locations are
 \f[C]/etc/youtube\-dl.conf\f[] and the user wide configuration file at
 \f[C]~/.config/youtube\-dl/config\f[].
 On Windows, the user wide configuration file locations are
@@ -1753,8 +1753,8 @@ In order to extract cookies from browser use any conforming browser
 extension for exporting cookies.
 For example,
 cookies.txt (https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg)
 extension for exporting cookies.
 For example,
 cookies.txt (https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg)
-(for Chrome) or Export
-Cookies (https://addons.mozilla.org/en-US/firefox/addon/export-cookies/)
+(for Chrome) or
+cookies.txt (https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/)
 (for Firefox).
 .PP
 Note that the cookies file must be in Mozilla/Netscape format and the
 (for Firefox).
 .PP
 Note that the cookies file must be in Mozilla/Netscape format and the
index 05ece67d88008030740752c0e57c089708e3979e..c9164180d3646727e28d8434af0a465ba0e2b8c8 100644 (file)
@@ -157,8 +157,8 @@ complete --command youtube-dl --long-option add-metadata --description 'Write me
 complete --command youtube-dl --long-option metadata-from-title --description 'Parse additional metadata like song title / artist from the video title. The format syntax is the same as --output. Regular expression with named capture groups may also be used. The parsed parameters replace existing values. Example: --metadata-from-title "%(artist)s - %(title)s" matches a title like "Coldplay - Paradise". Example (regex): --metadata-from-title "(?P<artist>.+?) - (?P<title>.+)"'
 complete --command youtube-dl --long-option xattrs --description 'Write metadata to the video file'"'"'s xattrs (using dublin core and xdg standards)'
 complete --command youtube-dl --long-option fixup --description 'Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn (the default; fix file if we can, warn otherwise)'
 complete --command youtube-dl --long-option metadata-from-title --description 'Parse additional metadata like song title / artist from the video title. The format syntax is the same as --output. Regular expression with named capture groups may also be used. The parsed parameters replace existing values. Example: --metadata-from-title "%(artist)s - %(title)s" matches a title like "Coldplay - Paradise". Example (regex): --metadata-from-title "(?P<artist>.+?) - (?P<title>.+)"'
 complete --command youtube-dl --long-option xattrs --description 'Write metadata to the video file'"'"'s xattrs (using dublin core and xdg standards)'
 complete --command youtube-dl --long-option fixup --description 'Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn (the default; fix file if we can, warn otherwise)'
-complete --command youtube-dl --long-option prefer-avconv --description 'Prefer avconv over ffmpeg for running the postprocessors (default)'
-complete --command youtube-dl --long-option prefer-ffmpeg --description 'Prefer ffmpeg over avconv for running the postprocessors'
+complete --command youtube-dl --long-option prefer-avconv --description 'Prefer avconv over ffmpeg for running the postprocessors'
+complete --command youtube-dl --long-option prefer-ffmpeg --description 'Prefer ffmpeg over avconv for running the postprocessors (default)'
 complete --command youtube-dl --long-option ffmpeg-location --description 'Location of the ffmpeg/avconv binary; either the path to the binary or its containing directory.'
 complete --command youtube-dl --long-option exec --description 'Execute a command on the file after downloading, similar to find'"'"'s -exec syntax. Example: --exec '"'"'adb push {} /sdcard/Music/ && rm {}'"'"''
 complete --command youtube-dl --long-option convert-subs --description 'Convert the subtitles to other format (currently supported: srt|ass|vtt|lrc)'
 complete --command youtube-dl --long-option ffmpeg-location --description 'Location of the ffmpeg/avconv binary; either the path to the binary or its containing directory.'
 complete --command youtube-dl --long-option exec --description 'Execute a command on the file after downloading, similar to find'"'"'s -exec syntax. Example: --exec '"'"'adb push {} /sdcard/Music/ && rm {}'"'"''
 complete --command youtube-dl --long-option convert-subs --description 'Convert the subtitles to other format (currently supported: srt|ass|vtt|lrc)'
index 2a405c5cac386d67b11dc0489dccf4ee68951c53..38ba43a977c1627f4d1f7a181c093477108ab959 100755 (executable)
@@ -305,8 +305,8 @@ class YoutubeDL(object):
     http_chunk_size.
 
     The following options are used by the post processors:
     http_chunk_size.
 
     The following options are used by the post processors:
-    prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
-                       otherwise prefer avconv.
+    prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
+                       otherwise prefer ffmpeg.
     postprocessor_args: A list of additional command-line arguments for the
                         postprocessor.
 
     postprocessor_args: A list of additional command-line arguments for the
                         postprocessor.
 
index 4a611f183408ce224404c2fce360ee4d50792dac..7b770340ff4316a1d6710fc4f37cb912d62bfb9a 100644 (file)
@@ -2787,6 +2787,12 @@ except NameError:  # Python 3
     compat_numeric_types = (int, float, complex)
 
 
     compat_numeric_types = (int, float, complex)
 
 
+try:
+    compat_integer_types = (int, long)
+except NameError:  # Python 3
+    compat_integer_types = (int, )
+
+
 if sys.version_info < (2, 7):
     def compat_socket_create_connection(address, timeout, source_address=None):
         host, port = address
 if sys.version_info < (2, 7):
     def compat_socket_create_connection(address, timeout, source_address=None):
         host, port = address
@@ -2974,6 +2980,7 @@ __all__ = [
     'compat_http_client',
     'compat_http_server',
     'compat_input',
     'compat_http_client',
     'compat_http_server',
     'compat_input',
+    'compat_integer_types',
     'compat_itertools_count',
     'compat_kwargs',
     'compat_numeric_types',
     'compat_itertools_count',
     'compat_kwargs',
     'compat_numeric_types',
index 576ece6db369254bf491cac972dc845ef1ac2653..eaa7adf7c4f2fc39d84984642a1ef17c014d089f 100644 (file)
@@ -2,7 +2,10 @@ from __future__ import unicode_literals
 
 from .fragment import FragmentFD
 from ..compat import compat_urllib_error
 
 from .fragment import FragmentFD
 from ..compat import compat_urllib_error
-from ..utils import urljoin
+from ..utils import (
+    DownloadError,
+    urljoin,
+)
 
 
 class DashSegmentsFD(FragmentFD):
 
 
 class DashSegmentsFD(FragmentFD):
@@ -57,6 +60,14 @@ class DashSegmentsFD(FragmentFD):
                     count += 1
                     if count <= fragment_retries:
                         self.report_retry_fragment(err, frag_index, count, fragment_retries)
                     count += 1
                     if count <= fragment_retries:
                         self.report_retry_fragment(err, frag_index, count, fragment_retries)
+                except DownloadError:
+                    # Don't retry fragment if error occurred during HTTP downloading
+                    # itself since it has own retry settings
+                    if not fatal:
+                        self.report_skip_fragment(frag_index)
+                        break
+                    raise
+
             if count > fragment_retries:
                 if not fatal:
                     self.report_skip_fragment(frag_index)
             if count > fragment_retries:
                 if not fatal:
                     self.report_skip_fragment(frag_index)
index acc4ce38dca31a4ec8401d253044eb7e4fb91b3e..88c96a95060738833f1cc7c85a251f1a26f4962e 100644 (file)
@@ -7,6 +7,7 @@ from .turner import TurnerBaseIE
 from ..utils import (
     int_or_none,
     strip_or_none,
 from ..utils import (
     int_or_none,
     strip_or_none,
+    url_or_none,
 )
 
 
 )
 
 
@@ -98,7 +99,7 @@ class AdultSwimIE(TurnerBaseIE):
             if not video_id:
                 entries = []
                 for episode in video_data.get('archiveEpisodes', []):
             if not video_id:
                 entries = []
                 for episode in video_data.get('archiveEpisodes', []):
-                    episode_url = episode.get('url')
+                    episode_url = url_or_none(episode.get('url'))
                     if not episode_url:
                         continue
                     entries.append(self.url_result(
                     if not episode_url:
                         continue
                     entries.append(self.url_result(
index 4b3d9713654ee7a0b939c8f4d39e97ab79c91071..6275e5209e23401c406f6df2206b13e978d0c60e 100644 (file)
@@ -9,6 +9,7 @@ from ..utils import (
     determine_ext,
     ExtractorError,
     int_or_none,
     determine_ext,
     ExtractorError,
     int_or_none,
+    url_or_none,
     urlencode_postdata,
     xpath_text,
 )
     urlencode_postdata,
     xpath_text,
 )
@@ -304,7 +305,7 @@ class AfreecaTVIE(InfoExtractor):
             file_elements = video_element.findall(compat_xpath('./file'))
             one = len(file_elements) == 1
             for file_num, file_element in enumerate(file_elements, start=1):
             file_elements = video_element.findall(compat_xpath('./file'))
             one = len(file_elements) == 1
             for file_num, file_element in enumerate(file_elements, start=1):
-                file_url = file_element.text
+                file_url = url_or_none(file_element.text)
                 if not file_url:
                     continue
                 key = file_element.get('key', '')
                 if not file_url:
                     continue
                 key = file_element.get('key', '')
index fde1a8ff74d8d6bcc85eb2520947e96d4205b176..7ff098cfa0c9789122dab4787f877a1a9bf45352 100644 (file)
@@ -3,11 +3,12 @@ from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from ..utils import (
 
 from .common import InfoExtractor
 from ..utils import (
-    int_or_none,
-    parse_iso8601,
-    mimetype2ext,
     determine_ext,
     ExtractorError,
     determine_ext,
     ExtractorError,
+    int_or_none,
+    mimetype2ext,
+    parse_iso8601,
+    url_or_none,
 )
 
 
 )
 
 
@@ -35,7 +36,7 @@ class AMPIE(InfoExtractor):
                 media_thumbnail = [media_thumbnail]
             for thumbnail_data in media_thumbnail:
                 thumbnail = thumbnail_data.get('@attributes', {})
                 media_thumbnail = [media_thumbnail]
             for thumbnail_data in media_thumbnail:
                 thumbnail = thumbnail_data.get('@attributes', {})
-                thumbnail_url = thumbnail.get('url')
+                thumbnail_url = url_or_none(thumbnail.get('url'))
                 if not thumbnail_url:
                     continue
                 thumbnails.append({
                 if not thumbnail_url:
                     continue
                 thumbnails.append({
@@ -51,7 +52,7 @@ class AMPIE(InfoExtractor):
                 media_subtitle = [media_subtitle]
             for subtitle_data in media_subtitle:
                 subtitle = subtitle_data.get('@attributes', {})
                 media_subtitle = [media_subtitle]
             for subtitle_data in media_subtitle:
                 subtitle = subtitle_data.get('@attributes', {})
-                subtitle_href = subtitle.get('href')
+                subtitle_href = url_or_none(subtitle.get('href'))
                 if not subtitle_href:
                     continue
                 subtitles.setdefault(subtitle.get('lang') or 'en', []).append({
                 if not subtitle_href:
                     continue
                 subtitles.setdefault(subtitle.get('lang') or 'en', []).append({
@@ -65,7 +66,7 @@ class AMPIE(InfoExtractor):
             media_content = [media_content]
         for media_data in media_content:
             media = media_data.get('@attributes', {})
             media_content = [media_content]
         for media_data in media_content:
             media = media_data.get('@attributes', {})
-            media_url = media.get('url')
+            media_url = url_or_none(media.get('url'))
             if not media_url:
                 continue
             ext = mimetype2ext(media.get('type')) or determine_ext(media_url)
             if not media_url:
                 continue
             ext = mimetype2ext(media.get('type')) or determine_ext(media_url)
@@ -79,7 +80,7 @@ class AMPIE(InfoExtractor):
             else:
                 formats.append({
                     'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),
             else:
                 formats.append({
                     'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),
-                    'url': media['url'],
+                    'url': media_url,
                     'tbr': int_or_none(media.get('bitrate')),
                     'filesize': int_or_none(media.get('fileSize')),
                     'ext': ext,
                     'tbr': int_or_none(media.get('bitrate')),
                     'filesize': int_or_none(media.get('fileSize')),
                     'ext': ext,
index 1fe5d5e56e698728cf17d15b35624c4fe1882798..00ce684d1c9811ef3ba885194c23fd012d935b6b 100644 (file)
@@ -8,6 +8,7 @@ from ..utils import (
     determine_ext,
     extract_attributes,
     ExtractorError,
     determine_ext,
     extract_attributes,
     ExtractorError,
+    url_or_none,
     urlencode_postdata,
     urljoin,
 )
     urlencode_postdata,
     urljoin,
 )
@@ -165,7 +166,7 @@ class AnimeOnDemandIE(InfoExtractor):
                         }, fatal=False)
                     if not playlist:
                         continue
                         }, fatal=False)
                     if not playlist:
                         continue
-                    stream_url = playlist.get('streamurl')
+                    stream_url = url_or_none(playlist.get('streamurl'))
                     if stream_url:
                         rtmp = re.search(
                             r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+/))(?P<playpath>mp[34]:.+)',
                     if stream_url:
                         rtmp = re.search(
                             r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+/))(?P<playpath>mp[34]:.+)',
index f6a78eb5d4cb1288d3bc2b864303343b24603422..84e841035afb16a44d7225456845c1162d712fa3 100644 (file)
@@ -134,9 +134,33 @@ class AnvatoIE(InfoExtractor):
         'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582'
     }
 
         'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582'
     }
 
+    _API_KEY = '3hwbSuqqT690uxjNYBktSQpa5ZrpYYR0Iofx7NcJHyA'
+
     _ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1'
     _AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce'
 
     _ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1'
     _AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce'
 
+    _TESTS = [{
+        # from https://www.boston25news.com/news/watch-humpback-whale-breaches-right-next-to-fishing-boat-near-nh/817484874
+        'url': 'anvato:8v9BEynrwx8EFLYpgfOWcG1qJqyXKlRM:4465496',
+        'info_dict': {
+            'id': '4465496',
+            'ext': 'mp4',
+            'title': 'VIDEO: Humpback whale breaches right next to NH boat',
+            'description': 'VIDEO: Humpback whale breaches right next to NH boat. Footage courtesy: Zach Fahey.',
+            'duration': 22,
+            'timestamp': 1534855680,
+            'upload_date': '20180821',
+            'uploader': 'ANV',
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        # from https://sanfrancisco.cbslocal.com/2016/06/17/source-oakland-cop-on-leave-for-having-girlfriend-help-with-police-reports/
+        'url': 'anvato:DVzl9QRzox3ZZsP9bNu5Li3X7obQOnqP:3417601',
+        'only_matching': True,
+    }]
+
     def __init__(self, *args, **kwargs):
         super(AnvatoIE, self).__init__(*args, **kwargs)
         self.__server_time = None
     def __init__(self, *args, **kwargs):
         super(AnvatoIE, self).__init__(*args, **kwargs)
         self.__server_time = None
@@ -169,7 +193,8 @@ class AnvatoIE(InfoExtractor):
             'api': {
                 'anvrid': anvrid,
                 'anvstk': md5_text('%s|%s|%d|%s' % (
             'api': {
                 'anvrid': anvrid,
                 'anvstk': md5_text('%s|%s|%d|%s' % (
-                    access_key, anvrid, server_time, self._ANVACK_TABLE[access_key])),
+                    access_key, anvrid, server_time,
+                    self._ANVACK_TABLE.get(access_key, self._API_KEY))),
                 'anvts': server_time,
             },
         }
                 'anvts': server_time,
             },
         }
@@ -284,5 +309,6 @@ class AnvatoIE(InfoExtractor):
         mobj = re.match(self._VALID_URL, url)
         access_key, video_id = mobj.group('access_key_or_mcp', 'id')
         if access_key not in self._ANVACK_TABLE:
         mobj = re.match(self._VALID_URL, url)
         access_key, video_id = mobj.group('access_key_or_mcp', 'id')
         if access_key not in self._ANVACK_TABLE:
-            access_key = self._MCP_TO_ACCESS_KEY_TABLE[access_key]
+            access_key = self._MCP_TO_ACCESS_KEY_TABLE.get(
+                access_key) or access_key
         return self._get_anvato_videos(access_key, video_id)
         return self._get_anvato_videos(access_key, video_id)
index b50f454ee0ca661aa3cf93ab05121bf8857eeca8..cb92791931de1ab1e6adf049ccd354fbd65b0cd4 100644 (file)
@@ -7,6 +7,7 @@ from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
     int_or_none,
 from ..utils import (
     ExtractorError,
     int_or_none,
+    url_or_none,
 )
 
 
 )
 
 
@@ -77,7 +78,7 @@ class AolIE(InfoExtractor):
             formats.extend(self._extract_m3u8_formats(
                 m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
         for rendition in video_data.get('renditions', []):
             formats.extend(self._extract_m3u8_formats(
                 m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
         for rendition in video_data.get('renditions', []):
-            video_url = rendition.get('url')
+            video_url = url_or_none(rendition.get('url'))
             if not video_url:
                 continue
             ext = rendition.get('format')
             if not video_url:
                 continue
             ext = rendition.get('format')
index a30a935aa85233b46be88bfad6807b1534108003..98ccdaa4a976922285d85a69cb37f85b5faa5edf 100644 (file)
@@ -4,10 +4,10 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
     determine_ext,
     js_to_json,
 from ..utils import (
     determine_ext,
     js_to_json,
+    url_or_none,
 )
 
 
 )
 
 
@@ -68,8 +68,8 @@ class APAIE(InfoExtractor):
         for source in sources:
             if not isinstance(source, dict):
                 continue
         for source in sources:
             if not isinstance(source, dict):
                 continue
-            source_url = source.get('file')
-            if not source_url or not isinstance(source_url, compat_str):
+            source_url = url_or_none(source.get('file'))
+            if not source_url:
                 continue
             ext = determine_ext(source_url)
             if ext == 'm3u8':
                 continue
             ext = determine_ext(source_url)
             if ext == 'm3u8':
index e394cb66143e2e72f029d629fe4dd6b4b5e7cf8e..6eb8bbb6e989d0310a0b447ef1928ba081567a6f 100644 (file)
@@ -5,6 +5,7 @@ from .common import InfoExtractor
 from ..utils import (
     int_or_none,
     mimetype2ext,
 from ..utils import (
     int_or_none,
     mimetype2ext,
+    url_or_none,
 )
 
 
 )
 
 
@@ -43,7 +44,7 @@ class AparatIE(InfoExtractor):
 
         formats = []
         for item in file_list[0]:
 
         formats = []
         for item in file_list[0]:
-            file_url = item.get('file')
+            file_url = url_or_none(item.get('file'))
             if not file_url:
                 continue
             ext = mimetype2ext(item.get('type'))
             if not file_url:
                 continue
             ext = mimetype2ext(item.get('type'))
index 86951d975dd1a11926c630b59f7c608921e077de..6bf8f61eb03c0994c87651af51ecb58cd3065060 100644 (file)
@@ -5,7 +5,6 @@ import re
 
 from .common import InfoExtractor
 from .generic import GenericIE
 
 from .common import InfoExtractor
 from .generic import GenericIE
-from ..compat import compat_str
 from ..utils import (
     determine_ext,
     ExtractorError,
 from ..utils import (
     determine_ext,
     ExtractorError,
@@ -15,13 +14,14 @@ from ..utils import (
     unified_strdate,
     xpath_text,
     update_url_query,
     unified_strdate,
     xpath_text,
     update_url_query,
+    url_or_none,
 )
 from ..compat import compat_etree_fromstring
 
 
 class ARDMediathekIE(InfoExtractor):
     IE_NAME = 'ARD:mediathek'
 )
 from ..compat import compat_etree_fromstring
 
 
 class ARDMediathekIE(InfoExtractor):
     IE_NAME = 'ARD:mediathek'
-    _VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
+    _VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
 
     _TESTS = [{
         # available till 26.07.2022
 
     _TESTS = [{
         # available till 26.07.2022
@@ -37,6 +37,9 @@ class ARDMediathekIE(InfoExtractor):
             # m3u8 download
             'skip_download': True,
         }
             # m3u8 download
             'skip_download': True,
         }
+    }, {
+        'url': 'https://one.ard.de/tv/Mord-mit-Aussicht/Mord-mit-Aussicht-6-39-T%C3%B6dliche-Nach/ONE/Video?bcastId=46384294&documentId=55586872',
+        'only_matching': True,
     }, {
         # audio
         'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
     }, {
         # audio
         'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
@@ -100,7 +103,7 @@ class ARDMediathekIE(InfoExtractor):
                 quality = stream.get('_quality')
                 server = stream.get('_server')
                 for stream_url in stream_urls:
                 quality = stream.get('_quality')
                 server = stream.get('_server')
                 for stream_url in stream_urls:
-                    if not isinstance(stream_url, compat_str) or '//' not in stream_url:
+                    if not url_or_none(stream_url):
                         continue
                     ext = determine_ext(stream_url)
                     if quality != 'auto' and ext in ('f4m', 'm3u8'):
                         continue
                     ext = determine_ext(stream_url)
                     if quality != 'auto' and ext in ('f4m', 'm3u8'):
@@ -282,3 +285,76 @@ class ARDIE(InfoExtractor):
             'upload_date': upload_date,
             'thumbnail': thumbnail,
         }
             'upload_date': upload_date,
             'thumbnail': thumbnail,
         }
+
+
+class ARDBetaMediathekIE(InfoExtractor):
+    _VALID_URL = r'https://beta\.ardmediathek\.de/[a-z]+/player/(?P<video_id>[a-zA-Z0-9]+)/(?P<display_id>[^/?#]+)'
+    _TESTS = [{
+        'url': 'https://beta.ardmediathek.de/ard/player/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE/die-robuste-roswita',
+        'md5': '2d02d996156ea3c397cfc5036b5d7f8f',
+        'info_dict': {
+            'display_id': 'die-robuste-roswita',
+            'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
+            'title': 'Tatort: Die robuste Roswita',
+            'description': r're:^Der Mord.*trüber ist als die Ilm.',
+            'duration': 5316,
+            'thumbnail': 'https://img.ardmediathek.de/standard/00/55/43/59/34/-1774185891/16x9/960?mandant=ard',
+            'upload_date': '20180826',
+            'ext': 'mp4',
+        },
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('video_id')
+        display_id = mobj.group('display_id')
+
+        webpage = self._download_webpage(url, display_id)
+        data_json = self._search_regex(r'window\.__APOLLO_STATE__\s*=\s*(\{.*);\n', webpage, 'json')
+        data = self._parse_json(data_json, display_id)
+
+        res = {
+            'id': video_id,
+            'display_id': display_id,
+        }
+        formats = []
+        for widget in data.values():
+            if widget.get('_geoblocked'):
+                raise ExtractorError('This video is not available due to geoblocking', expected=True)
+
+            if '_duration' in widget:
+                res['duration'] = widget['_duration']
+            if 'clipTitle' in widget:
+                res['title'] = widget['clipTitle']
+            if '_previewImage' in widget:
+                res['thumbnail'] = widget['_previewImage']
+            if 'broadcastedOn' in widget:
+                res['upload_date'] = unified_strdate(widget['broadcastedOn'])
+            if 'synopsis' in widget:
+                res['description'] = widget['synopsis']
+            if '_subtitleUrl' in widget:
+                res['subtitles'] = {'de': [{
+                    'ext': 'ttml',
+                    'url': widget['_subtitleUrl'],
+                }]}
+            if '_quality' in widget:
+                format_url = widget['_stream']['json'][0]
+
+                if format_url.endswith('.f4m'):
+                    formats.extend(self._extract_f4m_formats(
+                        format_url + '?hdcore=3.11.0',
+                        video_id, f4m_id='hds', fatal=False))
+                elif format_url.endswith('m3u8'):
+                    formats.extend(self._extract_m3u8_formats(
+                        format_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+                else:
+                    formats.append({
+                        'format_id': 'http-' + widget['_quality'],
+                        'url': format_url,
+                        'preference': 10,  # Plain HTTP, that's nice
+                    })
+
+        self._sort_formats(formats)
+        res['formats'] = formats
+
+        return res
index be41bd5a22477fce2aca4a043799574e148fdc57..f14b407dc82cf8f945f581ab059bd6218866eb57 100644 (file)
@@ -1,6 +1,5 @@
 from __future__ import unicode_literals
 
 from __future__ import unicode_literals
 
-import json
 import random
 import re
 import time
 import random
 import re
 import time
@@ -16,14 +15,18 @@ from ..utils import (
     int_or_none,
     KNOWN_EXTENSIONS,
     parse_filesize,
     int_or_none,
     KNOWN_EXTENSIONS,
     parse_filesize,
+    str_or_none,
+    try_get,
     unescapeHTML,
     update_url_query,
     unified_strdate,
     unescapeHTML,
     update_url_query,
     unified_strdate,
+    unified_timestamp,
+    url_or_none,
 )
 
 
 class BandcampIE(InfoExtractor):
 )
 
 
 class BandcampIE(InfoExtractor):
-    _VALID_URL = r'https?://.*?\.bandcamp\.com/track/(?P<title>[^/?#&]+)'
+    _VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<title>[^/?#&]+)'
     _TESTS = [{
         'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
         'md5': 'c557841d5e50261777a6585648adf439',
     _TESTS = [{
         'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
         'md5': 'c557841d5e50261777a6585648adf439',
@@ -35,13 +38,44 @@ class BandcampIE(InfoExtractor):
         },
         '_skip': 'There is a limit of 200 free downloads / month for the test song'
     }, {
         },
         '_skip': 'There is a limit of 200 free downloads / month for the test song'
     }, {
+        # free download
         'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
         'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
-        'md5': '0369ace6b939f0927e62c67a1a8d9fa7',
+        'md5': '853e35bf34aa1d6fe2615ae612564b36',
         'info_dict': {
             'id': '2650410135',
             'ext': 'aiff',
             'title': 'Ben Prunty - Lanius (Battle)',
         'info_dict': {
             'id': '2650410135',
             'ext': 'aiff',
             'title': 'Ben Prunty - Lanius (Battle)',
+            'thumbnail': r're:^https?://.*\.jpg$',
             'uploader': 'Ben Prunty',
             'uploader': 'Ben Prunty',
+            'timestamp': 1396508491,
+            'upload_date': '20140403',
+            'release_date': '20140403',
+            'duration': 260.877,
+            'track': 'Lanius (Battle)',
+            'track_number': 1,
+            'track_id': '2650410135',
+            'artist': 'Ben Prunty',
+            'album': 'FTL: Advanced Edition Soundtrack',
+        },
+    }, {
+        # no free download, mp3 128
+        'url': 'https://relapsealumni.bandcamp.com/track/hail-to-fire',
+        'md5': 'fec12ff55e804bb7f7ebeb77a800c8b7',
+        'info_dict': {
+            'id': '2584466013',
+            'ext': 'mp3',
+            'title': 'Mastodon - Hail to Fire',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'uploader': 'Mastodon',
+            'timestamp': 1322005399,
+            'upload_date': '20111122',
+            'release_date': '20040207',
+            'duration': 120.79,
+            'track': 'Hail to Fire',
+            'track_number': 5,
+            'track_id': '2584466013',
+            'artist': 'Mastodon',
+            'album': 'Call of the Mastodon',
         },
     }]
 
         },
     }]
 
@@ -50,19 +84,23 @@ class BandcampIE(InfoExtractor):
         title = mobj.group('title')
         webpage = self._download_webpage(url, title)
         thumbnail = self._html_search_meta('og:image', webpage, default=None)
         title = mobj.group('title')
         webpage = self._download_webpage(url, title)
         thumbnail = self._html_search_meta('og:image', webpage, default=None)
-        m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
-        if not m_download:
-            m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage)
-            if m_trackinfo:
-                json_code = m_trackinfo.group(1)
-                data = json.loads(json_code)[0]
-                track_id = compat_str(data['id'])
-
-                if not data.get('file'):
-                    raise ExtractorError('Not streamable', video_id=track_id, expected=True)
-
-                formats = []
-                for format_id, format_url in data['file'].items():
+
+        track_id = None
+        track = None
+        track_number = None
+        duration = None
+
+        formats = []
+        track_info = self._parse_json(
+            self._search_regex(
+                r'trackinfo\s*:\s*\[\s*({.+?})\s*\]\s*,\s*?\n',
+                webpage, 'track info', default='{}'), title)
+        if track_info:
+            file_ = track_info.get('file')
+            if isinstance(file_, dict):
+                for format_id, format_url in file_.items():
+                    if not url_or_none(format_url):
+                        continue
                     ext, abr_str = format_id.split('-', 1)
                     formats.append({
                         'format_id': format_id,
                     ext, abr_str = format_id.split('-', 1)
                     formats.append({
                         'format_id': format_id,
@@ -72,85 +110,110 @@ class BandcampIE(InfoExtractor):
                         'acodec': ext,
                         'abr': int_or_none(abr_str),
                     })
                         'acodec': ext,
                         'abr': int_or_none(abr_str),
                     })
+            track = track_info.get('title')
+            track_id = str_or_none(track_info.get('track_id') or track_info.get('id'))
+            track_number = int_or_none(track_info.get('track_num'))
+            duration = float_or_none(track_info.get('duration'))
+
+        def extract(key):
+            return self._search_regex(
+                r'\b%s\s*["\']?\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % key,
+                webpage, key, default=None, group='value')
+
+        artist = extract('artist')
+        album = extract('album_title')
+        timestamp = unified_timestamp(
+            extract('publish_date') or extract('album_publish_date'))
+        release_date = unified_strdate(extract('album_release_date'))
+
+        download_link = self._search_regex(
+            r'freeDownloadPage\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+            'download link', default=None, group='url')
+        if download_link:
+            track_id = self._search_regex(
+                r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$',
+                webpage, 'track id')
+
+            download_webpage = self._download_webpage(
+                download_link, track_id, 'Downloading free downloads page')
+
+            blob = self._parse_json(
+                self._search_regex(
+                    r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage,
+                    'blob', group='blob'),
+                track_id, transform_source=unescapeHTML)
+
+            info = try_get(
+                blob, (lambda x: x['digital_items'][0],
+                       lambda x: x['download_items'][0]), dict)
+            if info:
+                downloads = info.get('downloads')
+                if isinstance(downloads, dict):
+                    if not track:
+                        track = info.get('title')
+                    if not artist:
+                        artist = info.get('artist')
+                    if not thumbnail:
+                        thumbnail = info.get('thumb_url')
+
+                    download_formats = {}
+                    download_formats_list = blob.get('download_formats')
+                    if isinstance(download_formats_list, list):
+                        for f in blob['download_formats']:
+                            name, ext = f.get('name'), f.get('file_extension')
+                            if all(isinstance(x, compat_str) for x in (name, ext)):
+                                download_formats[name] = ext.strip('.')
+
+                    for format_id, f in downloads.items():
+                        format_url = f.get('url')
+                        if not format_url:
+                            continue
+                        # Stat URL generation algorithm is reverse engineered from
+                        # download_*_bundle_*.js
+                        stat_url = update_url_query(
+                            format_url.replace('/download/', '/statdownload/'), {
+                                '.rand': int(time.time() * 1000 * random.random()),
+                            })
+                        format_id = f.get('encoding_name') or format_id
+                        stat = self._download_json(
+                            stat_url, track_id, 'Downloading %s JSON' % format_id,
+                            transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1],
+                            fatal=False)
+                        if not stat:
+                            continue
+                        retry_url = url_or_none(stat.get('retry_url'))
+                        if not retry_url:
+                            continue
+                        formats.append({
+                            'url': self._proto_relative_url(retry_url, 'http:'),
+                            'ext': download_formats.get(format_id),
+                            'format_id': format_id,
+                            'format_note': f.get('description'),
+                            'filesize': parse_filesize(f.get('size_mb')),
+                            'vcodec': 'none',
+                        })
 
 
-                self._sort_formats(formats)
-
-                return {
-                    'id': track_id,
-                    'title': data['title'],
-                    'thumbnail': thumbnail,
-                    'formats': formats,
-                    'duration': float_or_none(data.get('duration')),
-                }
-            else:
-                raise ExtractorError('No free songs found')
-
-        download_link = m_download.group(1)
-        video_id = self._search_regex(
-            r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$',
-            webpage, 'video id')
-
-        download_webpage = self._download_webpage(
-            download_link, video_id, 'Downloading free downloads page')
-
-        blob = self._parse_json(
-            self._search_regex(
-                r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage,
-                'blob', group='blob'),
-            video_id, transform_source=unescapeHTML)
-
-        info = blob['digital_items'][0]
-
-        downloads = info['downloads']
-        track = info['title']
+        self._sort_formats(formats)
 
 
-        artist = info.get('artist')
         title = '%s - %s' % (artist, track) if artist else track
 
         title = '%s - %s' % (artist, track) if artist else track
 
-        download_formats = {}
-        for f in blob['download_formats']:
-            name, ext = f.get('name'), f.get('file_extension')
-            if all(isinstance(x, compat_str) for x in (name, ext)):
-                download_formats[name] = ext.strip('.')
-
-        formats = []
-        for format_id, f in downloads.items():
-            format_url = f.get('url')
-            if not format_url:
-                continue
-            # Stat URL generation algorithm is reverse engineered from
-            # download_*_bundle_*.js
-            stat_url = update_url_query(
-                format_url.replace('/download/', '/statdownload/'), {
-                    '.rand': int(time.time() * 1000 * random.random()),
-                })
-            format_id = f.get('encoding_name') or format_id
-            stat = self._download_json(
-                stat_url, video_id, 'Downloading %s JSON' % format_id,
-                transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1],
-                fatal=False)
-            if not stat:
-                continue
-            retry_url = stat.get('retry_url')
-            if not isinstance(retry_url, compat_str):
-                continue
-            formats.append({
-                'url': self._proto_relative_url(retry_url, 'http:'),
-                'ext': download_formats.get(format_id),
-                'format_id': format_id,
-                'format_note': f.get('description'),
-                'filesize': parse_filesize(f.get('size_mb')),
-                'vcodec': 'none',
-            })
-        self._sort_formats(formats)
+        if not duration:
+            duration = float_or_none(self._html_search_meta(
+                'duration', webpage, default=None))
 
         return {
 
         return {
-            'id': video_id,
+            'id': track_id,
             'title': title,
             'title': title,
-            'thumbnail': info.get('thumb_url') or thumbnail,
-            'uploader': info.get('artist'),
-            'artist': artist,
+            'thumbnail': thumbnail,
+            'uploader': artist,
+            'timestamp': timestamp,
+            'release_date': release_date,
+            'duration': duration,
             'track': track,
             'track': track,
+            'track_number': track_number,
+            'track_id': track_id,
+            'artist': artist,
+            'album': album,
             'formats': formats,
         }
 
             'formats': formats,
         }
 
@@ -306,7 +369,7 @@ class BandcampWeeklyIE(InfoExtractor):
 
         formats = []
         for format_id, format_url in show['audio_stream'].items():
 
         formats = []
         for format_id, format_url in show['audio_stream'].items():
-            if not isinstance(format_url, compat_str):
+            if not url_or_none(format_url):
                 continue
             for known_ext in KNOWN_EXTENSIONS:
                 if known_ext in format_id:
                 continue
             for known_ext in KNOWN_EXTENSIONS:
                 if known_ext in format_id:
index 30a63a24e12296fca713cea3b6304c784ae4af15..abcfa301d9abf4e76a01d42cac71a01e06e2f1eb 100644 (file)
@@ -21,7 +21,6 @@ from ..utils import (
     urljoin,
 )
 from ..compat import (
     urljoin,
 )
 from ..compat import (
-    compat_etree_fromstring,
     compat_HTTPError,
     compat_urlparse,
 )
     compat_HTTPError,
     compat_urlparse,
 )
@@ -30,7 +29,7 @@ from ..compat import (
 class BBCCoUkIE(InfoExtractor):
     IE_NAME = 'bbc.co.uk'
     IE_DESC = 'BBC iPlayer'
 class BBCCoUkIE(InfoExtractor):
     IE_NAME = 'bbc.co.uk'
     IE_DESC = 'BBC iPlayer'
-    _ID_REGEX = r'[pbw][\da-z]{7}'
+    _ID_REGEX = r'(?:[pbm][\da-z]{7}|w[\da-z]{7,14})'
     _VALID_URL = r'''(?x)
                     https?://
                         (?:www\.)?bbc\.co\.uk/
     _VALID_URL = r'''(?x)
                     https?://
                         (?:www\.)?bbc\.co\.uk/
@@ -237,6 +236,12 @@ class BBCCoUkIE(InfoExtractor):
         }, {
             'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
             'only_matching': True,
         }, {
             'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
             'only_matching': True,
+        }, {
+            'url': 'https://www.bbc.co.uk/programmes/m00005xn',
+            'only_matching': True,
+        }, {
+            'url': 'https://www.bbc.co.uk/programmes/w172w4dww1jqt5s',
+            'only_matching': True,
         }]
 
     _USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
         }]
 
     _USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
@@ -334,14 +339,9 @@ class BBCCoUkIE(InfoExtractor):
         self._raise_extractor_error(last_exception)
 
     def _download_media_selector_url(self, url, programme_id=None):
         self._raise_extractor_error(last_exception)
 
     def _download_media_selector_url(self, url, programme_id=None):
-        try:
-            media_selection = self._download_xml(
-                url, programme_id, 'Downloading media selection XML')
-        except ExtractorError as ee:
-            if isinstance(ee.cause, compat_HTTPError) and ee.cause.code in (403, 404):
-                media_selection = compat_etree_fromstring(ee.cause.read().decode('utf-8'))
-            else:
-                raise
+        media_selection = self._download_xml(
+            url, programme_id, 'Downloading media selection XML',
+            expected_status=(403, 404))
         return self._process_media_selector(media_selection, programme_id)
 
     def _process_media_selector(self, media_selection, programme_id):
         return self._process_media_selector(media_selection, programme_id)
 
     def _process_media_selector(self, media_selection, programme_id):
@@ -784,6 +784,17 @@ class BBCIE(BBCCoUkIE):
         'params': {
             'skip_download': True,
         }
         'params': {
             'skip_download': True,
         }
+    }, {
+        # window.__PRELOADED_STATE__
+        'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
+        'info_dict': {
+            'id': 'b0b9z4vz',
+            'ext': 'mp4',
+            'title': 'Prom 6: An American in Paris and Turangalila',
+            'description': 'md5:51cf7d6f5c8553f197e58203bc78dff8',
+            'uploader': 'Radio 3',
+            'uploader_id': 'bbc_radio_three',
+        },
     }]
 
     @classmethod
     }]
 
     @classmethod
@@ -1006,6 +1017,36 @@ class BBCIE(BBCCoUkIE):
                     'subtitles': subtitles,
                 }
 
                     'subtitles': subtitles,
                 }
 
+        preload_state = self._parse_json(self._search_regex(
+            r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
+            'preload state', default='{}'), playlist_id, fatal=False)
+        if preload_state:
+            current_programme = preload_state.get('programmes', {}).get('current') or {}
+            programme_id = current_programme.get('id')
+            if current_programme and programme_id and current_programme.get('type') == 'playable_item':
+                title = current_programme.get('titles', {}).get('tertiary') or playlist_title
+                formats, subtitles = self._download_media_selector(programme_id)
+                self._sort_formats(formats)
+                synopses = current_programme.get('synopses') or {}
+                network = current_programme.get('network') or {}
+                duration = int_or_none(
+                    current_programme.get('duration', {}).get('value'))
+                thumbnail = None
+                image_url = current_programme.get('image_url')
+                if image_url:
+                    thumbnail = image_url.replace('{recipe}', '1920x1920')
+                return {
+                    'id': programme_id,
+                    'title': title,
+                    'description': dict_get(synopses, ('long', 'medium', 'short')),
+                    'thumbnail': thumbnail,
+                    'duration': duration,
+                    'uploader': network.get('short_title'),
+                    'uploader_id': network.get('id'),
+                    'formats': formats,
+                    'subtitles': subtitles,
+                }
+
         bbc3_config = self._parse_json(
             self._search_regex(
                 r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
         bbc3_config = self._parse_json(
             self._search_regex(
                 r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
diff --git a/youtube_dl/extractor/bitchute.py b/youtube_dl/extractor/bitchute.py
new file mode 100644 (file)
index 0000000..446a1ab
--- /dev/null
@@ -0,0 +1,120 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+import re
+
+from .common import InfoExtractor
+from ..utils import urlencode_postdata
+
+
+class BitChuteIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
+    _TESTS = [{
+        'url': 'https://www.bitchute.com/video/szoMrox2JEI/',
+        'md5': '66c4a70e6bfc40dcb6be3eb1d74939eb',
+        'info_dict': {
+            'id': 'szoMrox2JEI',
+            'ext': 'mp4',
+            'title': 'Fuck bitches get money',
+            'description': 'md5:3f21f6fb5b1d17c3dee9cf6b5fe60b3a',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'uploader': 'Victoria X Rave',
+        },
+    }, {
+        'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(
+            'https://www.bitchute.com/video/%s' % video_id, video_id, headers={
+                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36',
+            })
+
+        title = self._search_regex(
+            (r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'<title>([^<]+)'),
+            webpage, 'title', default=None) or self._html_search_meta(
+            'description', webpage, 'title',
+            default=None) or self._og_search_description(webpage)
+
+        formats = [
+            {'url': mobj.group('url')}
+            for mobj in re.finditer(
+                r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage)]
+        self._sort_formats(formats)
+
+        description = self._html_search_regex(
+            r'(?s)<div\b[^>]+\bclass=["\']full hidden[^>]+>(.+?)</div>',
+            webpage, 'description', fatal=False)
+        thumbnail = self._og_search_thumbnail(
+            webpage, default=None) or self._html_search_meta(
+            'twitter:image:src', webpage, 'thumbnail')
+        uploader = self._html_search_regex(
+            r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>', webpage,
+            'uploader', fatal=False)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'uploader': uploader,
+            'formats': formats,
+        }
+
+
+class BitChuteChannelIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?bitchute\.com/channel/(?P<id>[^/?#&]+)'
+    _TEST = {
+        'url': 'https://www.bitchute.com/channel/victoriaxrave/',
+        'playlist_mincount': 185,
+        'info_dict': {
+            'id': 'victoriaxrave',
+        },
+    }
+
+    _TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7'
+
+    def _entries(self, channel_id):
+        channel_url = 'https://www.bitchute.com/channel/%s/' % channel_id
+        offset = 0
+        for page_num in itertools.count(1):
+            data = self._download_json(
+                '%sextend/' % channel_url, channel_id,
+                'Downloading channel page %d' % page_num,
+                data=urlencode_postdata({
+                    'csrfmiddlewaretoken': self._TOKEN,
+                    'name': '',
+                    'offset': offset,
+                }), headers={
+                    'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+                    'Referer': channel_url,
+                    'X-Requested-With': 'XMLHttpRequest',
+                    'Cookie': 'csrftoken=%s' % self._TOKEN,
+                })
+            if data.get('success') is False:
+                break
+            html = data.get('html')
+            if not html:
+                break
+            video_ids = re.findall(
+                r'class=["\']channel-videos-image-container[^>]+>\s*<a\b[^>]+\bhref=["\']/video/([^"\'/]+)',
+                html)
+            if not video_ids:
+                break
+            offset += len(video_ids)
+            for video_id in video_ids:
+                yield self.url_result(
+                    'https://www.bitchute.com/video/%s' % video_id,
+                    ie=BitChuteIE.ie_key(), video_id=video_id)
+
+    def _real_extract(self, url):
+        channel_id = self._match_id(url)
+        return self.playlist_result(
+            self._entries(channel_id), playlist_id=channel_id)
index 70d16767f19966fe625f8496f334fde1a15929f8..68c7cf2bba49c1cb4374ae00f6262214c1890551 100644 (file)
@@ -4,8 +4,10 @@ import re
 
 from .common import InfoExtractor
 from .youtube import YoutubeIE
 
 from .common import InfoExtractor
 from .youtube import YoutubeIE
-from ..compat import compat_str
-from ..utils import int_or_none
+from ..utils import (
+    int_or_none,
+    url_or_none,
+)
 
 
 class BreakIE(InfoExtractor):
 
 
 class BreakIE(InfoExtractor):
@@ -55,8 +57,8 @@ class BreakIE(InfoExtractor):
 
         formats = []
         for video in content:
 
         formats = []
         for video in content:
-            video_url = video.get('url')
-            if not video_url or not isinstance(video_url, compat_str):
+            video_url = url_or_none(video.get('url'))
+            if not video_url:
                 continue
             bitrate = int_or_none(self._search_regex(
                 r'(\d+)_kbps', video_url, 'tbr', default=None))
                 continue
             bitrate = int_or_none(self._search_regex(
                 r'(\d+)_kbps', video_url, 'tbr', default=None))
index ab62e54d63c2335d0002701c078105e00ccc6186..14f9a14edf417d996fa1cbe3c80bcd06f2ee9ef0 100644 (file)
@@ -572,7 +572,8 @@ class BrightcoveNewIE(AdobePassIE):
             container = source.get('container')
             ext = mimetype2ext(source.get('type'))
             src = source.get('src')
             container = source.get('container')
             ext = mimetype2ext(source.get('type'))
             src = source.get('src')
-            if ext == 'ism' or container == 'WVM':
+            # https://support.brightcove.com/playback-api-video-fields-reference#key_systems_object
+            if ext == 'ism' or container == 'WVM' or source.get('key_systems'):
                 continue
             elif ext == 'm3u8' or container == 'M2TS':
                 if not src:
                 continue
             elif ext == 'm3u8' or container == 'M2TS':
                 if not src:
@@ -629,6 +630,14 @@ class BrightcoveNewIE(AdobePassIE):
                         'format_id': build_format_id('rtmp'),
                     })
                 formats.append(f)
                         'format_id': build_format_id('rtmp'),
                     })
                 formats.append(f)
+        if not formats:
+            # for sonyliv.com DRM protected videos
+            s3_source_url = json_data.get('custom_fields', {}).get('s3sourceurl')
+            if s3_source_url:
+                formats.append({
+                    'url': s3_source_url,
+                    'format_id': 'source',
+                })
 
         errors = json_data.get('errors')
         if not formats and errors:
 
         errors = json_data.get('errors')
         if not formats and errors:
index ee0165dbaa20c3b3d4ecca53fbb748cb3f823bf8..79350817f5599fba1b560215076aea2878984835 100644 (file)
@@ -2,10 +2,10 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
     ExtractorError,
     int_or_none,
 from ..utils import (
     ExtractorError,
     int_or_none,
+    url_or_none,
 )
 
 
 )
 
 
@@ -56,8 +56,8 @@ class CamModelsIE(InfoExtractor):
             for media in encodings:
                 if not isinstance(media, dict):
                     continue
             for media in encodings:
                 if not isinstance(media, dict):
                     continue
-                media_url = media.get('location')
-                if not media_url or not isinstance(media_url, compat_str):
+                media_url = url_or_none(media.get('location'))
+                if not media_url:
                     continue
 
                 format_id_list = [format_id]
                     continue
 
                 format_id_list = [format_id]
index 8ac62c1a680ccb0ab8a8c038bf0c7270c510d3e6..174fd9e2b4b00b384b1737a1df058050e4603de9 100644 (file)
@@ -11,6 +11,7 @@ from ..utils import (
     strip_or_none,
     float_or_none,
     int_or_none,
     strip_or_none,
     float_or_none,
     int_or_none,
+    merge_dicts,
     parse_iso8601,
 )
 
     parse_iso8601,
 )
 
@@ -248,9 +249,13 @@ class VrtNUIE(GigyaBaseIE):
 
         webpage, urlh = self._download_webpage_handle(url, display_id)
 
 
         webpage, urlh = self._download_webpage_handle(url, display_id)
 
-        title = self._html_search_regex(
+        info = self._search_json_ld(webpage, display_id, default={})
+
+        # title is optional here since it may be extracted by extractor
+        # that is delegated from here
+        title = strip_or_none(self._html_search_regex(
             r'(?ms)<h1 class="content__heading">(.+?)</h1>',
             r'(?ms)<h1 class="content__heading">(.+?)</h1>',
-            webpage, 'title').strip()
+            webpage, 'title', default=None))
 
         description = self._html_search_regex(
             r'(?ms)<div class="content__description">(.+?)</div>',
 
         description = self._html_search_regex(
             r'(?ms)<div class="content__description">(.+?)</div>',
@@ -295,7 +300,7 @@ class VrtNUIE(GigyaBaseIE):
         # the first one
         video_id = list(video.values())[0].get('videoid')
 
         # the first one
         video_id = list(video.values())[0].get('videoid')
 
-        return {
+        return merge_dicts(info, {
             '_type': 'url_transparent',
             'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id,
             'ie_key': CanvasIE.ie_key(),
             '_type': 'url_transparent',
             'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id,
             'ie_key': CanvasIE.ie_key(),
@@ -307,4 +312,4 @@ class VrtNUIE(GigyaBaseIE):
             'season_number': season_number,
             'episode_number': episode_number,
             'release_date': release_date,
             'season_number': season_number,
             'episode_number': episode_number,
             'release_date': release_date,
-        }
+        })
index 07f5206c124e42368809ceb89149c3b5a9d5bdf7..544647f92b7594e50be813b9666a7d381a7722fe 100644 (file)
@@ -4,13 +4,13 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
     clean_html,
     int_or_none,
     parse_duration,
     parse_iso8601,
     parse_resolution,
 from ..utils import (
     clean_html,
     int_or_none,
     parse_duration,
     parse_iso8601,
     parse_resolution,
+    url_or_none,
 )
 
 
 )
 
 
@@ -53,8 +53,8 @@ class CCMAIE(InfoExtractor):
         media_url = media['media']['url']
         if isinstance(media_url, list):
             for format_ in media_url:
         media_url = media['media']['url']
         if isinstance(media_url, list):
             for format_ in media_url:
-                format_url = format_.get('file')
-                if not format_url or not isinstance(format_url, compat_str):
+                format_url = url_or_none(format_.get('file'))
+                if not format_url:
                     continue
                 label = format_.get('label')
                 f = parse_resolution(label)
                     continue
                 label = format_.get('label')
                 f = parse_resolution(label)
index 6bad908595e825ac75b80330a0718d029578e72c..46380430f9d35409b1f15aea9dbe366b153c594d 100644 (file)
@@ -108,7 +108,7 @@ class CeskaTelevizeIE(InfoExtractor):
 
         for user_agent in (None, USER_AGENTS['Safari']):
             req = sanitized_Request(
 
         for user_agent in (None, USER_AGENTS['Safari']):
             req = sanitized_Request(
-                'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
+                'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
                 data=urlencode_postdata(data))
 
             req.add_header('Content-type', 'application/x-www-form-urlencoded')
                 data=urlencode_postdata(data))
 
             req.add_header('Content-type', 'application/x-www-form-urlencoded')
index 57e6437997153200999e7a6eb6b9b9e450f6b13f..06d04de139444e0e48d46cc2132eee9f1f94919d 100644 (file)
@@ -1,15 +1,19 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_parse_qs,
+    compat_urllib_parse_urlparse,
+)
 from ..utils import (
     float_or_none,
 from ..utils import (
     float_or_none,
-    parse_iso8601,
+    unified_timestamp,
 )
 
 
 class ClypIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?clyp\.it/(?P<id>[a-z0-9]+)'
 )
 
 
 class ClypIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?clyp\.it/(?P<id>[a-z0-9]+)'
-    _TEST = {
+    _TESTS = [{
         'url': 'https://clyp.it/ojz2wfah',
         'md5': '1d4961036c41247ecfdcc439c0cddcbb',
         'info_dict': {
         'url': 'https://clyp.it/ojz2wfah',
         'md5': '1d4961036c41247ecfdcc439c0cddcbb',
         'info_dict': {
@@ -21,13 +25,34 @@ class ClypIE(InfoExtractor):
             'timestamp': 1443515251,
             'upload_date': '20150929',
         },
             'timestamp': 1443515251,
             'upload_date': '20150929',
         },
-    }
+    }, {
+        'url': 'https://clyp.it/b04p1odi?token=b0078e077e15835845c528a44417719d',
+        'info_dict': {
+            'id': 'b04p1odi',
+            'ext': 'mp3',
+            'title': 'GJ! (Reward Edit)',
+            'description': 'Metal Resistance (THE ONE edition)',
+            'duration': 177.789,
+            'timestamp': 1528241278,
+            'upload_date': '20180605',
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }]
 
     def _real_extract(self, url):
         audio_id = self._match_id(url)
 
 
     def _real_extract(self, url):
         audio_id = self._match_id(url)
 
+        qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+        token = qs.get('token', [None])[0]
+
+        query = {}
+        if token:
+            query['token'] = token
+
         metadata = self._download_json(
         metadata = self._download_json(
-            'https://api.clyp.it/%s' % audio_id, audio_id)
+            'https://api.clyp.it/%s' % audio_id, audio_id, query=query)
 
         formats = []
         for secure in ('', 'Secure'):
 
         formats = []
         for secure in ('', 'Secure'):
@@ -45,7 +70,7 @@ class ClypIE(InfoExtractor):
         title = metadata['Title']
         description = metadata.get('Description')
         duration = float_or_none(metadata.get('Duration'))
         title = metadata['Title']
         description = metadata.get('Description')
         duration = float_or_none(metadata.get('Duration'))
-        timestamp = parse_iso8601(metadata.get('DateCreated'))
+        timestamp = unified_timestamp(metadata.get('DateCreated'))
 
         return {
             'id': audio_id,
 
         return {
             'id': audio_id,
index a2548dba364b338e8c1fd37e8f3d9acaa0aa6e67..b8bbaf81a22aab883b1740f25edd91b78473916c 100644 (file)
@@ -19,6 +19,7 @@ from ..compat import (
     compat_cookies,
     compat_etree_fromstring,
     compat_getpass,
     compat_cookies,
     compat_etree_fromstring,
     compat_getpass,
+    compat_integer_types,
     compat_http_client,
     compat_os_name,
     compat_str,
     compat_http_client,
     compat_os_name,
     compat_str,
@@ -51,6 +52,7 @@ from ..utils import (
     GeoUtils,
     int_or_none,
     js_to_json,
     GeoUtils,
     int_or_none,
     js_to_json,
+    JSON_LD_RE,
     mimetype2ext,
     orderedSet,
     parse_codecs,
     mimetype2ext,
     orderedSet,
     parse_codecs,
@@ -548,8 +550,26 @@ class InfoExtractor(object):
     def IE_NAME(self):
         return compat_str(type(self).__name__[:-2])
 
     def IE_NAME(self):
         return compat_str(type(self).__name__[:-2])
 
-    def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
-        """ Returns the response handle """
+    @staticmethod
+    def __can_accept_status_code(err, expected_status):
+        assert isinstance(err, compat_urllib_error.HTTPError)
+        if expected_status is None:
+            return False
+        if isinstance(expected_status, compat_integer_types):
+            return err.code == expected_status
+        elif isinstance(expected_status, (list, tuple)):
+            return err.code in expected_status
+        elif callable(expected_status):
+            return expected_status(err.code) is True
+        else:
+            assert False
+
+    def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}, expected_status=None):
+        """
+        Return the response handle.
+
+        See _download_webpage docstring for arguments specification.
+        """
         if note is None:
             self.report_download_webpage(video_id)
         elif note is not False:
         if note is None:
             self.report_download_webpage(video_id)
         elif note is not False:
@@ -578,6 +598,10 @@ class InfoExtractor(object):
         try:
             return self._downloader.urlopen(url_or_request)
         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
         try:
             return self._downloader.urlopen(url_or_request)
         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+            if isinstance(err, compat_urllib_error.HTTPError):
+                if self.__can_accept_status_code(err, expected_status):
+                    return err.fp
+
             if errnote is False:
                 return False
             if errnote is None:
             if errnote is False:
                 return False
             if errnote is None:
@@ -590,13 +614,17 @@ class InfoExtractor(object):
                 self._downloader.report_warning(errmsg)
                 return False
 
                 self._downloader.report_warning(errmsg)
                 return False
 
-    def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}):
-        """ Returns a tuple (page content as string, URL handle) """
+    def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
+        """
+        Return a tuple (page content as string, URL handle).
+
+        See _download_webpage docstring for arguments specification.
+        """
         # Strip hashes from the URL (#1038)
         if isinstance(url_or_request, (compat_str, str)):
             url_or_request = url_or_request.partition('#')[0]
 
         # Strip hashes from the URL (#1038)
         if isinstance(url_or_request, (compat_str, str)):
             url_or_request = url_or_request.partition('#')[0]
 
-        urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query)
+        urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query, expected_status=expected_status)
         if urlh is False:
             assert not fatal
             return False
         if urlh is False:
             assert not fatal
             return False
@@ -685,13 +713,52 @@ class InfoExtractor(object):
 
         return content
 
 
         return content
 
-    def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None, data=None, headers={}, query={}):
-        """ Returns the data of the page as a string """
+    def _download_webpage(
+            self, url_or_request, video_id, note=None, errnote=None,
+            fatal=True, tries=1, timeout=5, encoding=None, data=None,
+            headers={}, query={}, expected_status=None):
+        """
+        Return the data of the page as a string.
+
+        Arguments:
+        url_or_request -- plain text URL as a string or
+            a compat_urllib_request.Requestobject
+        video_id -- Video/playlist/item identifier (string)
+
+        Keyword arguments:
+        note -- note printed before downloading (string)
+        errnote -- note printed in case of an error (string)
+        fatal -- flag denoting whether error should be considered fatal,
+            i.e. whether it should cause ExtractionError to be raised,
+            otherwise a warning will be reported and extraction continued
+        tries -- number of tries
+        timeout -- sleep interval between tries
+        encoding -- encoding for a page content decoding, guessed automatically
+            when not explicitly specified
+        data -- POST data (bytes)
+        headers -- HTTP headers (dict)
+        query -- URL query (dict)
+        expected_status -- allows to accept failed HTTP requests (non 2xx
+            status code) by explicitly specifying a set of accepted status
+            codes. Can be any of the following entities:
+                - an integer type specifying an exact failed status code to
+                  accept
+                - a list or a tuple of integer types specifying a list of
+                  failed status codes to accept
+                - a callable accepting an actual failed status code and
+                  returning True if it should be accepted
+            Note that this argument does not affect success status codes (2xx)
+            which are always accepted.
+        """
+
         success = False
         try_count = 0
         while success is False:
             try:
         success = False
         try_count = 0
         while success is False:
             try:
-                res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal, encoding=encoding, data=data, headers=headers, query=query)
+                res = self._download_webpage_handle(
+                    url_or_request, video_id, note, errnote, fatal,
+                    encoding=encoding, data=data, headers=headers, query=query,
+                    expected_status=expected_status)
                 success = True
             except compat_http_client.IncompleteRead as e:
                 try_count += 1
                 success = True
             except compat_http_client.IncompleteRead as e:
                 try_count += 1
@@ -707,11 +774,17 @@ class InfoExtractor(object):
     def _download_xml_handle(
             self, url_or_request, video_id, note='Downloading XML',
             errnote='Unable to download XML', transform_source=None,
     def _download_xml_handle(
             self, url_or_request, video_id, note='Downloading XML',
             errnote='Unable to download XML', transform_source=None,
-            fatal=True, encoding=None, data=None, headers={}, query={}):
-        """Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle)"""
+            fatal=True, encoding=None, data=None, headers={}, query={},
+            expected_status=None):
+        """
+        Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle).
+
+        See _download_webpage docstring for arguments specification.
+        """
         res = self._download_webpage_handle(
             url_or_request, video_id, note, errnote, fatal=fatal,
         res = self._download_webpage_handle(
             url_or_request, video_id, note, errnote, fatal=fatal,
-            encoding=encoding, data=data, headers=headers, query=query)
+            encoding=encoding, data=data, headers=headers, query=query,
+            expected_status=expected_status)
         if res is False:
             return res
         xml_string, urlh = res
         if res is False:
             return res
         xml_string, urlh = res
@@ -719,15 +792,21 @@ class InfoExtractor(object):
             xml_string, video_id, transform_source=transform_source,
             fatal=fatal), urlh
 
             xml_string, video_id, transform_source=transform_source,
             fatal=fatal), urlh
 
-    def _download_xml(self, url_or_request, video_id,
-                      note='Downloading XML', errnote='Unable to download XML',
-                      transform_source=None, fatal=True, encoding=None,
-                      data=None, headers={}, query={}):
-        """Return the xml as an xml.etree.ElementTree.Element"""
+    def _download_xml(
+            self, url_or_request, video_id,
+            note='Downloading XML', errnote='Unable to download XML',
+            transform_source=None, fatal=True, encoding=None,
+            data=None, headers={}, query={}, expected_status=None):
+        """
+        Return the xml as an xml.etree.ElementTree.Element.
+
+        See _download_webpage docstring for arguments specification.
+        """
         res = self._download_xml_handle(
             url_or_request, video_id, note=note, errnote=errnote,
             transform_source=transform_source, fatal=fatal, encoding=encoding,
         res = self._download_xml_handle(
             url_or_request, video_id, note=note, errnote=errnote,
             transform_source=transform_source, fatal=fatal, encoding=encoding,
-            data=data, headers=headers, query=query)
+            data=data, headers=headers, query=query,
+            expected_status=expected_status)
         return res if res is False else res[0]
 
     def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True):
         return res if res is False else res[0]
 
     def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True):
@@ -745,11 +824,17 @@ class InfoExtractor(object):
     def _download_json_handle(
             self, url_or_request, video_id, note='Downloading JSON metadata',
             errnote='Unable to download JSON metadata', transform_source=None,
     def _download_json_handle(
             self, url_or_request, video_id, note='Downloading JSON metadata',
             errnote='Unable to download JSON metadata', transform_source=None,
-            fatal=True, encoding=None, data=None, headers={}, query={}):
-        """Return a tuple (JSON object, URL handle)"""
+            fatal=True, encoding=None, data=None, headers={}, query={},
+            expected_status=None):
+        """
+        Return a tuple (JSON object, URL handle).
+
+        See _download_webpage docstring for arguments specification.
+        """
         res = self._download_webpage_handle(
             url_or_request, video_id, note, errnote, fatal=fatal,
         res = self._download_webpage_handle(
             url_or_request, video_id, note, errnote, fatal=fatal,
-            encoding=encoding, data=data, headers=headers, query=query)
+            encoding=encoding, data=data, headers=headers, query=query,
+            expected_status=expected_status)
         if res is False:
             return res
         json_string, urlh = res
         if res is False:
             return res
         json_string, urlh = res
@@ -760,11 +845,18 @@ class InfoExtractor(object):
     def _download_json(
             self, url_or_request, video_id, note='Downloading JSON metadata',
             errnote='Unable to download JSON metadata', transform_source=None,
     def _download_json(
             self, url_or_request, video_id, note='Downloading JSON metadata',
             errnote='Unable to download JSON metadata', transform_source=None,
-            fatal=True, encoding=None, data=None, headers={}, query={}):
+            fatal=True, encoding=None, data=None, headers={}, query={},
+            expected_status=None):
+        """
+        Return the JSON object as a dict.
+
+        See _download_webpage docstring for arguments specification.
+        """
         res = self._download_json_handle(
             url_or_request, video_id, note=note, errnote=errnote,
             transform_source=transform_source, fatal=fatal, encoding=encoding,
         res = self._download_json_handle(
             url_or_request, video_id, note=note, errnote=errnote,
             transform_source=transform_source, fatal=fatal, encoding=encoding,
-            data=data, headers=headers, query=query)
+            data=data, headers=headers, query=query,
+            expected_status=expected_status)
         return res if res is False else res[0]
 
     def _parse_json(self, json_string, video_id, transform_source=None, fatal=True):
         return res if res is False else res[0]
 
     def _parse_json(self, json_string, video_id, transform_source=None, fatal=True):
@@ -1058,8 +1150,7 @@ class InfoExtractor(object):
 
     def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
         json_ld = self._search_regex(
 
     def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
         json_ld = self._search_regex(
-            r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
-            html, 'JSON-LD', group='json_ld', **kwargs)
+            JSON_LD_RE, html, 'JSON-LD', group='json_ld', **kwargs)
         default = kwargs.get('default', NO_DEFAULT)
         if not json_ld:
             return default if default is not NO_DEFAULT else {}
         default = kwargs.get('default', NO_DEFAULT)
         if not json_ld:
             return default if default is not NO_DEFAULT else {}
@@ -1768,9 +1859,7 @@ class InfoExtractor(object):
                         'height': height,
                     })
                 formats.extend(m3u8_formats)
                         'height': height,
                     })
                 formats.extend(m3u8_formats)
-                continue
-
-            if src_ext == 'f4m':
+            elif src_ext == 'f4m':
                 f4m_url = src_url
                 if not f4m_params:
                     f4m_params = {
                 f4m_url = src_url
                 if not f4m_params:
                     f4m_params = {
@@ -1780,9 +1869,13 @@ class InfoExtractor(object):
                 f4m_url += '&' if '?' in f4m_url else '?'
                 f4m_url += compat_urllib_parse_urlencode(f4m_params)
                 formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
                 f4m_url += '&' if '?' in f4m_url else '?'
                 f4m_url += compat_urllib_parse_urlencode(f4m_params)
                 formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
-                continue
-
-            if src_url.startswith('http') and self._is_valid_url(src, video_id):
+            elif src_ext == 'mpd':
+                formats.extend(self._extract_mpd_formats(
+                    src_url, video_id, mpd_id='dash', fatal=False))
+            elif re.search(r'\.ism/[Mm]anifest', src_url):
+                formats.extend(self._extract_ism_formats(
+                    src_url, video_id, ism_id='mss', fatal=False))
+            elif src_url.startswith('http') and self._is_valid_url(src, video_id):
                 http_count += 1
                 formats.append({
                     'url': src_url,
                 http_count += 1
                 formats.append({
                     'url': src_url,
@@ -1793,7 +1886,6 @@ class InfoExtractor(object):
                     'width': width,
                     'height': height,
                 })
                     'width': width,
                     'height': height,
                 })
-                continue
 
         return formats
 
 
         return formats
 
@@ -2015,7 +2107,21 @@ class InfoExtractor(object):
                         representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
 
                         def prepare_template(template_name, identifiers):
                         representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
 
                         def prepare_template(template_name, identifiers):
-                            t = representation_ms_info[template_name]
+                            tmpl = representation_ms_info[template_name]
+                            # First of, % characters outside $...$ templates
+                            # must be escaped by doubling for proper processing
+                            # by % operator string formatting used further (see
+                            # https://github.com/rg3/youtube-dl/issues/16867).
+                            t = ''
+                            in_template = False
+                            for c in tmpl:
+                                t += c
+                                if c == '$':
+                                    in_template = not in_template
+                                elif c == '%' and not in_template:
+                                    t += c
+                            # Next, $...$ templates are translated to their
+                            # %(...) counterparts to be used with % operator
                             t = t.replace('$RepresentationID$', representation_id)
                             t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
                             t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
                             t = t.replace('$RepresentationID$', representation_id)
                             t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
                             t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
@@ -2346,6 +2452,8 @@ class InfoExtractor(object):
                         media_info['subtitles'].setdefault(lang, []).append({
                             'url': absolute_url(src),
                         })
                         media_info['subtitles'].setdefault(lang, []).append({
                             'url': absolute_url(src),
                         })
+            for f in media_info['formats']:
+                f.setdefault('http_headers', {})['Referer'] = base_url
             if media_info['formats'] or media_info['subtitles']:
                 entries.append(media_info)
         return entries
             if media_info['formats'] or media_info['subtitles']:
                 entries.append(media_info)
         return entries
index f4a61645502a3b064a62bcd807dc2fcb5835254b..8dd9d66872eab57fca0abf82d50db004d7df425a 100644 (file)
@@ -4,16 +4,14 @@ from __future__ import unicode_literals, division
 import re
 
 from .common import InfoExtractor
 import re
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_str,
-    compat_HTTPError,
-)
+from ..compat import compat_HTTPError
 from ..utils import (
     determine_ext,
     float_or_none,
     int_or_none,
     parse_age_limit,
     parse_duration,
 from ..utils import (
     determine_ext,
     float_or_none,
     int_or_none,
     parse_age_limit,
     parse_duration,
+    url_or_none,
     ExtractorError
 )
 
     ExtractorError
 )
 
@@ -86,8 +84,8 @@ class CrackleIE(InfoExtractor):
             for e in media['MediaURLs']:
                 if e.get('UseDRM') is True:
                     continue
             for e in media['MediaURLs']:
                 if e.get('UseDRM') is True:
                     continue
-                format_url = e.get('Path')
-                if not format_url or not isinstance(format_url, compat_str):
+                format_url = url_or_none(e.get('Path'))
+                if not format_url:
                     continue
                 ext = determine_ext(format_url)
                 if ext == 'm3u8':
                     continue
                 ext = determine_ext(format_url)
                 if ext == 'm3u8':
@@ -124,8 +122,8 @@ class CrackleIE(InfoExtractor):
                 for cc_file in cc_files:
                     if not isinstance(cc_file, dict):
                         continue
                 for cc_file in cc_files:
                     if not isinstance(cc_file, dict):
                         continue
-                    cc_url = cc_file.get('Path')
-                    if not cc_url or not isinstance(cc_url, compat_str):
+                    cc_url = url_or_none(cc_file.get('Path'))
+                    if not cc_url:
                         continue
                     lang = cc_file.get('Locale') or 'en'
                     subtitles.setdefault(lang, []).append({'url': cc_url})
                         continue
                     lang = cc_file.get('Locale') or 'en'
                     subtitles.setdefault(lang, []).append({'url': cc_url})
index 311da515df118e23df584641c6533897d96af1b8..ba8b9fa7eff3105fb657cf322fdacab17da632eb 100644 (file)
@@ -8,6 +8,7 @@ import zlib
 from hashlib import sha1
 from math import pow, sqrt, floor
 from .common import InfoExtractor
 from hashlib import sha1
 from math import pow, sqrt, floor
 from .common import InfoExtractor
+from .vrv import VRVIE
 from ..compat import (
     compat_b64decode,
     compat_etree_fromstring,
 from ..compat import (
     compat_b64decode,
     compat_etree_fromstring,
@@ -18,6 +19,8 @@ from ..compat import (
 from ..utils import (
     ExtractorError,
     bytes_to_intlist,
 from ..utils import (
     ExtractorError,
     bytes_to_intlist,
+    extract_attributes,
+    float_or_none,
     intlist_to_bytes,
     int_or_none,
     lowercase_escape,
     intlist_to_bytes,
     int_or_none,
     lowercase_escape,
@@ -26,7 +29,6 @@ from ..utils import (
     unified_strdate,
     urlencode_postdata,
     xpath_text,
     unified_strdate,
     urlencode_postdata,
     xpath_text,
-    extract_attributes,
 )
 from ..aes import (
     aes_cbc_decrypt,
 )
 from ..aes import (
     aes_cbc_decrypt,
@@ -139,7 +141,8 @@ class CrunchyrollBaseIE(InfoExtractor):
             parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
 
 
             parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
 
 
-class CrunchyrollIE(CrunchyrollBaseIE):
+class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
+    IE_NAME = 'crunchyroll'
     _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
     _TESTS = [{
         'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
     _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
     _TESTS = [{
         'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
@@ -148,7 +151,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
             'ext': 'mp4',
             'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
             'description': 'md5:2d17137920c64f2f49981a7797d275ef',
             'ext': 'mp4',
             'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
             'description': 'md5:2d17137920c64f2f49981a7797d275ef',
-            'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
+            'thumbnail': r're:^https?://.*\.jpg$',
             'uploader': 'Yomiuri Telecasting Corporation (YTV)',
             'upload_date': '20131013',
             'url': 're:(?!.*&amp)',
             'uploader': 'Yomiuri Telecasting Corporation (YTV)',
             'upload_date': '20131013',
             'url': 're:(?!.*&amp)',
@@ -221,7 +224,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
         'info_dict': {
             'id': '535080',
             'ext': 'mp4',
         'info_dict': {
             'id': '535080',
             'ext': 'mp4',
-            'title': '11eyes Episode 1 – Piros éjszaka - Red Night',
+            'title': '11eyes Episode 1 – Red Night ~ Piros éjszaka',
             'description': 'Kakeru and Yuka are thrown into an alternate nightmarish world they call "Red Night".',
             'uploader': 'Marvelous AQL Inc.',
             'upload_date': '20091021',
             'description': 'Kakeru and Yuka are thrown into an alternate nightmarish world they call "Red Night".',
             'uploader': 'Marvelous AQL Inc.',
             'upload_date': '20091021',
@@ -262,6 +265,9 @@ class CrunchyrollIE(CrunchyrollBaseIE):
             # Just test metadata extraction
             'skip_download': True,
         },
             # Just test metadata extraction
             'skip_download': True,
         },
+    }, {
+        'url': 'http://www.crunchyroll.com/media-723735',
+        'only_matching': True,
     }]
 
     _FORMAT_IDS = {
     }]
 
     _FORMAT_IDS = {
@@ -434,13 +440,18 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
         if 'To view this, please log in to verify you are 18 or older.' in webpage:
             self.raise_login_required()
 
         if 'To view this, please log in to verify you are 18 or older.' in webpage:
             self.raise_login_required()
 
+        media = self._parse_json(self._search_regex(
+            r'vilos\.config\.media\s*=\s*({.+?});',
+            webpage, 'vilos media', default='{}'), video_id)
+        media_metadata = media.get('metadata') or {}
+
         video_title = self._html_search_regex(
             r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>',
             webpage, 'video_title')
         video_title = re.sub(r' {2,}', ' ', video_title)
         video_title = self._html_search_regex(
             r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>',
             webpage, 'video_title')
         video_title = re.sub(r' {2,}', ' ', video_title)
-        video_description = self._parse_json(self._html_search_regex(
+        video_description = (self._parse_json(self._html_search_regex(
             r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
             r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
-            webpage, 'description', default='{}'), video_id).get('description')
+            webpage, 'description', default='{}'), video_id) or media_metadata).get('description')
         if video_description:
             video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
         video_upload_date = self._html_search_regex(
         if video_description:
             video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
         video_upload_date = self._html_search_regex(
@@ -453,91 +464,99 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
             [r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'],
             webpage, 'video_uploader', fatal=False)
 
             [r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'],
             webpage, 'video_uploader', fatal=False)
 
-        available_fmts = []
-        for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
-            attrs = extract_attributes(a)
-            href = attrs.get('href')
-            if href and '/freetrial' in href:
-                continue
-            available_fmts.append(fmt)
-        if not available_fmts:
-            for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
-                available_fmts = re.findall(p, webpage)
-                if available_fmts:
-                    break
-        video_encode_ids = []
         formats = []
         formats = []
-        for fmt in available_fmts:
-            stream_quality, stream_format = self._FORMAT_IDS[fmt]
-            video_format = fmt + 'p'
-            stream_infos = []
-            streamdata = self._call_rpc_api(
-                'VideoPlayer_GetStandardConfig', video_id,
-                'Downloading media info for %s' % video_format, data={
-                    'media_id': video_id,
-                    'video_format': stream_format,
-                    'video_quality': stream_quality,
-                    'current_page': url,
-                })
-            if streamdata is not None:
-                stream_info = streamdata.find('./{default}preload/stream_info')
+        for stream in media.get('streams', []):
+            formats.extend(self._extract_vrv_formats(
+                stream.get('url'), video_id, stream.get('format'),
+                stream.get('audio_lang'), stream.get('hardsub_lang')))
+        if not formats:
+            available_fmts = []
+            for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
+                attrs = extract_attributes(a)
+                href = attrs.get('href')
+                if href and '/freetrial' in href:
+                    continue
+                available_fmts.append(fmt)
+            if not available_fmts:
+                for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
+                    available_fmts = re.findall(p, webpage)
+                    if available_fmts:
+                        break
+            if not available_fmts:
+                available_fmts = self._FORMAT_IDS.keys()
+            video_encode_ids = []
+
+            for fmt in available_fmts:
+                stream_quality, stream_format = self._FORMAT_IDS[fmt]
+                video_format = fmt + 'p'
+                stream_infos = []
+                streamdata = self._call_rpc_api(
+                    'VideoPlayer_GetStandardConfig', video_id,
+                    'Downloading media info for %s' % video_format, data={
+                        'media_id': video_id,
+                        'video_format': stream_format,
+                        'video_quality': stream_quality,
+                        'current_page': url,
+                    })
+                if streamdata is not None:
+                    stream_info = streamdata.find('./{default}preload/stream_info')
+                    if stream_info is not None:
+                        stream_infos.append(stream_info)
+                stream_info = self._call_rpc_api(
+                    'VideoEncode_GetStreamInfo', video_id,
+                    'Downloading stream info for %s' % video_format, data={
+                        'media_id': video_id,
+                        'video_format': stream_format,
+                        'video_encode_quality': stream_quality,
+                    })
                 if stream_info is not None:
                     stream_infos.append(stream_info)
                 if stream_info is not None:
                     stream_infos.append(stream_info)
-            stream_info = self._call_rpc_api(
-                'VideoEncode_GetStreamInfo', video_id,
-                'Downloading stream info for %s' % video_format, data={
-                    'media_id': video_id,
-                    'video_format': stream_format,
-                    'video_encode_quality': stream_quality,
-                })
-            if stream_info is not None:
-                stream_infos.append(stream_info)
-            for stream_info in stream_infos:
-                video_encode_id = xpath_text(stream_info, './video_encode_id')
-                if video_encode_id in video_encode_ids:
-                    continue
-                video_encode_ids.append(video_encode_id)
-
-                video_file = xpath_text(stream_info, './file')
-                if not video_file:
-                    continue
-                if video_file.startswith('http'):
-                    formats.extend(self._extract_m3u8_formats(
-                        video_file, video_id, 'mp4', entry_protocol='m3u8_native',
-                        m3u8_id='hls', fatal=False))
-                    continue
+                for stream_info in stream_infos:
+                    video_encode_id = xpath_text(stream_info, './video_encode_id')
+                    if video_encode_id in video_encode_ids:
+                        continue
+                    video_encode_ids.append(video_encode_id)
 
 
-                video_url = xpath_text(stream_info, './host')
-                if not video_url:
-                    continue
-                metadata = stream_info.find('./metadata')
-                format_info = {
-                    'format': video_format,
-                    'height': int_or_none(xpath_text(metadata, './height')),
-                    'width': int_or_none(xpath_text(metadata, './width')),
-                }
-
-                if '.fplive.net/' in video_url:
-                    video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
-                    parsed_video_url = compat_urlparse.urlparse(video_url)
-                    direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
-                        netloc='v.lvlt.crcdn.net',
-                        path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
-                    if self._is_valid_url(direct_video_url, video_id, video_format):
-                        format_info.update({
-                            'format_id': 'http-' + video_format,
-                            'url': direct_video_url,
-                        })
-                        formats.append(format_info)
+                    video_file = xpath_text(stream_info, './file')
+                    if not video_file:
+                        continue
+                    if video_file.startswith('http'):
+                        formats.extend(self._extract_m3u8_formats(
+                            video_file, video_id, 'mp4', entry_protocol='m3u8_native',
+                            m3u8_id='hls', fatal=False))
                         continue
 
                         continue
 
-                format_info.update({
-                    'format_id': 'rtmp-' + video_format,
-                    'url': video_url,
-                    'play_path': video_file,
-                    'ext': 'flv',
-                })
-                formats.append(format_info)
+                    video_url = xpath_text(stream_info, './host')
+                    if not video_url:
+                        continue
+                    metadata = stream_info.find('./metadata')
+                    format_info = {
+                        'format': video_format,
+                        'height': int_or_none(xpath_text(metadata, './height')),
+                        'width': int_or_none(xpath_text(metadata, './width')),
+                    }
+
+                    if '.fplive.net/' in video_url:
+                        video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
+                        parsed_video_url = compat_urlparse.urlparse(video_url)
+                        direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
+                            netloc='v.lvlt.crcdn.net',
+                            path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
+                        if self._is_valid_url(direct_video_url, video_id, video_format):
+                            format_info.update({
+                                'format_id': 'http-' + video_format,
+                                'url': direct_video_url,
+                            })
+                            formats.append(format_info)
+                            continue
+
+                    format_info.update({
+                        'format_id': 'rtmp-' + video_format,
+                        'url': video_url,
+                        'play_path': video_file,
+                        'ext': 'flv',
+                    })
+                    formats.append(format_info)
         self._sort_formats(formats, ('height', 'width', 'tbr', 'fps'))
 
         metadata = self._call_rpc_api(
         self._sort_formats(formats, ('height', 'width', 'tbr', 'fps'))
 
         metadata = self._call_rpc_api(
@@ -546,7 +565,17 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
                 'media_id': video_id,
             })
 
                 'media_id': video_id,
             })
 
-        subtitles = self.extract_subtitles(video_id, webpage)
+        subtitles = {}
+        for subtitle in media.get('subtitles', []):
+            subtitle_url = subtitle.get('url')
+            if not subtitle_url:
+                continue
+            subtitles.setdefault(subtitle.get('language', 'enUS'), []).append({
+                'url': subtitle_url,
+                'ext': subtitle.get('format', 'ass'),
+            })
+        if not subtitles:
+            subtitles = self.extract_subtitles(video_id, webpage)
 
         # webpage provide more accurate data than series_title from XML
         series = self._html_search_regex(
 
         # webpage provide more accurate data than series_title from XML
         series = self._html_search_regex(
@@ -554,8 +583,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
             webpage, 'series', fatal=False)
         season = xpath_text(metadata, 'series_title')
 
             webpage, 'series', fatal=False)
         season = xpath_text(metadata, 'series_title')
 
-        episode = xpath_text(metadata, 'episode_title')
-        episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
+        episode = xpath_text(metadata, 'episode_title') or media_metadata.get('title')
+        episode_number = int_or_none(xpath_text(metadata, 'episode_number') or media_metadata.get('episode_number'))
 
         season_number = int_or_none(self._search_regex(
             r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
 
         season_number = int_or_none(self._search_regex(
             r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
@@ -565,7 +594,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
             'id': video_id,
             'title': video_title,
             'description': video_description,
             'id': video_id,
             'title': video_title,
             'description': video_description,
-            'thumbnail': xpath_text(metadata, 'episode_image_url'),
+            'duration': float_or_none(media_metadata.get('duration'), 1000),
+            'thumbnail': xpath_text(metadata, 'episode_image_url') or media_metadata.get('thumbnail', {}).get('url'),
             'uploader': video_uploader,
             'upload_date': video_upload_date,
             'series': series,
             'uploader': video_uploader,
             'upload_date': video_upload_date,
             'series': series,
@@ -580,7 +610,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
 
 class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
     IE_NAME = 'crunchyroll:playlist'
 
 class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
     IE_NAME = 'crunchyroll:playlist'
-    _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P<id>[\w\-]+))/?(?:\?|$)'
+    _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login|media-\d+))(?P<id>[\w\-]+))/?(?:\?|$)'
 
     _TESTS = [{
         'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
 
     _TESTS = [{
         'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
index f4cf0f1c5458af0b39f41957a9e90333bb586516..224a1fb5d4abcd49d3285a1aed7724d5de6a17e9 100644 (file)
@@ -4,7 +4,10 @@ from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
     int_or_none,
 from .common import InfoExtractor
 from ..utils import (
     int_or_none,
+    parse_age_limit,
     parse_iso8601,
     parse_iso8601,
+    smuggle_url,
+    str_or_none,
 )
 
 
 )
 
 
@@ -40,10 +43,15 @@ class CWTVIE(InfoExtractor):
             'duration': 1263,
             'series': 'Whose Line Is It Anyway?',
             'season_number': 11,
             'duration': 1263,
             'series': 'Whose Line Is It Anyway?',
             'season_number': 11,
-            'season': '11',
             'episode_number': 20,
             'upload_date': '20151006',
             'timestamp': 1444107300,
             'episode_number': 20,
             'upload_date': '20151006',
             'timestamp': 1444107300,
+            'age_limit': 14,
+            'uploader': 'CWTV',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
         },
     }, {
         'url': 'http://cwtv.com/thecw/chroniclesofcisco/?play=8adebe35-f447-465f-ab52-e863506ff6d6',
         },
     }, {
         'url': 'http://cwtv.com/thecw/chroniclesofcisco/?play=8adebe35-f447-465f-ab52-e863506ff6d6',
@@ -58,60 +66,28 @@ class CWTVIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        video_data = None
-        formats = []
-        for partner in (154, 213):
-            vdata = self._download_json(
-                'http://metaframe.digitalsmiths.tv/v2/CWtv/assets/%s/partner/%d?format=json' % (video_id, partner), video_id, fatal=False)
-            if not vdata:
-                continue
-            video_data = vdata
-            for quality, quality_data in vdata.get('videos', {}).items():
-                quality_url = quality_data.get('uri')
-                if not quality_url:
-                    continue
-                if quality == 'variantplaylist':
-                    formats.extend(self._extract_m3u8_formats(
-                        quality_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
-                else:
-                    tbr = int_or_none(quality_data.get('bitrate'))
-                    format_id = 'http' + ('-%d' % tbr if tbr else '')
-                    if self._is_valid_url(quality_url, video_id, format_id):
-                        formats.append({
-                            'format_id': format_id,
-                            'url': quality_url,
-                            'tbr': tbr,
-                        })
-        video_metadata = video_data['assetFields']
-        ism_url = video_metadata.get('smoothStreamingUrl')
-        if ism_url:
-            formats.extend(self._extract_ism_formats(
-                ism_url, video_id, ism_id='mss', fatal=False))
-        self._sort_formats(formats)
-
-        thumbnails = [{
-            'url': image['uri'],
-            'width': image.get('width'),
-            'height': image.get('height'),
-        } for image_id, image in video_data['images'].items() if image.get('uri')] if video_data.get('images') else None
+        video_data = self._download_json(
+            'http://images.cwtv.com/feed/mobileapp/video-meta/apiversion_8/guid_' + video_id,
+            video_id)['video']
+        title = video_data['title']
+        mpx_url = video_data.get('mpx_url') or 'http://link.theplatform.com/s/cwtv/media/guid/2703454149/%s?formats=M3U' % video_id
 
 
-        subtitles = {
-            'en': [{
-                'url': video_metadata['UnicornCcUrl'],
-            }],
-        } if video_metadata.get('UnicornCcUrl') else None
+        season = str_or_none(video_data.get('season'))
+        episode = str_or_none(video_data.get('episode'))
+        if episode and season:
+            episode = episode.lstrip(season)
 
         return {
 
         return {
+            '_type': 'url_transparent',
             'id': video_id,
             'id': video_id,
-            'title': video_metadata['title'],
-            'description': video_metadata.get('description'),
-            'duration': int_or_none(video_metadata.get('duration')),
-            'series': video_metadata.get('seriesName'),
-            'season_number': int_or_none(video_metadata.get('seasonNumber')),
-            'season': video_metadata.get('seasonName'),
-            'episode_number': int_or_none(video_metadata.get('episodeNumber')),
-            'timestamp': parse_iso8601(video_data.get('startTime')),
-            'thumbnails': thumbnails,
-            'formats': formats,
-            'subtitles': subtitles,
+            'title': title,
+            'url': smuggle_url(mpx_url, {'force_smil_url': True}),
+            'description': video_data.get('description_long'),
+            'duration': int_or_none(video_data.get('duration_secs')),
+            'series': video_data.get('series_name'),
+            'season_number': int_or_none(season),
+            'episode_number': int_or_none(episode),
+            'timestamp': parse_iso8601(video_data.get('start_time')),
+            'age_limit': parse_age_limit(video_data.get('rating')),
+            'ie_key': 'ThePlatform',
         }
         }
index 9a74906cb625f590bd5d1bc8862fc5c652fac52c..040f0bd02e94992bb1e4248d425f564ce7c81c75 100644 (file)
@@ -2,6 +2,7 @@
 from __future__ import unicode_literals
 
 import base64
 from __future__ import unicode_literals
 
 import base64
+import functools
 import hashlib
 import itertools
 import json
 import hashlib
 import itertools
 import json
@@ -16,11 +17,13 @@ from ..utils import (
     error_to_compat_str,
     ExtractorError,
     int_or_none,
     error_to_compat_str,
     ExtractorError,
     int_or_none,
+    mimetype2ext,
+    OnDemandPagedList,
     parse_iso8601,
     sanitized_Request,
     str_to_int,
     unescapeHTML,
     parse_iso8601,
     sanitized_Request,
     str_to_int,
     unescapeHTML,
-    mimetype2ext,
+    urlencode_postdata,
 )
 
 
 )
 
 
@@ -144,7 +147,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
 
         age_limit = self._rta_search(webpage)
 
 
         age_limit = self._rta_search(webpage)
 
-        description = self._og_search_description(webpage) or self._html_search_meta(
+        description = self._og_search_description(
+            webpage, default=None) or self._html_search_meta(
             'description', webpage, 'description')
 
         view_count_str = self._search_regex(
             'description', webpage, 'description')
 
         view_count_str = self._search_regex(
@@ -342,58 +346,73 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
 
 class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
     IE_NAME = 'dailymotion:playlist'
 
 class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
     IE_NAME = 'dailymotion:playlist'
-    _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>[^/?#&]+)'
-    _MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
-    _PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
+    _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>x[0-9a-z]+)'
     _TESTS = [{
         'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
         'info_dict': {
             'title': 'SPORT',
     _TESTS = [{
         'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
         'info_dict': {
             'title': 'SPORT',
-            'id': 'xv4bw_nqtv_sport',
+            'id': 'xv4bw',
         },
         'playlist_mincount': 20,
     }]
         },
         'playlist_mincount': 20,
     }]
-
-    def _extract_entries(self, id):
-        video_ids = set()
-        processed_urls = set()
-        for pagenum in itertools.count(1):
-            page_url = self._PAGE_TEMPLATE % (id, pagenum)
-            webpage, urlh = self._download_webpage_handle_no_ff(
-                page_url, id, 'Downloading page %s' % pagenum)
-            if urlh.geturl() in processed_urls:
-                self.report_warning('Stopped at duplicated page %s, which is the same as %s' % (
-                    page_url, urlh.geturl()), id)
-                break
-
-            processed_urls.add(urlh.geturl())
-
-            for video_id in re.findall(r'data-xid="(.+?)"', webpage):
-                if video_id not in video_ids:
-                    yield self.url_result(
-                        'http://www.dailymotion.com/video/%s' % video_id,
-                        DailymotionIE.ie_key(), video_id)
-                    video_ids.add(video_id)
-
-            if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
-                break
+    _PAGE_SIZE = 100
+
+    def _fetch_page(self, playlist_id, authorizaion, page):
+        page += 1
+        videos = self._download_json(
+            'https://graphql.api.dailymotion.com',
+            playlist_id, 'Downloading page %d' % page,
+            data=json.dumps({
+                'query': '''{
+  collection(xid: "%s") {
+    videos(first: %d, page: %d) {
+      pageInfo {
+        hasNextPage
+        nextPage
+      }
+      edges {
+        node {
+          xid
+          url
+        }
+      }
+    }
+  }
+}''' % (playlist_id, self._PAGE_SIZE, page)
+            }).encode(), headers={
+                'Authorization': authorizaion,
+                'Origin': 'https://www.dailymotion.com',
+            })['data']['collection']['videos']
+        for edge in videos['edges']:
+            node = edge['node']
+            yield self.url_result(
+                node['url'], DailymotionIE.ie_key(), node['xid'])
 
     def _real_extract(self, url):
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        playlist_id = mobj.group('id')
+        playlist_id = self._match_id(url)
         webpage = self._download_webpage(url, playlist_id)
         webpage = self._download_webpage(url, playlist_id)
-
-        return {
-            '_type': 'playlist',
-            'id': playlist_id,
-            'title': self._og_search_title(webpage),
-            'entries': self._extract_entries(playlist_id),
-        }
-
-
-class DailymotionUserIE(DailymotionPlaylistIE):
+        api = self._parse_json(self._search_regex(
+            r'__PLAYER_CONFIG__\s*=\s*({.+?});',
+            webpage, 'player config'), playlist_id)['context']['api']
+        auth = self._download_json(
+            api.get('auth_url', 'https://graphql.api.dailymotion.com/oauth/token'),
+            playlist_id, data=urlencode_postdata({
+                'client_id': api.get('client_id', 'f1a362d288c1b98099c7'),
+                'client_secret': api.get('client_secret', 'eea605b96e01c796ff369935357eca920c5da4c5'),
+                'grant_type': 'client_credentials',
+            }))
+        authorizaion = '%s %s' % (auth.get('token_type', 'Bearer'), auth['access_token'])
+        entries = OnDemandPagedList(functools.partial(
+            self._fetch_page, playlist_id, authorizaion), self._PAGE_SIZE)
+        return self.playlist_result(
+            entries, playlist_id,
+            self._og_search_title(webpage))
+
+
+class DailymotionUserIE(DailymotionBaseInfoExtractor):
     IE_NAME = 'dailymotion:user'
     _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
     IE_NAME = 'dailymotion:user'
     _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
+    _MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
     _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
     _TESTS = [{
         'url': 'https://www.dailymotion.com/user/nqtv',
     _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
     _TESTS = [{
         'url': 'https://www.dailymotion.com/user/nqtv',
@@ -415,6 +434,30 @@ class DailymotionUserIE(DailymotionPlaylistIE):
         'skip': 'Takes too long time',
     }]
 
         'skip': 'Takes too long time',
     }]
 
+    def _extract_entries(self, id):
+        video_ids = set()
+        processed_urls = set()
+        for pagenum in itertools.count(1):
+            page_url = self._PAGE_TEMPLATE % (id, pagenum)
+            webpage, urlh = self._download_webpage_handle_no_ff(
+                page_url, id, 'Downloading page %s' % pagenum)
+            if urlh.geturl() in processed_urls:
+                self.report_warning('Stopped at duplicated page %s, which is the same as %s' % (
+                    page_url, urlh.geturl()), id)
+                break
+
+            processed_urls.add(urlh.geturl())
+
+            for video_id in re.findall(r'data-xid="(.+?)"', webpage):
+                if video_id not in video_ids:
+                    yield self.url_result(
+                        'http://www.dailymotion.com/video/%s' % video_id,
+                        DailymotionIE.ie_key(), video_id)
+                    video_ids.add(video_id)
+
+            if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
+                break
+
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         user = mobj.group('user')
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         user = mobj.group('user')
index 3a6d0560e478cb08445f07d84578d28b3e4bc514..769a219dffe36ec73dab5d1e0fc18ab9bfc61993 100644 (file)
@@ -5,13 +5,16 @@ from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
     float_or_none,
 from ..compat import compat_str
 from ..utils import (
     float_or_none,
-    unified_strdate,
+    int_or_none,
+    unified_timestamp,
+    url_or_none,
 )
 
 
 class DctpTvIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(?:#/)?filme/(?P<id>[^/?#&]+)'
 )
 
 
 class DctpTvIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(?:#/)?filme/(?P<id>[^/?#&]+)'
-    _TEST = {
+    _TESTS = [{
+        # 4x3
         'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
         'info_dict': {
             'id': '95eaa4f33dad413aa17b4ee613cccc6c',
         'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
         'info_dict': {
             'id': '95eaa4f33dad413aa17b4ee613cccc6c',
@@ -19,37 +22,55 @@ class DctpTvIE(InfoExtractor):
             'ext': 'flv',
             'title': 'Videoinstallation für eine Kaufhausfassade',
             'description': 'Kurzfilm',
             'ext': 'flv',
             'title': 'Videoinstallation für eine Kaufhausfassade',
             'description': 'Kurzfilm',
-            'upload_date': '20110407',
             'thumbnail': r're:^https?://.*\.jpg$',
             'duration': 71.24,
             'thumbnail': r're:^https?://.*\.jpg$',
             'duration': 71.24,
+            'timestamp': 1302172322,
+            'upload_date': '20110407',
         },
         'params': {
             # rtmp download
             'skip_download': True,
         },
         },
         'params': {
             # rtmp download
             'skip_download': True,
         },
-    }
+    }, {
+        # 16x9
+        'url': 'http://www.dctp.tv/filme/sind-youtuber-die-besseren-lehrer/',
+        'only_matching': True,
+    }]
+
+    _BASE_URL = 'http://dctp-ivms2-restapi.s3.amazonaws.com'
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
 
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
 
-        webpage = self._download_webpage(url, display_id)
+        version = self._download_json(
+            '%s/version.json' % self._BASE_URL, display_id,
+            'Downloading version JSON')
+
+        restapi_base = '%s/%s/restapi' % (
+            self._BASE_URL, version['version_name'])
 
 
-        video_id = self._html_search_meta(
-            'DC.identifier', webpage, 'video id',
-            default=None) or self._search_regex(
-            r'id=["\']uuid[^>]+>([^<]+)<', webpage, 'video id')
+        info = self._download_json(
+            '%s/slugs/%s.json' % (restapi_base, display_id), display_id,
+            'Downloading video info JSON')
 
 
-        title = self._og_search_title(webpage)
+        media = self._download_json(
+            '%s/media/%s.json' % (restapi_base, compat_str(info['object_id'])),
+            display_id, 'Downloading media JSON')
+
+        uuid = media['uuid']
+        title = media['title']
+        ratio = '16x9' if media.get('is_wide') else '4x3'
+        play_path = 'mp4:%s_dctp_0500_%s.m4v' % (uuid, ratio)
 
         servers = self._download_json(
             'http://www.dctp.tv/streaming_servers/', display_id,
 
         servers = self._download_json(
             'http://www.dctp.tv/streaming_servers/', display_id,
-            note='Downloading server list', fatal=False)
+            note='Downloading server list JSON', fatal=False)
 
         if servers:
             endpoint = next(
                 server['endpoint']
                 for server in servers
 
         if servers:
             endpoint = next(
                 server['endpoint']
                 for server in servers
-                if isinstance(server.get('endpoint'), compat_str) and
+                if url_or_none(server.get('endpoint')) and
                 'cloudfront' in server['endpoint'])
         else:
             endpoint = 'rtmpe://s2pqqn4u96e4j8.cloudfront.net/cfx/st/'
                 'cloudfront' in server['endpoint'])
         else:
             endpoint = 'rtmpe://s2pqqn4u96e4j8.cloudfront.net/cfx/st/'
@@ -60,27 +81,35 @@ class DctpTvIE(InfoExtractor):
         formats = [{
             'url': endpoint,
             'app': app,
         formats = [{
             'url': endpoint,
             'app': app,
-            'play_path': 'mp4:%s_dctp_0500_4x3.m4v' % video_id,
+            'play_path': play_path,
             'page_url': url,
             'page_url': url,
-            'player_url': 'http://svm-prod-dctptv-static.s3.amazonaws.com/dctptv-relaunch2012-109.swf',
+            'player_url': 'http://svm-prod-dctptv-static.s3.amazonaws.com/dctptv-relaunch2012-110.swf',
             'ext': 'flv',
         }]
 
             'ext': 'flv',
         }]
 
-        description = self._html_search_meta('DC.description', webpage)
-        upload_date = unified_strdate(
-            self._html_search_meta('DC.date.created', webpage))
-        thumbnail = self._og_search_thumbnail(webpage)
-        duration = float_or_none(self._search_regex(
-            r'id=["\']duration_in_ms[^+]>(\d+)', webpage, 'duration',
-            default=None), scale=1000)
+        thumbnails = []
+        images = media.get('images')
+        if isinstance(images, list):
+            for image in images:
+                if not isinstance(image, dict):
+                    continue
+                image_url = url_or_none(image.get('url'))
+                if not image_url:
+                    continue
+                thumbnails.append({
+                    'url': image_url,
+                    'width': int_or_none(image.get('width')),
+                    'height': int_or_none(image.get('height')),
+                })
 
         return {
 
         return {
-            'id': video_id,
+            'id': uuid,
+            'display_id': display_id,
             'title': title,
             'title': title,
+            'alt_title': media.get('subtitle'),
+            'description': media.get('description') or media.get('teaser'),
+            'timestamp': unified_timestamp(media.get('created')),
+            'duration': float_or_none(media.get('duration_in_ms'), scale=1000),
+            'thumbnails': thumbnails,
             'formats': formats,
             'formats': formats,
-            'display_id': display_id,
-            'description': description,
-            'upload_date': upload_date,
-            'thumbnail': thumbnail,
-            'duration': duration,
         }
         }
index 3368c4c075505df8a4ff03198d66a33a76f5b2f6..9e7b14a7d594e5fbb145aab0521bc4f8250305a6 100644 (file)
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
     determine_ext,
     extract_attributes,
 from ..utils import (
     determine_ext,
     extract_attributes,
@@ -12,6 +11,7 @@ from ..utils import (
     parse_age_limit,
     remove_end,
     unescapeHTML,
     parse_age_limit,
     remove_end,
     unescapeHTML,
+    url_or_none,
 )
 
 
 )
 
 
@@ -69,9 +69,8 @@ class DiscoveryGoBaseIE(InfoExtractor):
         captions = stream.get('captions')
         if isinstance(captions, list):
             for caption in captions:
         captions = stream.get('captions')
         if isinstance(captions, list):
             for caption in captions:
-                subtitle_url = caption.get('fileUrl')
-                if (not subtitle_url or not isinstance(subtitle_url, compat_str) or
-                        not subtitle_url.startswith('http')):
+                subtitle_url = url_or_none(caption.get('fileUrl'))
+                if not subtitle_url or not subtitle_url.startswith('http'):
                     continue
                 lang = caption.get('fileLang', 'en')
                 ext = determine_ext(subtitle_url)
                     continue
                 lang = caption.get('fileLang', 'en')
                 ext = determine_ext(subtitle_url)
index fe47f6dcef72dcac44aa811ca3cc112d93f066cf..ebf59512c6bdf89df0efeb22c8cf047156dd4a7d 100644 (file)
@@ -21,6 +21,7 @@ from ..utils import (
     unified_strdate,
     unified_timestamp,
     update_url_query,
     unified_strdate,
     unified_timestamp,
     update_url_query,
+    urljoin,
     USER_AGENTS,
 )
 
     USER_AGENTS,
 )
 
@@ -310,9 +311,11 @@ class DPlayItIE(InfoExtractor):
 
         if not info:
             info_url = self._search_regex(
 
         if not info:
             info_url = self._search_regex(
-                r'url\s*[:=]\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)',
-                webpage, 'info url')
+                (r'playback_json_url\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
+                 r'url\s*[:=]\s*["\'](?P<url>(?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)'),
+                webpage, 'info url', group='url')
 
 
+            info_url = urljoin(url, info_url)
             video_id = info_url.rpartition('/')[-1]
 
             try:
             video_id = info_url.rpartition('/')[-1]
 
             try:
@@ -322,6 +325,8 @@ class DPlayItIE(InfoExtractor):
                             'dplayit_token').value,
                         'Referer': url,
                     })
                             'dplayit_token').value,
                         'Referer': url,
                     })
+                if isinstance(info, compat_str):
+                    info = self._parse_json(info, display_id)
             except ExtractorError as e:
                 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 403):
                     info = self._parse_json(e.cause.read().decode('utf-8'), display_id)
             except ExtractorError as e:
                 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 403):
                     info = self._parse_json(e.cause.read().decode('utf-8'), display_id)
@@ -337,6 +342,7 @@ class DPlayItIE(InfoExtractor):
         formats = self._extract_m3u8_formats(
             hls_url, display_id, ext='mp4', entry_protocol='m3u8_native',
             m3u8_id='hls')
         formats = self._extract_m3u8_formats(
             hls_url, display_id, ext='mp4', entry_protocol='m3u8_native',
             m3u8_id='hls')
+        self._sort_formats(formats)
 
         series = self._html_search_regex(
             r'(?s)<h1[^>]+class=["\'].*?\bshow_title\b.*?["\'][^>]*>(.+?)</h1>',
 
         series = self._html_search_regex(
             r'(?s)<h1[^>]+class=["\'].*?\bshow_title\b.*?["\'][^>]*>(.+?)</h1>',
index ab32ba4ff3eee337e8dfc2c7fa11c755e2b027c4..db1de699fafc5885da2c48f72f5e102825449703 100644 (file)
@@ -7,7 +7,6 @@ import json
 from .common import InfoExtractor
 from ..compat import (
     compat_HTTPError,
 from .common import InfoExtractor
 from ..compat import (
     compat_HTTPError,
-    compat_str,
     compat_urlparse,
 )
 from ..utils import (
     compat_urlparse,
 )
 from ..utils import (
@@ -17,6 +16,7 @@ from ..utils import (
     parse_age_limit,
     parse_duration,
     unified_timestamp,
     parse_age_limit,
     parse_duration,
     unified_timestamp,
+    url_or_none,
 )
 
 
 )
 
 
@@ -139,8 +139,8 @@ class DramaFeverIE(DramaFeverBaseIE):
         for sub in subs:
             if not isinstance(sub, dict):
                 continue
         for sub in subs:
             if not isinstance(sub, dict):
                 continue
-            sub_url = sub.get('url')
-            if not sub_url or not isinstance(sub_url, compat_str):
+            sub_url = url_or_none(sub.get('url'))
+            if not sub_url:
                 continue
             subtitles.setdefault(
                 sub.get('code') or sub.get('language') or 'en', []).append({
                 continue
             subtitles.setdefault(
                 sub.get('code') or sub.get('language') or 'en', []).append({
@@ -163,8 +163,8 @@ class DramaFeverIE(DramaFeverBaseIE):
             for format_id, format_dict in download_assets.items():
                 if not isinstance(format_dict, dict):
                     continue
             for format_id, format_dict in download_assets.items():
                 if not isinstance(format_dict, dict):
                     continue
-                format_url = format_dict.get('url')
-                if not format_url or not isinstance(format_url, compat_str):
+                format_url = url_or_none(format_dict.get('url'))
+                if not format_url:
                     continue
                 formats.append({
                     'url': format_url,
                     continue
                 formats.append({
                     'url': format_url,
index 4ca97f8606e8958d60483dc72a6de545cadf165f..5887887e15ef9065515abb1472e06511e55e32c9 100644 (file)
@@ -59,7 +59,7 @@ class DTubeIE(InfoExtractor):
             try:
                 self.to_screen('%s: Checking %s video format URL' % (video_id, format_id))
                 self._downloader._opener.open(video_url, timeout=5).close()
             try:
                 self.to_screen('%s: Checking %s video format URL' % (video_id, format_id))
                 self._downloader._opener.open(video_url, timeout=5).close()
-            except timeout as e:
+            except timeout:
                 self.to_screen(
                     '%s: %s URL is invalid, skipping' % (video_id, format_id))
                 continue
                 self.to_screen(
                     '%s: %s URL is invalid, skipping' % (video_id, format_id))
                 continue
index 42789278e3a04753bcbce06ad62316c458708dc1..36fef07b72be4db2183f2ed5fdee63981781a688 100644 (file)
@@ -4,14 +4,12 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
 import re
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_HTTPError,
-    compat_str,
-)
+from ..compat import compat_HTTPError
 from ..utils import (
     ExtractorError,
     int_or_none,
     unsmuggle_url,
 from ..utils import (
     ExtractorError,
     int_or_none,
     unsmuggle_url,
+    url_or_none,
 )
 
 
 )
 
 
@@ -177,7 +175,7 @@ class EaglePlatformIE(InfoExtractor):
             video_id, 'Downloading mp4 JSON', fatal=False)
         if mp4_data:
             for format_id, format_url in mp4_data.get('data', {}).items():
             video_id, 'Downloading mp4 JSON', fatal=False)
         if mp4_data:
             for format_id, format_url in mp4_data.get('data', {}).items():
-                if not isinstance(format_url, compat_str):
+                if not url_or_none(format_url):
                     continue
                 height = int_or_none(format_id)
                 if height is not None and m3u8_formats_dict.get(height):
                     continue
                 height = int_or_none(format_id)
                 if height is not None and m3u8_formats_dict.get(height):
index edabaafe689a3d4ffae1b626dc1a55aa068d05aa..df11dc206d29b7938db320d34a0a1a57753ab602 100644 (file)
@@ -8,6 +8,7 @@ from ..utils import (
     int_or_none,
     try_get,
     unified_timestamp,
     int_or_none,
     try_get,
     unified_timestamp,
+    url_or_none,
 )
 
 
 )
 
 
@@ -34,8 +35,8 @@ class EggheadCourseIE(InfoExtractor):
 
         entries = []
         for lesson in lessons:
 
         entries = []
         for lesson in lessons:
-            lesson_url = lesson.get('http_url')
-            if not lesson_url or not isinstance(lesson_url, compat_str):
+            lesson_url = url_or_none(lesson.get('http_url'))
+            if not lesson_url:
                 continue
             lesson_id = lesson.get('id')
             if lesson_id:
                 continue
             lesson_id = lesson.get('id')
             if lesson_id:
@@ -95,7 +96,8 @@ class EggheadLessonIE(InfoExtractor):
 
         formats = []
         for _, format_url in lesson['media_urls'].items():
 
         formats = []
         for _, format_url in lesson['media_urls'].items():
-            if not format_url or not isinstance(format_url, compat_str):
+            format_url = url_or_none(format_url)
+            if not format_url:
                 continue
             ext = determine_ext(format_url)
             if ext == 'm3u8':
                 continue
             ext = determine_ext(format_url)
             if ext == 'm3u8':
index 81f2e2ee1c4ac599d8844965fb9271e745293268..6d03d7095822d079d780cc8fb571adb06b7314d3 100644 (file)
@@ -11,6 +11,7 @@ from ..utils import (
     int_or_none,
     parse_duration,
     str_to_int,
     int_or_none,
     parse_duration,
     str_to_int,
+    url_or_none,
 )
 
 
 )
 
 
@@ -82,8 +83,8 @@ class EpornerIE(InfoExtractor):
             for format_id, format_dict in formats_dict.items():
                 if not isinstance(format_dict, dict):
                     continue
             for format_id, format_dict in formats_dict.items():
                 if not isinstance(format_dict, dict):
                     continue
-                src = format_dict.get('src')
-                if not isinstance(src, compat_str) or not src.startswith('http'):
+                src = url_or_none(format_dict.get('src'))
+                if not src or not src.startswith('http'):
                     continue
                 if kind == 'hls':
                     formats.extend(self._extract_m3u8_formats(
                     continue
                 if kind == 'hls':
                     formats.extend(self._extract_m3u8_formats(
index f61178012feb4db017e3c96f18ec4959ac3f4f42..9345714726bb7121b39eabf7fe86c1e669a4b39c 100644 (file)
@@ -1,6 +1,8 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from ..utils import (
     determine_ext,
 from .common import InfoExtractor
 from ..utils import (
     determine_ext,
@@ -11,7 +13,13 @@ from ..utils import (
 
 
 class ExpressenIE(InfoExtractor):
 
 
 class ExpressenIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?expressen\.se/tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:www\.)?expressen\.se/
+                        (?:(?:tvspelare/video|videoplayer/embed)/)?
+                        tv/(?:[^/]+/)*
+                        (?P<id>[^/?#&]+)
+                    '''
     _TESTS = [{
         'url': 'https://www.expressen.se/tv/ledare/ledarsnack/ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden/',
         'md5': '2fbbe3ca14392a6b1b36941858d33a45',
     _TESTS = [{
         'url': 'https://www.expressen.se/tv/ledare/ledarsnack/ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden/',
         'md5': '2fbbe3ca14392a6b1b36941858d33a45',
@@ -28,8 +36,21 @@ class ExpressenIE(InfoExtractor):
     }, {
         'url': 'https://www.expressen.se/tv/kultur/kulturdebatt-med-expressens-karin-olsson/',
         'only_matching': True,
     }, {
         'url': 'https://www.expressen.se/tv/kultur/kulturdebatt-med-expressens-karin-olsson/',
         'only_matching': True,
+    }, {
+        'url': 'https://www.expressen.se/tvspelare/video/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.expressen.se/videoplayer/embed/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
+        'only_matching': True,
     }]
 
     }]
 
+    @staticmethod
+    def _extract_urls(webpage):
+        return [
+            mobj.group('url') for mobj in re.finditer(
+                r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?expressen\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1',
+                webpage)]
+
     def _real_extract(self, url):
         display_id = self._match_id(url)
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
 
index 3b3964c0112737c0ea4c62165ffc02b0633de198..7dc56972498325b428a7c134115dc78ac5f81556 100644 (file)
@@ -54,6 +54,7 @@ from .appletrailers import (
 from .archiveorg import ArchiveOrgIE
 from .arkena import ArkenaIE
 from .ard import (
 from .archiveorg import ArchiveOrgIE
 from .arkena import ArkenaIE
 from .ard import (
+    ARDBetaMediathekIE,
     ARDIE,
     ARDMediathekIE,
 )
     ARDIE,
     ARDMediathekIE,
 )
@@ -118,6 +119,10 @@ from .bilibili import (
     BiliBiliBangumiIE,
 )
 from .biobiochiletv import BioBioChileTVIE
     BiliBiliBangumiIE,
 )
 from .biobiochiletv import BioBioChileTVIE
+from .bitchute import (
+    BitChuteIE,
+    BitChuteChannelIE,
+)
 from .biqle import BIQLEIE
 from .bleacherreport import (
     BleacherReportIE,
 from .biqle import BIQLEIE
 from .bleacherreport import (
     BleacherReportIE,
@@ -373,7 +378,6 @@ from .foxgay import FoxgayIE
 from .foxnews import (
     FoxNewsIE,
     FoxNewsArticleIE,
 from .foxnews import (
     FoxNewsIE,
     FoxNewsArticleIE,
-    FoxNewsInsiderIE,
 )
 from .foxsports import FoxSportsIE
 from .franceculture import FranceCultureIE
 )
 from .foxsports import FoxSportsIE
 from .franceculture import FranceCultureIE
@@ -391,6 +395,11 @@ from .francetv import (
 from .freesound import FreesoundIE
 from .freespeech import FreespeechIE
 from .freshlive import FreshLiveIE
 from .freesound import FreesoundIE
 from .freespeech import FreespeechIE
 from .freshlive import FreshLiveIE
+from .frontendmasters import (
+    FrontendMastersIE,
+    FrontendMastersLessonIE,
+    FrontendMastersCourseIE
+)
 from .funimation import FunimationIE
 from .funk import (
     FunkMixIE,
 from .funimation import FunimationIE
 from .funk import (
     FunkMixIE,
@@ -512,6 +521,7 @@ from .keezmovies import KeezMoviesIE
 from .ketnet import KetnetIE
 from .khanacademy import KhanAcademyIE
 from .kickstarter import KickStarterIE
 from .ketnet import KetnetIE
 from .khanacademy import KhanAcademyIE
 from .kickstarter import KickStarterIE
+from .kinopoisk import KinoPoiskIE
 from .keek import KeekIE
 from .konserthusetplay import KonserthusetPlayIE
 from .kontrtube import KontrTubeIE
 from .keek import KeekIE
 from .konserthusetplay import KonserthusetPlayIE
 from .kontrtube import KontrTubeIE
@@ -732,7 +742,10 @@ from .nonktube import NonkTubeIE
 from .noovo import NoovoIE
 from .normalboots import NormalbootsIE
 from .nosvideo import NosVideoIE
 from .noovo import NoovoIE
 from .normalboots import NormalbootsIE
 from .nosvideo import NosVideoIE
-from .nova import NovaIE
+from .nova import (
+    NovaEmbedIE,
+    NovaIE,
+)
 from .novamov import (
     AuroraVidIE,
     CloudTimeIE,
 from .novamov import (
     AuroraVidIE,
     CloudTimeIE,
@@ -764,7 +777,9 @@ from .nrk import (
     NRKSkoleIE,
     NRKTVIE,
     NRKTVDirekteIE,
     NRKSkoleIE,
     NRKTVIE,
     NRKTVDirekteIE,
+    NRKTVEpisodeIE,
     NRKTVEpisodesIE,
     NRKTVEpisodesIE,
+    NRKTVSeasonIE,
     NRKTVSeriesIE,
 )
 from .ntvde import NTVDeIE
     NRKTVSeriesIE,
 )
 from .ntvde import NTVDeIE
@@ -854,6 +869,10 @@ from .pornhub import (
 from .pornotube import PornotubeIE
 from .pornovoisines import PornoVoisinesIE
 from .pornoxo import PornoXOIE
 from .pornotube import PornotubeIE
 from .pornovoisines import PornoVoisinesIE
 from .pornoxo import PornoXOIE
+from .puhutv import (
+    PuhuTVIE,
+    PuhuTVSerieIE,
+)
 from .presstv import PressTVIE
 from .primesharetv import PrimeShareTVIE
 from .promptfile import PromptFileIE
 from .presstv import PressTVIE
 from .primesharetv import PrimeShareTVIE
 from .promptfile import PromptFileIE
@@ -885,7 +904,10 @@ from .rai import (
     RaiPlayPlaylistIE,
     RaiIE,
 )
     RaiPlayPlaylistIE,
     RaiIE,
 )
-from .raywenderlich import RayWenderlichIE
+from .raywenderlich import (
+    RayWenderlichIE,
+    RayWenderlichCourseIE,
+)
 from .rbmaradio import RBMARadioIE
 from .rds import RDSIE
 from .redbulltv import RedBullTVIE
 from .rbmaradio import RBMARadioIE
 from .rds import RDSIE
 from .redbulltv import RedBullTVIE
@@ -1041,6 +1063,7 @@ from .stretchinternet import StretchInternetIE
 from .sunporno import SunPornoIE
 from .svt import (
     SVTIE,
 from .sunporno import SunPornoIE
 from .svt import (
     SVTIE,
+    SVTPageIE,
     SVTPlayIE,
     SVTSeriesIE,
 )
     SVTPlayIE,
     SVTSeriesIE,
 )
@@ -1063,6 +1086,7 @@ from .teachingchannel import TeachingChannelIE
 from .teamcoco import TeamcocoIE
 from .techtalks import TechTalksIE
 from .ted import TEDIE
 from .teamcoco import TeamcocoIE
 from .techtalks import TechTalksIE
 from .ted import TEDIE
+from .tele5 import Tele5IE
 from .tele13 import Tele13IE
 from .telebruxelles import TeleBruxellesIE
 from .telecinco import TelecincoIE
 from .tele13 import Tele13IE
 from .telebruxelles import TeleBruxellesIE
 from .telecinco import TelecincoIE
@@ -1159,6 +1183,7 @@ from .tvp import (
 from .tvplay import (
     TVPlayIE,
     ViafreeIE,
 from .tvplay import (
     TVPlayIE,
     ViafreeIE,
+    TVPlayHomeIE,
 )
 from .tvplayer import TVPlayerIE
 from .tweakers import TweakersIE
 )
 from .tvplayer import TVPlayerIE
 from .tweakers import TweakersIE
@@ -1280,6 +1305,7 @@ from .viki import (
     VikiIE,
     VikiChannelIE,
 )
     VikiIE,
     VikiChannelIE,
 )
+from .viqeo import ViqeoIE
 from .viu import (
     ViuIE,
     ViuPlaylistIE,
 from .viu import (
     ViuIE,
     ViuPlaylistIE,
@@ -1405,6 +1431,7 @@ from .younow import (
     YouNowMomentIE,
 )
 from .youporn import YouPornIE
     YouNowMomentIE,
 )
 from .youporn import YouPornIE
+from .yourporn import YourPornIE
 from .yourupload import YourUploadIE
 from .youtube import (
     YoutubeIE,
 from .yourupload import YourUploadIE
 from .youtube import (
     YoutubeIE,
index 8a9ed96c264ae37c5de2af472eca4c0ff1f4fa86..97cfe0fc38ed53f32fd868fd8faa6276a3362dfe 100644 (file)
@@ -20,6 +20,7 @@ from ..utils import (
     int_or_none,
     js_to_json,
     limit_length,
     int_or_none,
     js_to_json,
     limit_length,
+    parse_count,
     sanitized_Request,
     try_get,
     urlencode_postdata,
     sanitized_Request,
     try_get,
     urlencode_postdata,
@@ -75,7 +76,7 @@ class FacebookIE(InfoExtractor):
         'info_dict': {
             'id': '274175099429670',
             'ext': 'mp4',
         'info_dict': {
             'id': '274175099429670',
             'ext': 'mp4',
-            'title': 'Asif Nawab Butt posted a video to his Timeline.',
+            'title': 're:^Asif Nawab Butt posted a video',
             'uploader': 'Asif Nawab Butt',
             'upload_date': '20140506',
             'timestamp': 1399398998,
             'uploader': 'Asif Nawab Butt',
             'upload_date': '20140506',
             'timestamp': 1399398998,
@@ -133,7 +134,7 @@ class FacebookIE(InfoExtractor):
     }, {
         # have 1080P, but only up to 720p in swf params
         'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
     }, {
         # have 1080P, but only up to 720p in swf params
         'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
-        'md5': '0d9813160b146b3bc8744e006027fcc6',
+        'md5': '9571fae53d4165bbbadb17a94651dcdc',
         'info_dict': {
             'id': '10155529876156509',
             'ext': 'mp4',
         'info_dict': {
             'id': '10155529876156509',
             'ext': 'mp4',
@@ -142,6 +143,7 @@ class FacebookIE(InfoExtractor):
             'upload_date': '20161030',
             'uploader': 'CNN',
             'thumbnail': r're:^https?://.*',
             'upload_date': '20161030',
             'uploader': 'CNN',
             'thumbnail': r're:^https?://.*',
+            'view_count': int,
         },
     }, {
         # bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
         },
     }, {
         # bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
@@ -149,7 +151,7 @@ class FacebookIE(InfoExtractor):
         'info_dict': {
             'id': '1417995061575415',
             'ext': 'mp4',
         'info_dict': {
             'id': '1417995061575415',
             'ext': 'mp4',
-            'title': 'md5:a7b86ca673f51800cd54687b7f4012fe',
+            'title': 'md5:1db063d6a8c13faa8da727817339c857',
             'timestamp': 1486648217,
             'upload_date': '20170209',
             'uploader': 'Yaroslav Korpan',
             'timestamp': 1486648217,
             'upload_date': '20170209',
             'uploader': 'Yaroslav Korpan',
@@ -176,7 +178,7 @@ class FacebookIE(InfoExtractor):
         'info_dict': {
             'id': '1396382447100162',
             'ext': 'mp4',
         'info_dict': {
             'id': '1396382447100162',
             'ext': 'mp4',
-            'title': 'md5:e2d2700afdf84e121f5d0f999bad13a3',
+            'title': 'md5:19a428bbde91364e3de815383b54a235',
             'timestamp': 1486035494,
             'upload_date': '20170202',
             'uploader': 'Elisabeth Ahtn',
             'timestamp': 1486035494,
             'upload_date': '20170202',
             'uploader': 'Elisabeth Ahtn',
@@ -353,7 +355,6 @@ class FacebookIE(InfoExtractor):
             tahoe_data = self._download_webpage(
                 self._VIDEO_PAGE_TAHOE_TEMPLATE % video_id, video_id,
                 data=urlencode_postdata({
             tahoe_data = self._download_webpage(
                 self._VIDEO_PAGE_TAHOE_TEMPLATE % video_id, video_id,
                 data=urlencode_postdata({
-                    '__user': 0,
                     '__a': 1,
                     '__pc': self._search_regex(
                         r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', webpage,
                     '__a': 1,
                     '__pc': self._search_regex(
                         r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', webpage,
@@ -361,6 +362,9 @@ class FacebookIE(InfoExtractor):
                     '__rev': self._search_regex(
                         r'client_revision["\']\s*:\s*(\d+),', webpage,
                         'client revision', default='3944515'),
                     '__rev': self._search_regex(
                         r'client_revision["\']\s*:\s*(\d+),', webpage,
                         'client revision', default='3944515'),
+                    'fb_dtsg': self._search_regex(
+                        r'"DTSGInitialData"\s*,\s*\[\]\s*,\s*{\s*"token"\s*:\s*"([^"]+)"',
+                        webpage, 'dtsg token', default=''),
                 }),
                 headers={
                     'Content-Type': 'application/x-www-form-urlencoded',
                 }),
                 headers={
                     'Content-Type': 'application/x-www-form-urlencoded',
@@ -426,6 +430,10 @@ class FacebookIE(InfoExtractor):
             'timestamp', default=None))
         thumbnail = self._og_search_thumbnail(webpage)
 
             'timestamp', default=None))
         thumbnail = self._og_search_thumbnail(webpage)
 
+        view_count = parse_count(self._search_regex(
+            r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count',
+            default=None))
+
         info_dict = {
             'id': video_id,
             'title': video_title,
         info_dict = {
             'id': video_id,
             'title': video_title,
@@ -433,6 +441,7 @@ class FacebookIE(InfoExtractor):
             'uploader': uploader,
             'timestamp': timestamp,
             'thumbnail': thumbnail,
             'uploader': uploader,
             'timestamp': timestamp,
             'thumbnail': thumbnail,
+            'view_count': view_count,
         }
 
         return webpage, info_dict
         }
 
         return webpage, info_dict
index 4803a22c81d16c2540f33cfb97bdc5a1d7f7aa57..28617d83c7d2afe398184b758b60380446314fc2 100644 (file)
@@ -10,6 +10,7 @@ from ..utils import (
     int_or_none,
     qualities,
     unified_strdate,
     int_or_none,
     qualities,
     unified_strdate,
+    url_or_none,
 )
 
 
 )
 
 
@@ -88,8 +89,8 @@ class FirstTVIE(InfoExtractor):
             formats = []
             path = None
             for f in item.get('mbr', []):
             formats = []
             path = None
             for f in item.get('mbr', []):
-                src = f.get('src')
-                if not src or not isinstance(src, compat_str):
+                src = url_or_none(f.get('src'))
+                if not src:
                     continue
                 tbr = int_or_none(self._search_regex(
                     r'_(\d{3,})\.mp4', src, 'tbr', default=None))
                     continue
                 tbr = int_or_none(self._search_regex(
                     r'_(\d{3,})\.mp4', src, 'tbr', default=None))
index dc0662f74ce5a84d59aa94333ee14d56a592cda2..63613cb85d36f78c7eea9f5f03adb061bed3ac4c 100644 (file)
@@ -58,6 +58,14 @@ class FoxNewsIE(AMPIE):
         },
     ]
 
         },
     ]
 
+    @staticmethod
+    def _extract_urls(webpage):
+        return [
+            mobj.group('url')
+            for mobj in re.finditer(
+                r'<(?:amp-)?iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.foxnews\.com/v/video-embed\.html?.*?\bvideo_id=\d+.*?)\1',
+                webpage)]
+
     def _real_extract(self, url):
         host, video_id = re.match(self._VALID_URL, url).groups()
 
     def _real_extract(self, url):
         host, video_id = re.match(self._VALID_URL, url).groups()
 
@@ -68,73 +76,52 @@ class FoxNewsIE(AMPIE):
 
 
 class FoxNewsArticleIE(InfoExtractor):
 
 
 class FoxNewsArticleIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?foxnews\.com/(?!v)([^/]+/)+(?P<id>[a-z-]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:insider\.)?foxnews\.com/(?!v)([^/]+/)+(?P<id>[a-z-]+)'
     IE_NAME = 'foxnews:article'
 
     IE_NAME = 'foxnews:article'
 
-    _TEST = {
+    _TESTS = [{
+        # data-video-id
         'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html',
         'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html',
-        'md5': '62aa5a781b308fdee212ebb6f33ae7ef',
+        'md5': '83d44e1aff1433e7a29a7b537d1700b5',
         'info_dict': {
             'id': '5116295019001',
             'ext': 'mp4',
             'title': 'Trump and Clinton asked to defend positions on Iraq War',
             'description': 'Veterans react on \'The Kelly File\'',
         'info_dict': {
             'id': '5116295019001',
             'ext': 'mp4',
             'title': 'Trump and Clinton asked to defend positions on Iraq War',
             'description': 'Veterans react on \'The Kelly File\'',
-            'timestamp': 1473299755,
+            'timestamp': 1473301045,
             'upload_date': '20160908',
         },
             'upload_date': '20160908',
         },
-    }
-
-    def _real_extract(self, url):
-        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id)
-
-        video_id = self._html_search_regex(
-            r'data-video-id=([\'"])(?P<id>[^\'"]+)\1',
-            webpage, 'video ID', group='id')
-        return self.url_result(
-            'http://video.foxnews.com/v/' + video_id,
-            FoxNewsIE.ie_key())
-
-
-class FoxNewsInsiderIE(InfoExtractor):
-    _VALID_URL = r'https?://insider\.foxnews\.com/([^/]+/)+(?P<id>[a-z-]+)'
-    IE_NAME = 'foxnews:insider'
-
-    _TEST = {
-        'url': 'http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words',
-        'md5': 'a10c755e582d28120c62749b4feb4c0c',
+    }, {
+        # iframe embed
+        'url': 'http://www.foxnews.com/us/2018/03/09/parkland-survivor-kyle-kashuv-on-meeting-trump-his-app-to-prevent-another-school-shooting.amp.html?__twitter_impression=true',
         'info_dict': {
         'info_dict': {
-            'id': '5099377331001',
-            'display_id': 'univ-wisconsin-student-group-pushing-silence-certain-words',
-            'ext': 'mp4',
-            'title': 'Student Group: Saying \'Politically Correct,\' \'Trash\' and \'Lame\' Is Offensive',
-            'description': 'Is campus censorship getting out of control?',
-            'timestamp': 1472168725,
-            'upload_date': '20160825',
+            'id': '5748266721001',
+            'ext': 'flv',
+            'title': 'Kyle Kashuv has a positive message for the Trump White House',
+            'description': 'Marjory Stoneman Douglas student disagrees with classmates.',
             'thumbnail': r're:^https?://.*\.jpg$',
             'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 229,
+            'timestamp': 1520594670,
+            'upload_date': '20180309',
         },
         'params': {
         },
         'params': {
-            # m3u8 download
             'skip_download': True,
         },
             'skip_download': True,
         },
-        'add_ie': [FoxNewsIE.ie_key()],
-    }
+    }, {
+        'url': 'http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
-
         webpage = self._download_webpage(url, display_id)
 
         webpage = self._download_webpage(url, display_id)
 
-        embed_url = self._html_search_meta('embedUrl', webpage, 'embed URL')
-
-        title = self._og_search_title(webpage)
-        description = self._og_search_description(webpage)
+        video_id = self._html_search_regex(
+            r'data-video-id=([\'"])(?P<id>[^\'"]+)\1',
+            webpage, 'video ID', group='id', default=None)
+        if video_id:
+            return self.url_result(
+                'http://video.foxnews.com/v/' + video_id, FoxNewsIE.ie_key())
 
 
-        return {
-            '_type': 'url_transparent',
-            'ie_key': FoxNewsIE.ie_key(),
-            'url': embed_url,
-            'display_id': display_id,
-            'title': title,
-            'description': description,
-        }
+        return self.url_result(
+            FoxNewsIE._extract_urls(webpage)[0], FoxNewsIE.ie_key())
index 6fc6b0da076bef777f667e2125b2203c0036f4fa..2ffe83a78c49b8e6b7436642febb72f66567068d 100644 (file)
@@ -16,6 +16,7 @@ from ..utils import (
     int_or_none,
     parse_duration,
     try_get,
     int_or_none,
     parse_duration,
     try_get,
+    url_or_none,
 )
 from .dailymotion import DailymotionIE
 
 )
 from .dailymotion import DailymotionIE
 
@@ -115,14 +116,13 @@ class FranceTVIE(InfoExtractor):
 
         def sign(manifest_url, manifest_id):
             for host in ('hdfauthftv-a.akamaihd.net', 'hdfauth.francetv.fr'):
 
         def sign(manifest_url, manifest_id):
             for host in ('hdfauthftv-a.akamaihd.net', 'hdfauth.francetv.fr'):
-                signed_url = self._download_webpage(
+                signed_url = url_or_none(self._download_webpage(
                     'https://%s/esi/TA' % host, video_id,
                     'Downloading signed %s manifest URL' % manifest_id,
                     fatal=False, query={
                         'url': manifest_url,
                     'https://%s/esi/TA' % host, video_id,
                     'Downloading signed %s manifest URL' % manifest_id,
                     fatal=False, query={
                         'url': manifest_url,
-                    })
-                if (signed_url and isinstance(signed_url, compat_str) and
-                        re.search(r'^(?:https?:)?//', signed_url)):
+                    }))
+                if signed_url:
                     return signed_url
             return manifest_url
 
                     return signed_url
             return manifest_url
 
diff --git a/youtube_dl/extractor/frontendmasters.py b/youtube_dl/extractor/frontendmasters.py
new file mode 100644 (file)
index 0000000..cb57ba0
--- /dev/null
@@ -0,0 +1,263 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_str,
+    compat_urlparse,
+)
+from ..utils import (
+    ExtractorError,
+    parse_duration,
+    url_or_none,
+    urlencode_postdata,
+)
+
+
+class FrontendMastersBaseIE(InfoExtractor):
+    _API_BASE = 'https://api.frontendmasters.com/v1/kabuki'
+    _LOGIN_URL = 'https://frontendmasters.com/login/'
+
+    _NETRC_MACHINE = 'frontendmasters'
+
+    _QUALITIES = {
+        'low': {'width': 480, 'height': 360},
+        'mid': {'width': 1280, 'height': 720},
+        'high': {'width': 1920, 'height': 1080}
+    }
+
+    def _real_initialize(self):
+        self._login()
+
+    def _login(self):
+        (username, password) = self._get_login_info()
+        if username is None:
+            return
+
+        login_page = self._download_webpage(
+            self._LOGIN_URL, None, 'Downloading login page')
+
+        login_form = self._hidden_inputs(login_page)
+
+        login_form.update({
+            'username': username,
+            'password': password
+        })
+
+        post_url = self._search_regex(
+            r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
+            'post_url', default=self._LOGIN_URL, group='url')
+
+        if not post_url.startswith('http'):
+            post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
+
+        response = self._download_webpage(
+            post_url, None, 'Logging in', data=urlencode_postdata(login_form),
+            headers={'Content-Type': 'application/x-www-form-urlencoded'})
+
+        # Successful login
+        if any(p in response for p in (
+                'wp-login.php?action=logout', '>Logout')):
+            return
+
+        error = self._html_search_regex(
+            r'class=(["\'])(?:(?!\1).)*\bMessageAlert\b(?:(?!\1).)*\1[^>]*>(?P<error>[^<]+)<',
+            response, 'error message', default=None, group='error')
+        if error:
+            raise ExtractorError('Unable to login: %s' % error, expected=True)
+        raise ExtractorError('Unable to log in')
+
+
+class FrontendMastersPageBaseIE(FrontendMastersBaseIE):
+    def _download_course(self, course_name, url):
+        return self._download_json(
+            '%s/courses/%s' % (self._API_BASE, course_name), course_name,
+            'Downloading course JSON', headers={'Referer': url})
+
+    @staticmethod
+    def _extract_chapters(course):
+        chapters = []
+        lesson_elements = course.get('lessonElements')
+        if isinstance(lesson_elements, list):
+            chapters = [url_or_none(e) for e in lesson_elements if url_or_none(e)]
+        return chapters
+
+    @staticmethod
+    def _extract_lesson(chapters, lesson_id, lesson):
+        title = lesson.get('title') or lesson_id
+        display_id = lesson.get('slug')
+        description = lesson.get('description')
+        thumbnail = lesson.get('thumbnail')
+
+        chapter_number = None
+        index = lesson.get('index')
+        element_index = lesson.get('elementIndex')
+        if (isinstance(index, int) and isinstance(element_index, int) and
+                index < element_index):
+            chapter_number = element_index - index
+        chapter = (chapters[chapter_number - 1]
+                   if chapter_number - 1 < len(chapters) else None)
+
+        duration = None
+        timestamp = lesson.get('timestamp')
+        if isinstance(timestamp, compat_str):
+            mobj = re.search(
+                r'(?P<start>\d{1,2}:\d{1,2}:\d{1,2})\s*-(?P<end>\s*\d{1,2}:\d{1,2}:\d{1,2})',
+                timestamp)
+            if mobj:
+                duration = parse_duration(mobj.group('end')) - parse_duration(
+                    mobj.group('start'))
+
+        return {
+            '_type': 'url_transparent',
+            'url': 'frontendmasters:%s' % lesson_id,
+            'ie_key': FrontendMastersIE.ie_key(),
+            'id': lesson_id,
+            'display_id': display_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'chapter': chapter,
+            'chapter_number': chapter_number,
+        }
+
+
+class FrontendMastersIE(FrontendMastersBaseIE):
+    _VALID_URL = r'(?:frontendmasters:|https?://api\.frontendmasters\.com/v\d+/kabuki/video/)(?P<id>[^/]+)'
+    _TESTS = [{
+        'url': 'https://api.frontendmasters.com/v1/kabuki/video/a2qogef6ba',
+        'md5': '7f161159710d6b7016a4f4af6fcb05e2',
+        'info_dict': {
+            'id': 'a2qogef6ba',
+            'ext': 'mp4',
+            'title': 'a2qogef6ba',
+        },
+        'skip': 'Requires FrontendMasters account credentials',
+    }, {
+        'url': 'frontendmasters:a2qogef6ba',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        lesson_id = self._match_id(url)
+
+        source_url = '%s/video/%s/source' % (self._API_BASE, lesson_id)
+
+        formats = []
+        for ext in ('webm', 'mp4'):
+            for quality in ('low', 'mid', 'high'):
+                resolution = self._QUALITIES[quality].copy()
+                format_id = '%s-%s' % (ext, quality)
+                format_url = self._download_json(
+                    source_url, lesson_id,
+                    'Downloading %s source JSON' % format_id, query={
+                        'f': ext,
+                        'r': resolution['height'],
+                    }, headers={
+                        'Referer': url,
+                    }, fatal=False)['url']
+
+                if not format_url:
+                    continue
+
+                f = resolution.copy()
+                f.update({
+                    'url': format_url,
+                    'ext': ext,
+                    'format_id': format_id,
+                })
+                formats.append(f)
+        self._sort_formats(formats)
+
+        subtitles = {
+            'en': [{
+                'url': '%s/transcripts/%s.vtt' % (self._API_BASE, lesson_id),
+            }]
+        }
+
+        return {
+            'id': lesson_id,
+            'title': lesson_id,
+            'formats': formats,
+            'subtitles': subtitles
+        }
+
+
+class FrontendMastersLessonIE(FrontendMastersPageBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?frontendmasters\.com/courses/(?P<course_name>[^/]+)/(?P<lesson_name>[^/]+)'
+    _TEST = {
+        'url': 'https://frontendmasters.com/courses/web-development/tools',
+        'info_dict': {
+            'id': 'a2qogef6ba',
+            'display_id': 'tools',
+            'ext': 'mp4',
+            'title': 'Tools',
+            'description': 'md5:82c1ea6472e88ed5acd1829fe992e4f7',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'chapter': 'Introduction',
+            'chapter_number': 1,
+        },
+        'params': {
+            'skip_download': True,
+        },
+        'skip': 'Requires FrontendMasters account credentials',
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        course_name, lesson_name = mobj.group('course_name', 'lesson_name')
+
+        course = self._download_course(course_name, url)
+
+        lesson_id, lesson = next(
+            (video_id, data)
+            for video_id, data in course['lessonData'].items()
+            if data.get('slug') == lesson_name)
+
+        chapters = self._extract_chapters(course)
+        return self._extract_lesson(chapters, lesson_id, lesson)
+
+
+class FrontendMastersCourseIE(FrontendMastersPageBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?frontendmasters\.com/courses/(?P<id>[^/]+)'
+    _TEST = {
+        'url': 'https://frontendmasters.com/courses/web-development/',
+        'info_dict': {
+            'id': 'web-development',
+            'title': 'Introduction to Web Development',
+            'description': 'md5:9317e6e842098bf725d62360e52d49a6',
+        },
+        'playlist_count': 81,
+        'skip': 'Requires FrontendMasters account credentials',
+    }
+
+    @classmethod
+    def suitable(cls, url):
+        return False if FrontendMastersLessonIE.suitable(url) else super(
+            FrontendMastersBaseIE, cls).suitable(url)
+
+    def _real_extract(self, url):
+        course_name = self._match_id(url)
+
+        course = self._download_course(course_name, url)
+
+        chapters = self._extract_chapters(course)
+
+        lessons = sorted(
+            course['lessonData'].values(), key=lambda data: data['index'])
+
+        entries = []
+        for lesson in lessons:
+            lesson_name = lesson.get('slug')
+            if not lesson_name:
+                continue
+            lesson_id = lesson.get('hash') or lesson.get('statsId')
+            entries.append(self._extract_lesson(chapters, lesson_id, lesson))
+
+        title = course.get('title')
+        description = course.get('description')
+
+        return self.playlist_result(entries, course_name, title, description)
index 0ff058619bc05fa6b3d6c0680be56bff957ca802..7e1af95e0fcd86b72115ef60b4b6dabe52f94d77 100644 (file)
@@ -1,10 +1,12 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
 # coding: utf-8
 from __future__ import unicode_literals
 
+import itertools
 import re
 
 from .common import InfoExtractor
 from .nexx import NexxIE
 import re
 
 from .common import InfoExtractor
 from .nexx import NexxIE
+from ..compat import compat_str
 from ..utils import (
     int_or_none,
     try_get,
 from ..utils import (
     int_or_none,
     try_get,
@@ -12,6 +14,19 @@ from ..utils import (
 
 
 class FunkBaseIE(InfoExtractor):
 
 
 class FunkBaseIE(InfoExtractor):
+    _HEADERS = {
+        'Accept': '*/*',
+        'Accept-Language': 'en-US,en;q=0.9,ru;q=0.8',
+        'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4',
+    }
+    _AUTH = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4'
+
+    @staticmethod
+    def _make_headers(referer):
+        headers = FunkBaseIE._HEADERS.copy()
+        headers['Referer'] = referer
+        return headers
+
     def _make_url_result(self, video):
         return {
             '_type': 'url_transparent',
     def _make_url_result(self, video):
         return {
             '_type': 'url_transparent',
@@ -48,19 +63,19 @@ class FunkMixIE(FunkBaseIE):
 
         lists = self._download_json(
             'https://www.funk.net/api/v3.1/curation/curatedLists/',
 
         lists = self._download_json(
             'https://www.funk.net/api/v3.1/curation/curatedLists/',
-            mix_id, headers={
-                'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoiY3VyYXRpb24tdG9vbC12Mi4wIiwic2NvcGUiOiJzdGF0aWMtY29udGVudC1hcGksY3VyYXRpb24tc2VydmljZSxzZWFyY2gtYXBpIn0.SGCC1IXHLtZYoo8PvRKlU2gXH1su8YSu47sB3S4iXBI',
-                'Referer': url,
-            }, query={
+            mix_id, headers=self._make_headers(url), query={
                 'size': 100,
                 'size': 100,
-            })['result']['lists']
+            })['_embedded']['curatedListList']
 
         metas = next(
             l for l in lists
             if mix_id in (l.get('entityId'), l.get('alias')))['videoMetas']
         video = next(
             meta['videoDataDelegate']
 
         metas = next(
             l for l in lists
             if mix_id in (l.get('entityId'), l.get('alias')))['videoMetas']
         video = next(
             meta['videoDataDelegate']
-            for meta in metas if meta.get('alias') == alias)
+            for meta in metas
+            if try_get(
+                meta, lambda x: x['videoDataDelegate']['alias'],
+                compat_str) == alias)
 
         return self._make_url_result(video)
 
 
         return self._make_url_result(video)
 
@@ -104,25 +119,53 @@ class FunkChannelIE(FunkBaseIE):
         channel_id = mobj.group('id')
         alias = mobj.group('alias')
 
         channel_id = mobj.group('id')
         alias = mobj.group('alias')
 
-        headers = {
-            'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoiY3VyYXRpb24tdG9vbCIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxzZWFyY2gtYXBpIn0.q4Y2xZG8PFHai24-4Pjx2gym9RmJejtmK6lMXP5wAgc',
-            'Referer': url,
-        }
+        headers = self._make_headers(url)
 
         video = None
 
 
         video = None
 
-        by_id_list = self._download_json(
-            'https://www.funk.net/api/v3.0/content/videos/byIdList', channel_id,
-            headers=headers, query={
-                'ids': alias,
-            }, fatal=False)
-        if by_id_list:
-            video = try_get(by_id_list, lambda x: x['result'][0], dict)
+        # Id-based channels are currently broken on their side: webplayer
+        # tries to process them via byChannelAlias endpoint and fails
+        # predictably.
+        for page_num in itertools.count():
+            by_channel_alias = self._download_json(
+                'https://www.funk.net/api/v3.1/webapp/videos/byChannelAlias/%s'
+                % channel_id,
+                'Downloading byChannelAlias JSON page %d' % (page_num + 1),
+                headers=headers, query={
+                    'filterFsk': 'false',
+                    'sort': 'creationDate,desc',
+                    'size': 100,
+                    'page': page_num,
+                }, fatal=False)
+            if not by_channel_alias:
+                break
+            video_list = try_get(
+                by_channel_alias, lambda x: x['_embedded']['videoList'], list)
+            if not video_list:
+                break
+            try:
+                video = next(r for r in video_list if r.get('alias') == alias)
+                break
+            except StopIteration:
+                pass
+            if not try_get(
+                    by_channel_alias, lambda x: x['_links']['next']):
+                break
+
+        if not video:
+            by_id_list = self._download_json(
+                'https://www.funk.net/api/v3.0/content/videos/byIdList',
+                channel_id, 'Downloading byIdList JSON', headers=headers,
+                query={
+                    'ids': alias,
+                }, fatal=False)
+            if by_id_list:
+                video = try_get(by_id_list, lambda x: x['result'][0], dict)
 
         if not video:
             results = self._download_json(
 
         if not video:
             results = self._download_json(
-                'https://www.funk.net/api/v3.0/content/videos/filter', channel_id,
-                headers=headers, query={
+                'https://www.funk.net/api/v3.0/content/videos/filter',
+                channel_id, 'Downloading filter JSON', headers=headers, query={
                     'channelId': channel_id,
                     'size': 100,
                 })['result']
                     'channelId': channel_id,
                     'size': 100,
                 })['result']
index dad951b751853f900a26e6cd2e7bce5ca964b749..76ef01332feef1619a6bde5e915893cf0196fe57 100644 (file)
@@ -111,6 +111,9 @@ from .cloudflarestream import CloudflareStreamIE
 from .peertube import PeerTubeIE
 from .indavideo import IndavideoEmbedIE
 from .apa import APAIE
 from .peertube import PeerTubeIE
 from .indavideo import IndavideoEmbedIE
 from .apa import APAIE
+from .foxnews import FoxNewsIE
+from .viqeo import ViqeoIE
+from .expressen import ExpressenIE
 
 
 class GenericIE(InfoExtractor):
 
 
 class GenericIE(InfoExtractor):
@@ -1394,17 +1397,6 @@ class GenericIE(InfoExtractor):
                 'skip_download': True,
             },
         },
                 'skip_download': True,
             },
         },
-        # SVT embed
-        {
-            'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
-            'info_dict': {
-                'id': '2900353',
-                'ext': 'flv',
-                'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
-                'duration': 27,
-                'age_limit': 0,
-            },
-        },
         # Crooks and Liars embed
         {
             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
         # Crooks and Liars embed
         {
             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
@@ -2069,6 +2061,30 @@ class GenericIE(InfoExtractor):
             },
             'skip': 'TODO: fix nested playlists processing in tests',
         },
             },
             'skip': 'TODO: fix nested playlists processing in tests',
         },
+        {
+            # Viqeo embeds
+            'url': 'https://viqeo.tv/',
+            'info_dict': {
+                'id': 'viqeo',
+                'title': 'All-new video platform',
+            },
+            'playlist_count': 6,
+        },
+        {
+            # videojs embed
+            'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',
+            'info_dict': {
+                'id': 'shell',
+                'ext': 'mp4',
+                'title': 'Доставщик пиццы спросил разрешения сыграть на фортепиано',
+                'description': 'md5:89209cdc587dab1e4a090453dbaa2cb1',
+                'thumbnail': r're:^https?://.*\.jpg$',
+            },
+            'params': {
+                'skip_download': True,
+            },
+            'expected_warnings': ['Failed to download MPD manifest'],
+        },
         # {
         #     # TODO: find another test
         #     # http://schema.org/VideoObject
         # {
         #     # TODO: find another test
         #     # http://schema.org/VideoObject
@@ -3076,7 +3092,7 @@ class GenericIE(InfoExtractor):
             return self.playlist_from_matches(
                 cloudflarestream_urls, video_id, video_title, ie=CloudflareStreamIE.ie_key())
 
             return self.playlist_from_matches(
                 cloudflarestream_urls, video_id, video_title, ie=CloudflareStreamIE.ie_key())
 
-        peertube_urls = PeerTubeIE._extract_urls(webpage)
+        peertube_urls = PeerTubeIE._extract_urls(webpage, url)
         if peertube_urls:
             return self.playlist_from_matches(
                 peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
         if peertube_urls:
             return self.playlist_from_matches(
                 peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
@@ -3091,13 +3107,28 @@ class GenericIE(InfoExtractor):
             return self.playlist_from_matches(
                 apa_urls, video_id, video_title, ie=APAIE.ie_key())
 
             return self.playlist_from_matches(
                 apa_urls, video_id, video_title, ie=APAIE.ie_key())
 
-        sharevideos_urls = [mobj.group('url') for mobj in re.finditer(
+        foxnews_urls = FoxNewsIE._extract_urls(webpage)
+        if foxnews_urls:
+            return self.playlist_from_matches(
+                foxnews_urls, video_id, video_title, ie=FoxNewsIE.ie_key())
+
+        sharevideos_urls = [sharevideos_mobj.group('url') for sharevideos_mobj in re.finditer(
             r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
             webpage)]
         if sharevideos_urls:
             return self.playlist_from_matches(
                 sharevideos_urls, video_id, video_title)
 
             r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
             webpage)]
         if sharevideos_urls:
             return self.playlist_from_matches(
                 sharevideos_urls, video_id, video_title)
 
+        viqeo_urls = ViqeoIE._extract_urls(webpage)
+        if viqeo_urls:
+            return self.playlist_from_matches(
+                viqeo_urls, video_id, video_title, ie=ViqeoIE.ie_key())
+
+        expressen_urls = ExpressenIE._extract_urls(webpage)
+        if expressen_urls:
+            return self.playlist_from_matches(
+                expressen_urls, video_id, video_title, ie=ExpressenIE.ie_key())
+
         # Look for HTML5 media
         entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
         if entries:
         # Look for HTML5 media
         entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
         if entries:
@@ -3119,9 +3150,13 @@ class GenericIE(InfoExtractor):
         jwplayer_data = self._find_jwplayer_data(
             webpage, video_id, transform_source=js_to_json)
         if jwplayer_data:
         jwplayer_data = self._find_jwplayer_data(
             webpage, video_id, transform_source=js_to_json)
         if jwplayer_data:
-            info = self._parse_jwplayer_data(
-                jwplayer_data, video_id, require_title=False, base_url=url)
-            return merge_dicts(info, info_dict)
+            try:
+                info = self._parse_jwplayer_data(
+                    jwplayer_data, video_id, require_title=False, base_url=url)
+                return merge_dicts(info, info_dict)
+            except ExtractorError:
+                # See https://github.com/rg3/youtube-dl/pull/16735
+                pass
 
         # Video.js embed
         mobj = re.search(
 
         # Video.js embed
         mobj = re.search(
index e781405f2f7d55aa51f8cc50cc99df22ee5dff40..ec9dd6e3ab3979d0970e2caf5ad73c38e22d6630 100644 (file)
@@ -36,7 +36,8 @@ class GoIE(AdobePassIE):
             'requestor_id': 'DisneyXD',
         }
     }
             'requestor_id': 'DisneyXD',
         }
     }
-    _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())
+    _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))'\
+                 % '|'.join(list(_SITE_INFO.keys()) + ['disneynow'])
     _TESTS = [{
         'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643',
         'info_dict': {
     _TESTS = [{
         'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643',
         'info_dict': {
@@ -62,6 +63,14 @@ class GoIE(AdobePassIE):
     }, {
         'url': 'http://abc.go.com/shows/world-news-tonight/episode-guide/2017-02/17-021717-intense-stand-off-between-man-with-rifle-and-police-in-oakland',
         'only_matching': True,
     }, {
         'url': 'http://abc.go.com/shows/world-news-tonight/episode-guide/2017-02/17-021717-intense-stand-off-between-man-with-rifle-and-police-in-oakland',
         'only_matching': True,
+    }, {
+        # brand 004
+        'url': 'http://disneynow.go.com/shows/big-hero-6-the-series/season-01/episode-10-mr-sparkles-loses-his-sparkle/vdka4637915',
+        'only_matching': True,
+    }, {
+        # brand 008
+        'url': 'http://disneynow.go.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013',
+        'only_matching': True,
     }]
 
     def _extract_videos(self, brand, video_id='-1', show_id='-1'):
     }]
 
     def _extract_videos(self, brand, video_id='-1', show_id='-1'):
@@ -72,14 +81,23 @@ class GoIE(AdobePassIE):
 
     def _real_extract(self, url):
         sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups()
 
     def _real_extract(self, url):
         sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups()
-        site_info = self._SITE_INFO[sub_domain]
-        brand = site_info['brand']
-        if not video_id:
-            webpage = self._download_webpage(url, display_id)
+        site_info = self._SITE_INFO.get(sub_domain, {})
+        brand = site_info.get('brand')
+        if not video_id or not site_info:
+            webpage = self._download_webpage(url, display_id or video_id)
             video_id = self._search_regex(
                 # There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
                 # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
             video_id = self._search_regex(
                 # There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
                 # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
-                r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id', default=None)
+                r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id',
+                default=None)
+            if not site_info:
+                brand = self._search_regex(
+                    (r'data-brand=\s*["\']\s*(\d+)',
+                     r'data-page-brand=\s*["\']\s*(\d+)'), webpage, 'brand',
+                    default='004')
+                site_info = next(
+                    si for _, si in self._SITE_INFO.items()
+                    if si.get('brand') == brand)
             if not video_id:
                 # show extraction works for Disney, DisneyJunior and DisneyXD
                 # ABC and Freeform has different layout
             if not video_id:
                 # show extraction works for Disney, DisneyJunior and DisneyXD
                 # ABC and Freeform has different layout
index 35dde42d07143a3805c5f1cfd551001eb2a23de3..c3ea717bcd17633fe25b2f16ed8abf1bacf7e3ab 100644 (file)
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
 import re
 
 from .common import InfoExtractor
+from ..compat import compat_HTTPError
 from ..utils import (
     determine_ext,
     ExtractorError,
 from ..utils import (
     determine_ext,
     ExtractorError,
@@ -14,8 +15,8 @@ from ..utils import (
 
 
 class Go90IE(InfoExtractor):
 
 
 class Go90IE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?go90\.com/videos/(?P<id>[0-9a-zA-Z]+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?go90\.com/(?:videos|embed)/(?P<id>[0-9a-zA-Z]+)'
+    _TESTS = [{
         'url': 'https://www.go90.com/videos/84BUqjLpf9D',
         'md5': 'efa7670dbbbf21a7b07b360652b24a32',
         'info_dict': {
         'url': 'https://www.go90.com/videos/84BUqjLpf9D',
         'md5': 'efa7670dbbbf21a7b07b360652b24a32',
         'info_dict': {
@@ -27,15 +28,31 @@ class Go90IE(InfoExtractor):
             'upload_date': '20170411',
             'age_limit': 14,
         }
             'upload_date': '20170411',
             'age_limit': 14,
         }
-    }
+    }, {
+        'url': 'https://www.go90.com/embed/261MflWkD3N',
+        'only_matching': True,
+    }]
+    _GEO_BYPASS = False
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        video_data = self._download_json(
-            'https://www.go90.com/api/view/items/' + video_id,
-            video_id, headers={
+
+        try:
+            headers = self.geo_verification_headers()
+            headers.update({
                 'Content-Type': 'application/json; charset=utf-8',
                 'Content-Type': 'application/json; charset=utf-8',
-            }, data=b'{"client":"web","device_type":"pc"}')
+            })
+            video_data = self._download_json(
+                'https://www.go90.com/api/view/items/' + video_id, video_id,
+                headers=headers, data=b'{"client":"web","device_type":"pc"}')
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
+                message = self._parse_json(e.cause.read().decode(), None)['error']['message']
+                if 'region unavailable' in message:
+                    self.raise_geo_restricted(countries=['US'])
+                raise ExtractorError(message, expected=True)
+            raise
+
         if video_data.get('requires_drm'):
             raise ExtractorError('This video is DRM protected.', expected=True)
         main_video_asset = video_data['main_video_asset']
         if video_data.get('requires_drm'):
             raise ExtractorError('This video is DRM protected.', expected=True)
         main_video_asset = video_data['main_video_asset']
index 39fabe8a55958374f03700e8a653ea86d707dcc9..f26f80265678576217994746d9286482c5184a3d 100644 (file)
@@ -8,6 +8,7 @@ from ..compat import compat_str
 from ..utils import (
     ExtractorError,
     int_or_none,
 from ..utils import (
     ExtractorError,
     int_or_none,
+    url_or_none,
     urlencode_postdata,
 )
 
     urlencode_postdata,
 )
 
@@ -80,8 +81,8 @@ class HiDiveIE(InfoExtractor):
             bitrates = rendition.get('bitrates')
             if not isinstance(bitrates, dict):
                 continue
             bitrates = rendition.get('bitrates')
             if not isinstance(bitrates, dict):
                 continue
-            m3u8_url = bitrates.get('hls')
-            if not isinstance(m3u8_url, compat_str):
+            m3u8_url = url_or_none(bitrates.get('hls'))
+            if not m3u8_url:
                 continue
             formats.extend(self._extract_m3u8_formats(
                 m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
                 continue
             formats.extend(self._extract_m3u8_formats(
                 m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
@@ -93,9 +94,8 @@ class HiDiveIE(InfoExtractor):
                 if not isinstance(cc_file, list) or len(cc_file) < 3:
                     continue
                 cc_lang = cc_file[0]
                 if not isinstance(cc_file, list) or len(cc_file) < 3:
                     continue
                 cc_lang = cc_file[0]
-                cc_url = cc_file[2]
-                if not isinstance(cc_lang, compat_str) or not isinstance(
-                        cc_url, compat_str):
+                cc_url = url_or_none(cc_file[2])
+                if not isinstance(cc_lang, compat_str) or not cc_url:
                     continue
                 subtitles.setdefault(cc_lang, []).append({
                     'url': cc_url,
                     continue
                 subtitles.setdefault(cc_lang, []).append({
                     'url': cc_url,
index 4bafa54a21e5abbc294a3686b6b974a9bb6d4eb3..436759da5480da347a578aba6cb388cb01e97448 100644 (file)
@@ -3,12 +3,12 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
     determine_ext,
     mimetype2ext,
     parse_duration,
     qualities,
 from ..utils import (
     determine_ext,
     mimetype2ext,
     parse_duration,
     qualities,
+    url_or_none,
 )
 
 
 )
 
 
@@ -61,10 +61,11 @@ class ImdbIE(InfoExtractor):
         for encoding in video_metadata.get('encodings', []):
             if not encoding or not isinstance(encoding, dict):
                 continue
         for encoding in video_metadata.get('encodings', []):
             if not encoding or not isinstance(encoding, dict):
                 continue
-            video_url = encoding.get('videoUrl')
-            if not video_url or not isinstance(video_url, compat_str):
+            video_url = url_or_none(encoding.get('videoUrl'))
+            if not video_url:
                 continue
                 continue
-            ext = determine_ext(video_url, mimetype2ext(encoding.get('mimeType')))
+            ext = mimetype2ext(encoding.get(
+                'mimeType')) or determine_ext(video_url)
             if ext == 'm3u8':
                 formats.extend(self._extract_m3u8_formats(
                     video_url, video_id, 'mp4', entry_protocol='m3u8_native',
             if ext == 'm3u8':
                 formats.extend(self._extract_m3u8_formats(
                     video_url, video_id, 'mp4', entry_protocol='m3u8_native',
index 2901960a51f8faa28975a7c5bef7b965062573ba..ecc958a1717d35ad37cc68c3b44147f3d0db6161 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 
 class ImgurIE(InfoExtractor):
 
 
 class ImgurIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|(?:topic|r)/[^/]+)/)?(?P<id>[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z]+)?$'
+    _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|(?:topic|r)/[^/]+)/)?(?P<id>[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z0-9]+)?$'
 
     _TESTS = [{
         'url': 'https://i.imgur.com/A61SaA1.gifv',
 
     _TESTS = [{
         'url': 'https://i.imgur.com/A61SaA1.gifv',
@@ -43,6 +43,9 @@ class ImgurIE(InfoExtractor):
     }, {
         'url': 'http://imgur.com/r/aww/VQcQPhM',
         'only_matching': True,
     }, {
         'url': 'http://imgur.com/r/aww/VQcQPhM',
         'only_matching': True,
+    }, {
+        'url': 'https://i.imgur.com/crGpqCV.mp4',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
     }]
 
     def _real_extract(self, url):
index 0c13f54ee04fe4df7169203b27e222cd2771b4be..7e0e838f05a5e4a527cd9cf89e84f884ac1b2498 100644 (file)
@@ -17,6 +17,7 @@ from ..utils import (
     lowercase_escape,
     std_headers,
     try_get,
     lowercase_escape,
     std_headers,
     try_get,
+    url_or_none,
 )
 
 
 )
 
 
@@ -170,7 +171,7 @@ class InstagramIE(InfoExtractor):
                             node = try_get(edge, lambda x: x['node'], dict)
                             if not node:
                                 continue
                             node = try_get(edge, lambda x: x['node'], dict)
                             if not node:
                                 continue
-                            node_video_url = try_get(node, lambda x: x['video_url'], compat_str)
+                            node_video_url = url_or_none(node.get('video_url'))
                             if not node_video_url:
                                 continue
                             entries.append({
                             if not node_video_url:
                                 continue
                             entries.append({
index 10ba1f6cfd73e2d060da6f1715264e9cb6cd9f23..676e8e269ce5ac3cf402e5e306b0ba2b1c61e28c 100644 (file)
@@ -7,7 +7,7 @@ from ..utils import unified_timestamp
 
 class InternazionaleIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?internazionale\.it/video/(?:[^/]+/)*(?P<id>[^/?#&]+)'
 
 class InternazionaleIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?internazionale\.it/video/(?:[^/]+/)*(?P<id>[^/?#&]+)'
-    _TEST = {
+    _TESTS = [{
         'url': 'https://www.internazionale.it/video/2015/02/19/richard-linklater-racconta-una-scena-di-boyhood',
         'md5': '3e39d32b66882c1218e305acbf8348ca',
         'info_dict': {
         'url': 'https://www.internazionale.it/video/2015/02/19/richard-linklater-racconta-una-scena-di-boyhood',
         'md5': '3e39d32b66882c1218e305acbf8348ca',
         'info_dict': {
@@ -23,7 +23,23 @@ class InternazionaleIE(InfoExtractor):
         'params': {
             'format': 'bestvideo',
         },
         'params': {
             'format': 'bestvideo',
         },
-    }
+    }, {
+        'url': 'https://www.internazionale.it/video/2018/08/29/telefono-stare-con-noi-stessi',
+        'md5': '9db8663704cab73eb972d1cee0082c79',
+        'info_dict': {
+            'id': '761344',
+            'display_id': 'telefono-stare-con-noi-stessi',
+            'ext': 'mp4',
+            'title': 'Usiamo il telefono per evitare di stare con noi stessi',
+            'description': 'md5:75ccfb0d6bcefc6e7428c68b4aa1fe44',
+            'timestamp': 1535528954,
+            'upload_date': '20180829',
+            'thumbnail': r're:^https?://.*\.jpg$',
+        },
+        'params': {
+            'format': 'bestvideo',
+        },
+    }]
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
@@ -40,8 +56,13 @@ class InternazionaleIE(InfoExtractor):
             DATA_RE % 'job-id', webpage, 'video id', group='value')
         video_path = self._search_regex(
             DATA_RE % 'video-path', webpage, 'video path', group='value')
             DATA_RE % 'job-id', webpage, 'video id', group='value')
         video_path = self._search_regex(
             DATA_RE % 'video-path', webpage, 'video path', group='value')
+        video_available_abroad = self._search_regex(
+            DATA_RE % 'video-available_abroad', webpage,
+            'video available aboard', default='1', group='value')
+        video_available_abroad = video_available_abroad == '1'
 
 
-        video_base = 'https://video.internazionale.it/%s/%s.' % (video_path, video_id)
+        video_base = 'https://video%s.internazionale.it/%s/%s.' % \
+            ('' if video_available_abroad else '-ita', video_path, video_id)
 
         formats = self._extract_m3u8_formats(
             video_base + 'm3u8', display_id, 'mp4',
 
         formats = self._extract_m3u8_formats(
             video_base + 'm3u8', display_id, 'mp4',
index a29e6a5badd2ef0403a2b8b4afe1c1ef2f926d4c..1d58d6e850724f226d66f5822777010f8a8b8d38 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 
 class IPrimaIE(InfoExtractor):
 
 
 class IPrimaIE(InfoExtractor):
-    _VALID_URL = r'https?://play\.iprima\.cz/(?:.+/)?(?P<id>[^?#]+)'
+    _VALID_URL = r'https?://(?:play|prima)\.iprima\.cz/(?:.+/)?(?P<id>[^?#]+)'
     _GEO_BYPASS = False
 
     _TESTS = [{
     _GEO_BYPASS = False
 
     _TESTS = [{
@@ -33,14 +33,27 @@ class IPrimaIE(InfoExtractor):
         # geo restricted
         'url': 'http://play.iprima.cz/closer-nove-pripady/closer-nove-pripady-iv-1',
         'only_matching': True,
         # geo restricted
         'url': 'http://play.iprima.cz/closer-nove-pripady/closer-nove-pripady-iv-1',
         'only_matching': True,
+    }, {
+        # iframe api.play-backend.iprima.cz
+        'url': 'https://prima.iprima.cz/my-little-pony/mapa-znameni-2-2',
+        'only_matching': True,
+    }, {
+        # iframe prima.iprima.cz
+        'url': 'https://prima.iprima.cz/porady/jak-se-stavi-sen/rodina-rathousova-praha',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
     }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
+        self._set_cookie('play.iprima.cz', 'ott_adult_confirmed', '1')
+
         webpage = self._download_webpage(url, video_id)
 
         webpage = self._download_webpage(url, video_id)
 
-        video_id = self._search_regex(r'data-product="([^"]+)">', webpage, 'real id')
+        video_id = self._search_regex(
+            (r'<iframe[^>]+\bsrc=["\'](?:https?:)?//(?:api\.play-backend\.iprima\.cz/prehravac/embedded|prima\.iprima\.cz/[^/]+/[^/]+)\?.*?\bid=(p\d+)',
+             r'data-product="([^"]+)">'),
+            webpage, 'real id')
 
         playerpage = self._download_webpage(
             'http://play.iprima.cz/prehravac/init',
 
         playerpage = self._download_webpage(
             'http://play.iprima.cz/prehravac/init',
index 6a4f8a50569b5bebb0a46481843609fcd205d108..de65b6bb45d8980f9dafce9f60c3e19539dc58d5 100644 (file)
@@ -13,15 +13,17 @@ from ..compat import (
     compat_etree_register_namespace,
 )
 from ..utils import (
     compat_etree_register_namespace,
 )
 from ..utils import (
+    determine_ext,
+    ExtractorError,
     extract_attributes,
     extract_attributes,
-    xpath_with_ns,
-    xpath_element,
-    xpath_text,
     int_or_none,
     int_or_none,
+    merge_dicts,
     parse_duration,
     smuggle_url,
     parse_duration,
     smuggle_url,
-    ExtractorError,
-    determine_ext,
+    url_or_none,
+    xpath_with_ns,
+    xpath_element,
+    xpath_text,
 )
 
 
 )
 
 
@@ -129,64 +131,65 @@ class ITVIE(InfoExtractor):
 
         resp_env = self._download_xml(
             params['data-playlist-url'], video_id,
 
         resp_env = self._download_xml(
             params['data-playlist-url'], video_id,
-            headers=headers, data=etree.tostring(req_env))
-        playlist = xpath_element(resp_env, './/Playlist')
-        if playlist is None:
-            fault_code = xpath_text(resp_env, './/faultcode')
-            fault_string = xpath_text(resp_env, './/faultstring')
-            if fault_code == 'InvalidGeoRegion':
-                self.raise_geo_restricted(
-                    msg=fault_string, countries=self._GEO_COUNTRIES)
-            elif fault_code not in (
-                    'InvalidEntity', 'InvalidVodcrid', 'ContentUnavailable'):
-                raise ExtractorError(
-                    '%s said: %s' % (self.IE_NAME, fault_string), expected=True)
-            info.update({
-                'title': self._og_search_title(webpage),
-                'episode_title': params.get('data-video-episode'),
-                'series': params.get('data-video-title'),
-            })
-        else:
-            title = xpath_text(playlist, 'EpisodeTitle', default=None)
-            info.update({
-                'title': title,
-                'episode_title': title,
-                'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')),
-                'series': xpath_text(playlist, 'ProgrammeTitle'),
-                'duration': parse_duration(xpath_text(playlist, 'Duration')),
-            })
-            video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
-            media_files = xpath_element(video_element, 'MediaFiles', fatal=True)
-            rtmp_url = media_files.attrib['base']
+            headers=headers, data=etree.tostring(req_env), fatal=False)
+        if resp_env:
+            playlist = xpath_element(resp_env, './/Playlist')
+            if playlist is None:
+                fault_code = xpath_text(resp_env, './/faultcode')
+                fault_string = xpath_text(resp_env, './/faultstring')
+                if fault_code == 'InvalidGeoRegion':
+                    self.raise_geo_restricted(
+                        msg=fault_string, countries=self._GEO_COUNTRIES)
+                elif fault_code not in (
+                        'InvalidEntity', 'InvalidVodcrid', 'ContentUnavailable'):
+                    raise ExtractorError(
+                        '%s said: %s' % (self.IE_NAME, fault_string), expected=True)
+                info.update({
+                    'title': self._og_search_title(webpage),
+                    'episode_title': params.get('data-video-episode'),
+                    'series': params.get('data-video-title'),
+                })
+            else:
+                title = xpath_text(playlist, 'EpisodeTitle', default=None)
+                info.update({
+                    'title': title,
+                    'episode_title': title,
+                    'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')),
+                    'series': xpath_text(playlist, 'ProgrammeTitle'),
+                    'duration': parse_duration(xpath_text(playlist, 'Duration')),
+                })
+                video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
+                media_files = xpath_element(video_element, 'MediaFiles', fatal=True)
+                rtmp_url = media_files.attrib['base']
 
 
-            for media_file in media_files.findall('MediaFile'):
-                play_path = xpath_text(media_file, 'URL')
-                if not play_path:
-                    continue
-                tbr = int_or_none(media_file.get('bitrate'), 1000)
-                f = {
-                    'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''),
-                    'play_path': play_path,
-                    # Providing this swfVfy allows to avoid truncated downloads
-                    'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf',
-                    'page_url': url,
-                    'tbr': tbr,
-                    'ext': 'flv',
-                }
-                app = self._search_regex(
-                    'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None)
-                if app:
-                    f.update({
-                        'url': rtmp_url.split('?', 1)[0],
-                        'app': app,
-                    })
-                else:
-                    f['url'] = rtmp_url
-                formats.append(f)
+                for media_file in media_files.findall('MediaFile'):
+                    play_path = xpath_text(media_file, 'URL')
+                    if not play_path:
+                        continue
+                    tbr = int_or_none(media_file.get('bitrate'), 1000)
+                    f = {
+                        'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''),
+                        'play_path': play_path,
+                        # Providing this swfVfy allows to avoid truncated downloads
+                        'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf',
+                        'page_url': url,
+                        'tbr': tbr,
+                        'ext': 'flv',
+                    }
+                    app = self._search_regex(
+                        'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None)
+                    if app:
+                        f.update({
+                            'url': rtmp_url.split('?', 1)[0],
+                            'app': app,
+                        })
+                    else:
+                        f['url'] = rtmp_url
+                    formats.append(f)
 
 
-            for caption_url in video_element.findall('ClosedCaptioningURIs/URL'):
-                if caption_url.text:
-                    extract_subtitle(caption_url.text)
+                for caption_url in video_element.findall('ClosedCaptioningURIs/URL'):
+                    if caption_url.text:
+                        extract_subtitle(caption_url.text)
 
         ios_playlist_url = params.get('data-video-playlist') or params.get('data-video-id')
         hmac = params.get('data-video-hmac')
 
         ios_playlist_url = params.get('data-video-playlist') or params.get('data-video-id')
         hmac = params.get('data-video-hmac')
@@ -248,8 +251,8 @@ class ITVIE(InfoExtractor):
                     for sub in subs:
                         if not isinstance(sub, dict):
                             continue
                     for sub in subs:
                         if not isinstance(sub, dict):
                             continue
-                        href = sub.get('Href')
-                        if isinstance(href, compat_str):
+                        href = url_or_none(sub.get('Href'))
+                        if href:
                             extract_subtitle(href)
                 if not info.get('duration'):
                     info['duration'] = parse_duration(video_data.get('Duration'))
                             extract_subtitle(href)
                 if not info.get('duration'):
                     info['duration'] = parse_duration(video_data.get('Duration'))
@@ -261,7 +264,17 @@ class ITVIE(InfoExtractor):
             'formats': formats,
             'subtitles': subtitles,
         })
             'formats': formats,
             'subtitles': subtitles,
         })
-        return info
+
+        webpage_info = self._search_json_ld(webpage, video_id, default={})
+        if not webpage_info.get('title'):
+            webpage_info['title'] = self._html_search_regex(
+                r'(?s)<h\d+[^>]+\bclass=["\'][^>]*episode-title["\'][^>]*>([^<]+)<',
+                webpage, 'title', default=None) or self._og_search_title(
+                webpage, default=None) or self._html_search_meta(
+                'twitter:title', webpage, 'title',
+                default=None) or webpage_info['episode']
+
+        return merge_dicts(info, webpage_info)
 
 
 class ITVBTCCIE(InfoExtractor):
 
 
 class ITVBTCCIE(InfoExtractor):
index a7514fc80b3dc64636a9f53b7abc8d7672cb0546..907d5fc8bb23434559714d45be7c089d70a5d8a8 100644 (file)
@@ -7,6 +7,7 @@ from ..utils import (
     int_or_none,
     mimetype2ext,
     remove_end,
     int_or_none,
     mimetype2ext,
     remove_end,
+    url_or_none,
 )
 
 
 )
 
 
@@ -73,11 +74,14 @@ class IwaraIE(InfoExtractor):
 
         formats = []
         for a_format in video_data:
 
         formats = []
         for a_format in video_data:
+            format_uri = url_or_none(a_format.get('uri'))
+            if not format_uri:
+                continue
             format_id = a_format.get('resolution')
             height = int_or_none(self._search_regex(
                 r'(\d+)p', format_id, 'height', default=None))
             formats.append({
             format_id = a_format.get('resolution')
             height = int_or_none(self._search_regex(
                 r'(\d+)p', format_id, 'height', default=None))
             formats.append({
-                'url': a_format['uri'],
+                'url': self._proto_relative_url(format_uri, 'https:'),
                 'format_id': format_id,
                 'ext': mimetype2ext(a_format.get('mime')) or 'mp4',
                 'height': height,
                 'format_id': format_id,
                 'ext': mimetype2ext(a_format.get('mime')) or 'mp4',
                 'height': height,
index a764023e9f82a07bc96f6eaab293daf224a1c91a..d9f8dbfd240db90fac3ddcfd23dd76d095cef52a 100644 (file)
@@ -18,7 +18,7 @@ class JojIE(InfoExtractor):
                         joj:|\r
                         https?://media\.joj\.sk/embed/\r
                     )\r
                         joj:|\r
                         https?://media\.joj\.sk/embed/\r
                     )\r
-                    (?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})\r
+                    (?P<id>[^/?#^]+)\r
                 '''\r
     _TESTS = [{\r
         'url': 'https://media.joj.sk/embed/a388ec4c-6019-4a4a-9312-b1bee194e932',\r
                 '''\r
     _TESTS = [{\r
         'url': 'https://media.joj.sk/embed/a388ec4c-6019-4a4a-9312-b1bee194e932',\r
@@ -29,16 +29,24 @@ class JojIE(InfoExtractor):
             'thumbnail': r're:^https?://.*\.jpg$',\r
             'duration': 3118,\r
         }\r
             'thumbnail': r're:^https?://.*\.jpg$',\r
             'duration': 3118,\r
         }\r
+    }, {\r
+        'url': 'https://media.joj.sk/embed/9i1cxv',\r
+        'only_matching': True,\r
     }, {\r
         'url': 'joj:a388ec4c-6019-4a4a-9312-b1bee194e932',\r
         'only_matching': True,\r
     }, {\r
         'url': 'joj:a388ec4c-6019-4a4a-9312-b1bee194e932',\r
         'only_matching': True,\r
+    }, {\r
+        'url': 'joj:9i1cxv',\r
+        'only_matching': True,\r
     }]\r
 \r
     @staticmethod\r
     def _extract_urls(webpage):\r
     }]\r
 \r
     @staticmethod\r
     def _extract_urls(webpage):\r
-        return re.findall(\r
-            r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//media\.joj\.sk/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',\r
-            webpage)\r
+        return [\r
+            mobj.group('url')\r
+            for mobj in re.finditer(\r
+                r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//media\.joj\.sk/embed/(?:(?!\1).)+)\1',\r
+                webpage)]\r
 \r
     def _real_extract(self, url):\r
         video_id = self._match_id(url)\r
 \r
     def _real_extract(self, url):\r
         video_id = self._match_id(url)\r
index d4e6f7ac17be4f19e4fdf3bff891fde0b087d006..c3eb74c1742bedc23ae125384b9ba39965f6cd95 100644 (file)
@@ -4,16 +4,14 @@ import re
 
 from .common import InfoExtractor
 from ..aes import aes_decrypt_text
 
 from .common import InfoExtractor
 from ..aes import aes_decrypt_text
-from ..compat import (
-    compat_str,
-    compat_urllib_parse_unquote,
-)
+from ..compat import compat_urllib_parse_unquote
 from ..utils import (
     determine_ext,
     ExtractorError,
     int_or_none,
     str_to_int,
     strip_or_none,
 from ..utils import (
     determine_ext,
     ExtractorError,
     int_or_none,
     str_to_int,
     strip_or_none,
+    url_or_none,
 )
 
 
 )
 
 
@@ -55,7 +53,8 @@ class KeezMoviesIE(InfoExtractor):
         encrypted = False
 
         def extract_format(format_url, height=None):
         encrypted = False
 
         def extract_format(format_url, height=None):
-            if not isinstance(format_url, compat_str) or not format_url.startswith(('http', '//')):
+            format_url = url_or_none(format_url)
+            if not format_url or not format_url.startswith(('http', '//')):
                 return
             if format_url in format_urls:
                 return
                 return
             if format_url in format_urls:
                 return
diff --git a/youtube_dl/extractor/kinopoisk.py b/youtube_dl/extractor/kinopoisk.py
new file mode 100644 (file)
index 0000000..9e8d01f
--- /dev/null
@@ -0,0 +1,70 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    dict_get,
+    int_or_none,
+)
+
+
+class KinoPoiskIE(InfoExtractor):
+    _GEO_COUNTRIES = ['RU']
+    _VALID_URL = r'https?://(?:www\.)?kinopoisk\.ru/film/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://www.kinopoisk.ru/film/81041/watch/',
+        'md5': '4f71c80baea10dfa54a837a46111d326',
+        'info_dict': {
+            'id': '81041',
+            'ext': 'mp4',
+            'title': 'Алеша попович и тугарин змей',
+            'description': 'md5:43787e673d68b805d0aa1df5a5aea701',
+            'thumbnail': r're:^https?://.*',
+            'duration': 4533,
+            'age_limit': 12,
+        },
+        'params': {
+            'format': 'bestvideo',
+        },
+    }, {
+        'url': 'https://www.kinopoisk.ru/film/81041',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(
+            'https://ott-widget.kinopoisk.ru/v1/kp/', video_id,
+            query={'kpId': video_id})
+
+        data = self._parse_json(
+            self._search_regex(
+                r'(?s)<script[^>]+\btype=["\']application/json[^>]+>(.+?)<',
+                webpage, 'data'),
+            video_id)['models']
+
+        film = data['filmStatus']
+        title = film.get('title') or film['originalTitle']
+
+        formats = self._extract_m3u8_formats(
+            data['playlistEntity']['uri'], video_id, 'mp4',
+            entry_protocol='m3u8_native', m3u8_id='hls')
+        self._sort_formats(formats)
+
+        description = dict_get(
+            film, ('descriptscription', 'description',
+                   'shortDescriptscription', 'shortDescription'))
+        thumbnail = film.get('coverUrl') or film.get('posterUrl')
+        duration = int_or_none(film.get('duration'))
+        age_limit = int_or_none(film.get('restrictionAge'))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'age_limit': age_limit,
+            'formats': formats,
+        }
index c11cbcf4757238642639cb6fac454ce98bb4a5c5..dd42bb2f27b756e3d7050fdf68429e2607db6fc2 100644 (file)
@@ -2,11 +2,11 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
     determine_ext,
     float_or_none,
     int_or_none,
 from ..utils import (
     determine_ext,
     float_or_none,
     int_or_none,
+    url_or_none,
 )
 
 
 )
 
 
@@ -109,7 +109,8 @@ class KonserthusetPlayIE(InfoExtractor):
         captions = source.get('captionsAvailableLanguages')
         if isinstance(captions, dict):
             for lang, subtitle_url in captions.items():
         captions = source.get('captionsAvailableLanguages')
         if isinstance(captions, dict):
             for lang, subtitle_url in captions.items():
-                if lang != 'none' and isinstance(subtitle_url, compat_str):
+                subtitle_url = url_or_none(subtitle_url)
+                if lang != 'none' and subtitle_url:
                     subtitles.setdefault(lang, []).append({'url': subtitle_url})
 
         return {
                     subtitles.setdefault(lang, []).append({'url': subtitle_url})
 
         return {
index af34829e783b6df9c7c9d10691f245fe386b9b30..920872f5c428ad61a0b16b0fc384ecd022389432 100644 (file)
@@ -20,5 +20,7 @@ class LCIIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
-        wat_id = self._search_regex(r'data-watid=[\'"](\d+)', webpage, 'wat id')
+        wat_id = self._search_regex(
+            (r'data-watid=[\'"](\d+)', r'idwat["\']?\s*:\s*["\']?(\d+)'),
+            webpage, 'wat id')
         return self.url_result('wat:' + wat_id, 'Wat', wat_id)
         return self.url_result('wat:' + wat_id, 'Wat', wat_id)
index f5c7abc13eb431605355c080ade55da197fc04e3..4ba61cd8a1ccc187eaa539114796a35d083737a2 100644 (file)
@@ -4,7 +4,6 @@ import re
 
 from .common import InfoExtractor
 from ..compat import (
 
 from .common import InfoExtractor
 from ..compat import (
-    compat_HTTPError,
     compat_str,
     compat_urlparse,
 )
     compat_str,
     compat_urlparse,
 )
@@ -44,21 +43,15 @@ class LyndaBaseIE(InfoExtractor):
         form_data = self._hidden_inputs(form_html)
         form_data.update(extra_form_data)
 
         form_data = self._hidden_inputs(form_html)
         form_data.update(extra_form_data)
 
-        try:
-            response = self._download_json(
-                action_url, None, note,
-                data=urlencode_postdata(form_data),
-                headers={
-                    'Referer': referrer_url,
-                    'X-Requested-With': 'XMLHttpRequest',
-                })
-        except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
-                response = self._parse_json(e.cause.read().decode('utf-8'), None)
-                self._check_error(response, ('email', 'password'))
-            raise
-
-        self._check_error(response, 'ErrorMessage')
+        response = self._download_json(
+            action_url, None, note,
+            data=urlencode_postdata(form_data),
+            headers={
+                'Referer': referrer_url,
+                'X-Requested-With': 'XMLHttpRequest',
+            }, expected_status=(418, 500, ))
+
+        self._check_error(response, ('email', 'password', 'ErrorMessage'))
 
         return response, action_url
 
 
         return response, action_url
 
index e6bfab1141da2638fd20e26bda0cf2d7bd2bbd1e..def960a0c5eea1a41fce7202c7a9012300498ab4 100644 (file)
@@ -110,7 +110,11 @@ class MarkizaPageIE(InfoExtractor):
     def _real_extract(self, url):
         playlist_id = self._match_id(url)
 
     def _real_extract(self, url):
         playlist_id = self._match_id(url)
 
-        webpage = self._download_webpage(url, playlist_id)
+        webpage = self._download_webpage(
+            # Downloading for some hosts (e.g. dajto, doma) fails with 500
+            # although everything seems to be OK, so considering 500
+            # status code to be expected.
+            url, playlist_id, expected_status=500)
 
         entries = [
             self.url_result('http://videoarchiv.markiza.sk/video/%s' % video_id)
 
         entries = [
             self.url_result('http://videoarchiv.markiza.sk/video/%s' % video_id)
index 9760eafd5685225bda09218d309acffacd7cd647..57f97409da5f15e1575fe0fb317078dbfb3c33b6 100644 (file)
@@ -3,59 +3,75 @@ from __future__ import unicode_literals
 
 import re
 
 
 import re
 
-from .common import InfoExtractor
-from ..compat import compat_str
+from .theplatform import ThePlatformBaseIE
 from ..utils import (
 from ..utils import (
-    determine_ext,
-    parse_duration,
-    try_get,
-    unified_strdate,
+    ExtractorError,
+    int_or_none,
+    update_url_query,
 )
 
 
 )
 
 
-class MediasetIE(InfoExtractor):
+class MediasetIE(ThePlatformBaseIE):
+    _TP_TLD = 'eu'
     _VALID_URL = r'''(?x)
                     (?:
                         mediaset:|
                         https?://
     _VALID_URL = r'''(?x)
                     (?:
                         mediaset:|
                         https?://
-                            (?:www\.)?video\.mediaset\.it/
+                            (?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/
                             (?:
                                 (?:video|on-demand)/(?:[^/]+/)+[^/]+_|
                             (?:
                                 (?:video|on-demand)/(?:[^/]+/)+[^/]+_|
-                                player/playerIFrame(?:Twitter)?\.shtml\?.*?\bid=
+                                player/index\.html\?.*?\bprogramGuid=
                             )
                             )
-                    )(?P<id>[0-9]+)
+                    )(?P<id>[0-9A-Z]{16})
                     '''
     _TESTS = [{
         # full episode
                     '''
     _TESTS = [{
         # full episode
-        'url': 'http://www.video.mediaset.it/video/hello_goodbye/full/quarta-puntata_661824.html',
+        'url': 'https://www.mediasetplay.mediaset.it/video/hellogoodbye/quarta-puntata_FAFU000000661824',
         'md5': '9b75534d42c44ecef7bf1ffeacb7f85d',
         'info_dict': {
         'md5': '9b75534d42c44ecef7bf1ffeacb7f85d',
         'info_dict': {
-            'id': '661824',
+            'id': 'FAFU000000661824',
             'ext': 'mp4',
             'title': 'Quarta puntata',
             'ext': 'mp4',
             'title': 'Quarta puntata',
-            'description': 'md5:7183696d6df570e3412a5ef74b27c5e2',
+            'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
             'thumbnail': r're:^https?://.*\.jpg$',
             'thumbnail': r're:^https?://.*\.jpg$',
-            'duration': 1414,
-            'creator': 'mediaset',
+            'duration': 1414.26,
             'upload_date': '20161107',
             'series': 'Hello Goodbye',
             'upload_date': '20161107',
             'series': 'Hello Goodbye',
-            'categories': ['reality'],
+            'timestamp': 1478532900,
+            'uploader': 'Rete 4',
+            'uploader_id': 'R4',
         },
         },
-        'expected_warnings': ['is not a supported codec'],
+    }, {
+        'url': 'https://www.mediasetplay.mediaset.it/video/matrix/puntata-del-25-maggio_F309013801000501',
+        'md5': '288532f0ad18307705b01e581304cd7b',
+        'info_dict': {
+            'id': 'F309013801000501',
+            'ext': 'mp4',
+            'title': 'Puntata del 25 maggio',
+            'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 6565.007,
+            'upload_date': '20180526',
+            'series': 'Matrix',
+            'timestamp': 1527326245,
+            'uploader': 'Canale 5',
+            'uploader_id': 'C5',
+        },
+        'expected_warnings': ['HTTP Error 403: Forbidden'],
     }, {
         # clip
     }, {
         # clip
-        'url': 'http://www.video.mediaset.it/video/gogglebox/clip/un-grande-classico-della-commedia-sexy_661680.html',
+        'url': 'https://www.mediasetplay.mediaset.it/video/gogglebox/un-grande-classico-della-commedia-sexy_FAFU000000661680',
         'only_matching': True,
     }, {
         # iframe simple
         'only_matching': True,
     }, {
         # iframe simple
-        'url': 'http://www.video.mediaset.it/player/playerIFrame.shtml?id=665924&autoplay=true',
+        'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665924&id=665924',
         'only_matching': True,
     }, {
         # iframe twitter (from http://www.wittytv.it/se-prima-mi-fidavo-zero/)
         'only_matching': True,
     }, {
         # iframe twitter (from http://www.wittytv.it/se-prima-mi-fidavo-zero/)
-        'url': 'https://www.video.mediaset.it/player/playerIFrameTwitter.shtml?id=665104&playrelated=false&autoplay=false&related=true&hidesocial=true',
+        'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665104&id=665104',
         'only_matching': True,
     }, {
         'only_matching': True,
     }, {
-        'url': 'mediaset:661824',
+        'url': 'mediaset:FAFU000000665924',
         'only_matching': True,
     }]
 
         'only_matching': True,
     }]
 
@@ -68,51 +84,54 @@ class MediasetIE(InfoExtractor):
                 webpage)]
 
     def _real_extract(self, url):
                 webpage)]
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        video_list = self._download_json(
-            'http://cdnsel01.mediaset.net/GetCdn.aspx',
-            video_id, 'Downloading video CDN JSON', query={
-                'streamid': video_id,
-                'format': 'json',
-            })['videoList']
+        guid = self._match_id(url)
+        tp_path = 'PR1GhC/media/guid/2702976343/' + guid
+        info = self._extract_theplatform_metadata(tp_path, guid)
 
         formats = []
 
         formats = []
-        for format_url in video_list:
-            if '.ism' in format_url:
-                formats.extend(self._extract_ism_formats(
-                    format_url, video_id, ism_id='mss', fatal=False))
-            else:
-                formats.append({
-                    'url': format_url,
-                    'format_id': determine_ext(format_url),
-                })
+        subtitles = {}
+        first_e = None
+        for asset_type in ('SD', 'HD'):
+            for f in ('MPEG4', 'MPEG-DASH', 'M3U', 'ISM'):
+                try:
+                    tp_formats, tp_subtitles = self._extract_theplatform_smil(
+                        update_url_query('http://link.theplatform.%s/s/%s' % (self._TP_TLD, tp_path), {
+                            'mbr': 'true',
+                            'formats': f,
+                            'assetTypes': asset_type,
+                        }), guid, 'Downloading %s %s SMIL data' % (f, asset_type))
+                except ExtractorError as e:
+                    if not first_e:
+                        first_e = e
+                    break
+                for tp_f in tp_formats:
+                    tp_f['quality'] = 1 if asset_type == 'HD' else 0
+                formats.extend(tp_formats)
+                subtitles = self._merge_subtitles(subtitles, tp_subtitles)
+        if first_e and not formats:
+            raise first_e
         self._sort_formats(formats)
 
         self._sort_formats(formats)
 
-        mediainfo = self._download_json(
-            'http://plr.video.mediaset.it/html/metainfo.sjson',
-            video_id, 'Downloading video info JSON', query={
-                'id': video_id,
-            })['video']
-
-        title = mediainfo['title']
-
-        creator = try_get(
-            mediainfo, lambda x: x['brand-info']['publisher'], compat_str)
-        category = try_get(
-            mediainfo, lambda x: x['brand-info']['category'], compat_str)
-        categories = [category] if category else None
+        fields = []
+        for templ, repls in (('tvSeason%sNumber', ('', 'Episode')), ('mediasetprogram$%s', ('brandTitle', 'numberOfViews', 'publishInfo'))):
+            fields.extend(templ % repl for repl in repls)
+        feed_data = self._download_json(
+            'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs/guid/-/' + guid,
+            guid, fatal=False, query={'fields': ','.join(fields)})
+        if feed_data:
+            publish_info = feed_data.get('mediasetprogram$publishInfo') or {}
+            info.update({
+                'episode_number': int_or_none(feed_data.get('tvSeasonEpisodeNumber')),
+                'season_number': int_or_none(feed_data.get('tvSeasonNumber')),
+                'series': feed_data.get('mediasetprogram$brandTitle'),
+                'uploader': publish_info.get('description'),
+                'uploader_id': publish_info.get('channel'),
+                'view_count': int_or_none(feed_data.get('mediasetprogram$numberOfViews')),
+            })
 
 
-        return {
-            'id': video_id,
-            'title': title,
-            'description': mediainfo.get('short-description'),
-            'thumbnail': mediainfo.get('thumbnail'),
-            'duration': parse_duration(mediainfo.get('duration')),
-            'creator': creator,
-            'upload_date': unified_strdate(mediainfo.get('production-date')),
-            'webpage_url': mediainfo.get('url'),
-            'series': mediainfo.get('brand-value'),
-            'categories': categories,
+        info.update({
+            'id': guid,
             'formats': formats,
             'formats': formats,
-        }
+            'subtitles': subtitles,
+        })
+        return info
index 0e2645c55072a3401e95d6308d9296ef13280694..84876b883811cc8ad2607de7caed0f621f35f660 100644 (file)
@@ -15,6 +15,7 @@ from ..utils import (
     mimetype2ext,
     unescapeHTML,
     unsmuggle_url,
     mimetype2ext,
     unescapeHTML,
     unsmuggle_url,
+    url_or_none,
     urljoin,
 )
 
     urljoin,
 )
 
@@ -156,8 +157,8 @@ class MediasiteIE(InfoExtractor):
 
             stream_formats = []
             for unum, VideoUrl in enumerate(video_urls):
 
             stream_formats = []
             for unum, VideoUrl in enumerate(video_urls):
-                video_url = VideoUrl.get('Location')
-                if not video_url or not isinstance(video_url, compat_str):
+                video_url = url_or_none(VideoUrl.get('Location'))
+                if not video_url:
                     continue
                 # XXX: if Stream.get('CanChangeScheme', False), switch scheme to HTTP/HTTPS
 
                     continue
                 # XXX: if Stream.get('CanChangeScheme', False), switch scheme to HTTP/HTTPS
 
index 42759eae87cecd940c7b8edd8b9aabbb578e9039..40f214a873a7dcbb5087726aa270b74a5dab3e76 100644 (file)
@@ -1,84 +1,14 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
 # coding: utf-8
 from __future__ import unicode_literals
 
-import json
-import uuid
-
 from .common import InfoExtractor
 from .common import InfoExtractor
-from .ooyala import OoyalaIE
-from ..compat import (
-    compat_str,
-    compat_urlparse,
-)
 from ..utils import (
     int_or_none,
 from ..utils import (
     int_or_none,
-    extract_attributes,
-    determine_ext,
     smuggle_url,
     parse_duration,
 )
 
 
     smuggle_url,
     parse_duration,
 )
 
 
-class MiTeleBaseIE(InfoExtractor):
-    def _get_player_info(self, url, webpage):
-        player_data = extract_attributes(self._search_regex(
-            r'(?s)(<ms-video-player.+?</ms-video-player>)',
-            webpage, 'ms video player'))
-        video_id = player_data['data-media-id']
-        if player_data.get('data-cms-id') == 'ooyala':
-            return self.url_result(
-                'ooyala:%s' % video_id, ie=OoyalaIE.ie_key(), video_id=video_id)
-        config_url = compat_urlparse.urljoin(url, player_data['data-config'])
-        config = self._download_json(
-            config_url, video_id, 'Downloading config JSON')
-        mmc_url = config['services']['mmc']
-
-        duration = None
-        formats = []
-        for m_url in (mmc_url, mmc_url.replace('/flash.json', '/html5.json')):
-            mmc = self._download_json(
-                m_url, video_id, 'Downloading mmc JSON')
-            if not duration:
-                duration = int_or_none(mmc.get('duration'))
-            for location in mmc['locations']:
-                gat = self._proto_relative_url(location.get('gat'), 'http:')
-                gcp = location.get('gcp')
-                ogn = location.get('ogn')
-                if None in (gat, gcp, ogn):
-                    continue
-                token_data = {
-                    'gcp': gcp,
-                    'ogn': ogn,
-                    'sta': 0,
-                }
-                media = self._download_json(
-                    gat, video_id, data=json.dumps(token_data).encode('utf-8'),
-                    headers={
-                        'Content-Type': 'application/json;charset=utf-8',
-                        'Referer': url,
-                    })
-                stream = media.get('stream') or media.get('file')
-                if not stream:
-                    continue
-                ext = determine_ext(stream)
-                if ext == 'f4m':
-                    formats.extend(self._extract_f4m_formats(
-                        stream + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
-                        video_id, f4m_id='hds', fatal=False))
-                elif ext == 'm3u8':
-                    formats.extend(self._extract_m3u8_formats(
-                        stream, video_id, 'mp4', 'm3u8_native',
-                        m3u8_id='hls', fatal=False))
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'formats': formats,
-            'thumbnail': player_data.get('data-poster') or config.get('poster', {}).get('imageUrl'),
-            'duration': duration,
-        }
-
-
 class MiTeleIE(InfoExtractor):
     IE_DESC = 'mitele.es'
     _VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player'
 class MiTeleIE(InfoExtractor):
     IE_DESC = 'mitele.es'
     _VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player'
@@ -86,7 +16,7 @@ class MiTeleIE(InfoExtractor):
     _TESTS = [{
         'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player',
         'info_dict': {
     _TESTS = [{
         'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player',
         'info_dict': {
-            'id': '57b0dfb9c715da65618b4afa',
+            'id': 'FhYW1iNTE6J6H7NkQRIEzfne6t2quqPg',
             'ext': 'mp4',
             'title': 'Tor, la web invisible',
             'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
             'ext': 'mp4',
             'title': 'Tor, la web invisible',
             'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
@@ -104,7 +34,7 @@ class MiTeleIE(InfoExtractor):
         # no explicit title
         'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player',
         'info_dict': {
         # no explicit title
         'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player',
         'info_dict': {
-            'id': '57b0de3dc915da14058b4876',
+            'id': 'oyNG1iNTE6TAPP-JmCjbwfwJqqMMX3Vq',
             'ext': 'mp4',
             'title': 'Cuarto Milenio Temporada 6 Programa 226',
             'description': 'md5:5ff132013f0cd968ffbf1f5f3538a65f',
             'ext': 'mp4',
             'title': 'Cuarto Milenio Temporada 6 Programa 226',
             'description': 'md5:5ff132013f0cd968ffbf1f5f3538a65f',
@@ -128,40 +58,21 @@ class MiTeleIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-
-        gigya_url = self._search_regex(
-            r'<gigya-api>[^>]*</gigya-api>[^>]*<script\s+src="([^"]*)">[^>]*</script>',
-            webpage, 'gigya', default=None)
-        gigya_sc = self._download_webpage(
-            compat_urlparse.urljoin('http://www.mitele.es/', gigya_url),
-            video_id, 'Downloading gigya script')
-
-        # Get a appKey/uuid for getting the session key
-        appKey = self._search_regex(
-            r'constant\s*\(\s*["\']_appGridApplicationKey["\']\s*,\s*["\']([0-9a-f]+)',
-            gigya_sc, 'appKey')
-
-        session_json = self._download_json(
-            'https://appgrid-api.cloud.accedo.tv/session',
-            video_id, 'Downloading session keys', query={
-                'appKey': appKey,
-                'uuid': compat_str(uuid.uuid4()),
-            })
 
         paths = self._download_json(
 
         paths = self._download_json(
-            'https://appgrid-api.cloud.accedo.tv/metadata/general_configuration,%20web_configuration',
-            video_id, 'Downloading paths JSON',
-            query={'sessionKey': compat_str(session_json['sessionKey'])})
+            'https://www.mitele.es/amd/agp/web/metadata/general_configuration',
+            video_id, 'Downloading paths JSON')
 
         ooyala_s = paths['general_configuration']['api_configuration']['ooyala_search']
 
         ooyala_s = paths['general_configuration']['api_configuration']['ooyala_search']
+        base_url = ooyala_s.get('base_url', 'cdn-search-mediaset.carbyne.ps.ooyala.com')
+        full_path = ooyala_s.get('full_path', '/search/v1/full/providers/')
         source = self._download_json(
         source = self._download_json(
-            'http://%s%s%s/docs/%s' % (
-                ooyala_s['base_url'], ooyala_s['full_path'],
-                ooyala_s['provider_id'], video_id),
+            '%s://%s%s%s/docs/%s' % (
+                ooyala_s.get('protocol', 'https'), base_url, full_path,
+                ooyala_s.get('provider_id', '104951'), video_id),
             video_id, 'Downloading data JSON', query={
                 'include_titles': 'Series,Season',
             video_id, 'Downloading data JSON', query={
                 'include_titles': 'Series,Season',
-                'product_name': 'test',
+                'product_name': ooyala_s.get('product_name', 'test'),
                 'format': 'full',
             })['hits']['hits'][0]['_source']
 
                 'format': 'full',
             })['hits']['hits'][0]['_source']
 
index e24396e791cfddf88e53a34639e563b54795694e..d4bd273b61e756ef71db83bf264752fb2c0182af 100644 (file)
@@ -77,8 +77,11 @@ class MotherlessIE(InfoExtractor):
 
         title = self._html_search_regex(
             r'id="view-upload-title">\s+([^<]+)<', webpage, 'title')
 
         title = self._html_search_regex(
             r'id="view-upload-title">\s+([^<]+)<', webpage, 'title')
-        video_url = self._html_search_regex(
-            r'setup\(\{\s+"file".+: "([^"]+)",', webpage, 'video URL')
+        video_url = (self._html_search_regex(
+            (r'setup\(\{\s*["\']file["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
+             r'fileurl\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'),
+            webpage, 'video URL', default=None, group='url') or
+            'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id)
         age_limit = self._rta_search(webpage)
         view_count = str_to_int(self._html_search_regex(
             r'<strong>Views</strong>\s+([^<]+)<',
         age_limit = self._rta_search(webpage)
         view_count = str_to_int(self._html_search_regex(
             r'<strong>Views</strong>\s+([^<]+)<',
@@ -120,7 +123,7 @@ class MotherlessIE(InfoExtractor):
 
 
 class MotherlessGroupIE(InfoExtractor):
 
 
 class MotherlessGroupIE(InfoExtractor):
-    _VALID_URL = 'https?://(?:www\.)?motherless\.com/gv?/(?P<id>[a-z0-9_]+)'
+    _VALID_URL = r'https?://(?:www\.)?motherless\.com/gv?/(?P<id>[a-z0-9_]+)'
     _TESTS = [{
         'url': 'http://motherless.com/g/movie_scenes',
         'info_dict': {
     _TESTS = [{
         'url': 'http://motherless.com/g/movie_scenes',
         'info_dict': {
@@ -164,9 +167,9 @@ class MotherlessGroupIE(InfoExtractor):
         if not entries:
             entries = [
                 self.url_result(
         if not entries:
             entries = [
                 self.url_result(
-                    compat_urlparse.urljoin(base, '/' + video_id),
-                    ie=MotherlessIE.ie_key(), video_id=video_id)
-                for video_id in orderedSet(re.findall(
+                    compat_urlparse.urljoin(base, '/' + entry_id),
+                    ie=MotherlessIE.ie_key(), video_id=entry_id)
+                for entry_id in orderedSet(re.findall(
                     r'data-codename=["\']([A-Z0-9]+)', webpage))]
         return entries
 
                     r'data-codename=["\']([A-Z0-9]+)', webpage))]
         return entries
 
index c843f8649791727ca17e6a1ecbcb7bfb80c78205..765c46fd228f1b90e37902e94e8a5e45a3bacb98 100644 (file)
@@ -7,6 +7,7 @@ import re
 from .common import InfoExtractor
 from .theplatform import ThePlatformIE
 from .adobepass import AdobePassIE
 from .common import InfoExtractor
 from .theplatform import ThePlatformIE
 from .adobepass import AdobePassIE
+from ..compat import compat_urllib_parse_unquote
 from ..utils import (
     find_xpath_attr,
     smuggle_url,
 from ..utils import (
     find_xpath_attr,
     smuggle_url,
@@ -75,11 +76,16 @@ class NBCIE(AdobePassIE):
             'url': 'https://www.nbc.com/classic-tv/charles-in-charge/video/charles-in-charge-pilot/n3310',
             'only_matching': True,
         },
             'url': 'https://www.nbc.com/classic-tv/charles-in-charge/video/charles-in-charge-pilot/n3310',
             'only_matching': True,
         },
+        {
+            # Percent escaped url
+            'url': 'https://www.nbc.com/up-all-night/video/day-after-valentine%27s-day/n2189',
+            'only_matching': True,
+        }
     ]
 
     def _real_extract(self, url):
         permalink, video_id = re.match(self._VALID_URL, url).groups()
     ]
 
     def _real_extract(self, url):
         permalink, video_id = re.match(self._VALID_URL, url).groups()
-        permalink = 'http' + permalink
+        permalink = 'http' + compat_urllib_parse_unquote(permalink)
         response = self._download_json(
             'https://api.nbc.com/v3/videos', video_id, query={
                 'filter[permalink]': permalink,
         response = self._download_json(
             'https://api.nbc.com/v3/videos', video_id, query={
                 'filter[permalink]': permalink,
index dbe871f1657e8ace3802704d14dd29f49efadec9..76b412ff1ae7d2d0866f3357706db365a9c0e613 100644 (file)
@@ -252,7 +252,7 @@ class NiconicoIE(InfoExtractor):
                     },
                     'timing_constraint': 'unlimited'
                 }
                     },
                     'timing_constraint': 'unlimited'
                 }
-            }))
+            }).encode())
 
         resolution = video_quality.get('resolution', {})
 
 
         resolution = video_quality.get('resolution', {})
 
index 06cb8cb3f5a867104a8bd67abd9ce60bbc1df25f..80186ec50cef0e580e1e07104006f8e3f89fa799 100644 (file)
@@ -6,28 +6,90 @@ import re
 from .common import InfoExtractor
 from ..utils import (
     clean_html,
 from .common import InfoExtractor
 from ..utils import (
     clean_html,
+    int_or_none,
+    js_to_json,
+    qualities,
     unified_strdate,
     unified_strdate,
+    url_or_none,
 )
 
 
 )
 
 
+class NovaEmbedIE(InfoExtractor):
+    _VALID_URL = r'https?://media\.cms\.nova\.cz/embed/(?P<id>[^/?#&]+)'
+    _TEST = {
+        'url': 'https://media.cms.nova.cz/embed/8o0n0r?autoplay=1',
+        'md5': 'b3834f6de5401baabf31ed57456463f7',
+        'info_dict': {
+            'id': '8o0n0r',
+            'ext': 'mp4',
+            'title': '2180. díl',
+            'thumbnail': r're:^https?://.*\.jpg',
+            'duration': 2578,
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        bitrates = self._parse_json(
+            self._search_regex(
+                r'(?s)bitrates\s*=\s*({.+?})\s*;', webpage, 'formats'),
+            video_id, transform_source=js_to_json)
+
+        QUALITIES = ('lq', 'mq', 'hq', 'hd')
+        quality_key = qualities(QUALITIES)
+
+        formats = []
+        for format_id, format_list in bitrates.items():
+            if not isinstance(format_list, list):
+                continue
+            for format_url in format_list:
+                format_url = url_or_none(format_url)
+                if not format_url:
+                    continue
+                f = {
+                    'url': format_url,
+                }
+                f_id = format_id
+                for quality in QUALITIES:
+                    if '%s.mp4' % quality in format_url:
+                        f_id += '-%s' % quality
+                        f.update({
+                            'quality': quality_key(quality),
+                            'format_note': quality.upper(),
+                        })
+                        break
+                f['format_id'] = f_id
+                formats.append(f)
+        self._sort_formats(formats)
+
+        title = self._og_search_title(
+            webpage, default=None) or self._search_regex(
+            (r'<value>(?P<title>[^<]+)',
+             r'videoTitle\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
+            'title', group='value')
+        thumbnail = self._og_search_thumbnail(
+            webpage, default=None) or self._search_regex(
+            r'poster\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
+            'thumbnail', fatal=False, group='value')
+        duration = int_or_none(self._search_regex(
+            r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'formats': formats,
+        }
+
+
 class NovaIE(InfoExtractor):
     IE_DESC = 'TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz'
     _VALID_URL = r'https?://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)'
     _TESTS = [{
 class NovaIE(InfoExtractor):
     IE_DESC = 'TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz'
     _VALID_URL = r'https?://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)'
     _TESTS = [{
-        'url': 'http://tvnoviny.nova.cz/clanek/novinky/co-na-sebe-sportaci-praskli-vime-jestli-pujde-hrdlicka-na-materskou.html?utm_source=tvnoviny&utm_medium=cpfooter&utm_campaign=novaplus',
-        'info_dict': {
-            'id': '1608920',
-            'display_id': 'co-na-sebe-sportaci-praskli-vime-jestli-pujde-hrdlicka-na-materskou',
-            'ext': 'flv',
-            'title': 'Duel: Michal Hrdlička a Petr Suchoň',
-            'description': 'md5:d0cc509858eee1b1374111c588c6f5d5',
-            'thumbnail': r're:^https?://.*\.(?:jpg)',
-        },
-        'params': {
-            # rtmp download
-            'skip_download': True,
-        }
-    }, {
         'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260',
         'md5': '1dd7b9d5ea27bc361f110cd855a19bd3',
         'info_dict': {
         'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260',
         'md5': '1dd7b9d5ea27bc361f110cd855a19bd3',
         'info_dict': {
@@ -38,33 +100,6 @@ class NovaIE(InfoExtractor):
             'description': 'md5:f0a42dd239c26f61c28f19e62d20ef53',
             'thumbnail': r're:^https?://.*\.(?:jpg)',
         }
             'description': 'md5:f0a42dd239c26f61c28f19e62d20ef53',
             'thumbnail': r're:^https?://.*\.(?:jpg)',
         }
-    }, {
-        'url': 'http://novaplus.nova.cz/porad/policie-modrava/video/5591-policie-modrava-15-dil-blondynka-na-hrbitove',
-        'info_dict': {
-            'id': '1756825',
-            'display_id': '5591-policie-modrava-15-dil-blondynka-na-hrbitove',
-            'ext': 'flv',
-            'title': 'Policie Modrava - 15. díl - Blondýnka na hřbitově',
-            'description': 'md5:dc24e50be5908df83348e50d1431295e',  # Make sure this description is clean of html tags
-            'thumbnail': r're:^https?://.*\.(?:jpg)',
-        },
-        'params': {
-            # rtmp download
-            'skip_download': True,
-        }
-    }, {
-        'url': 'http://novaplus.nova.cz/porad/televizni-noviny/video/5585-televizni-noviny-30-5-2015/',
-        'info_dict': {
-            'id': '1756858',
-            'ext': 'flv',
-            'title': 'Televizní noviny - 30. 5. 2015',
-            'thumbnail': r're:^https?://.*\.(?:jpg)',
-            'upload_date': '20150530',
-        },
-        'params': {
-            # rtmp download
-            'skip_download': True,
-        }
     }, {
         'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html',
         'info_dict': {
     }, {
         'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html',
         'info_dict': {
@@ -79,6 +114,20 @@ class NovaIE(InfoExtractor):
             # rtmp download
             'skip_download': True,
         }
             # rtmp download
             'skip_download': True,
         }
+    }, {
+        # media.cms.nova.cz embed
+        'url': 'https://novaplus.nova.cz/porad/ulice/epizoda/18760-2180-dil',
+        'info_dict': {
+            'id': '8o0n0r',
+            'ext': 'mp4',
+            'title': '2180. díl',
+            'thumbnail': r're:^https?://.*\.jpg',
+            'duration': 2578,
+        },
+        'params': {
+            'skip_download': True,
+        },
+        'add_ie': [NovaEmbedIE.ie_key()],
     }, {
         'url': 'http://sport.tn.nova.cz/clanek/sport/hokej/nhl/zivot-jde-dal-hodnotil-po-vyrazeni-z-playoff-jiri-sekac.html',
         'only_matching': True,
     }, {
         'url': 'http://sport.tn.nova.cz/clanek/sport/hokej/nhl/zivot-jde-dal-hodnotil-po-vyrazeni-z-playoff-jiri-sekac.html',
         'only_matching': True,
@@ -103,6 +152,15 @@ class NovaIE(InfoExtractor):
 
         webpage = self._download_webpage(url, display_id)
 
 
         webpage = self._download_webpage(url, display_id)
 
+        # novaplus
+        embed_id = self._search_regex(
+            r'<iframe[^>]+\bsrc=["\'](?:https?:)?//media\.cms\.nova\.cz/embed/([^/?#&]+)',
+            webpage, 'embed url', default=None)
+        if embed_id:
+            return self.url_result(
+                'https://media.cms.nova.cz/embed/%s' % embed_id,
+                ie=NovaEmbedIE.ie_key(), video_id=embed_id)
+
         video_id = self._search_regex(
             [r"(?:media|video_id)\s*:\s*'(\d+)'",
              r'media=(\d+)',
         video_id = self._search_regex(
             [r"(?:media|video_id)\s*:\s*'(\d+)'",
              r'media=(\d+)',
@@ -111,8 +169,21 @@ class NovaIE(InfoExtractor):
             webpage, 'video id')
 
         config_url = self._search_regex(
             webpage, 'video id')
 
         config_url = self._search_regex(
-            r'src="(http://tn\.nova\.cz/bin/player/videojs/config\.php\?[^"]+)"',
+            r'src="(https?://(?:tn|api)\.nova\.cz/bin/player/videojs/config\.php\?[^"]+)"',
             webpage, 'config url', default=None)
             webpage, 'config url', default=None)
+        config_params = {}
+
+        if not config_url:
+            player = self._parse_json(
+                self._search_regex(
+                    r'(?s)Player\s*\(.+?\s*,\s*({.+?\bmedia\b["\']?\s*:\s*["\']?\d+.+?})\s*\)', webpage,
+                    'player', default='{}'),
+                video_id, transform_source=js_to_json, fatal=False)
+            if player:
+                config_url = url_or_none(player.get('configUrl'))
+                params = player.get('configParams')
+                if isinstance(params, dict):
+                    config_params = params
 
         if not config_url:
             DEFAULT_SITE_ID = '23000'
 
         if not config_url:
             DEFAULT_SITE_ID = '23000'
@@ -127,14 +198,20 @@ class NovaIE(InfoExtractor):
             }
 
             site_id = self._search_regex(
             }
 
             site_id = self._search_regex(
-                r'site=(\d+)', webpage, 'site id', default=None) or SITES.get(site, DEFAULT_SITE_ID)
+                r'site=(\d+)', webpage, 'site id', default=None) or SITES.get(
+                site, DEFAULT_SITE_ID)
 
 
-            config_url = ('http://tn.nova.cz/bin/player/videojs/config.php?site=%s&media=%s&jsVar=vjsconfig'
-                          % (site_id, video_id))
+            config_url = 'https://api.nova.cz/bin/player/videojs/config.php'
+            config_params = {
+                'site': site_id,
+                'media': video_id,
+                'quality': 3,
+                'version': 1,
+            }
 
         config = self._download_json(
             config_url, display_id,
 
         config = self._download_json(
             config_url, display_id,
-            'Downloading config JSON',
+            'Downloading config JSON', query=config_params,
             transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1])
 
         mediafile = config['mediafile']
             transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1])
 
         mediafile = config['mediafile']
index cb8319f0db1ce96f32b2890108d9ca3d2adde0c5..c2cb85a734f256330b9cbb9bfd77910a6f962c45 100644 (file)
@@ -282,7 +282,7 @@ class NPOIE(NPOBaseIE):
                 video_url = stream_info.get('url')
             if not video_url or video_url in urls:
                 continue
                 video_url = stream_info.get('url')
             if not video_url or video_url in urls:
                 continue
-            urls.add(item_url)
+            urls.add(video_url)
             if determine_ext(video_url) == 'm3u8':
                 formats.extend(self._extract_m3u8_formats(
                     video_url, video_id, ext='mp4',
             if determine_ext(video_url) == 'm3u8':
                 formats.extend(self._extract_m3u8_formats(
                     video_url, video_id, ext='mp4',
index 7157e2390909dec9667ac0d563dbe88627249d34..a231735fb79aa18140e3f4e5bdd80fb416b8020c 100644 (file)
@@ -4,12 +4,18 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote
+from ..compat import (
+    compat_str,
+    compat_urllib_parse_unquote,
+)
 from ..utils import (
     ExtractorError,
     int_or_none,
 from ..utils import (
     ExtractorError,
     int_or_none,
+    JSON_LD_RE,
+    NO_DEFAULT,
     parse_age_limit,
     parse_duration,
     parse_age_limit,
     parse_duration,
+    try_get,
 )
 
 
 )
 
 
@@ -359,6 +365,182 @@ class NRKTVIE(NRKBaseIE):
     }]
 
 
     }]
 
 
+class NRKTVEpisodeIE(InfoExtractor):
+    _VALID_URL = r'https?://tv\.nrk\.no/serie/(?P<id>[^/]+/sesong/\d+/episode/\d+)'
+    _TEST = {
+        'url': 'https://tv.nrk.no/serie/backstage/sesong/1/episode/8',
+        'info_dict': {
+            'id': 'MSUI14000816AA',
+            'ext': 'mp4',
+            'title': 'Backstage 8:30',
+            'description': 'md5:de6ca5d5a2d56849e4021f2bf2850df4',
+            'duration': 1320,
+            'series': 'Backstage',
+            'season_number': 1,
+            'episode_number': 8,
+            'episode': '8:30',
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        nrk_id = self._parse_json(
+            self._search_regex(JSON_LD_RE, webpage, 'JSON-LD', group='json_ld'),
+            display_id)['@id']
+
+        assert re.match(NRKTVIE._EPISODE_RE, nrk_id)
+        return self.url_result(
+            'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id)
+
+
+class NRKTVSerieBaseIE(InfoExtractor):
+    def _extract_series(self, webpage, display_id, fatal=True):
+        config = self._parse_json(
+            self._search_regex(
+                r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>', webpage, 'config',
+                default='{}' if not fatal else NO_DEFAULT),
+            display_id, fatal=False)
+        if not config:
+            return
+        return try_get(config, lambda x: x['series'], dict)
+
+    def _extract_episodes(self, season):
+        entries = []
+        if not isinstance(season, dict):
+            return entries
+        episodes = season.get('episodes')
+        if not isinstance(episodes, list):
+            return entries
+        for episode in episodes:
+            nrk_id = episode.get('prfId')
+            if not nrk_id or not isinstance(nrk_id, compat_str):
+                continue
+            entries.append(self.url_result(
+                'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id))
+        return entries
+
+
+class NRKTVSeasonIE(NRKTVSerieBaseIE):
+    _VALID_URL = r'https?://tv\.nrk\.no/serie/[^/]+/sesong/(?P<id>\d+)'
+    _TEST = {
+        'url': 'https://tv.nrk.no/serie/backstage/sesong/1',
+        'info_dict': {
+            'id': '1',
+            'title': 'Sesong 1',
+        },
+        'playlist_mincount': 30,
+    }
+
+    @classmethod
+    def suitable(cls, url):
+        return (False if NRKTVIE.suitable(url) or NRKTVEpisodeIE.suitable(url)
+                else super(NRKTVSeasonIE, cls).suitable(url))
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        series = self._extract_series(webpage, display_id)
+
+        season = next(
+            s for s in series['seasons']
+            if int(display_id) == s.get('seasonNumber'))
+
+        title = try_get(season, lambda x: x['titles']['title'], compat_str)
+        return self.playlist_result(
+            self._extract_episodes(season), display_id, title)
+
+
+class NRKTVSeriesIE(NRKTVSerieBaseIE):
+    _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'
+    _ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'
+    _TESTS = [{
+        # new layout
+        'url': 'https://tv.nrk.no/serie/backstage',
+        'info_dict': {
+            'id': 'backstage',
+            'title': 'Backstage',
+            'description': 'md5:c3ec3a35736fca0f9e1207b5511143d3',
+        },
+        'playlist_mincount': 60,
+    }, {
+        # old layout
+        'url': 'https://tv.nrk.no/serie/groenn-glede',
+        'info_dict': {
+            'id': 'groenn-glede',
+            'title': 'Grønn glede',
+            'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
+        },
+        'playlist_mincount': 9,
+    }, {
+        'url': 'http://tv.nrksuper.no/serie/labyrint',
+        'info_dict': {
+            'id': 'labyrint',
+            'title': 'Labyrint',
+            'description': 'md5:58afd450974c89e27d5a19212eee7115',
+        },
+        'playlist_mincount': 3,
+    }, {
+        'url': 'https://tv.nrk.no/serie/broedrene-dal-og-spektralsteinene',
+        'only_matching': True,
+    }, {
+        'url': 'https://tv.nrk.no/serie/saving-the-human-race',
+        'only_matching': True,
+    }, {
+        'url': 'https://tv.nrk.no/serie/postmann-pat',
+        'only_matching': True,
+    }]
+
+    @classmethod
+    def suitable(cls, url):
+        return (
+            False if any(ie.suitable(url)
+                         for ie in (NRKTVIE, NRKTVEpisodeIE, NRKTVSeasonIE))
+            else super(NRKTVSeriesIE, cls).suitable(url))
+
+    def _real_extract(self, url):
+        series_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, series_id)
+
+        # New layout (e.g. https://tv.nrk.no/serie/backstage)
+        series = self._extract_series(webpage, series_id, fatal=False)
+        if series:
+            title = try_get(series, lambda x: x['titles']['title'], compat_str)
+            description = try_get(
+                series, lambda x: x['titles']['subtitle'], compat_str)
+            entries = []
+            for season in series['seasons']:
+                entries.extend(self._extract_episodes(season))
+            return self.playlist_result(entries, series_id, title, description)
+
+        # Old layout (e.g. https://tv.nrk.no/serie/groenn-glede)
+        entries = [
+            self.url_result(
+                'https://tv.nrk.no/program/Episodes/{series}/{season}'.format(
+                    series=series_id, season=season_id))
+            for season_id in re.findall(self._ITEM_RE, webpage)
+        ]
+
+        title = self._html_search_meta(
+            'seriestitle', webpage,
+            'title', default=None) or self._og_search_title(
+            webpage, fatal=False)
+
+        description = self._html_search_meta(
+            'series_description', webpage,
+            'description', default=None) or self._og_search_description(webpage)
+
+        return self.playlist_result(entries, series_id, title, description)
+
+
 class NRKTVDirekteIE(NRKTVIE):
     IE_DESC = 'NRK TV Direkte and NRK Radio Direkte'
     _VALID_URL = r'https?://(?:tv|radio)\.nrk\.no/direkte/(?P<id>[^/?#&]+)'
 class NRKTVDirekteIE(NRKTVIE):
     IE_DESC = 'NRK TV Direkte and NRK Radio Direkte'
     _VALID_URL = r'https?://(?:tv|radio)\.nrk\.no/direkte/(?P<id>[^/?#&]+)'
@@ -438,64 +620,6 @@ class NRKTVEpisodesIE(NRKPlaylistBaseIE):
             r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
 
 
             r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
 
 
-class NRKTVSeriesIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'
-    _ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'
-    _TESTS = [{
-        'url': 'https://tv.nrk.no/serie/groenn-glede',
-        'info_dict': {
-            'id': 'groenn-glede',
-            'title': 'Grønn glede',
-            'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
-        },
-        'playlist_mincount': 9,
-    }, {
-        'url': 'http://tv.nrksuper.no/serie/labyrint',
-        'info_dict': {
-            'id': 'labyrint',
-            'title': 'Labyrint',
-            'description': 'md5:58afd450974c89e27d5a19212eee7115',
-        },
-        'playlist_mincount': 3,
-    }, {
-        'url': 'https://tv.nrk.no/serie/broedrene-dal-og-spektralsteinene',
-        'only_matching': True,
-    }, {
-        'url': 'https://tv.nrk.no/serie/saving-the-human-race',
-        'only_matching': True,
-    }, {
-        'url': 'https://tv.nrk.no/serie/postmann-pat',
-        'only_matching': True,
-    }]
-
-    @classmethod
-    def suitable(cls, url):
-        return False if NRKTVIE.suitable(url) else super(NRKTVSeriesIE, cls).suitable(url)
-
-    def _real_extract(self, url):
-        series_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, series_id)
-
-        entries = [
-            self.url_result(
-                'https://tv.nrk.no/program/Episodes/{series}/{season}'.format(
-                    series=series_id, season=season_id))
-            for season_id in re.findall(self._ITEM_RE, webpage)
-        ]
-
-        title = self._html_search_meta(
-            'seriestitle', webpage,
-            'title', default=None) or self._og_search_title(
-            webpage, fatal=False)
-
-        description = self._html_search_meta(
-            'series_description', webpage,
-            'description', default=None) or self._og_search_description(webpage)
-
-        return self.playlist_result(entries, series_id, title, description)
-
-
 class NRKSkoleIE(InfoExtractor):
     IE_DESC = 'NRK Skole'
     _VALID_URL = r'https?://(?:www\.)?nrk\.no/skole/?\?.*\bmediaId=(?P<id>\d+)'
 class NRKSkoleIE(InfoExtractor):
     IE_DESC = 'NRK Skole'
     _VALID_URL = r'https?://(?:www\.)?nrk\.no/skole/?\?.*\bmediaId=(?P<id>\d+)'
index 52ab2f158e28f3fe5d7553beb7a0c108c2608641..80340f595c11df34e58899d481edc887f55a8837 100644 (file)
@@ -15,6 +15,7 @@ from ..utils import (
     strip_jsonp,
     strip_or_none,
     unified_strdate,
     strip_jsonp,
     strip_or_none,
     unified_strdate,
+    url_or_none,
     US_RATINGS,
 )
 
     US_RATINGS,
 )
 
@@ -557,6 +558,13 @@ class PBSIE(InfoExtractor):
                 if redirect_url and redirect_url not in redirect_urls:
                     redirects.append(redirect)
                     redirect_urls.add(redirect_url)
                 if redirect_url and redirect_url not in redirect_urls:
                     redirects.append(redirect)
                     redirect_urls.add(redirect_url)
+            encodings = info.get('encodings')
+            if isinstance(encodings, list):
+                for encoding in encodings:
+                    encoding_url = url_or_none(encoding)
+                    if encoding_url and encoding_url not in redirect_urls:
+                        redirects.append({'url': encoding_url})
+                        redirect_urls.add(encoding_url)
 
         chapters = []
         # Player pages may also serve different qualities
 
         chapters = []
         # Player pages may also serve different qualities
index a481b3151bb30667804c0be70c0360394200470a..e03c3d1d3d61ec2fd981776fba2775464b9658d1 100644 (file)
@@ -10,6 +10,7 @@ from ..utils import (
     parse_resolution,
     try_get,
     unified_timestamp,
     parse_resolution,
     try_get,
     unified_timestamp,
+    url_or_none,
     urljoin,
 )
 
     urljoin,
 )
 
@@ -116,12 +117,14 @@ class PeerTubeIE(InfoExtractor):
                             videos\.tcit\.fr|
                             peertube\.cpy\.re
                         )'''
                             videos\.tcit\.fr|
                             peertube\.cpy\.re
                         )'''
+    _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
     _VALID_URL = r'''(?x)
     _VALID_URL = r'''(?x)
-                    https?://
-                        %s
-                        /(?:videos/(?:watch|embed)|api/v\d/videos)/
-                        (?P<id>[^/?\#&]+)
-                    ''' % _INSTANCES_RE
+                    (?:
+                        peertube:(?P<host>[^:]+):|
+                        https?://(?P<host_2>%s)/(?:videos/(?:watch|embed)|api/v\d/videos)/
+                    )
+                    (?P<id>%s)
+                    ''' % (_INSTANCES_RE, _UUID_RE)
     _TESTS = [{
         'url': 'https://peertube.moe/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c',
         'md5': '80f24ff364cc9d333529506a263e7feb',
     _TESTS = [{
         'url': 'https://peertube.moe/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c',
         'md5': '80f24ff364cc9d333529506a263e7feb',
@@ -157,21 +160,40 @@ class PeerTubeIE(InfoExtractor):
     }, {
         'url': 'https://tube.openalgeria.org/api/v1/videos/c1875674-97d0-4c94-a058-3f7e64c962e8',
         'only_matching': True,
     }, {
         'url': 'https://tube.openalgeria.org/api/v1/videos/c1875674-97d0-4c94-a058-3f7e64c962e8',
         'only_matching': True,
+    }, {
+        'url': 'peertube:video.blender.org:b37a5b9f-e6b5-415c-b700-04a5cd6ec205',
+        'only_matching': True,
     }]
 
     @staticmethod
     }]
 
     @staticmethod
-    def _extract_urls(webpage):
-        return [
-            mobj.group('url')
-            for mobj in re.finditer(
-                r'''(?x)<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//%s/videos/embed/[^/?\#&]+)\1'''
-                % PeerTubeIE._INSTANCES_RE, webpage)]
+    def _extract_peertube_url(webpage, source_url):
+        mobj = re.match(
+            r'https?://(?P<host>[^/]+)/videos/watch/(?P<id>%s)'
+            % PeerTubeIE._UUID_RE, source_url)
+        if mobj and any(p in webpage for p in (
+                '<title>PeerTube<',
+                'There will be other non JS-based clients to access PeerTube',
+                '>We are sorry but it seems that PeerTube is not compatible with your web browser.<')):
+            return 'peertube:%s:%s' % mobj.group('host', 'id')
+
+    @staticmethod
+    def _extract_urls(webpage, source_url):
+        entries = re.findall(
+            r'''(?x)<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//%s/videos/embed/%s)'''
+            % (PeerTubeIE._INSTANCES_RE, PeerTubeIE._UUID_RE), webpage)
+        if not entries:
+            peertube_url = PeerTubeIE._extract_peertube_url(webpage, source_url)
+            if peertube_url:
+                entries = [peertube_url]
+        return entries
 
     def _real_extract(self, url):
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        host = mobj.group('host') or mobj.group('host_2')
+        video_id = mobj.group('id')
 
         video = self._download_json(
 
         video = self._download_json(
-            urljoin(url, '/api/v1/videos/%s' % video_id), video_id)
+            'https://%s/api/v1/videos/%s' % (host, video_id), video_id)
 
         title = video['name']
 
 
         title = video['name']
 
@@ -179,8 +201,8 @@ class PeerTubeIE(InfoExtractor):
         for file_ in video['files']:
             if not isinstance(file_, dict):
                 continue
         for file_ in video['files']:
             if not isinstance(file_, dict):
                 continue
-            file_url = file_.get('fileUrl')
-            if not file_url or not isinstance(file_url, compat_str):
+            file_url = url_or_none(file_.get('fileUrl'))
+            if not file_url:
                 continue
             file_size = int_or_none(file_.get('size'))
             format_id = try_get(
                 continue
             file_size = int_or_none(file_.get('size'))
             format_id = try_get(
index a207ca9cb93a3c839bf9304267d6a5b10d426b5e..1257841e4bbcffbae999cd402f9a6c7982a3fb30 100644 (file)
@@ -27,6 +27,60 @@ from ..utils import (
 class PluralsightBaseIE(InfoExtractor):
     _API_BASE = 'https://app.pluralsight.com'
 
 class PluralsightBaseIE(InfoExtractor):
     _API_BASE = 'https://app.pluralsight.com'
 
+    _GRAPHQL_EP = '%s/player/api/graphql' % _API_BASE
+    _GRAPHQL_HEADERS = {
+        'Content-Type': 'application/json;charset=UTF-8',
+    }
+    _GRAPHQL_COURSE_TMPL = '''
+query BootstrapPlayer {
+  rpc {
+    bootstrapPlayer {
+      profile {
+        firstName
+        lastName
+        email
+        username
+        userHandle
+        authed
+        isAuthed
+        plan
+      }
+      course(courseId: "%s") {
+        name
+        title
+        courseHasCaptions
+        translationLanguages {
+          code
+          name
+        }
+        supportsWideScreenVideoFormats
+        timestamp
+        modules {
+          name
+          title
+          duration
+          formattedDuration
+          author
+          authorized
+          clips {
+            authorized
+            clipId
+            duration
+            formattedDuration
+            id
+            index
+            moduleIndex
+            moduleTitle
+            name
+            title
+            watched
+          }
+        }
+      }
+    }
+  }
+}'''
+
     def _download_course(self, course_id, url, display_id):
         try:
             return self._download_course_rpc(course_id, url, display_id)
     def _download_course(self, course_id, url, display_id):
         try:
             return self._download_course_rpc(course_id, url, display_id)
@@ -39,20 +93,14 @@ class PluralsightBaseIE(InfoExtractor):
 
     def _download_course_rpc(self, course_id, url, display_id):
         response = self._download_json(
 
     def _download_course_rpc(self, course_id, url, display_id):
         response = self._download_json(
-            '%s/player/functions/rpc' % self._API_BASE, display_id,
-            'Downloading course JSON',
-            data=json.dumps({
-                'fn': 'bootstrapPlayer',
-                'payload': {
-                    'courseId': course_id,
-                },
-            }).encode('utf-8'),
-            headers={
-                'Content-Type': 'application/json;charset=utf-8',
-                'Referer': url,
-            })
-
-        course = try_get(response, lambda x: x['payload']['course'], dict)
+            self._GRAPHQL_EP, display_id, data=json.dumps({
+                'query': self._GRAPHQL_COURSE_TMPL % course_id,
+                'variables': {}
+            }).encode('utf-8'), headers=self._GRAPHQL_HEADERS)
+
+        course = try_get(
+            response, lambda x: x['data']['rpc']['bootstrapPlayer']['course'],
+            dict)
         if course:
             return course
 
         if course:
             return course
 
@@ -90,6 +138,28 @@ class PluralsightIE(PluralsightBaseIE):
         'only_matching': True,
     }]
 
         'only_matching': True,
     }]
 
+    GRAPHQL_VIEWCLIP_TMPL = '''
+query viewClip {
+  viewClip(input: {
+    author: "%(author)s",
+    clipIndex: %(clipIndex)d,
+    courseName: "%(courseName)s",
+    includeCaptions: %(includeCaptions)s,
+    locale: "%(locale)s",
+    mediaType: "%(mediaType)s",
+    moduleName: "%(moduleName)s",
+    quality: "%(quality)s"
+  }) {
+    urls {
+      url
+      cdn
+      rank
+      source
+    },
+    status
+  }
+}'''
+
     def _real_initialize(self):
         self._login()
 
     def _real_initialize(self):
         self._login()
 
@@ -277,7 +347,7 @@ class PluralsightIE(PluralsightBaseIE):
                 f = QUALITIES[quality].copy()
                 clip_post = {
                     'author': author,
                 f = QUALITIES[quality].copy()
                 clip_post = {
                     'author': author,
-                    'includeCaptions': False,
+                    'includeCaptions': 'false',
                     'clipIndex': int(clip_idx),
                     'courseName': course_name,
                     'locale': 'en',
                     'clipIndex': int(clip_idx),
                     'courseName': course_name,
                     'locale': 'en',
@@ -286,11 +356,23 @@ class PluralsightIE(PluralsightBaseIE):
                     'quality': '%dx%d' % (f['width'], f['height']),
                 }
                 format_id = '%s-%s' % (ext, quality)
                     'quality': '%dx%d' % (f['width'], f['height']),
                 }
                 format_id = '%s-%s' % (ext, quality)
-                viewclip = self._download_json(
-                    '%s/video/clips/viewclip' % self._API_BASE, display_id,
-                    'Downloading %s viewclip JSON' % format_id, fatal=False,
-                    data=json.dumps(clip_post).encode('utf-8'),
-                    headers={'Content-Type': 'application/json;charset=utf-8'})
+
+                try:
+                    viewclip = self._download_json(
+                        self._GRAPHQL_EP, display_id,
+                        'Downloading %s viewclip graphql' % format_id,
+                        data=json.dumps({
+                            'query': self.GRAPHQL_VIEWCLIP_TMPL % clip_post,
+                            'variables': {}
+                        }).encode('utf-8'),
+                        headers=self._GRAPHQL_HEADERS)['data']['viewClip']
+                except ExtractorError:
+                    # Still works but most likely will go soon
+                    viewclip = self._download_json(
+                        '%s/video/clips/viewclip' % self._API_BASE, display_id,
+                        'Downloading %s viewclip JSON' % format_id, fatal=False,
+                        data=json.dumps(clip_post).encode('utf-8'),
+                        headers={'Content-Type': 'application/json;charset=utf-8'})
 
                 # Pluralsight tracks multiple sequential calls to ViewClip API and start
                 # to return 429 HTTP errors after some time (see
 
                 # Pluralsight tracks multiple sequential calls to ViewClip API and start
                 # to return 429 HTTP errors after some time (see
index 60ade06da37d2bd2a56e717f9761f525cb4b7436..5726cab3ae6763d7466d013898f71925966bed6d 100644 (file)
@@ -43,7 +43,8 @@ class PornComIE(InfoExtractor):
 
         config = self._parse_json(
             self._search_regex(
 
         config = self._parse_json(
             self._search_regex(
-                r'=\s*({.+?})\s*,\s*[\da-zA-Z_]+\s*=',
+                (r'=\s*({.+?})\s*;\s*v1ar\b',
+                 r'=\s*({.+?})\s*,\s*[\da-zA-Z_]+\s*='),
                 webpage, 'config', default='{}'),
             display_id, transform_source=js_to_json, fatal=False)
 
                 webpage, 'config', default='{}'),
             display_id, transform_source=js_to_json, fatal=False)
 
@@ -69,7 +70,7 @@ class PornComIE(InfoExtractor):
                 'height': int(height),
                 'filesize_approx': parse_filesize(filesize),
             } for format_url, height, filesize in re.findall(
                 'height': int(height),
                 'filesize_approx': parse_filesize(filesize),
             } for format_url, height, filesize in re.findall(
-                r'<a[^>]+href="(/download/[^"]+)">MPEG4 (\d+)p<span[^>]*>(\d+\s+[a-zA-Z]+)<',
+                r'<a[^>]+href="(/download/[^"]+)">[^<]*?(\d+)p<span[^>]*>(\d+\s*[a-zA-Z]+)<',
                 webpage)]
             thumbnail = None
             duration = None
                 webpage)]
             thumbnail = None
             duration = None
index 23e24d216c04773841944b26d1c524e260b6d5ee..6782848d90c289d02aeef76bb2d0925351d64121 100644 (file)
@@ -4,28 +4,22 @@ from __future__ import unicode_literals
 import functools
 import itertools
 import operator
 import functools
 import itertools
 import operator
-# import os
 import re
 
 from .common import InfoExtractor
 from ..compat import (
     compat_HTTPError,
 import re
 
 from .common import InfoExtractor
 from ..compat import (
     compat_HTTPError,
-    # compat_urllib_parse_unquote,
-    # compat_urllib_parse_unquote_plus,
-    # compat_urllib_parse_urlparse,
+    compat_str,
 )
 from ..utils import (
     ExtractorError,
     int_or_none,
     js_to_json,
     orderedSet,
 )
 from ..utils import (
     ExtractorError,
     int_or_none,
     js_to_json,
     orderedSet,
-    # sanitized_Request,
     remove_quotes,
     str_to_int,
     remove_quotes,
     str_to_int,
+    url_or_none,
 )
 )
-# from ..aes import (
-#     aes_decrypt_text
-# )
 
 
 class PornHubIE(InfoExtractor):
 
 
 class PornHubIE(InfoExtractor):
@@ -62,7 +56,7 @@ class PornHubIE(InfoExtractor):
             'id': '1331683002',
             'ext': 'mp4',
             'title': '重庆婷婷女王足交',
             'id': '1331683002',
             'ext': 'mp4',
             'title': '重庆婷婷女王足交',
-            'uploader': 'cj397186295',
+            'uploader': 'Unknown',
             'duration': 1753,
             'view_count': int,
             'like_count': int,
             'duration': 1753,
             'view_count': int,
             'like_count': int,
@@ -75,6 +69,31 @@ class PornHubIE(InfoExtractor):
         'params': {
             'skip_download': True,
         },
         'params': {
             'skip_download': True,
         },
+    }, {
+        # subtitles
+        'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7',
+        'info_dict': {
+            'id': 'ph5af5fef7c2aa7',
+            'ext': 'mp4',
+            'title': 'BFFS - Cute Teen Girls Share Cock On the Floor',
+            'uploader': 'BFFs',
+            'duration': 622,
+            'view_count': int,
+            'like_count': int,
+            'dislike_count': int,
+            'comment_count': int,
+            'age_limit': 18,
+            'tags': list,
+            'categories': list,
+            'subtitles': {
+                'en': [{
+                    "ext": 'srt'
+                }]
+            },
+        },
+        'params': {
+            'skip_download': True,
+        },
     }, {
         'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
         'only_matching': True,
     }, {
         'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
         'only_matching': True,
@@ -121,7 +140,7 @@ class PornHubIE(InfoExtractor):
             self._set_cookie('pornhub.com', 'platform', platform)
             return self._download_webpage(
                 'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id,
             self._set_cookie('pornhub.com', 'platform', platform)
             return self._download_webpage(
                 'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id,
-                video_id)
+                video_id, 'Downloading %s webpage' % platform)
 
         webpage = dl_webpage('pc')
 
 
         webpage = dl_webpage('pc')
 
@@ -134,60 +153,108 @@ class PornHubIE(InfoExtractor):
                 'PornHub said: %s' % error_msg,
                 expected=True, video_id=video_id)
 
                 'PornHub said: %s' % error_msg,
                 expected=True, video_id=video_id)
 
-        tv_webpage = dl_webpage('tv')
-
-        assignments = self._search_regex(
-            r'(var.+?mediastring.+?)</script>', tv_webpage,
-            'encoded url').split(';')
-
-        js_vars = {}
-
-        def parse_js_value(inp):
-            inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp)
-            if '+' in inp:
-                inps = inp.split('+')
-                return functools.reduce(
-                    operator.concat, map(parse_js_value, inps))
-            inp = inp.strip()
-            if inp in js_vars:
-                return js_vars[inp]
-            return remove_quotes(inp)
-
-        for assn in assignments:
-            assn = assn.strip()
-            if not assn:
-                continue
-            assn = re.sub(r'var\s+', '', assn)
-            vname, value = assn.split('=', 1)
-            js_vars[vname] = parse_js_value(value)
-
-        video_url = js_vars['mediastring']
-
-        title = self._search_regex(
-            r'<h1>([^>]+)</h1>', tv_webpage, 'title', default=None)
-
         # video_title from flashvars contains whitespace instead of non-ASCII (see
         # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
         # on that anymore.
         # video_title from flashvars contains whitespace instead of non-ASCII (see
         # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
         # on that anymore.
-        title = title or self._html_search_meta(
+        title = self._html_search_meta(
             'twitter:title', webpage, default=None) or self._search_regex(
             (r'<h1[^>]+class=["\']title["\'][^>]*>(?P<title>[^<]+)',
              r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1',
              r'shareTitle\s*=\s*(["\'])(?P<title>.+?)\1'),
             webpage, 'title', group='title')
 
             'twitter:title', webpage, default=None) or self._search_regex(
             (r'<h1[^>]+class=["\']title["\'][^>]*>(?P<title>[^<]+)',
              r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1',
              r'shareTitle\s*=\s*(["\'])(?P<title>.+?)\1'),
             webpage, 'title', group='title')
 
+        video_urls = []
+        video_urls_set = set()
+        subtitles = {}
+
         flashvars = self._parse_json(
             self._search_regex(
                 r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'),
             video_id)
         if flashvars:
         flashvars = self._parse_json(
             self._search_regex(
                 r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'),
             video_id)
         if flashvars:
+            subtitle_url = url_or_none(flashvars.get('closedCaptionsFile'))
+            if subtitle_url:
+                subtitles.setdefault('en', []).append({
+                    'url': subtitle_url,
+                    'ext': 'srt',
+                })
             thumbnail = flashvars.get('image_url')
             duration = int_or_none(flashvars.get('video_duration'))
             thumbnail = flashvars.get('image_url')
             duration = int_or_none(flashvars.get('video_duration'))
+            media_definitions = flashvars.get('mediaDefinitions')
+            if isinstance(media_definitions, list):
+                for definition in media_definitions:
+                    if not isinstance(definition, dict):
+                        continue
+                    video_url = definition.get('videoUrl')
+                    if not video_url or not isinstance(video_url, compat_str):
+                        continue
+                    if video_url in video_urls_set:
+                        continue
+                    video_urls_set.add(video_url)
+                    video_urls.append(
+                        (video_url, int_or_none(definition.get('quality'))))
         else:
         else:
-            title, thumbnail, duration = [None] * 3
+            thumbnail, duration = [None] * 2
+
+        if not video_urls:
+            tv_webpage = dl_webpage('tv')
+
+            assignments = self._search_regex(
+                r'(var.+?mediastring.+?)</script>', tv_webpage,
+                'encoded url').split(';')
+
+            js_vars = {}
+
+            def parse_js_value(inp):
+                inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp)
+                if '+' in inp:
+                    inps = inp.split('+')
+                    return functools.reduce(
+                        operator.concat, map(parse_js_value, inps))
+                inp = inp.strip()
+                if inp in js_vars:
+                    return js_vars[inp]
+                return remove_quotes(inp)
+
+            for assn in assignments:
+                assn = assn.strip()
+                if not assn:
+                    continue
+                assn = re.sub(r'var\s+', '', assn)
+                vname, value = assn.split('=', 1)
+                js_vars[vname] = parse_js_value(value)
+
+            video_url = js_vars['mediastring']
+            if video_url not in video_urls_set:
+                video_urls.append((video_url, None))
+                video_urls_set.add(video_url)
+
+        for mobj in re.finditer(
+                r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1',
+                webpage):
+            video_url = mobj.group('url')
+            if video_url not in video_urls_set:
+                video_urls.append((video_url, None))
+                video_urls_set.add(video_url)
+
+        formats = []
+        for video_url, height in video_urls:
+            tbr = None
+            mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url)
+            if mobj:
+                if not height:
+                    height = int(mobj.group('height'))
+                tbr = int(mobj.group('tbr'))
+            formats.append({
+                'url': video_url,
+                'format_id': '%dp' % height if height else None,
+                'height': height,
+                'tbr': tbr,
+            })
+        self._sort_formats(formats)
 
         video_uploader = self._html_search_regex(
 
         video_uploader = self._html_search_regex(
-            r'(?s)From:&nbsp;.+?<(?:a\b[^>]+\bhref=["\']/(?:user|channel)s/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
+            r'(?s)From:&nbsp;.+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
             webpage, 'uploader', fatal=False)
 
         view_count = self._extract_count(
             webpage, 'uploader', fatal=False)
 
         view_count = self._extract_count(
@@ -210,7 +277,6 @@ class PornHubIE(InfoExtractor):
 
         return {
             'id': video_id,
 
         return {
             'id': video_id,
-            'url': video_url,
             'uploader': video_uploader,
             'title': title,
             'thumbnail': thumbnail,
             'uploader': video_uploader,
             'title': title,
             'thumbnail': thumbnail,
@@ -219,10 +285,11 @@ class PornHubIE(InfoExtractor):
             'like_count': like_count,
             'dislike_count': dislike_count,
             'comment_count': comment_count,
             'like_count': like_count,
             'dislike_count': dislike_count,
             'comment_count': comment_count,
-            'formats': formats,
+            'formats': formats,
             'age_limit': 18,
             'tags': tags,
             'categories': categories,
             'age_limit': 18,
             'tags': tags,
             'categories': categories,
+            'subtitles': subtitles,
         }
 
 
         }
 
 
@@ -279,7 +346,7 @@ class PornHubPlaylistIE(PornHubPlaylistBaseIE):
 
 
 class PornHubUserVideosIE(PornHubPlaylistBaseIE):
 
 
 class PornHubUserVideosIE(PornHubPlaylistBaseIE):
-    _VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.com/(?:user|channel)s/(?P<id>[^/]+)/videos'
+    _VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.com/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos'
     _TESTS = [{
         'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
         'info_dict': {
     _TESTS = [{
         'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
         'info_dict': {
@@ -311,6 +378,12 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE):
     }, {
         'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
         'only_matching': True,
     }, {
         'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
         'only_matching': True,
+    }, {
+        'url': 'https://www.pornhub.com/model/jayndrea/videos/upload',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
     }]
 
     def _real_extract(self, url):
diff --git a/youtube_dl/extractor/puhutv.py b/youtube_dl/extractor/puhutv.py
new file mode 100644 (file)
index 0000000..5465e8a
--- /dev/null
@@ -0,0 +1,247 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_HTTPError,
+    compat_str,
+)
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    float_or_none,
+    parse_resolution,
+    str_or_none,
+    try_get,
+    unified_timestamp,
+    url_or_none,
+    urljoin,
+)
+
+
+class PuhuTVIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-izle'
+    IE_NAME = 'puhutv'
+    _TESTS = [{
+        # film
+        'url': 'https://puhutv.com/sut-kardesler-izle',
+        'md5': 'fbd8f2d8e7681f8bcd51b592475a6ae7',
+        'info_dict': {
+            'id': '5085',
+            'display_id': 'sut-kardesler',
+            'ext': 'mp4',
+            'title': 'Süt Kardeşler',
+            'description': 'md5:405fd024df916ca16731114eb18e511a',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 4832.44,
+            'creator': 'Arzu Film',
+            'timestamp': 1469778212,
+            'upload_date': '20160729',
+            'release_year': 1976,
+            'view_count': int,
+            'tags': ['Aile', 'Komedi', 'Klasikler'],
+        },
+    }, {
+        # episode, geo restricted, bypassable with --geo-verification-proxy
+        'url': 'https://puhutv.com/jet-sosyete-1-bolum-izle',
+        'only_matching': True,
+    }, {
+        # 4k, with subtitles
+        'url': 'https://puhutv.com/dip-1-bolum-izle',
+        'only_matching': True,
+    }]
+    _SUBTITLE_LANGS = {
+        'English': 'en',
+        'Deutsch': 'de',
+        'عربى': 'ar'
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        info = self._download_json(
+            urljoin(url, '/api/slug/%s-izle' % display_id),
+            display_id)['data']
+
+        video_id = compat_str(info['id'])
+        title = info.get('name') or info['title']['name']
+        if info.get('display_name'):
+            title = '%s %s' % (title, info.get('display_name'))
+
+        try:
+            videos = self._download_json(
+                'https://puhutv.com/api/assets/%s/videos' % video_id,
+                display_id, 'Downloading video JSON',
+                headers=self.geo_verification_headers())
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+                self.raise_geo_restricted()
+            raise
+
+        formats = []
+        for video in videos['data']['videos']:
+            media_url = url_or_none(video.get('url'))
+            if not media_url:
+                continue
+            playlist = video.get('is_playlist')
+            if video.get('stream_type') == 'hls' and playlist is True:
+                formats.extend(self._extract_m3u8_formats(
+                    media_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                    m3u8_id='hls', fatal=False))
+                continue
+            quality = int_or_none(video.get('quality'))
+            f = {
+                'url': media_url,
+                'ext': 'mp4',
+                'height': quality
+            }
+            video_format = video.get('video_format')
+            if video_format == 'hls' and playlist is False:
+                format_id = 'hls'
+                f['protocol'] = 'm3u8_native'
+            elif video_format == 'mp4':
+                format_id = 'http'
+
+            else:
+                continue
+            if quality:
+                format_id += '-%sp' % quality
+            f['format_id'] = format_id
+            formats.append(f)
+        self._sort_formats(formats)
+
+        description = try_get(
+            info, lambda x: x['title']['description'],
+            compat_str) or info.get('description')
+        timestamp = unified_timestamp(info.get('created_at'))
+        creator = try_get(
+            info, lambda x: x['title']['producer']['name'], compat_str)
+
+        duration = float_or_none(
+            try_get(info, lambda x: x['content']['duration_in_ms'], int),
+            scale=1000)
+        view_count = try_get(info, lambda x: x['content']['watch_count'], int)
+
+        images = try_get(
+            info, lambda x: x['content']['images']['wide'], dict) or {}
+        thumbnails = []
+        for image_id, image_url in images.items():
+            if not isinstance(image_url, compat_str):
+                continue
+            if not image_url.startswith(('http', '//')):
+                image_url = 'https://%s' % image_url
+            t = parse_resolution(image_id)
+            t.update({
+                'id': image_id,
+                'url': image_url
+            })
+            thumbnails.append(t)
+
+        release_year = try_get(info, lambda x: x['title']['released_at'], int)
+
+        season_number = int_or_none(info.get('season_number'))
+        season_id = str_or_none(info.get('season_id'))
+        episode_number = int_or_none(info.get('episode_number'))
+
+        tags = []
+        for genre in try_get(info, lambda x: x['title']['genres'], list) or []:
+            if not isinstance(genre, dict):
+                continue
+            genre_name = genre.get('name')
+            if genre_name and isinstance(genre_name, compat_str):
+                tags.append(genre_name)
+
+        subtitles = {}
+        for subtitle in try_get(
+                info, lambda x: x['content']['subtitles'], list) or []:
+            if not isinstance(subtitle, dict):
+                continue
+            lang = subtitle.get('language')
+            sub_url = url_or_none(subtitle.get('url'))
+            if not lang or not isinstance(lang, compat_str) or not sub_url:
+                continue
+            subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{
+                'url': sub_url
+            }]
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'description': description,
+            'season_id': season_id,
+            'season_number': season_number,
+            'episode_number': episode_number,
+            'release_year': release_year,
+            'timestamp': timestamp,
+            'creator': creator,
+            'view_count': view_count,
+            'duration': duration,
+            'tags': tags,
+            'subtitles': subtitles,
+            'thumbnails': thumbnails,
+            'formats': formats
+        }
+
+
+class PuhuTVSerieIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-detay'
+    IE_NAME = 'puhutv:serie'
+    _TESTS = [{
+        'url': 'https://puhutv.com/deniz-yildizi-detay',
+        'info_dict': {
+            'title': 'Deniz Yıldızı',
+            'id': 'deniz-yildizi',
+        },
+        'playlist_mincount': 205,
+    }, {
+        # a film detail page which is using same url with serie page
+        'url': 'https://puhutv.com/kaybedenler-kulubu-detay',
+        'only_matching': True,
+    }]
+
+    def _extract_entries(self, seasons):
+        for season in seasons:
+            season_id = season.get('id')
+            if not season_id:
+                continue
+            page = 1
+            has_more = True
+            while has_more is True:
+                season = self._download_json(
+                    'https://galadriel.puhutv.com/seasons/%s' % season_id,
+                    season_id, 'Downloading page %s' % page, query={
+                        'page': page,
+                        'per': 40,
+                    })
+                episodes = season.get('episodes')
+                if isinstance(episodes, list):
+                    for ep in episodes:
+                        slug_path = str_or_none(ep.get('slugPath'))
+                        if not slug_path:
+                            continue
+                        video_id = str_or_none(int_or_none(ep.get('id')))
+                        yield self.url_result(
+                            'https://puhutv.com/%s' % slug_path,
+                            ie=PuhuTVIE.ie_key(), video_id=video_id,
+                            video_title=ep.get('name') or ep.get('eventLabel'))
+                page += 1
+                has_more = season.get('hasMore')
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+
+        info = self._download_json(
+            urljoin(url, '/api/slug/%s-detay' % playlist_id),
+            playlist_id)['data']
+
+        seasons = info.get('seasons')
+        if seasons:
+            return self.playlist_result(
+                self._extract_entries(seasons), playlist_id, info.get('name'))
+
+        # For films, these are using same url with series
+        video_id = info.get('slug') or info['assets'][0]['slug']
+        return self.url_result(
+            'https://puhutv.com/%s-izle' % video_id,
+            PuhuTVIE.ie_key(), video_id)
index a53ad97a56ef9000ea5ed65fbf0e24276b03f6f3..3f74f0c01ffd570dcf996d9c004070d80d50731e 100644 (file)
@@ -4,8 +4,11 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
 
 from .common import InfoExtractor
 from ..utils import (
-    unified_strdate,
+    parse_resolution,
     str_to_int,
     str_to_int,
+    unified_strdate,
+    urlencode_postdata,
+    urljoin,
 )
 
 
 )
 
 
@@ -29,13 +32,26 @@ class RadioJavanIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
+        download_host = self._download_json(
+            'https://www.radiojavan.com/videos/video_host', video_id,
+            data=urlencode_postdata({'id': video_id}),
+            headers={
+                'Content-Type': 'application/x-www-form-urlencoded',
+                'Referer': url,
+            }).get('host', 'https://host1.rjmusicmedia.com')
+
         webpage = self._download_webpage(url, video_id)
 
         webpage = self._download_webpage(url, video_id)
 
-        formats = [{
-            'url': 'https://media.rdjavan.com/media/music_video/%s' % video_path,
-            'format_id': '%sp' % height,
-            'height': int(height),
-        } for height, video_path in re.findall(r"RJ\.video(\d+)p\s*=\s*'/?([^']+)'", webpage)]
+        formats = []
+        for format_id, _, video_path in re.findall(
+                r'RJ\.video(?P<format_id>\d+[pPkK])\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2',
+                webpage):
+            f = parse_resolution(format_id)
+            f.update({
+                'url': urljoin(download_host, video_path),
+                'format_id': format_id,
+            })
+            formats.append(f)
         self._sort_formats(formats)
 
         title = self._og_search_title(webpage)
         self._sort_formats(formats)
 
         title = self._og_search_title(webpage)
index d22311031f42d5a4d35c66e9182db2993ddda8ce..f916b26195ed18106655ed84f2be01a032af84c4 100644 (file)
@@ -32,6 +32,9 @@ class RaiBaseIE(InfoExtractor):
     _GEO_BYPASS = False
 
     def _extract_relinker_info(self, relinker_url, video_id):
     _GEO_BYPASS = False
 
     def _extract_relinker_info(self, relinker_url, video_id):
+        if not re.match(r'https?://', relinker_url):
+            return {'formats': [{'url': relinker_url}]}
+
         formats = []
         geoprotection = None
         is_live = None
         formats = []
         geoprotection = None
         is_live = None
@@ -369,6 +372,10 @@ class RaiIE(RaiBaseIE):
         'params': {
             'skip_download': True,
         },
         'params': {
             'skip_download': True,
         },
+    }, {
+        # Direct MMS URL
+        'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html',
+        'only_matching': True,
     }]
 
     def _extract_from_content_id(self, content_id, url):
     }]
 
     def _extract_from_content_id(self, content_id, url):
index 640c3ee23f06c1dca48627130a7c56d998b05fac..5411ece218351733bbdebea7980a446997eaae06 100644 (file)
@@ -4,24 +4,37 @@ import re
 
 from .common import InfoExtractor
 from .vimeo import VimeoIE
 
 from .common import InfoExtractor
 from .vimeo import VimeoIE
+from ..compat import compat_str
 from ..utils import (
 from ..utils import (
-    extract_attributes,
     ExtractorError,
     ExtractorError,
-    smuggle_url,
-    unsmuggle_url,
+    int_or_none,
+    merge_dicts,
+    try_get,
+    unescapeHTML,
+    unified_timestamp,
     urljoin,
 )
 
 
 class RayWenderlichIE(InfoExtractor):
     urljoin,
 )
 
 
 class RayWenderlichIE(InfoExtractor):
-    _VALID_URL = r'https?://videos\.raywenderlich\.com/courses/(?P<course_id>[^/]+)/lessons/(?P<id>\d+)'
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:
+                            videos\.raywenderlich\.com/courses|
+                            (?:www\.)?raywenderlich\.com
+                        )/
+                        (?P<course_id>[^/]+)/lessons/(?P<id>\d+)
+                    '''
 
     _TESTS = [{
 
     _TESTS = [{
-        'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1',
+        'url': 'https://www.raywenderlich.com/3530-testing-in-ios/lessons/1',
         'info_dict': {
             'id': '248377018',
             'ext': 'mp4',
         'info_dict': {
             'id': '248377018',
             'ext': 'mp4',
-            'title': 'Testing In iOS Episode 1: Introduction',
+            'title': 'Introduction',
+            'description': 'md5:804d031b3efa9fcb49777d512d74f722',
+            'timestamp': 1513906277,
+            'upload_date': '20171222',
             'duration': 133,
             'uploader': 'Ray Wenderlich',
             'uploader_id': 'user3304672',
             'duration': 133,
             'uploader': 'Ray Wenderlich',
             'uploader_id': 'user3304672',
@@ -34,69 +47,133 @@ class RayWenderlichIE(InfoExtractor):
         'expected_warnings': ['HTTP Error 403: Forbidden'],
     }, {
         'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1',
         'expected_warnings': ['HTTP Error 403: Forbidden'],
     }, {
         'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1',
+        'only_matching': True,
+    }]
+
+    @staticmethod
+    def _extract_video_id(data, lesson_id):
+        if not data:
+            return
+        groups = try_get(data, lambda x: x['groups'], list) or []
+        if not groups:
+            return
+        for group in groups:
+            if not isinstance(group, dict):
+                continue
+            contents = try_get(data, lambda x: x['contents'], list) or []
+            for content in contents:
+                if not isinstance(content, dict):
+                    continue
+                ordinal = int_or_none(content.get('ordinal'))
+                if ordinal != lesson_id:
+                    continue
+                video_id = content.get('identifier')
+                if video_id:
+                    return compat_str(video_id)
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        course_id, lesson_id = mobj.group('course_id', 'id')
+        display_id = '%s/%s' % (course_id, lesson_id)
+
+        webpage = self._download_webpage(url, display_id)
+
+        thumbnail = self._og_search_thumbnail(
+            webpage, default=None) or self._html_search_meta(
+            'twitter:image', webpage, 'thumbnail')
+
+        if '>Subscribe to unlock' in webpage:
+            raise ExtractorError(
+                'This content is only available for subscribers',
+                expected=True)
+
+        info = {
+            'thumbnail': thumbnail,
+        }
+
+        vimeo_id = self._search_regex(
+            r'data-vimeo-id=["\'](\d+)', webpage, 'vimeo id', default=None)
+
+        if not vimeo_id:
+            data = self._parse_json(
+                self._search_regex(
+                    r'data-collection=(["\'])(?P<data>{.+?})\1', webpage,
+                    'data collection', default='{}', group='data'),
+                display_id, transform_source=unescapeHTML, fatal=False)
+            video_id = self._extract_video_id(
+                data, lesson_id) or self._search_regex(
+                r'/videos/(\d+)/', thumbnail, 'video id')
+            headers = {
+                'Referer': url,
+                'X-Requested-With': 'XMLHttpRequest',
+            }
+            csrf_token = self._html_search_meta(
+                'csrf-token', webpage, 'csrf token', default=None)
+            if csrf_token:
+                headers['X-CSRF-Token'] = csrf_token
+            video = self._download_json(
+                'https://videos.raywenderlich.com/api/v1/videos/%s.json'
+                % video_id, display_id, headers=headers)['video']
+            vimeo_id = video['clips'][0]['provider_id']
+            info.update({
+                '_type': 'url_transparent',
+                'title': video.get('name'),
+                'description': video.get('description') or video.get(
+                    'meta_description'),
+                'duration': int_or_none(video.get('duration')),
+                'timestamp': unified_timestamp(video.get('created_at')),
+            })
+
+        return merge_dicts(info, self.url_result(
+            VimeoIE._smuggle_referrer(
+                'https://player.vimeo.com/video/%s' % vimeo_id, url),
+            ie=VimeoIE.ie_key(), video_id=vimeo_id))
+
+
+class RayWenderlichCourseIE(InfoExtractor):
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:
+                            videos\.raywenderlich\.com/courses|
+                            (?:www\.)?raywenderlich\.com
+                        )/
+                        (?P<id>[^/]+)
+                    '''
+
+    _TEST = {
+        'url': 'https://www.raywenderlich.com/3530-testing-in-ios',
         'info_dict': {
             'title': 'Testing in iOS',
         'info_dict': {
             'title': 'Testing in iOS',
-            'id': '105-testing-in-ios',
+            'id': '3530-testing-in-ios',
         },
         'params': {
             'noplaylist': False,
         },
         'playlist_count': 29,
         },
         'params': {
             'noplaylist': False,
         },
         'playlist_count': 29,
-    }]
+    }
+
+    @classmethod
+    def suitable(cls, url):
+        return False if RayWenderlichIE.suitable(url) else super(
+            RayWenderlichCourseIE, cls).suitable(url)
 
     def _real_extract(self, url):
 
     def _real_extract(self, url):
-        url, smuggled_data = unsmuggle_url(url, {})
+        course_id = self._match_id(url)
 
 
-        mobj = re.match(self._VALID_URL, url)
-        course_id, lesson_id = mobj.group('course_id', 'id')
-        video_id = '%s/%s' % (course_id, lesson_id)
-
-        webpage = self._download_webpage(url, video_id)
-
-        no_playlist = self._downloader.params.get('noplaylist')
-        if no_playlist or smuggled_data.get('force_video', False):
-            if no_playlist:
-                self.to_screen(
-                    'Downloading just video %s because of --no-playlist'
-                    % video_id)
-            if '>Subscribe to unlock' in webpage:
-                raise ExtractorError(
-                    'This content is only available for subscribers',
-                    expected=True)
-            vimeo_id = self._search_regex(
-                r'data-vimeo-id=["\'](\d+)', webpage, 'video id')
-            return self.url_result(
-                VimeoIE._smuggle_referrer(
-                    'https://player.vimeo.com/video/%s' % vimeo_id, url),
-                ie=VimeoIE.ie_key(), video_id=vimeo_id)
-
-        self.to_screen(
-            'Downloading playlist %s - add --no-playlist to just download video'
-            % course_id)
-
-        lesson_ids = set((lesson_id, ))
-        for lesson in re.findall(
-                r'(<a[^>]+\bclass=["\']lesson-link[^>]+>)', webpage):
-            attrs = extract_attributes(lesson)
-            if not attrs:
-                continue
-            lesson_url = attrs.get('href')
-            if not lesson_url:
-                continue
-            lesson_id = self._search_regex(
-                r'/lessons/(\d+)', lesson_url, 'lesson id', default=None)
-            if not lesson_id:
-                continue
-            lesson_ids.add(lesson_id)
+        webpage = self._download_webpage(url, course_id)
 
         entries = []
 
         entries = []
-        for lesson_id in sorted(lesson_ids):
+        lesson_urls = set()
+        for lesson_url in re.findall(
+                r'<a[^>]+\bhref=["\'](/%s/lessons/\d+)' % course_id, webpage):
+            if lesson_url in lesson_urls:
+                continue
+            lesson_urls.add(lesson_url)
             entries.append(self.url_result(
             entries.append(self.url_result(
-                smuggle_url(urljoin(url, lesson_id), {'force_video': True}),
-                ie=RayWenderlichIE.ie_key()))
+                urljoin(url, lesson_url), ie=RayWenderlichIE.ie_key()))
 
 
-        title = self._search_regex(
-            r'class=["\']course-title[^>]+>([^<]+)', webpage, 'course title',
-            default=None)
+        title = self._og_search_title(
+            webpage, default=None) or self._html_search_meta(
+            'twitter:title', webpage, 'title', default=None)
 
         return self.playlist_result(entries, course_id, title)
 
         return self.playlist_result(entries, course_id, title)
index 243603676a21c15180c021cf7c711aeb6de3ca7a..7e8d58f38190d857310cd9ecea2074024f38257c 100644 (file)
@@ -10,7 +10,7 @@ from ..utils import (
 
 
 class RedBullTVIE(InfoExtractor):
 
 
 class RedBullTVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?redbull\.tv/video/(?P<id>AP-\w+)'
+    _VALID_URL = r'https?://(?:www\.)?redbull(?:\.tv|\.com/(?:[^/]+/)?tv)/video/(?P<id>AP-\w+)'
     _TESTS = [{
         # film
         'url': 'https://www.redbull.tv/video/AP-1Q6XCDTAN1W11',
     _TESTS = [{
         # film
         'url': 'https://www.redbull.tv/video/AP-1Q6XCDTAN1W11',
@@ -35,6 +35,9 @@ class RedBullTVIE(InfoExtractor):
         'params': {
             'skip_download': True,
         },
         'params': {
             'skip_download': True,
         },
+    }, {
+        'url': 'https://www.redbull.com/int-en/tv/video/AP-1UWHCAR9S1W11/rob-meets-sam-gaze?playlist=playlists::3f81040a-2f31-4832-8e2e-545b1d39d173',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
     }]
 
     def _real_extract(self, url):
index 879bcf81d8136ff4bb1f90a9312ff5c7812fdfb8..10311a81a8ae26e283d381fba88c44752844d04f 100644 (file)
@@ -3,12 +3,12 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
     ExtractorError,
     int_or_none,
     str_to_int,
     unified_strdate,
 from ..utils import (
     ExtractorError,
     int_or_none,
     str_to_int,
     unified_strdate,
+    url_or_none,
 )
 
 
 )
 
 
@@ -71,8 +71,8 @@ class RedTubeIE(InfoExtractor):
             video_id, fatal=False)
         if medias and isinstance(medias, list):
             for media in medias:
             video_id, fatal=False)
         if medias and isinstance(medias, list):
             for media in medias:
-                format_url = media.get('videoUrl')
-                if not format_url or not isinstance(format_url, compat_str):
+                format_url = url_or_none(media.get('videoUrl'))
+                if not format_url:
                     continue
                 format_id = media.get('quality')
                 formats.append({
                     continue
                 format_id = media.get('quality')
                 formats.append({
index 8bcf87126b18dd82f4c08bc7a9c586b0f99f112e..7c8909d95377b38f5e9bbc3a204337f1f8b540a6 100644 (file)
@@ -6,6 +6,7 @@ from ..compat import compat_str
 from ..utils import (
     determine_ext,
     int_or_none,
 from ..utils import (
     determine_ext,
     int_or_none,
+    url_or_none,
 )
 
 
 )
 
 
@@ -37,8 +38,8 @@ class RENTVIE(InfoExtractor):
         title = config['title']
         formats = []
         for video in config['src']:
         title = config['title']
         formats = []
         for video in config['src']:
-            src = video.get('src')
-            if not src or not isinstance(src, compat_str):
+            src = url_or_none(video.get('src'))
+            if not src:
                 continue
             ext = determine_ext(src)
             if ext == 'm3u8':
                 continue
             ext = determine_ext(src)
             if ext == 'm3u8':
index acff9766acb275dde443a250e5c8bd1dddbccfda..3b0f3080b1a236e9db62eed75dd126d581d15291 100644 (file)
@@ -99,7 +99,7 @@ class RTBFIE(InfoExtractor):
         http_url = data.get('url')
         if formats and http_url and re.search(height_re, http_url):
             http_url = fix_url(http_url)
         http_url = data.get('url')
         if formats and http_url and re.search(height_re, http_url):
             http_url = fix_url(http_url)
-            for m3u8_f in formats.copy():
+            for m3u8_f in formats[:]:
                 height = m3u8_f.get('height')
                 if not height:
                     continue
                 height = m3u8_f.get('height')
                 if not height:
                     continue
index 89d89b65a8108215a98957e20ae3045c0c205471..261bcbb83ece85610a709701155ab4e244400f10 100644 (file)
@@ -16,6 +16,7 @@ from ..utils import (
     int_or_none,
     try_get,
     unified_timestamp,
     int_or_none,
     try_get,
     unified_timestamp,
+    url_or_none,
 )
 
 
 )
 
 
@@ -176,8 +177,8 @@ class RutubePlaylistBaseIE(RutubeBaseIE):
                 break
 
             for result in results:
                 break
 
             for result in results:
-                video_url = result.get('video_url')
-                if not video_url or not isinstance(video_url, compat_str):
+                video_url = url_or_none(result.get('video_url'))
+                if not video_url:
                     continue
                 entry = self._extract_video(result, require_title=False)
                 entry.update({
                     continue
                 entry = self._extract_video(result, require_title=False)
                 entry.update({
index 6d4e3b76daba1b265fcd358122e7fe2e13695906..7a1c7e38bec915a31713a98a2cbee234faf0edb1 100644 (file)
@@ -164,6 +164,6 @@ class SeznamZpravyArticleIE(InfoExtractor):
         description = info.get('description') or self._og_search_description(webpage)
 
         return self.playlist_result([
         description = info.get('description') or self._og_search_description(webpage)
 
         return self.playlist_result([
-            self.url_result(url, ie=SeznamZpravyIE.ie_key())
-            for url in SeznamZpravyIE._extract_urls(webpage)],
+            self.url_result(entry_url, ie=SeznamZpravyIE.ie_key())
+            for entry_url in SeznamZpravyIE._extract_urls(webpage)],
             article_id, title, description)
             article_id, title, description)
index 1f8469a90876673a21cbd3d436fde3081c3e015c..207ab44771aa76be0ee8581e85cecac3993f86a6 100644 (file)
@@ -19,7 +19,7 @@ from ..utils import (
 
 class SixPlayIE(InfoExtractor):
     IE_NAME = '6play'
 
 class SixPlayIE(InfoExtractor):
     IE_NAME = '6play'
-    _VALID_URL = r'(?:6play:|https?://(?:www\.)?(?P<domain>6play\.fr|rtlplay.be)/.+?-c_)(?P<id>[0-9]+)'
+    _VALID_URL = r'(?:6play:|https?://(?:www\.)?(?P<domain>6play\.fr|rtlplay\.be|play\.rtl\.hr)/.+?-c_)(?P<id>[0-9]+)'
     _TESTS = [{
         'url': 'https://www.6play.fr/minute-par-minute-p_9533/le-but-qui-a-marque-lhistoire-du-football-francais-c_12041051',
         'md5': '31fcd112637baa0c2ab92c4fcd8baf27',
     _TESTS = [{
         'url': 'https://www.6play.fr/minute-par-minute-p_9533/le-but-qui-a-marque-lhistoire-du-football-francais-c_12041051',
         'md5': '31fcd112637baa0c2ab92c4fcd8baf27',
@@ -32,6 +32,9 @@ class SixPlayIE(InfoExtractor):
     }, {
         'url': 'https://www.rtlplay.be/rtl-info-13h-p_8551/les-titres-du-rtlinfo-13h-c_12045869',
         'only_matching': True,
     }, {
         'url': 'https://www.rtlplay.be/rtl-info-13h-p_8551/les-titres-du-rtlinfo-13h-c_12045869',
         'only_matching': True,
+    }, {
+        'url': 'https://play.rtl.hr/pj-masks-p_9455/epizoda-34-sezona-1-catboyevo-cudo-na-dva-kotaca-c_11984989',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
     }]
 
     def _real_extract(self, url):
@@ -39,6 +42,7 @@ class SixPlayIE(InfoExtractor):
         service, consumer_name = {
             '6play.fr': ('6play', 'm6web'),
             'rtlplay.be': ('rtlbe_rtl_play', 'rtlbe'),
         service, consumer_name = {
             '6play.fr': ('6play', 'm6web'),
             'rtlplay.be': ('rtlbe_rtl_play', 'rtlbe'),
+            'play.rtl.hr': ('rtlhr_rtl_play', 'rtlhr'),
         }.get(domain, ('6play', 'm6web'))
 
         data = self._download_json(
         }.get(domain, ('6play', 'm6web'))
 
         data = self._download_json(
@@ -71,7 +75,9 @@ class SixPlayIE(InfoExtractor):
             if container == 'm3u8' or ext == 'm3u8':
                 if protocol == 'usp':
                     if compat_parse_qs(compat_urllib_parse_urlparse(asset_url).query).get('token', [None])[0]:
             if container == 'm3u8' or ext == 'm3u8':
                 if protocol == 'usp':
                     if compat_parse_qs(compat_urllib_parse_urlparse(asset_url).query).get('token', [None])[0]:
-                        urlh = self._request_webpage(asset_url, video_id, fatal=False)
+                        urlh = self._request_webpage(
+                            asset_url, video_id, fatal=False,
+                            headers=self.geo_verification_headers())
                         if not urlh:
                             continue
                         asset_url = urlh.geturl()
                         if not urlh:
                             continue
                         asset_url = urlh.geturl()
index 104576033db9652f80746783a19bc61812a80904..ed84322c5131b49a1acac4deedb4d049832d7547 100644 (file)
@@ -8,6 +8,7 @@ from ..utils import ExtractorError
 class SlidesLiveIE(InfoExtractor):
     _VALID_URL = r'https?://slideslive\.com/(?P<id>[0-9]+)'
     _TESTS = [{
 class SlidesLiveIE(InfoExtractor):
     _VALID_URL = r'https?://slideslive\.com/(?P<id>[0-9]+)'
     _TESTS = [{
+        # video_service_name = YOUTUBE
         'url': 'https://slideslive.com/38902413/gcc-ia16-backend',
         'md5': 'b29fcd6c6952d0c79c5079b0e7a07e6f',
         'info_dict': {
         'url': 'https://slideslive.com/38902413/gcc-ia16-backend',
         'md5': 'b29fcd6c6952d0c79c5079b0e7a07e6f',
         'info_dict': {
@@ -19,14 +20,18 @@ class SlidesLiveIE(InfoExtractor):
             'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
             'upload_date': '20170925',
         }
             'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
             'upload_date': '20170925',
         }
+    }, {
+        # video_service_name = youtube
+        'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         video_data = self._download_json(
             url, video_id, headers={'Accept': 'application/json'})
     }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         video_data = self._download_json(
             url, video_id, headers={'Accept': 'application/json'})
-        service_name = video_data['video_service_name']
-        if service_name == 'YOUTUBE':
+        service_name = video_data['video_service_name'].lower()
+        if service_name == 'youtube':
             yt_video_id = video_data['video_service_id']
             return self.url_result(yt_video_id, 'Youtube', video_id=yt_video_id)
         else:
             yt_video_id = video_data['video_service_id']
             return self.url_result(yt_video_id, 'Youtube', video_id=yt_video_id)
         else:
index 6fc2ff60d2a923f763fc834127eb62b667c542d5..661f9e59d0310a6c6def25e20de33a9cbc65e09f 100644 (file)
@@ -1,12 +1,10 @@
 from __future__ import unicode_literals
 
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 
 
 class SlutloadIE(InfoExtractor):
 from .common import InfoExtractor
 
 
 class SlutloadIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P<id>[^/]+)/?$'
+    _VALID_URL = r'https?://(?:\w+\.)?slutload\.com/(?:video/[^/]+|embed_player|watch)/(?P<id>[^/]+)'
     _TESTS = [{
         'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/',
         'md5': '868309628ba00fd488cf516a113fd717',
     _TESTS = [{
         'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/',
         'md5': '868309628ba00fd488cf516a113fd717',
@@ -16,33 +14,52 @@ class SlutloadIE(InfoExtractor):
             'title': 'virginie baisee en cam',
             'age_limit': 18,
             'thumbnail': r're:https?://.*?\.jpg'
             'title': 'virginie baisee en cam',
             'age_limit': 18,
             'thumbnail': r're:https?://.*?\.jpg'
-        }
+        },
     }, {
         # mobile site
         'url': 'http://mobile.slutload.com/video/masturbation-solo/fviFLmc6kzJ/',
         'only_matching': True,
     }, {
         # mobile site
         'url': 'http://mobile.slutload.com/video/masturbation-solo/fviFLmc6kzJ/',
         'only_matching': True,
+    }, {
+        'url': 'http://www.slutload.com/embed_player/TD73btpBqSxc/',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.slutload.com/watch/TD73btpBqSxc/Virginie-Baisee-En-Cam.html',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
     }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        desktop_url = re.sub(r'^(https?://)mobile\.', r'\1', url)
-        webpage = self._download_webpage(desktop_url, video_id)
+        embed_page = self._download_webpage(
+            'http://www.slutload.com/embed_player/%s' % video_id, video_id,
+            'Downloading embed page', fatal=False)
 
 
-        video_title = self._html_search_regex(r'<h1><strong>([^<]+)</strong>',
-                                              webpage, 'title').strip()
+        if embed_page:
+            def extract(what):
+                return self._html_search_regex(
+                    r'data-video-%s=(["\'])(?P<url>(?:(?!\1).)+)\1' % what,
+                    embed_page, 'video %s' % what, default=None, group='url')
 
 
-        video_url = self._html_search_regex(
-            r'(?s)<div id="vidPlayer"\s+data-url="([^"]+)"',
-            webpage, 'video URL')
-        thumbnail = self._html_search_regex(
-            r'(?s)<div id="vidPlayer"\s+.*?previewer-file="([^"]+)"',
-            webpage, 'thumbnail', fatal=False)
+            video_url = extract('url')
+            if video_url:
+                title = self._html_search_regex(
+                    r'<title>([^<]+)', embed_page, 'title', default=video_id)
+                return {
+                    'id': video_id,
+                    'url': video_url,
+                    'title': title,
+                    'thumbnail': extract('preview'),
+                    'age_limit': 18
+                }
 
 
-        return {
+        webpage = self._download_webpage(
+            'http://www.slutload.com/video/_/%s/' % video_id, video_id)
+        title = self._html_search_regex(
+            r'<h1><strong>([^<]+)</strong>', webpage, 'title').strip()
+        info = self._parse_html5_media_entries(url, webpage, video_id)[0]
+        info.update({
             'id': video_id,
             'id': video_id,
-            'url': video_url,
-            'title': video_title,
-            'thumbnail': thumbnail,
-            'age_limit': 18
-        }
+            'title': title,
+            'age_limit': 18,
+        })
+        return info
index 6a6bb90c493a92fc2e644e2a550547460d899ca4..4a410611deafbfdb216d21ed668cffe6b31ef31f 100644 (file)
@@ -72,4 +72,7 @@ class StreamcloudIE(InfoExtractor):
             'title': title,
             'url': video_url,
             'thumbnail': thumbnail,
             'title': title,
             'url': video_url,
             'thumbnail': thumbnail,
+            'http_headers': {
+                'Referer': url,
+            },
         }
         }
index f71eab8b25014501aa6d123e70fba4506c095cea..0901c3163e6cab4723d451b30df8574e359ba899 100644 (file)
@@ -12,6 +12,8 @@ from ..utils import (
     determine_ext,
     dict_get,
     int_or_none,
     determine_ext,
     dict_get,
     int_or_none,
+    orderedSet,
+    strip_or_none,
     try_get,
     urljoin,
     compat_str,
     try_get,
     urljoin,
     compat_str,
@@ -137,7 +139,12 @@ class SVTPlayBaseIE(SVTBaseIE):
 
 class SVTPlayIE(SVTPlayBaseIE):
     IE_DESC = 'SVT Play and Öppet arkiv'
 
 class SVTPlayIE(SVTPlayBaseIE):
     IE_DESC = 'SVT Play and Öppet arkiv'
-    _VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)'
+    _VALID_URL = r'''(?x)
+                    (?:
+                        svt:(?P<svt_id>[^/?#&]+)|
+                        https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)
+                    )
+                    '''
     _TESTS = [{
         'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2',
         'md5': '2b6704fe4a28801e1a098bbf3c5ac611',
     _TESTS = [{
         'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2',
         'md5': '2b6704fe4a28801e1a098bbf3c5ac611',
@@ -164,10 +171,40 @@ class SVTPlayIE(SVTPlayBaseIE):
     }, {
         'url': 'https://www.svtplay.se/kanaler/svt1',
         'only_matching': True,
     }, {
         'url': 'https://www.svtplay.se/kanaler/svt1',
         'only_matching': True,
+    }, {
+        'url': 'svt:1376446-003A',
+        'only_matching': True,
+    }, {
+        'url': 'svt:14278044',
+        'only_matching': True,
     }]
 
     }]
 
+    def _adjust_title(self, info):
+        if info['is_live']:
+            info['title'] = self._live_title(info['title'])
+
+    def _extract_by_video_id(self, video_id, webpage=None):
+        data = self._download_json(
+            'https://api.svt.se/videoplayer-api/video/%s' % video_id,
+            video_id, headers=self.geo_verification_headers())
+        info_dict = self._extract_video(data, video_id)
+        if not info_dict.get('title'):
+            title = dict_get(info_dict, ('episode', 'series'))
+            if not title and webpage:
+                title = re.sub(
+                    r'\s*\|\s*.+?$', '', self._og_search_title(webpage))
+            if not title:
+                title = video_id
+            info_dict['title'] = title
+        self._adjust_title(info_dict)
+        return info_dict
+
     def _real_extract(self, url):
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        video_id, svt_id = mobj.group('id', 'svt_id')
+
+        if svt_id:
+            return self._extract_by_video_id(svt_id)
 
         webpage = self._download_webpage(url, video_id)
 
 
         webpage = self._download_webpage(url, video_id)
 
@@ -179,10 +216,6 @@ class SVTPlayIE(SVTPlayBaseIE):
 
         thumbnail = self._og_search_thumbnail(webpage)
 
 
         thumbnail = self._og_search_thumbnail(webpage)
 
-        def adjust_title(info):
-            if info['is_live']:
-                info['title'] = self._live_title(info['title'])
-
         if data:
             video_info = try_get(
                 data, lambda x: x['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'],
         if data:
             video_info = try_get(
                 data, lambda x: x['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'],
@@ -193,24 +226,14 @@ class SVTPlayIE(SVTPlayBaseIE):
                     'title': data['context']['dispatcher']['stores']['MetaStore']['title'],
                     'thumbnail': thumbnail,
                 })
                     'title': data['context']['dispatcher']['stores']['MetaStore']['title'],
                     'thumbnail': thumbnail,
                 })
-                adjust_title(info_dict)
+                self._adjust_title(info_dict)
                 return info_dict
 
                 return info_dict
 
-        video_id = self._search_regex(
+        svt_id = self._search_regex(
             r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)',
             r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)',
-            webpage, 'video id', default=None)
+            webpage, 'video id')
 
 
-        if video_id:
-            data = self._download_json(
-                'https://api.svt.se/videoplayer-api/video/%s' % video_id,
-                video_id, headers=self.geo_verification_headers())
-            info_dict = self._extract_video(data, video_id)
-            if not info_dict.get('title'):
-                info_dict['title'] = re.sub(
-                    r'\s*\|\s*.+?$', '',
-                    info_dict.get('episode') or self._og_search_title(webpage))
-            adjust_title(info_dict)
-            return info_dict
+        return self._extract_by_video_id(svt_id, webpage)
 
 
 class SVTSeriesIE(SVTPlayBaseIE):
 
 
 class SVTSeriesIE(SVTPlayBaseIE):
@@ -292,3 +315,57 @@ class SVTSeriesIE(SVTPlayBaseIE):
 
         return self.playlist_result(
             entries, series_id, title, metadata.get('description'))
 
         return self.playlist_result(
             entries, series_id, title, metadata.get('description'))
+
+
+class SVTPageIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?svt\.se/(?:[^/]+/)*(?P<id>[^/?&#]+)'
+    _TESTS = [{
+        'url': 'https://www.svt.se/sport/oseedat/guide-sommartraningen-du-kan-gora-var-och-nar-du-vill',
+        'info_dict': {
+            'id': 'guide-sommartraningen-du-kan-gora-var-och-nar-du-vill',
+            'title': 'GUIDE: Sommarträning du kan göra var och när du vill',
+        },
+        'playlist_count': 7,
+    }, {
+        'url': 'https://www.svt.se/nyheter/inrikes/ebba-busch-thor-kd-har-delvis-ratt-om-no-go-zoner',
+        'info_dict': {
+            'id': 'ebba-busch-thor-kd-har-delvis-ratt-om-no-go-zoner',
+            'title': 'Ebba Busch Thor har bara delvis rätt om ”no-go-zoner”',
+        },
+        'playlist_count': 1,
+    }, {
+        # only programTitle
+        'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
+        'info_dict': {
+            'id': '2900353',
+            'ext': 'mp4',
+            'title': 'Stjärnorna skojar till det - under SVT-intervjun',
+            'duration': 27,
+            'age_limit': 0,
+        },
+    }, {
+        'url': 'https://www.svt.se/nyheter/lokalt/vast/svt-testar-tar-nagon-upp-skrapet-1',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.svt.se/vader/manadskronikor/maj2018',
+        'only_matching': True,
+    }]
+
+    @classmethod
+    def suitable(cls, url):
+        return False if SVTIE.suitable(url) else super(SVTPageIE, cls).suitable(url)
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        entries = [
+            self.url_result(
+                'svt:%s' % video_id, ie=SVTPlayIE.ie_key(), video_id=video_id)
+            for video_id in orderedSet(re.findall(
+                r'data-video-id=["\'](\d+)', webpage))]
+
+        title = strip_or_none(self._og_search_title(webpage, default=None))
+
+        return self.playlist_result(entries, playlist_id, title)
index 06a27fd0428b469abb64e6428faf263fc080cc33..212ac80abb52ec98c887b9652234fdb50bb81717 100644 (file)
@@ -7,8 +7,10 @@ from .common import InfoExtractor
 
 from ..compat import compat_str
 from ..utils import (
 
 from ..compat import compat_str
 from ..utils import (
+    float_or_none,
     int_or_none,
     try_get,
     int_or_none,
     try_get,
+    url_or_none,
 )
 
 
 )
 
 
@@ -30,7 +32,7 @@ class TEDIE(InfoExtractor):
         '''
     _TESTS = [{
         'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
         '''
     _TESTS = [{
         'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
-        'md5': '0de43ac406aa3e4ea74b66c9c7789b13',
+        'md5': 'b0ce2b05ca215042124fbc9e3886493a',
         'info_dict': {
             'id': '102',
             'ext': 'mp4',
         'info_dict': {
             'id': '102',
             'ext': 'mp4',
@@ -42,24 +44,30 @@ class TEDIE(InfoExtractor):
             'uploader': 'Dan Dennett',
             'width': 853,
             'duration': 1308,
             'uploader': 'Dan Dennett',
             'width': 853,
             'duration': 1308,
-        }
+            'view_count': int,
+            'comment_count': int,
+            'tags': list,
+        },
+        'params': {
+            'skip_download': True,
+        },
     }, {
     }, {
-        'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms',
-        'md5': 'b899ac15e345fb39534d913f7606082b',
+        # missing HTTP bitrates
+        'url': 'https://www.ted.com/talks/vishal_sikka_the_beauty_and_power_of_algorithms',
         'info_dict': {
         'info_dict': {
-            'id': 'tSVI8ta_P4w',
+            'id': '6069',
             'ext': 'mp4',
             'ext': 'mp4',
-            'title': 'Vishal Sikka: The beauty and power of algorithms',
+            'title': 'The beauty and power of algorithms',
             'thumbnail': r're:^https?://.+\.jpg',
             'thumbnail': r're:^https?://.+\.jpg',
-            'description': 'md5:6261fdfe3e02f4f579cbbfc00aff73f4',
-            'upload_date': '20140122',
-            'uploader_id': 'TEDInstitute',
-            'uploader': 'TED Institute',
+            'description': 'md5:734e352710fb00d840ab87ae31aaf688',
+            'uploader': 'Vishal Sikka',
+        },
+        'params': {
+            'skip_download': True,
         },
         },
-        'add_ie': ['Youtube'],
     }, {
         'url': 'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best',
     }, {
         'url': 'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best',
-        'md5': '71b3ab2f4233012dce09d515c9c39ce2',
+        'md5': 'e6b9617c01a7970ceac8bb2c92c346c0',
         'info_dict': {
             'id': '1972',
             'ext': 'mp4',
         'info_dict': {
             'id': '1972',
             'ext': 'mp4',
@@ -68,6 +76,9 @@ class TEDIE(InfoExtractor):
             'description': 'md5:5174aed4d0f16021b704120360f72b92',
             'duration': 1128,
         },
             'description': 'md5:5174aed4d0f16021b704120360f72b92',
             'duration': 1128,
         },
+        'params': {
+            'skip_download': True,
+        },
     }, {
         'url': 'http://www.ted.com/playlists/who_are_the_hackers',
         'info_dict': {
     }, {
         'url': 'http://www.ted.com/playlists/who_are_the_hackers',
         'info_dict': {
@@ -92,17 +103,17 @@ class TEDIE(InfoExtractor):
             'skip_download': True,
         },
     }, {
             'skip_download': True,
         },
     }, {
-        # YouTube video
-        'url': 'http://www.ted.com/talks/jeffrey_kluger_the_sibling_bond',
-        'add_ie': ['Youtube'],
+        # no nativeDownloads
+        'url': 'https://www.ted.com/talks/tom_thum_the_orchestra_in_my_mouth',
         'info_dict': {
         'info_dict': {
-            'id': 'aFBIPO-P7LM',
+            'id': '1792',
             'ext': 'mp4',
             'ext': 'mp4',
-            'title': 'The hidden power of siblings: Jeff Kluger at TEDxAsheville',
-            'description': 'md5:3d7a4f50d95ca5dd67104e2a20f43fe1',
-            'uploader': 'TEDx Talks',
-            'uploader_id': 'TEDxTalks',
-            'upload_date': '20111216',
+            'title': 'The orchestra in my mouth',
+            'description': 'md5:5d1d78650e2f8dfcbb8ebee2951ac29a',
+            'uploader': 'Tom Thum',
+            'view_count': int,
+            'comment_count': int,
+            'tags': list,
         },
         'params': {
             'skip_download': True,
         },
         'params': {
             'skip_download': True,
@@ -161,27 +172,16 @@ class TEDIE(InfoExtractor):
 
         info = self._extract_info(webpage)
 
 
         info = self._extract_info(webpage)
 
-        talk_info = try_get(
-            info, lambda x: x['__INITIAL_DATA__']['talks'][0],
-            dict) or info['talks'][0]
+        data = try_get(info, lambda x: x['__INITIAL_DATA__'], dict) or info
+        talk_info = data['talks'][0]
 
         title = talk_info['title'].strip()
 
 
         title = talk_info['title'].strip()
 
-        external = talk_info.get('external')
-        if external:
-            service = external['service']
-            self.to_screen('Found video from %s' % service)
-            ext_url = None
-            if service.lower() == 'youtube':
-                ext_url = external.get('code')
-            return {
-                '_type': 'url',
-                'url': ext_url or external['uri'],
-            }
-
         native_downloads = try_get(
         native_downloads = try_get(
-            talk_info, lambda x: x['downloads']['nativeDownloads'],
-            dict) or talk_info['nativeDownloads']
+            talk_info,
+            (lambda x: x['downloads']['nativeDownloads'],
+             lambda x: x['nativeDownloads']),
+            dict) or {}
 
         formats = [{
             'url': format_url,
 
         formats = [{
             'url': format_url,
@@ -196,10 +196,24 @@ class TEDIE(InfoExtractor):
 
         player_talk = talk_info['player_talks'][0]
 
 
         player_talk = talk_info['player_talks'][0]
 
+        external = player_talk.get('external')
+        if isinstance(external, dict):
+            service = external.get('service')
+            if isinstance(service, compat_str):
+                ext_url = None
+                if service.lower() == 'youtube':
+                    ext_url = external.get('code')
+                return {
+                    '_type': 'url',
+                    'url': ext_url or external['uri'],
+                }
+
         resources_ = player_talk.get('resources') or talk_info.get('resources')
 
         http_url = None
         for format_id, resources in resources_.items():
         resources_ = player_talk.get('resources') or talk_info.get('resources')
 
         http_url = None
         for format_id, resources in resources_.items():
+            if not isinstance(resources, dict):
+                continue
             if format_id == 'h264':
                 for resource in resources:
                     h264_url = resource.get('file')
             if format_id == 'h264':
                 for resource in resources:
                     h264_url = resource.get('file')
@@ -228,8 +242,12 @@ class TEDIE(InfoExtractor):
                         'tbr': int_or_none(resource.get('bitrate')),
                     })
             elif format_id == 'hls':
                         'tbr': int_or_none(resource.get('bitrate')),
                     })
             elif format_id == 'hls':
+                stream_url = url_or_none(resources.get('stream'))
+                if not stream_url:
+                    continue
                 formats.extend(self._extract_m3u8_formats(
                 formats.extend(self._extract_m3u8_formats(
-                    resources.get('stream'), video_name, 'mp4', m3u8_id=format_id, fatal=False))
+                    stream_url, video_name, 'mp4', m3u8_id=format_id,
+                    fatal=False))
 
         m3u8_formats = list(filter(
             lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none',
 
         m3u8_formats = list(filter(
             lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none',
@@ -239,9 +257,13 @@ class TEDIE(InfoExtractor):
                 bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None)
                 if not bitrate:
                     continue
                 bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None)
                 if not bitrate:
                     continue
+                bitrate_url = re.sub(r'\d+k', bitrate, http_url)
+                if not self._is_valid_url(
+                        bitrate_url, video_name, '%s bitrate' % bitrate):
+                    continue
                 f = m3u8_format.copy()
                 f.update({
                 f = m3u8_format.copy()
                 f.update({
-                    'url': re.sub(r'\d+k', bitrate, http_url),
+                    'url': bitrate_url,
                     'format_id': m3u8_format['format_id'].replace('hls', 'http'),
                     'protocol': 'http',
                 })
                     'format_id': m3u8_format['format_id'].replace('hls', 'http'),
                     'protocol': 'http',
                 })
@@ -267,7 +289,11 @@ class TEDIE(InfoExtractor):
             'description': self._og_search_description(webpage),
             'subtitles': self._get_subtitles(video_id, talk_info),
             'formats': formats,
             'description': self._og_search_description(webpage),
             'subtitles': self._get_subtitles(video_id, talk_info),
             'formats': formats,
-            'duration': talk_info.get('duration'),
+            'duration': float_or_none(talk_info.get('duration')),
+            'view_count': int_or_none(data.get('viewed_count')),
+            'comment_count': int_or_none(
+                try_get(data, lambda x: x['comments']['count'])),
+            'tags': try_get(talk_info, lambda x: x['tags'], list),
         }
 
     def _get_subtitles(self, video_id, talk_info):
         }
 
     def _get_subtitles(self, video_id, talk_info):
diff --git a/youtube_dl/extractor/tele5.py b/youtube_dl/extractor/tele5.py
new file mode 100644 (file)
index 0000000..25573e4
--- /dev/null
@@ -0,0 +1,44 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .nexx import NexxIE
+from ..compat import compat_urlparse
+
+
+class Tele5IE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:mediathek|tv)/(?P<id>[^?#&]+)'
+    _TESTS = [{
+        'url': 'https://www.tele5.de/mediathek/filme-online/videos?vid=1549416',
+        'info_dict': {
+            'id': '1549416',
+            'ext': 'mp4',
+            'upload_date': '20180814',
+            'timestamp': 1534290623,
+            'title': 'Pandorum',
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'https://www.tele5.de/tv/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.tele5.de/tv/dark-matter/videos',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+        video_id = (qs.get('vid') or qs.get('ve_id') or [None])[0]
+
+        if not video_id:
+            display_id = self._match_id(url)
+            webpage = self._download_webpage(url, display_id)
+            video_id = self._html_search_regex(
+                r'id\s*=\s*["\']video-player["\'][^>]+data-id\s*=\s*["\'](\d+)',
+                webpage, 'video id')
+
+        return self.url_result(
+            'https://api.nexx.cloud/v3/759/videos/byid/%s' % video_id,
+            ie=NexxIE.ie_key(), video_id=video_id)
index fdcc7d5731b4833065912dd89e0132f385c817f3..d37e1b0557cf3ba241a25e7e56d28c8dc679b1d0 100644 (file)
@@ -1,26 +1,43 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
 # coding: utf-8
 from __future__ import unicode_literals
 
-from .mitele import MiTeleBaseIE
+import json
+import re
 
 
+from .common import InfoExtractor
+from .ooyala import OoyalaIE
+from ..utils import (
+    clean_html,
+    determine_ext,
+    int_or_none,
+    str_or_none,
+    urljoin,
+)
 
 
-class TelecincoIE(MiTeleBaseIE):
+
+class TelecincoIE(InfoExtractor):
     IE_DESC = 'telecinco.es, cuatro.com and mediaset.es'
     _VALID_URL = r'https?://(?:www\.)?(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html'
 
     _TESTS = [{
         'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
     IE_DESC = 'telecinco.es, cuatro.com and mediaset.es'
     _VALID_URL = r'https?://(?:www\.)?(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html'
 
     _TESTS = [{
         'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
-        'md5': '8d7b2d5f699ee2709d992a63d5cd1712',
         'info_dict': {
         'info_dict': {
-            'id': 'JEA5ijCnF6p5W08A1rNKn7',
-            'ext': 'mp4',
+            'id': '1876350223',
             'title': 'Bacalao con kokotxas al pil-pil',
             'description': 'md5:1382dacd32dd4592d478cbdca458e5bb',
             'title': 'Bacalao con kokotxas al pil-pil',
             'description': 'md5:1382dacd32dd4592d478cbdca458e5bb',
-            'duration': 662,
         },
         },
+        'playlist': [{
+            'md5': 'adb28c37238b675dad0f042292f209a7',
+            'info_dict': {
+                'id': 'JEA5ijCnF6p5W08A1rNKn7',
+                'ext': 'mp4',
+                'title': 'Con Martín Berasategui, hacer un bacalao al pil-pil es fácil y divertido',
+                'duration': 662,
+            },
+        }]
     }, {
         'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html',
     }, {
         'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html',
-        'md5': '284393e5387b3b947b77c613ef04749a',
+        'md5': '9468140ebc300fbb8b9d65dc6e5c4b43',
         'info_dict': {
             'id': 'jn24Od1zGLG4XUZcnUnZB6',
             'ext': 'mp4',
         'info_dict': {
             'id': 'jn24Od1zGLG4XUZcnUnZB6',
             'ext': 'mp4',
@@ -30,7 +47,7 @@ class TelecincoIE(MiTeleBaseIE):
         },
     }, {
         'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html',
         },
     }, {
         'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html',
-        'md5': '749afab6ea5a136a8806855166ae46a2',
+        'md5': 'ae2dc6b7b50b2392076a51c0f70e01f6',
         'info_dict': {
             'id': 'aywerkD2Sv1vGNqq9b85Q2',
             'ext': 'mp4',
         'info_dict': {
             'id': 'aywerkD2Sv1vGNqq9b85Q2',
             'ext': 'mp4',
@@ -50,17 +67,90 @@ class TelecincoIE(MiTeleBaseIE):
         'only_matching': True,
     }]
 
         'only_matching': True,
     }]
 
+    def _parse_content(self, content, url):
+        video_id = content['dataMediaId']
+        if content.get('dataCmsId') == 'ooyala':
+            return self.url_result(
+                'ooyala:%s' % video_id, OoyalaIE.ie_key(), video_id)
+        config_url = urljoin(url, content['dataConfig'])
+        config = self._download_json(
+            config_url, video_id, 'Downloading config JSON')
+        title = config['info']['title']
+
+        def mmc_url(mmc_type):
+            return re.sub(
+                r'/(?:flash|html5)\.json', '/%s.json' % mmc_type,
+                config['services']['mmc'])
+
+        duration = None
+        formats = []
+        for mmc_type in ('flash', 'html5'):
+            mmc = self._download_json(
+                mmc_url(mmc_type), video_id,
+                'Downloading %s mmc JSON' % mmc_type, fatal=False)
+            if not mmc:
+                continue
+            if not duration:
+                duration = int_or_none(mmc.get('duration'))
+            for location in mmc['locations']:
+                gat = self._proto_relative_url(location.get('gat'), 'http:')
+                gcp = location.get('gcp')
+                ogn = location.get('ogn')
+                if None in (gat, gcp, ogn):
+                    continue
+                token_data = {
+                    'gcp': gcp,
+                    'ogn': ogn,
+                    'sta': 0,
+                }
+                media = self._download_json(
+                    gat, video_id, data=json.dumps(token_data).encode('utf-8'),
+                    headers={
+                        'Content-Type': 'application/json;charset=utf-8',
+                        'Referer': url,
+                    }, fatal=False) or {}
+                stream = media.get('stream') or media.get('file')
+                if not stream:
+                    continue
+                ext = determine_ext(stream)
+                if ext == 'f4m':
+                    formats.extend(self._extract_f4m_formats(
+                        stream + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
+                        video_id, f4m_id='hds', fatal=False))
+                elif ext == 'm3u8':
+                    formats.extend(self._extract_m3u8_formats(
+                        stream, video_id, 'mp4', 'm3u8_native',
+                        m3u8_id='hls', fatal=False))
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+            'thumbnail': content.get('dataPoster') or config.get('poster', {}).get('imageUrl'),
+            'duration': duration,
+        }
+
     def _real_extract(self, url):
         display_id = self._match_id(url)
         webpage = self._download_webpage(url, display_id)
     def _real_extract(self, url):
         display_id = self._match_id(url)
         webpage = self._download_webpage(url, display_id)
-        title = self._html_search_meta(
-            ['og:title', 'twitter:title'], webpage, 'title')
-        info = self._get_player_info(url, webpage)
+        article = self._parse_json(self._search_regex(
+            r'window\.\$REACTBASE_STATE\.article\s*=\s*({.+})',
+            webpage, 'article'), display_id)['article']
+        title = article.get('title')
+        description = clean_html(article.get('leadParagraph'))
+        if article.get('editorialType') != 'VID':
+            entries = []
+            for p in article.get('body', []):
+                content = p.get('content')
+                if p.get('type') != 'video' or not content:
+                    continue
+                entries.append(self._parse_content(content, url))
+            return self.playlist_result(
+                entries, str_or_none(article.get('id')), title, description)
+        content = article['opening']['content']
+        info = self._parse_content(content, url)
         info.update({
         info.update({
-            'display_id': display_id,
-            'title': title,
-            'description': self._html_search_meta(
-                ['og:description', 'twitter:description'],
-                webpage, 'title', fatal=False),
+            'description': description,
         })
         return info
         })
         return info
index b1a985ff6c12368347d98d95beed6a042e70093c..ffef5bf06bc0de32f3249474e960e52bf603ff05 100644 (file)
@@ -32,13 +32,15 @@ _x = lambda p: xpath_with_ns(p, {'smil': default_ns})
 
 
 class ThePlatformBaseIE(OnceIE):
 
 
 class ThePlatformBaseIE(OnceIE):
+    _TP_TLD = 'com'
+
     def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
         meta = self._download_xml(
             smil_url, video_id, note=note, query={'format': 'SMIL'},
             headers=self.geo_verification_headers())
         error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src')
         if error_element is not None and error_element.attrib['src'].startswith(
     def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
         meta = self._download_xml(
             smil_url, video_id, note=note, query={'format': 'SMIL'},
             headers=self.geo_verification_headers())
         error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src')
         if error_element is not None and error_element.attrib['src'].startswith(
-                'http://link.theplatform.com/s/errorFiles/Unavailable.'):
+                'http://link.theplatform.%s/s/errorFiles/Unavailable.' % self._TP_TLD):
             raise ExtractorError(error_element.attrib['abstract'], expected=True)
 
         smil_formats = self._parse_smil_formats(
             raise ExtractorError(error_element.attrib['abstract'], expected=True)
 
         smil_formats = self._parse_smil_formats(
@@ -66,7 +68,7 @@ class ThePlatformBaseIE(OnceIE):
         return formats, subtitles
 
     def _download_theplatform_metadata(self, path, video_id):
         return formats, subtitles
 
     def _download_theplatform_metadata(self, path, video_id):
-        info_url = 'http://link.theplatform.com/s/%s?format=preview' % path
+        info_url = 'http://link.theplatform.%s/s/%s?format=preview' % (self._TP_TLD, path)
         return self._download_json(info_url, video_id)
 
     def _parse_theplatform_metadata(self, info):
         return self._download_json(info_url, video_id)
 
     def _parse_theplatform_metadata(self, info):
@@ -308,7 +310,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
 
 class ThePlatformFeedIE(ThePlatformBaseIE):
     _URL_TEMPLATE = '%s//feed.theplatform.com/f/%s/%s?form=json&%s'
 
 class ThePlatformFeedIE(ThePlatformBaseIE):
     _URL_TEMPLATE = '%s//feed.theplatform.com/f/%s/%s?form=json&%s'
-    _VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*(?P<filter>by(?:Gui|I)d=(?P<id>[\w-]+))'
+    _VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*(?P<filter>by(?:Gui|I)d=(?P<id>[^&]+))'
     _TESTS = [{
         # From http://player.theplatform.com/p/7wvmTC/MSNBCEmbeddedOffSite?guid=n_hardball_5biden_140207
         'url': 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207',
     _TESTS = [{
         # From http://player.theplatform.com/p/7wvmTC/MSNBCEmbeddedOffSite?guid=n_hardball_5biden_140207
         'url': 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207',
@@ -325,6 +327,9 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
             'categories': ['MSNBC/Issues/Democrats', 'MSNBC/Issues/Elections/Election 2016'],
             'uploader': 'NBCU-NEWS',
         },
             'categories': ['MSNBC/Issues/Democrats', 'MSNBC/Issues/Elections/Election 2016'],
             'uploader': 'NBCU-NEWS',
         },
+    }, {
+        'url': 'http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews?byGuid=nn_netcast_180306.Copy.01',
+        'only_matching': True,
     }]
 
     def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}, account_id=None):
     }]
 
     def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}, account_id=None):
index 2b7b0d6e1b046d7184b194b5688af5295e3b8436..4a6cbfbb88d41dbb9d65a98eb4975e49a5ab0809 100644 (file)
@@ -15,6 +15,7 @@ from ..utils import (
     update_url_query,
     ExtractorError,
     strip_or_none,
     update_url_query,
     ExtractorError,
     strip_or_none,
+    url_or_none,
 )
 
 
 )
 
 
@@ -154,8 +155,8 @@ class TurnerBaseIE(AdobePassIE):
         subtitles = {}
         for source in video_data.findall('closedCaptions/source'):
             for track in source.findall('track'):
         subtitles = {}
         for source in video_data.findall('closedCaptions/source'):
             for track in source.findall('track'):
-                track_url = track.get('url')
-                if not isinstance(track_url, compat_str) or track_url.endswith('/big'):
+                track_url = url_or_none(track.get('url'))
+                if not track_url or track_url.endswith('/big'):
                     continue
                 lang = track.get('lang') or track.get('label') or 'en'
                 subtitles.setdefault(lang, []).append({
                     continue
                 lang = track.get('lang') or track.get('label') or 'en'
                 subtitles.setdefault(lang, []).append({
index 2b2630b916dc94cea142986169cc615479d30d7e..4222ff9ee239fd2b55cc7771892ace9f3c59d43f 100644 (file)
@@ -4,10 +4,10 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
     int_or_none,
     unescapeHTML,
 from ..utils import (
     int_or_none,
     unescapeHTML,
+    url_or_none,
 )
 
 
 )
 
 
@@ -106,9 +106,8 @@ class TVNetIE(InfoExtractor):
         for stream in self._download_json(data_file, video_id):
             if not isinstance(stream, dict):
                 continue
         for stream in self._download_json(data_file, video_id):
             if not isinstance(stream, dict):
                 continue
-            stream_url = stream.get('url')
-            if (stream_url in stream_urls or not stream_url or
-                    not isinstance(stream_url, compat_str)):
+            stream_url = url_or_none(stream.get('url'))
+            if stream_url in stream_urls or not stream_url:
                 continue
             stream_urls.add(stream_url)
             formats.extend(self._extract_m3u8_formats(
                 continue
             stream_urls.add(stream_url)
             formats.extend(self._extract_m3u8_formats(
index 808571ece0fce5c9698c6c351b51f1ffcd717171..60937616f2c57e55cf093cbfe5f1d55987c75b55 100644 (file)
@@ -19,8 +19,8 @@ class TVNowBaseIE(InfoExtractor):
     _VIDEO_FIELDS = (
         'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort',
         'broadcastStartDate', 'isDrm', 'duration', 'season', 'episode',
     _VIDEO_FIELDS = (
         'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort',
         'broadcastStartDate', 'isDrm', 'duration', 'season', 'episode',
-        'manifest.dashclear', 'format.title', 'format.defaultImage169Format',
-        'format.defaultImage169Logo')
+        'manifest.dashclear', 'manifest.hlsclear', 'manifest.smoothclear',
+        'format.title', 'format.defaultImage169Format', 'format.defaultImage169Logo')
 
     def _call_api(self, path, video_id, query):
         return self._download_json(
 
     def _call_api(self, path, video_id, query):
         return self._download_json(
@@ -31,27 +31,42 @@ class TVNowBaseIE(InfoExtractor):
         video_id = compat_str(info['id'])
         title = info['title']
 
         video_id = compat_str(info['id'])
         title = info['title']
 
-        mpd_url = info['manifest']['dashclear']
-        if not mpd_url:
+        paths = []
+        for manifest_url in (info.get('manifest') or {}).values():
+            if not manifest_url:
+                continue
+            manifest_url = update_url_query(manifest_url, {'filter': ''})
+            path = self._search_regex(r'https?://[^/]+/(.+?)\.ism/', manifest_url, 'path')
+            if path in paths:
+                continue
+            paths.append(path)
+
+            def url_repl(proto, suffix):
+                return re.sub(
+                    r'(?:hls|dash|hss)([.-])', proto + r'\1', re.sub(
+                        r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)',
+                        '.ism/' + suffix, manifest_url))
+
+            formats = self._extract_mpd_formats(
+                url_repl('dash', '.mpd'), video_id,
+                mpd_id='dash', fatal=False)
+            formats.extend(self._extract_ism_formats(
+                url_repl('hss', 'Manifest'),
+                video_id, ism_id='mss', fatal=False))
+            formats.extend(self._extract_m3u8_formats(
+                url_repl('hls', '.m3u8'), video_id, 'mp4',
+                'm3u8_native', m3u8_id='hls', fatal=False))
+            if formats:
+                break
+        else:
             if info.get('isDrm'):
                 raise ExtractorError(
                     'Video %s is DRM protected' % video_id, expected=True)
             if info.get('geoblocked'):
             if info.get('isDrm'):
                 raise ExtractorError(
                     'Video %s is DRM protected' % video_id, expected=True)
             if info.get('geoblocked'):
-                raise ExtractorError(
-                    'Video %s is not available from your location due to geo restriction' % video_id,
-                    expected=True)
+                raise self.raise_geo_restricted()
             if not info.get('free', True):
                 raise ExtractorError(
                     'Video %s is not available for free' % video_id, expected=True)
             if not info.get('free', True):
                 raise ExtractorError(
                     'Video %s is not available for free' % video_id, expected=True)
-
-        mpd_url = update_url_query(mpd_url, {'filter': ''})
-        formats = self._extract_mpd_formats(mpd_url, video_id, mpd_id='dash', fatal=False)
-        formats.extend(self._extract_ism_formats(
-            mpd_url.replace('dash.', 'hss.').replace('/.mpd', '/Manifest'),
-            video_id, ism_id='mss', fatal=False))
-        formats.extend(self._extract_m3u8_formats(
-            mpd_url.replace('dash.', 'hls.').replace('/.mpd', '/.m3u8'),
-            video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
         self._sort_formats(formats)
 
         description = info.get('articleLong') or info.get('articleShort')
         self._sort_formats(formats)
 
         description = info.get('articleLong') or info.get('articleShort')
@@ -88,7 +103,7 @@ class TVNowBaseIE(InfoExtractor):
 class TVNowIE(TVNowBaseIE):
     _VALID_URL = r'''(?x)
                     https?://
 class TVNowIE(TVNowBaseIE):
     _VALID_URL = r'''(?x)
                     https?://
-                        (?:www\.)?tvnow\.(?:de|at|ch)/[^/]+/
+                        (?:www\.)?tvnow\.(?:de|at|ch)/(?P<station>[^/]+)/
                         (?P<show_id>[^/]+)/
                         (?!(?:list|jahr)(?:/|$))(?P<id>[^/?\#&]+)
                     '''
                         (?P<show_id>[^/]+)/
                         (?!(?:list|jahr)(?:/|$))(?P<id>[^/?\#&]+)
                     '''
@@ -140,11 +155,13 @@ class TVNowIE(TVNowBaseIE):
     }]
 
     def _real_extract(self, url):
     }]
 
     def _real_extract(self, url):
-        display_id = '%s/%s' % re.match(self._VALID_URL, url).groups()
+        mobj = re.match(self._VALID_URL, url)
+        display_id = '%s/%s' % mobj.group(2, 3)
 
         info = self._call_api(
             'movies/' + display_id, display_id, query={
                 'fields': ','.join(self._VIDEO_FIELDS),
 
         info = self._call_api(
             'movies/' + display_id, display_id, query={
                 'fields': ','.join(self._VIDEO_FIELDS),
+                'station': mobj.group(1),
             })
 
         return self._extract_video(info, display_id)
             })
 
         return self._extract_video(info, display_id)
index e09b5f804d897954f4488344d27beaa8a7a2eea6..8f1ff3b761d8bd6e21ee4f122e1ed62fbabd2d78 100644 (file)
@@ -19,6 +19,7 @@ from ..utils import (
     try_get,
     unsmuggle_url,
     update_url_query,
     try_get,
     unsmuggle_url,
     update_url_query,
+    url_or_none,
 )
 
 
 )
 
 
@@ -31,12 +32,12 @@ class TVPlayIE(InfoExtractor):
                         https?://
                             (?:www\.)?
                             (?:
                         https?://
                             (?:www\.)?
                             (?:
-                                tvplay(?:\.skaties)?\.lv/parraides|
-                                (?:tv3play|play\.tv3)\.lt/programos|
+                                tvplay(?:\.skaties)?\.lv(?:/parraides)?|
+                                (?:tv3play|play\.tv3)\.lt(?:/programos)?|
                                 tv3play(?:\.tv3)?\.ee/sisu|
                                 (?:tv(?:3|6|8|10)play|viafree)\.se/program|
                                 (?:(?:tv3play|viasat4play|tv6play|viafree)\.no|(?:tv3play|viafree)\.dk)/programmer|
                                 tv3play(?:\.tv3)?\.ee/sisu|
                                 (?:tv(?:3|6|8|10)play|viafree)\.se/program|
                                 (?:(?:tv3play|viasat4play|tv6play|viafree)\.no|(?:tv3play|viafree)\.dk)/programmer|
-                                play\.novatv\.bg/programi
+                                play\.nova(?:tv)?\.bg/programi
                             )
                             /(?:[^/]+/)+
                         )
                             )
                             /(?:[^/]+/)+
                         )
@@ -202,10 +203,18 @@ class TVPlayIE(InfoExtractor):
                 'skip_download': True,
             },
         },
                 'skip_download': True,
             },
         },
+        {
+            'url': 'https://play.nova.bg/programi/zdravei-bulgariya/764300?autostart=true',
+            'only_matching': True,
+        },
         {
             'url': 'http://tvplay.skaties.lv/parraides/vinas-melo-labak/418113?autostart=true',
             'only_matching': True,
         },
         {
             'url': 'http://tvplay.skaties.lv/parraides/vinas-melo-labak/418113?autostart=true',
             'only_matching': True,
         },
+        {
+            'url': 'https://tvplay.skaties.lv/vinas-melo-labak/418113/?autostart=true',
+            'only_matching': True,
+        },
         {
             # views is null
             'url': 'http://tvplay.skaties.lv/parraides/tv3-zinas/760183',
         {
             # views is null
             'url': 'http://tvplay.skaties.lv/parraides/tv3-zinas/760183',
@@ -255,7 +264,8 @@ class TVPlayIE(InfoExtractor):
         quality = qualities(['hls', 'medium', 'high'])
         formats = []
         for format_id, video_url in streams.get('streams', {}).items():
         quality = qualities(['hls', 'medium', 'high'])
         formats = []
         for format_id, video_url in streams.get('streams', {}).items():
-            if not video_url or not isinstance(video_url, compat_str):
+            video_url = url_or_none(video_url)
+            if not video_url:
                 continue
             ext = determine_ext(video_url)
             if ext == 'f4m':
                 continue
             ext = determine_ext(video_url)
             if ext == 'f4m':
@@ -286,6 +296,7 @@ class TVPlayIE(InfoExtractor):
                         'url': m.group('url'),
                         'app': m.group('app'),
                         'play_path': m.group('playpath'),
                         'url': m.group('url'),
                         'app': m.group('app'),
                         'play_path': m.group('playpath'),
+                        'preference': -1,
                     })
                 else:
                     fmt.update({
                     })
                 else:
                     fmt.update({
@@ -445,3 +456,102 @@ class ViafreeIE(InfoExtractor):
                     'skip_rtmp': True,
                 }),
             ie=TVPlayIE.ie_key(), video_id=video_id)
                     'skip_rtmp': True,
                 }),
             ie=TVPlayIE.ie_key(), video_id=video_id)
+
+
+class TVPlayHomeIE(InfoExtractor):
+    _VALID_URL = r'https?://tvplay\.(?:tv3\.lt|skaties\.lv|tv3\.ee)/[^/]+/[^/?#&]+-(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://tvplay.tv3.lt/aferistai-n-7/aferistai-10047125/',
+        'info_dict': {
+            'id': '366367',
+            'ext': 'mp4',
+            'title': 'Aferistai',
+            'description': 'Aferistai. Kalėdinė pasaka.',
+            'series': 'Aferistai [N-7]',
+            'season': '1 sezonas',
+            'season_number': 1,
+            'duration': 464,
+            'timestamp': 1394209658,
+            'upload_date': '20140307',
+            'age_limit': 18,
+        },
+        'params': {
+            'skip_download': True,
+        },
+        'add_ie': [TVPlayIE.ie_key()],
+    }, {
+        'url': 'https://tvplay.skaties.lv/vinas-melo-labak/vinas-melo-labak-10280317/',
+        'only_matching': True,
+    }, {
+        'url': 'https://tvplay.tv3.ee/cool-d-ga-mehhikosse/cool-d-ga-mehhikosse-10044354/',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_id = self._search_regex(
+            r'data-asset-id\s*=\s*["\'](\d{5,7})\b', webpage, 'video id',
+            default=None)
+
+        if video_id:
+            return self.url_result(
+                'mtg:%s' % video_id, ie=TVPlayIE.ie_key(), video_id=video_id)
+
+        m3u8_url = self._search_regex(
+            r'data-file\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+            'm3u8 url', group='url')
+
+        formats = self._extract_m3u8_formats(
+            m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
+            m3u8_id='hls')
+        self._sort_formats(formats)
+
+        title = self._search_regex(
+            r'data-title\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
+            'title', default=None, group='value') or self._html_search_meta(
+            'title', webpage, default=None) or self._og_search_title(
+            webpage)
+
+        description = self._html_search_meta(
+            'description', webpage,
+            default=None) or self._og_search_description(webpage)
+
+        thumbnail = self._search_regex(
+            r'data-image\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+            'thumbnail', default=None, group='url') or self._html_search_meta(
+            'thumbnail', webpage, default=None) or self._og_search_thumbnail(
+            webpage)
+
+        duration = int_or_none(self._search_regex(
+            r'data-duration\s*=\s*["\'](\d+)', webpage, 'duration',
+            fatal=False))
+
+        season = self._search_regex(
+            (r'data-series-title\s*=\s*(["\'])[^/]+/(?P<value>(?:(?!\1).)+)\1',
+             r'\bseason\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
+            'season', default=None, group='value')
+        season_number = int_or_none(self._search_regex(
+            r'(\d+)(?:[.\s]+sezona|\s+HOOAEG)', season or '', 'season number',
+            default=None))
+        episode = self._search_regex(
+            r'(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, 'episode',
+            default=None, group='value')
+        episode_number = int_or_none(self._search_regex(
+            r'(?:S[eē]rija|Osa)\s+(\d+)', episode or '', 'episode number',
+            default=None))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'season': season,
+            'season_number': season_number,
+            'episode': episode,
+            'episode_number': episode_number,
+            'formats': formats,
+        }
index e01f11331007d072e6e78272d97fedce599ede4a..b39972b1efbfd46aab00a2f05e1a82605b8772b8 100644 (file)
@@ -4,10 +4,10 @@ from __future__ import unicode_literals
 import itertools
 import re
 import random
 import itertools
 import re
 import random
+import json
 
 from .common import InfoExtractor
 from ..compat import (
 
 from .common import InfoExtractor
 from ..compat import (
-    compat_HTTPError,
     compat_kwargs,
     compat_parse_qs,
     compat_str,
     compat_kwargs,
     compat_parse_qs,
     compat_str,
@@ -26,7 +26,7 @@ from ..utils import (
     try_get,
     unified_timestamp,
     update_url_query,
     try_get,
     unified_timestamp,
     update_url_query,
-    urlencode_postdata,
+    url_or_none,
     urljoin,
 )
 
     urljoin,
 )
 
@@ -36,8 +36,9 @@ class TwitchBaseIE(InfoExtractor):
 
     _API_BASE = 'https://api.twitch.tv'
     _USHER_BASE = 'https://usher.ttvnw.net'
 
     _API_BASE = 'https://api.twitch.tv'
     _USHER_BASE = 'https://usher.ttvnw.net'
-    _LOGIN_URL = 'https://www.twitch.tv/login'
-    _CLIENT_ID = 'jzkbprff40iqj646a697cyrvl0zt2m6'
+    _LOGIN_FORM_URL = 'https://www.twitch.tv/login'
+    _LOGIN_POST_URL = 'https://passport.twitch.tv/login'
+    _CLIENT_ID = 'kimne78kx3ncx6brgo4mv6wki5h1ko'
     _NETRC_MACHINE = 'twitch'
 
     def _handle_error(self, response):
     _NETRC_MACHINE = 'twitch'
 
     def _handle_error(self, response):
@@ -76,22 +77,21 @@ class TwitchBaseIE(InfoExtractor):
             page_url = urlh.geturl()
             post_url = self._search_regex(
                 r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page,
             page_url = urlh.geturl()
             post_url = self._search_regex(
                 r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page,
-                'post url', default=page_url, group='url')
+                'post url', default=self._LOGIN_POST_URL, group='url')
             post_url = urljoin(page_url, post_url)
 
             post_url = urljoin(page_url, post_url)
 
-            headers = {'Referer': page_url}
+            headers = {
+                'Referer': page_url,
+                'Origin': page_url,
+                'Content-Type': 'text/plain;charset=UTF-8',
+            }
 
 
-            try:
-                response = self._download_json(
-                    post_url, None, note,
-                    data=urlencode_postdata(form),
-                    headers=headers)
-            except ExtractorError as e:
-                if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
-                    response = self._parse_json(
-                        e.cause.read().decode('utf-8'), None)
-                    fail(response.get('message') or response['errors'][0])
-                raise
+            response = self._download_json(
+                post_url, None, note, data=json.dumps(form).encode(),
+                headers=headers, expected_status=400)
+            error = response.get('error_description') or response.get('error_code')
+            if error:
+                fail(error)
 
             if 'Authenticated successfully' in response.get('message', ''):
                 return None, None
 
             if 'Authenticated successfully' in response.get('message', ''):
                 return None, None
@@ -104,7 +104,7 @@ class TwitchBaseIE(InfoExtractor):
                 headers=headers)
 
         login_page, handle = self._download_webpage_handle(
                 headers=headers)
 
         login_page, handle = self._download_webpage_handle(
-            self._LOGIN_URL, None, 'Downloading login page')
+            self._LOGIN_FORM_URL, None, 'Downloading login page')
 
         # Some TOR nodes and public proxies are blocked completely
         if 'blacklist_message' in login_page:
 
         # Some TOR nodes and public proxies are blocked completely
         if 'blacklist_message' in login_page:
@@ -114,6 +114,7 @@ class TwitchBaseIE(InfoExtractor):
             login_page, handle, 'Logging in', {
                 'username': username,
                 'password': password,
             login_page, handle, 'Logging in', {
                 'username': username,
                 'password': password,
+                'client_id': self._CLIENT_ID,
             })
 
         # Successful login
             })
 
         # Successful login
@@ -239,7 +240,7 @@ class TwitchVodIE(TwitchItemBaseIE):
     _VALID_URL = r'''(?x)
                     https?://
                         (?:
     _VALID_URL = r'''(?x)
                     https?://
                         (?:
-                            (?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v|videos)/|
+                            (?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v(?:ideo)?|videos)/|
                             player\.twitch\.tv/\?.*?\bvideo=v
                         )
                         (?P<id>\d+)
                             player\.twitch\.tv/\?.*?\bvideo=v
                         )
                         (?P<id>\d+)
@@ -295,6 +296,9 @@ class TwitchVodIE(TwitchItemBaseIE):
     }, {
         'url': 'https://m.twitch.tv/beagsandjam/v/247478721',
         'only_matching': True,
     }, {
         'url': 'https://m.twitch.tv/beagsandjam/v/247478721',
         'only_matching': True,
+    }, {
+        'url': 'https://www.twitch.tv/northernlion/video/291940395',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
     }]
 
     def _real_extract(self, url):
@@ -663,8 +667,8 @@ class TwitchClipsIE(TwitchBaseIE):
         for option in status['quality_options']:
             if not isinstance(option, dict):
                 continue
         for option in status['quality_options']:
             if not isinstance(option, dict):
                 continue
-            source = option.get('source')
-            if not source or not isinstance(source, compat_str):
+            source = url_or_none(option.get('source'))
+            if not source:
                 continue
             formats.append({
                 'url': source,
                 continue
             formats.append({
                 'url': source,
index a7196997ec111cc4e98331d70cfb5df46682fa49..79c45f80e0f827d07a2ef362597b6e7fbab122e7 100644 (file)
@@ -20,6 +20,7 @@ from ..utils import (
     sanitized_Request,
     try_get,
     unescapeHTML,
     sanitized_Request,
     try_get,
     unescapeHTML,
+    url_or_none,
     urlencode_postdata,
 )
 
     urlencode_postdata,
 )
 
@@ -265,8 +266,8 @@ class UdemyIE(InfoExtractor):
             if not isinstance(source_list, list):
                 return
             for source in source_list:
             if not isinstance(source_list, list):
                 return
             for source in source_list:
-                video_url = source.get('file') or source.get('src')
-                if not video_url or not isinstance(video_url, compat_str):
+                video_url = url_or_none(source.get('file') or source.get('src'))
+                if not video_url:
                     continue
                 if source.get('type') == 'application/x-mpegURL' or determine_ext(video_url) == 'm3u8':
                     formats.extend(self._extract_m3u8_formats(
                     continue
                 if source.get('type') == 'application/x-mpegURL' or determine_ext(video_url) == 'm3u8':
                     formats.extend(self._extract_m3u8_formats(
@@ -293,8 +294,8 @@ class UdemyIE(InfoExtractor):
                     continue
                 if track.get('kind') != 'captions':
                     continue
                     continue
                 if track.get('kind') != 'captions':
                     continue
-                src = track.get('src')
-                if not src or not isinstance(src, compat_str):
+                src = url_or_none(track.get('src'))
+                if not src:
                     continue
                 lang = track.get('language') or track.get(
                     'srclang') or track.get('label')
                     continue
                 lang = track.get('language') or track.get(
                     'srclang') or track.get('label')
@@ -314,8 +315,8 @@ class UdemyIE(InfoExtractor):
             for cc in captions:
                 if not isinstance(cc, dict):
                     continue
             for cc in captions:
                 if not isinstance(cc, dict):
                     continue
-                cc_url = cc.get('url')
-                if not cc_url or not isinstance(cc_url, compat_str):
+                cc_url = url_or_none(cc.get('url'))
+                if not cc_url:
                     continue
                 lang = try_get(cc, lambda x: x['locale']['locale'], compat_str)
                 sub_dict = (automatic_captions if cc.get('source') == 'auto'
                     continue
                 lang = try_get(cc, lambda x: x['locale']['locale'], compat_str)
                 sub_dict = (automatic_captions if cc.get('source') == 'auto'
index c21a09c01f03035b092a09ab314bf73430cd7d5a..fe7a26b620deeff53a5443beea96e109f7685dc8 100644 (file)
@@ -24,6 +24,7 @@ class VGTVIE(XstreamIE):
         'aftenposten.no/webtv': 'aptv',
         'ap.vgtv.no/webtv': 'aptv',
         'tv.aftonbladet.se/abtv': 'abtv',
         'aftenposten.no/webtv': 'aptv',
         'ap.vgtv.no/webtv': 'aptv',
         'tv.aftonbladet.se/abtv': 'abtv',
+        'www.aftonbladet.se/tv': 'abtv',
     }
 
     _APP_NAME_TO_VENDOR = {
     }
 
     _APP_NAME_TO_VENDOR = {
@@ -44,7 +45,7 @@ class VGTVIE(XstreamIE):
                     (?:
                         (?:\#!/)?(?:video|live)/|
                         embed?.*id=|
                     (?:
                         (?:\#!/)?(?:video|live)/|
                         embed?.*id=|
-                        articles/
+                        a(?:rticles)?/
                     )|
                     (?P<appname>
                         %s
                     )|
                     (?P<appname>
                         %s
@@ -143,6 +144,10 @@ class VGTVIE(XstreamIE):
             'url': 'http://tv.aftonbladet.se/abtv/articles/36015',
             'only_matching': True,
         },
             'url': 'http://tv.aftonbladet.se/abtv/articles/36015',
             'only_matching': True,
         },
+        {
+            'url': 'https://www.aftonbladet.se/tv/a/36015',
+            'only_matching': True,
+        },
         {
             'url': 'abtv:140026',
             'only_matching': True,
         {
             'url': 'abtv:140026',
             'only_matching': True,
@@ -178,13 +183,15 @@ class VGTVIE(XstreamIE):
 
         streams = data['streamUrls']
         stream_type = data.get('streamType')
 
         streams = data['streamUrls']
         stream_type = data.get('streamType')
-
+        is_live = stream_type == 'live'
         formats = []
 
         hls_url = streams.get('hls')
         if hls_url:
             formats.extend(self._extract_m3u8_formats(
         formats = []
 
         hls_url = streams.get('hls')
         if hls_url:
             formats.extend(self._extract_m3u8_formats(
-                hls_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+                hls_url, video_id, 'mp4',
+                entry_protocol='m3u8' if is_live else 'm3u8_native',
+                m3u8_id='hls', fatal=False))
 
         hds_url = streams.get('hds')
         if hds_url:
 
         hds_url = streams.get('hds')
         if hds_url:
@@ -229,13 +236,13 @@ class VGTVIE(XstreamIE):
 
         info.update({
             'id': video_id,
 
         info.update({
             'id': video_id,
-            'title': self._live_title(data['title']) if stream_type == 'live' else data['title'],
+            'title': self._live_title(data['title']) if is_live else data['title'],
             'description': data['description'],
             'thumbnail': data['images']['main'] + '?t[]=900x506q80',
             'timestamp': data['published'],
             'duration': float_or_none(data['duration'], 1000),
             'view_count': data['displays'],
             'description': data['description'],
             'thumbnail': data['images']['main'] + '?t[]=900x506q80',
             'timestamp': data['published'],
             'duration': float_or_none(data['duration'], 1000),
             'view_count': data['displays'],
-            'is_live': True if stream_type == 'live' else False,
+            'is_live': is_live,
         })
         return info
 
         })
         return info
 
index 59adb2377e06c95bb8681432a0411723b7879945..174e69cd6b1e8ce1e5bf829781140ea756bdc6bf 100644 (file)
@@ -3,15 +3,13 @@ from __future__ import unicode_literals
 import itertools
 
 from .common import InfoExtractor
 import itertools
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_HTTPError,
-    compat_str,
-)
+from ..compat import compat_HTTPError
 from ..utils import (
     ExtractorError,
     int_or_none,
     float_or_none,
     parse_iso8601,
 from ..utils import (
     ExtractorError,
     int_or_none,
     float_or_none,
     parse_iso8601,
+    url_or_none,
 )
 
 
 )
 
 
@@ -166,8 +164,8 @@ class VidmeIE(InfoExtractor):
 
         formats = []
         for f in video.get('formats', []):
 
         formats = []
         for f in video.get('formats', []):
-            format_url = f.get('uri')
-            if not format_url or not isinstance(format_url, compat_str):
+            format_url = url_or_none(f.get('uri'))
+            if not format_url:
                 continue
             format_type = f.get('type')
             if format_type == 'dash':
                 continue
             format_type = f.get('type')
             if format_type == 'dash':
index d70283479ae18c5c01b0c5caba75ad0e3392698f..42ea4952c381b497b6f01abcb44212339563e0a3 100644 (file)
@@ -13,7 +13,7 @@ from ..utils import (
 
 
 class VidziIE(InfoExtractor):
 
 
 class VidziIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?vidzi\.(?:tv|cc|si)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
+    _VALID_URL = r'https?://(?:www\.)?vidzi\.(?:tv|cc|si|nu)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
     _TESTS = [{
         'url': 'http://vidzi.tv/cghql9yq6emu.html',
         'md5': '4f16c71ca0c8c8635ab6932b5f3f1660',
     _TESTS = [{
         'url': 'http://vidzi.tv/cghql9yq6emu.html',
         'md5': '4f16c71ca0c8c8635ab6932b5f3f1660',
@@ -35,6 +35,9 @@ class VidziIE(InfoExtractor):
     }, {
         'url': 'https://vidzi.si/rph9gztxj1et.html',
         'only_matching': True,
     }, {
         'url': 'https://vidzi.si/rph9gztxj1et.html',
         'only_matching': True,
+    }, {
+        'url': 'http://vidzi.nu/cghql9yq6emu.html',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
     }]
 
     def _real_extract(self, url):
index 3baa2d075543fe951db281e081da8527aeced13c..e49b233f2b98ebf5cf559e9e80d8a06a0687381f 100644 (file)
@@ -539,9 +539,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
                 # We try to find out to which variable is assigned the config dic
                 m_variable_name = re.search(r'(\w)\.video\.id', webpage)
                 if m_variable_name is not None:
                 # We try to find out to which variable is assigned the config dic
                 m_variable_name = re.search(r'(\w)\.video\.id', webpage)
                 if m_variable_name is not None:
-                    config_re = r'%s=({[^}].+?});' % re.escape(m_variable_name.group(1))
+                    config_re = [r'%s=({[^}].+?});' % re.escape(m_variable_name.group(1))]
                 else:
                     config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']
                 else:
                     config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']
+                config_re.append(r'\bvar\s+r\s*=\s*({.+?})\s*;')
                 config = self._search_regex(config_re, webpage, 'info section',
                                             flags=re.DOTALL)
                 config = json.loads(config)
                 config = self._search_regex(config_re, webpage, 'info section',
                                             flags=re.DOTALL)
                 config = json.loads(config)
diff --git a/youtube_dl/extractor/viqeo.py b/youtube_dl/extractor/viqeo.py
new file mode 100644 (file)
index 0000000..be7dfa8
--- /dev/null
@@ -0,0 +1,99 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    str_or_none,
+    url_or_none,
+)
+
+
+class ViqeoIE(InfoExtractor):
+    _VALID_URL = r'''(?x)
+                        (?:
+                            viqeo:|
+                            https?://cdn\.viqeo\.tv/embed/*\?.*?\bvid=|
+                            https?://api\.viqeo\.tv/v\d+/data/startup?.*?\bvideo(?:%5B%5D|\[\])=
+                        )
+                        (?P<id>[\da-f]+)
+                    '''
+    _TESTS = [{
+        'url': 'https://cdn.viqeo.tv/embed/?vid=cde96f09d25f39bee837',
+        'md5': 'a169dd1a6426b350dca4296226f21e76',
+        'info_dict': {
+            'id': 'cde96f09d25f39bee837',
+            'ext': 'mp4',
+            'title': 'cde96f09d25f39bee837',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 76,
+        },
+    }, {
+        'url': 'viqeo:cde96f09d25f39bee837',
+        'only_matching': True,
+    }, {
+        'url': 'https://api.viqeo.tv/v1/data/startup?video%5B%5D=71bbec412ade45c3216c&profile=112',
+        'only_matching': True,
+    }]
+
+    @staticmethod
+    def _extract_urls(webpage):
+        return [
+            mobj.group('url')
+            for mobj in re.finditer(
+                r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//cdn\.viqeo\.tv/embed/*\?.*?\bvid=[\da-f]+.*?)\1',
+                webpage)]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(
+            'https://cdn.viqeo.tv/embed/?vid=%s' % video_id, video_id)
+
+        data = self._parse_json(
+            self._search_regex(
+                r'SLOT_DATA\s*=\s*({.+?})\s*;', webpage, 'slot data'),
+            video_id)
+
+        formats = []
+        thumbnails = []
+        for media_file in data['mediaFiles']:
+            if not isinstance(media_file, dict):
+                continue
+            media_url = url_or_none(media_file.get('url'))
+            if not media_url or not media_url.startswith(('http', '//')):
+                continue
+            media_type = str_or_none(media_file.get('type'))
+            if not media_type:
+                continue
+            media_kind = media_type.split('/')[0].lower()
+            f = {
+                'url': media_url,
+                'width': int_or_none(media_file.get('width')),
+                'height': int_or_none(media_file.get('height')),
+            }
+            format_id = str_or_none(media_file.get('quality'))
+            if media_kind == 'image':
+                f['id'] = format_id
+                thumbnails.append(f)
+            elif media_kind in ('video', 'audio'):
+                is_audio = media_kind == 'audio'
+                f.update({
+                    'format_id': 'audio' if is_audio else format_id,
+                    'fps': int_or_none(media_file.get('fps')),
+                    'vcodec': 'none' if is_audio else None,
+                })
+                formats.append(f)
+        self._sort_formats(formats)
+
+        duration = int_or_none(data.get('duration'))
+
+        return {
+            'id': video_id,
+            'title': video_id,
+            'duration': duration,
+            'thumbnails': thumbnails,
+            'formats': formats,
+        }
index 5cf93591cd0d863bb1f62968a0c25ea86e4cc744..3bd37525b6ffcc53d8cb35bed9ab8974847893cc 100644 (file)
@@ -195,16 +195,29 @@ class ViuOTTIE(InfoExtractor):
         'skip': 'Geo-restricted to Hong Kong',
     }]
 
         'skip': 'Geo-restricted to Hong Kong',
     }]
 
+    _AREA_ID = {
+        'HK': 1,
+        'SG': 2,
+        'TH': 4,
+        'PH': 5,
+    }
+
     def _real_extract(self, url):
         country_code, video_id = re.match(self._VALID_URL, url).groups()
 
     def _real_extract(self, url):
         country_code, video_id = re.match(self._VALID_URL, url).groups()
 
+        query = {
+            'r': 'vod/ajax-detail',
+            'platform_flag_label': 'web',
+            'product_id': video_id,
+        }
+
+        area_id = self._AREA_ID.get(country_code.upper())
+        if area_id:
+            query['area_id'] = area_id
+
         product_data = self._download_json(
             'http://www.viu.com/ott/%s/index.php' % country_code, video_id,
         product_data = self._download_json(
             'http://www.viu.com/ott/%s/index.php' % country_code, video_id,
-            'Downloading video info', query={
-                'r': 'vod/ajax-detail',
-                'platform_flag_label': 'web',
-                'product_id': video_id,
-            })['data']
+            'Downloading video info', query=query)['data']
 
         video_data = product_data.get('current_product')
         if not video_data:
 
         video_data = product_data.get('current_product')
         if not video_data:
@@ -214,6 +227,9 @@ class ViuOTTIE(InfoExtractor):
             'https://d1k2us671qcoau.cloudfront.net/distribute_web_%s.php' % country_code,
             video_id, 'Downloading stream info', query={
                 'ccs_product_id': video_data['ccs_product_id'],
             'https://d1k2us671qcoau.cloudfront.net/distribute_web_%s.php' % country_code,
             video_id, 'Downloading stream info', query={
                 'ccs_product_id': video_data['ccs_product_id'],
+            }, headers={
+                'Referer': url,
+                'Origin': re.search(r'https?://[^/]+', url).group(0),
             })['data']['stream']
 
         stream_sizes = stream_data.get('size', {})
             })['data']['stream']
 
         stream_sizes = stream_data.get('size', {})
index 29002b35fc08469c0d39a73cdec2c812869c1c21..ef8b9bcb7b610c25925ed2725a8bb43d8f3a6d1c 100644 (file)
@@ -17,9 +17,11 @@ from ..utils import (
     int_or_none,
     orderedSet,
     remove_start,
     int_or_none,
     orderedSet,
     remove_start,
+    str_or_none,
     str_to_int,
     unescapeHTML,
     unified_timestamp,
     str_to_int,
     unescapeHTML,
     unified_timestamp,
+    url_or_none,
     urlencode_postdata,
 )
 from .dailymotion import DailymotionIE
     urlencode_postdata,
 )
 from .dailymotion import DailymotionIE
@@ -105,10 +107,10 @@ class VKIE(VKBaseIE):
                 'ext': 'mp4',
                 'title': 'ProtivoGunz - Хуёвая песня',
                 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
                 'ext': 'mp4',
                 'title': 'ProtivoGunz - Хуёвая песня',
                 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
+                'uploader_id': '-77521',
                 'duration': 195,
                 'duration': 195,
-                'timestamp': 1329060660,
+                'timestamp': 1329049880,
                 'upload_date': '20120212',
                 'upload_date': '20120212',
-                'view_count': int,
             },
         },
         {
             },
         },
         {
@@ -117,12 +119,12 @@ class VKIE(VKBaseIE):
             'info_dict': {
                 'id': '165548505',
                 'ext': 'mp4',
             'info_dict': {
                 'id': '165548505',
                 'ext': 'mp4',
-                'uploader': 'Tom Cruise',
                 'title': 'No name',
                 'title': 'No name',
+                'uploader': 'Tom Cruise',
+                'uploader_id': '205387401',
                 'duration': 9,
                 'duration': 9,
-                'timestamp': 1374374880,
-                'upload_date': '20130721',
-                'view_count': int,
+                'timestamp': 1374364108,
+                'upload_date': '20130720',
             }
         },
         {
             }
         },
         {
@@ -206,10 +208,10 @@ class VKIE(VKBaseIE):
                 'id': 'V3K4mi0SYkc',
                 'ext': 'webm',
                 'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
                 'id': 'V3K4mi0SYkc',
                 'ext': 'webm',
                 'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
-                'description': 'md5:d9903938abdc74c738af77f527ca0596',
-                'duration': 178,
+                'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
+                'duration': 179,
                 'upload_date': '20130116',
                 'upload_date': '20130116',
-                'uploader': "Children's Joy Foundation",
+                'uploader': "Children's Joy Foundation Inc.",
                 'uploader_id': 'thecjf',
                 'view_count': int,
             },
                 'uploader_id': 'thecjf',
                 'view_count': int,
             },
@@ -221,6 +223,7 @@ class VKIE(VKBaseIE):
                 'id': 'k3lz2cmXyRuJQSjGHUv',
                 'ext': 'mp4',
                 'title': 'md5:d52606645c20b0ddbb21655adaa4f56f',
                 'id': 'k3lz2cmXyRuJQSjGHUv',
                 'ext': 'mp4',
                 'title': 'md5:d52606645c20b0ddbb21655adaa4f56f',
+                # TODO: fix test by fixing dailymotion description extraction
                 'description': 'md5:c651358f03c56f1150b555c26d90a0fd',
                 'uploader': 'AniLibria.Tv',
                 'upload_date': '20160914',
                 'description': 'md5:c651358f03c56f1150b555c26d90a0fd',
                 'uploader': 'AniLibria.Tv',
                 'upload_date': '20160914',
@@ -240,9 +243,12 @@ class VKIE(VKBaseIE):
                 'ext': 'mp4',
                 'title': 'S-Dance, репетиции к The way show',
                 'uploader': 'THE WAY SHOW | 17 апреля',
                 'ext': 'mp4',
                 'title': 'S-Dance, репетиции к The way show',
                 'uploader': 'THE WAY SHOW | 17 апреля',
-                'timestamp': 1454870100,
+                'uploader_id': '-110305615',
+                'timestamp': 1454859345,
                 'upload_date': '20160207',
                 'upload_date': '20160207',
-                'view_count': int,
+            },
+            'params': {
+                'skip_download': True,
             },
         },
         {
             },
         },
         {
@@ -295,7 +301,7 @@ class VKIE(VKBaseIE):
         video_id = mobj.group('videoid')
 
         if video_id:
         video_id = mobj.group('videoid')
 
         if video_id:
-            info_url = 'https://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id
+            info_url = 'https://vk.com/al_video.php?act=show_inline&al=1&video=' + video_id
             # Some videos (removed?) can only be downloaded with list id specified
             list_id = mobj.group('list_id')
             if list_id:
             # Some videos (removed?) can only be downloaded with list id specified
             list_id = mobj.group('list_id')
             if list_id:
@@ -345,6 +351,9 @@ class VKIE(VKBaseIE):
 
             r'<!>This video is no longer available, because its author has been blocked.':
             'Video %s is no longer available, because its author has been blocked.',
 
             r'<!>This video is no longer available, because its author has been blocked.':
             'Video %s is no longer available, because its author has been blocked.',
+
+            r'<!>This video is no longer available, because it has been deleted.':
+            'Video %s is no longer available, because it has been deleted.',
         }
 
         for error_re, error_msg in ERRORS.items():
         }
 
         for error_re, error_msg in ERRORS.items():
@@ -393,7 +402,8 @@ class VKIE(VKBaseIE):
         if not data:
             data = self._parse_json(
                 self._search_regex(
         if not data:
             data = self._parse_json(
                 self._search_regex(
-                    r'<!json>\s*({.+?})\s*<!>', info_page, 'json', default='{}'),
+                    [r'<!json>\s*({.+?})\s*<!>', r'<!json>\s*({.+})'],
+                    info_page, 'json', default='{}'),
                 video_id)
             if data:
                 data = data['player']['params'][0]
                 video_id)
             if data:
                 data = data['player']['params'][0]
@@ -415,7 +425,7 @@ class VKIE(VKBaseIE):
 
         timestamp = unified_timestamp(self._html_search_regex(
             r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page,
 
         timestamp = unified_timestamp(self._html_search_regex(
             r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page,
-            'upload date', fatal=False))
+            'upload date', default=None)) or int_or_none(data.get('date'))
 
         view_count = str_to_int(self._search_regex(
             r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)',
 
         view_count = str_to_int(self._search_regex(
             r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)',
@@ -423,7 +433,8 @@ class VKIE(VKBaseIE):
 
         formats = []
         for format_id, format_url in data.items():
 
         formats = []
         for format_id, format_url in data.items():
-            if not isinstance(format_url, compat_str) or not format_url.startswith(('http', '//', 'rtmp')):
+            format_url = url_or_none(format_url)
+            if not format_url or not format_url.startswith(('http', '//', 'rtmp')):
                 continue
             if (format_id.startswith(('url', 'cache')) or
                     format_id in ('extra_data', 'live_mp4', 'postlive_mp4')):
                 continue
             if (format_id.startswith(('url', 'cache')) or
                     format_id in ('extra_data', 'live_mp4', 'postlive_mp4')):
@@ -452,9 +463,12 @@ class VKIE(VKBaseIE):
             'title': title,
             'thumbnail': data.get('jpg'),
             'uploader': data.get('md_author'),
             'title': title,
             'thumbnail': data.get('jpg'),
             'uploader': data.get('md_author'),
+            'uploader_id': str_or_none(data.get('author_id')),
             'duration': data.get('duration'),
             'timestamp': timestamp,
             'view_count': view_count,
             'duration': data.get('duration'),
             'timestamp': timestamp,
             'view_count': view_count,
+            'like_count': int_or_none(data.get('liked')),
+            'dislike_count': int_or_none(data.get('nolikes')),
             'is_live': is_live,
         }
 
             'is_live': is_live,
         }
 
index 64d0224e6f92779c68e2170564c2b1ad459269ed..0b5165fd03d8837a2954f864350a642b47176aff 100644 (file)
@@ -57,7 +57,7 @@ class VLiveIE(InfoExtractor):
         video_id = self._match_id(url)
 
         webpage = self._download_webpage(
         video_id = self._match_id(url)
 
         webpage = self._download_webpage(
-            'http://www.vlive.tv/video/%s' % video_id, video_id)
+            'https://www.vlive.tv/video/%s' % video_id, video_id)
 
         VIDEO_PARAMS_RE = r'\bvlive\.video\.init\(([^)]+)'
         VIDEO_PARAMS_FIELD = 'video params'
 
         VIDEO_PARAMS_RE = r'\bvlive\.video\.init\(([^)]+)'
         VIDEO_PARAMS_FIELD = 'video params'
@@ -108,11 +108,11 @@ class VLiveIE(InfoExtractor):
 
     def _live(self, video_id, webpage):
         init_page = self._download_webpage(
 
     def _live(self, video_id, webpage):
         init_page = self._download_webpage(
-            'http://www.vlive.tv/video/init/view',
+            'https://www.vlive.tv/video/init/view',
             video_id, note='Downloading live webpage',
             data=urlencode_postdata({'videoSeq': video_id}),
             headers={
             video_id, note='Downloading live webpage',
             data=urlencode_postdata({'videoSeq': video_id}),
             headers={
-                'Referer': 'http://www.vlive.tv/video/%s' % video_id,
+                'Referer': 'https://www.vlive.tv/video/%s' % video_id,
                 'Content-Type': 'application/x-www-form-urlencoded'
             })
 
                 'Content-Type': 'application/x-www-form-urlencoded'
             })
 
index 64b13f0ed00f6f58e6eea6d88bacb04f7a3f757d..921e9e172496975a2b7ff78d7f08871b6c27f8f7 100644 (file)
@@ -72,7 +72,7 @@ class VRVBaseIE(InfoExtractor):
 class VRVIE(VRVBaseIE):
     IE_NAME = 'vrv'
     _VALID_URL = r'https?://(?:www\.)?vrv\.co/watch/(?P<id>[A-Z0-9]+)'
 class VRVIE(VRVBaseIE):
     IE_NAME = 'vrv'
     _VALID_URL = r'https?://(?:www\.)?vrv\.co/watch/(?P<id>[A-Z0-9]+)'
-    _TEST = {
+    _TESTS = [{
         'url': 'https://vrv.co/watch/GR9PNZ396/Hidden-America-with-Jonah-Ray:BOSTON-WHERE-THE-PAST-IS-THE-PRESENT',
         'info_dict': {
             'id': 'GR9PNZ396',
         'url': 'https://vrv.co/watch/GR9PNZ396/Hidden-America-with-Jonah-Ray:BOSTON-WHERE-THE-PAST-IS-THE-PRESENT',
         'info_dict': {
             'id': 'GR9PNZ396',
@@ -85,7 +85,28 @@ class VRVIE(VRVBaseIE):
             # m3u8 download
             'skip_download': True,
         },
             # m3u8 download
             'skip_download': True,
         },
-    }
+    }]
+
+    def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang):
+        if not url or stream_format not in ('hls', 'dash'):
+            return []
+        stream_id = hardsub_lang or audio_lang
+        format_id = '%s-%s' % (stream_format, stream_id)
+        if stream_format == 'hls':
+            adaptive_formats = self._extract_m3u8_formats(
+                url, video_id, 'mp4', m3u8_id=format_id,
+                note='Downloading %s m3u8 information' % stream_id,
+                fatal=False)
+        elif stream_format == 'dash':
+            adaptive_formats = self._extract_mpd_formats(
+                url, video_id, mpd_id=format_id,
+                note='Downloading %s MPD information' % stream_id,
+                fatal=False)
+        if audio_lang:
+            for f in adaptive_formats:
+                if f.get('acodec') != 'none':
+                    f['language'] = audio_lang
+        return adaptive_formats
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -115,26 +136,9 @@ class VRVIE(VRVBaseIE):
         for stream_type, streams in streams_json.get('streams', {}).items():
             if stream_type in ('adaptive_hls', 'adaptive_dash'):
                 for stream in streams.values():
         for stream_type, streams in streams_json.get('streams', {}).items():
             if stream_type in ('adaptive_hls', 'adaptive_dash'):
                 for stream in streams.values():
-                    stream_url = stream.get('url')
-                    if not stream_url:
-                        continue
-                    stream_id = stream.get('hardsub_locale') or audio_locale
-                    format_id = '%s-%s' % (stream_type.split('_')[1], stream_id)
-                    if stream_type == 'adaptive_hls':
-                        adaptive_formats = self._extract_m3u8_formats(
-                            stream_url, video_id, 'mp4', m3u8_id=format_id,
-                            note='Downloading %s m3u8 information' % stream_id,
-                            fatal=False)
-                    else:
-                        adaptive_formats = self._extract_mpd_formats(
-                            stream_url, video_id, mpd_id=format_id,
-                            note='Downloading %s MPD information' % stream_id,
-                            fatal=False)
-                    if audio_locale:
-                        for f in adaptive_formats:
-                            if f.get('acodec') != 'none':
-                                f['language'] = audio_locale
-                    formats.extend(adaptive_formats)
+                    formats.extend(self._extract_vrv_formats(
+                        stream.get('url'), video_id, stream_type.split('_')[1],
+                        audio_locale, stream.get('hardsub_locale')))
         self._sort_formats(formats)
 
         subtitles = {}
         self._sort_formats(formats)
 
         subtitles = {}
index be0bcba15380041ca1698c1687497bffc524b4ef..5a4e46e73a28e71a6ee20f30be9e3af6336c8d29 100644 (file)
@@ -10,6 +10,7 @@ from ..utils import (
     js_to_json,
     strip_or_none,
     try_get,
     js_to_json,
     strip_or_none,
     try_get,
+    unescapeHTML,
     unified_timestamp,
 )
 
     unified_timestamp,
 )
 
@@ -67,11 +68,20 @@ class WatchBoxIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
 
         webpage = self._download_webpage(url, video_id)
 
-        source = self._parse_json(
+        player_config = self._parse_json(
             self._search_regex(
             self._search_regex(
-                r'(?s)source["\']?\s*:\s*({.+?})\s*[,}]', webpage, 'source',
-                default='{}'),
-            video_id, transform_source=js_to_json, fatal=False) or {}
+                r'data-player-conf=(["\'])(?P<data>{.+?})\1', webpage,
+                'player config', default='{}', group='data'),
+            video_id, transform_source=unescapeHTML, fatal=False)
+
+        if not player_config:
+            player_config = self._parse_json(
+                self._search_regex(
+                    r'playerConf\s*=\s*({.+?})\s*;', webpage, 'player config',
+                    default='{}'),
+                video_id, transform_source=js_to_json, fatal=False) or {}
+
+        source = player_config.get('source') or {}
 
         video_id = compat_str(source.get('videoId') or video_id)
 
 
         video_id = compat_str(source.get('videoId') or video_id)
 
index 1eb1f67024acfca7948d7450f2bca849069190cc..f2b8d19b439d4279e89b872e41b2eeefd421333e 100644 (file)
@@ -4,7 +4,10 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
 import re
 
 from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+    int_or_none,
+    orderedSet,
+)
 
 
 class WebOfStoriesIE(InfoExtractor):
 
 
 class WebOfStoriesIE(InfoExtractor):
@@ -133,8 +136,10 @@ class WebOfStoriesPlaylistIE(InfoExtractor):
         webpage = self._download_webpage(url, playlist_id)
 
         entries = [
         webpage = self._download_webpage(url, playlist_id)
 
         entries = [
-            self.url_result('http://www.webofstories.com/play/%s' % video_number, 'WebOfStories')
-            for video_number in set(re.findall(r'href="/playAll/%s\?sId=(\d+)"' % playlist_id, webpage))
+            self.url_result(
+                'http://www.webofstories.com/play/%s' % video_id,
+                'WebOfStories', video_id=video_id)
+            for video_id in orderedSet(re.findall(r'\bid=["\']td_(\d+)', webpage))
         ]
 
         title = self._search_regex(
         ]
 
         title = self._search_regex(
index bc3239f68864b0e5aff7fc4dd3459e8656453ee5..b38c7a7b3cd5f87a33f814444fd70af3219afb93 100644 (file)
@@ -23,7 +23,7 @@ class XFileShareIE(InfoExtractor):
         (r'powerwatch\.pw', 'PowerWatch'),
         (r'rapidvideo\.ws', 'Rapidvideo.ws'),
         (r'thevideobee\.to', 'TheVideoBee'),
         (r'powerwatch\.pw', 'PowerWatch'),
         (r'rapidvideo\.ws', 'Rapidvideo.ws'),
         (r'thevideobee\.to', 'TheVideoBee'),
-        (r'vidto\.me', 'Vidto'),
+        (r'vidto\.(?:me|se)', 'Vidto'),
         (r'streamin\.to', 'Streamin.To'),
         (r'xvidstage\.com', 'XVIDSTAGE'),
         (r'vidabc\.com', 'Vid ABC'),
         (r'streamin\.to', 'Streamin.To'),
         (r'xvidstage\.com', 'XVIDSTAGE'),
         (r'vidabc\.com', 'Vid ABC'),
@@ -115,7 +115,10 @@ class XFileShareIE(InfoExtractor):
         'only_matching': True,
     }, {
         'url': 'http://www.fastvideo.me/k8604r8nk8sn/FAST_FURIOUS_8_-_Trailer_italiano_ufficiale.mp4.html',
         'only_matching': True,
     }, {
         'url': 'http://www.fastvideo.me/k8604r8nk8sn/FAST_FURIOUS_8_-_Trailer_italiano_ufficiale.mp4.html',
-        'only_matching': True
+        'only_matching': True,
+    }, {
+        'url': 'http://vidto.se/1tx1pf6t12cg.html',
+        'only_matching': True,
     }]
 
     @staticmethod
     }]
 
     @staticmethod
index d1bc992fd95deee13d407380112a4548921290e2..68a48034ead8036eb28f3815a6588ac4a2253fbb 100644 (file)
@@ -13,6 +13,7 @@ from ..utils import (
     parse_duration,
     try_get,
     unified_strdate,
     parse_duration,
     try_get,
     unified_strdate,
+    url_or_none,
 )
 
 
 )
 
 
@@ -137,7 +138,8 @@ class XHamsterIE(InfoExtractor):
                     else:
                         format_url = format_item
                         filesize = None
                     else:
                         format_url = format_item
                         filesize = None
-                    if not isinstance(format_url, compat_str):
+                    format_url = url_or_none(format_url)
+                    if not format_url:
                         continue
                     formats.append({
                         'format_id': '%s-%s' % (format_id, quality),
                         continue
                     formats.append({
                         'format_id': '%s-%s' % (format_id, quality),
@@ -198,7 +200,8 @@ class XHamsterIE(InfoExtractor):
                 default='{}'),
             video_id, fatal=False)
         for format_id, format_url in sources.items():
                 default='{}'),
             video_id, fatal=False)
         for format_id, format_url in sources.items():
-            if not isinstance(format_url, compat_str):
+            format_url = url_or_none(format_url)
+            if not format_url:
                 continue
             if format_url in format_urls:
                 continue
                 continue
             if format_url in format_urls:
                 continue
index 7fafbf5969a26f8ce9f911cdd14714a7cb33bc0e..cfb368de94eca516af432f52ecf3e31744b9f984 100644 (file)
@@ -4,12 +4,12 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
     ExtractorError,
     int_or_none,
     qualities,
     unescapeHTML,
 from ..utils import (
     ExtractorError,
     int_or_none,
     qualities,
     unescapeHTML,
+    url_or_none,
 )
 
 
 )
 
 
@@ -80,9 +80,9 @@ class YapFilesIE(InfoExtractor):
         formats = []
         for format_id in QUALITIES:
             is_hd = format_id == 'hd'
         formats = []
         for format_id in QUALITIES:
             is_hd = format_id == 'hd'
-            format_url = playlist.get(
-                'file%s' % ('_hd' if is_hd else ''))
-            if not format_url or not isinstance(format_url, compat_str):
+            format_url = url_or_none(playlist.get(
+                'file%s' % ('_hd' if is_hd else '')))
+            if not format_url:
                 continue
             formats.append({
                 'url': format_url,
                 continue
             formats.append({
                 'url': format_url,
index f33fabe194daceb9ac6ffaf838536e7c9d53cd34..dff69fcb7aca250373fc0e70b2f8278ed2661755 100644 (file)
@@ -3,11 +3,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
     determine_ext,
     int_or_none,
     parse_duration,
 from ..utils import (
     determine_ext,
     int_or_none,
     parse_duration,
+    url_or_none,
 )
 
 
 )
 
 
@@ -50,8 +50,8 @@ class YouJizzIE(InfoExtractor):
         for encoding in encodings:
             if not isinstance(encoding, dict):
                 continue
         for encoding in encodings:
             if not isinstance(encoding, dict):
                 continue
-            format_url = encoding.get('filename')
-            if not isinstance(format_url, compat_str):
+            format_url = url_or_none(encoding.get('filename'))
+            if not format_url:
                 continue
             if determine_ext(format_url) == 'm3u8':
                 formats.extend(self._extract_m3u8_formats(
                 continue
             if determine_ext(format_url) == 'm3u8':
                 formats.extend(self._extract_m3u8_formats(
index 547adefeba00ffb58a7e4dbd4205fc935c7ff2be..ea0bce784c5fbe91d904428670377d3e92414453 100644 (file)
@@ -3,13 +3,13 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
     int_or_none,
     sanitized_Request,
     str_to_int,
     unescapeHTML,
     unified_strdate,
 from ..utils import (
     int_or_none,
     sanitized_Request,
     str_to_int,
     unescapeHTML,
     unified_strdate,
+    url_or_none,
 )
 from ..aes import aes_decrypt_text
 
 )
 from ..aes import aes_decrypt_text
 
@@ -88,8 +88,8 @@ class YouPornIE(InfoExtractor):
             for definition in definitions:
                 if not isinstance(definition, dict):
                     continue
             for definition in definitions:
                 if not isinstance(definition, dict):
                     continue
-                video_url = definition.get('videoUrl')
-                if isinstance(video_url, compat_str) and video_url:
+                video_url = url_or_none(definition.get('videoUrl'))
+                if video_url:
                     links.append(video_url)
 
         # Fallback #1, this also contains extra low quality 180p format
                     links.append(video_url)
 
         # Fallback #1, this also contains extra low quality 180p format
diff --git a/youtube_dl/extractor/yourporn.py b/youtube_dl/extractor/yourporn.py
new file mode 100644 (file)
index 0000000..6602f7c
--- /dev/null
@@ -0,0 +1,41 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import urljoin
+
+
+class YourPornIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?yourporn\.sexy/post/(?P<id>[^/?#&.]+)'
+    _TEST = {
+        'url': 'https://yourporn.sexy/post/57ffcb2e1179b.html',
+        'md5': '6f8682b6464033d87acaa7a8ff0c092e',
+        'info_dict': {
+            'id': '57ffcb2e1179b',
+            'ext': 'mp4',
+            'title': 'md5:c9f43630bd968267672651ba905a7d35',
+            'thumbnail': r're:^https?://.*\.jpg$',
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_url = urljoin(url, self._parse_json(
+            self._search_regex(
+                r'data-vnfo=(["\'])(?P<data>{.+?})\1', webpage, 'data info',
+                group='data'),
+            video_id)[video_id])
+
+        title = (self._search_regex(
+            r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title',
+            default=None) or self._og_search_description(webpage)).strip()
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'thumbnail': thumbnail,
+        }
index 89c8b7f8d9f534134ef8e405c6ba97d7c232f515..27047425db6a121c0b5e39e11f597e82db26301a 100644 (file)
@@ -64,7 +64,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
     # If True it will raise an error if no login info is provided
     _LOGIN_REQUIRED = False
 
     # If True it will raise an error if no login info is provided
     _LOGIN_REQUIRED = False
 
-    _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,}'
+    _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}'
 
     def _set_language(self):
         self._set_cookie(
 
     def _set_language(self):
         self._set_cookie(
@@ -178,13 +178,13 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
             warn('Unable to extract result entry')
             return False
 
             warn('Unable to extract result entry')
             return False
 
-        tfa = try_get(res, lambda x: x[0][0], list)
-        if tfa:
-            tfa_str = try_get(tfa, lambda x: x[2], compat_str)
-            if tfa_str == 'TWO_STEP_VERIFICATION':
+        login_challenge = try_get(res, lambda x: x[0][0], list)
+        if login_challenge:
+            challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
+            if challenge_str == 'TWO_STEP_VERIFICATION':
                 # SEND_SUCCESS - TFA code has been successfully sent to phone
                 # QUOTA_EXCEEDED - reached the limit of TFA codes
                 # SEND_SUCCESS - TFA code has been successfully sent to phone
                 # QUOTA_EXCEEDED - reached the limit of TFA codes
-                status = try_get(tfa, lambda x: x[5], compat_str)
+                status = try_get(login_challenge, lambda x: x[5], compat_str)
                 if status == 'QUOTA_EXCEEDED':
                     warn('Exceeded the limit of TFA codes, try later')
                     return False
                 if status == 'QUOTA_EXCEEDED':
                     warn('Exceeded the limit of TFA codes, try later')
                     return False
@@ -228,6 +228,17 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
 
                 check_cookie_url = try_get(
                     tfa_results, lambda x: x[0][-1][2], compat_str)
 
                 check_cookie_url = try_get(
                     tfa_results, lambda x: x[0][-1][2], compat_str)
+            else:
+                CHALLENGES = {
+                    'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
+                    'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
+                    'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
+                }
+                challenge = CHALLENGES.get(
+                    challenge_str,
+                    '%s returned error %s.' % (self.IE_NAME, challenge_str))
+                warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
+                return False
         else:
             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 
         else:
             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 
@@ -1167,7 +1178,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
     def _parse_sig_js(self, jscode):
         funcname = self._search_regex(
             (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
     def _parse_sig_js(self, jscode):
         funcname = self._search_regex(
             (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
-             r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\('),
+             r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
+             r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+             r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\('),
             jscode, 'Initial JS player signature function name', group='sig')
 
         jsi = JSInterpreter(jscode)
             jscode, 'Initial JS player signature function name', group='sig')
 
         jsi = JSInterpreter(jscode)
@@ -2112,7 +2125,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
                             youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
                         )
                         (
                             youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
                         )
                         (
-                            (?:PL|LL|EC|UU|FL|RD|UL|TL)?[0-9A-Za-z-_]{10,}
+                            (?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,}
                             # Top tracks, they can also include dots
                             |(?:MC)[\w\.]*
                         )
                             # Top tracks, they can also include dots
                             |(?:MC)[\w\.]*
                         )
@@ -2250,6 +2263,10 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
     }, {
         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
         'only_matching': True,
     }, {
         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
         'only_matching': True,
+    }, {
+        # music album playlist
+        'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
+        'only_matching': True,
     }]
 
     def _real_initialize(self):
     }]
 
     def _real_initialize(self):
index b5a3a071681f127c73050f7368f57a9d97124278..fb167c1985527ada9d06e5f482e20c88ba0a4559 100644 (file)
@@ -13,6 +13,7 @@ from ..utils import (
     ExtractorError,
     int_or_none,
     try_get,
     ExtractorError,
     int_or_none,
     try_get,
+    url_or_none,
     urlencode_postdata,
 )
 
     urlencode_postdata,
 )
 
@@ -150,8 +151,8 @@ class ZattooBaseIE(InfoExtractor):
             for watch in watch_urls:
                 if not isinstance(watch, dict):
                     continue
             for watch in watch_urls:
                 if not isinstance(watch, dict):
                     continue
-                watch_url = watch.get('url')
-                if not watch_url or not isinstance(watch_url, compat_str):
+                watch_url = url_or_none(watch.get('url'))
+                if not watch_url:
                     continue
                 format_id_list = [stream_type]
                 maxrate = watch.get('maxrate')
                     continue
                 format_id_list = [stream_type]
                 maxrate = watch.get('maxrate')
index bb9020c918b3659437d752c5a4109bc520e3ab88..afa3f6c47f17a52f40903751cad5ff4293411715 100644 (file)
@@ -15,6 +15,7 @@ from ..utils import (
     try_get,
     unified_timestamp,
     update_url_query,
     try_get,
     unified_timestamp,
     update_url_query,
+    url_or_none,
     urljoin,
 )
 
     urljoin,
 )
 
@@ -67,8 +68,8 @@ class ZDFIE(ZDFBaseIE):
     def _extract_subtitles(src):
         subtitles = {}
         for caption in try_get(src, lambda x: x['captions'], list) or []:
     def _extract_subtitles(src):
         subtitles = {}
         for caption in try_get(src, lambda x: x['captions'], list) or []:
-            subtitle_url = caption.get('uri')
-            if subtitle_url and isinstance(subtitle_url, compat_str):
+            subtitle_url = url_or_none(caption.get('uri'))
+            if subtitle_url:
                 lang = caption.get('language', 'deu')
                 subtitles.setdefault(lang, []).append({
                     'url': subtitle_url,
                 lang = caption.get('language', 'deu')
                 subtitles.setdefault(lang, []).append({
                     'url': subtitle_url,
@@ -76,8 +77,8 @@ class ZDFIE(ZDFBaseIE):
         return subtitles
 
     def _extract_format(self, video_id, formats, format_urls, meta):
         return subtitles
 
     def _extract_format(self, video_id, formats, format_urls, meta):
-        format_url = meta.get('url')
-        if not format_url or not isinstance(format_url, compat_str):
+        format_url = url_or_none(meta.get('url'))
+        if not format_url:
             return
         if format_url in format_urls:
             return
             return
         if format_url in format_urls:
             return
@@ -152,7 +153,8 @@ class ZDFIE(ZDFBaseIE):
             content, lambda x: x['teaserImageRef']['layouts'], dict)
         if layouts:
             for layout_key, layout_url in layouts.items():
             content, lambda x: x['teaserImageRef']['layouts'], dict)
         if layouts:
             for layout_key, layout_url in layouts.items():
-                if not isinstance(layout_url, compat_str):
+                layout_url = url_or_none(layout_url)
+                if not layout_url:
                     continue
                 thumbnail = {
                     'url': layout_url,
                     continue
                 thumbnail = {
                     'url': layout_url,
index e83d546a082af8e24f2f01438605aabad41857af..e7d8e891030585ff9ffa50c61505a75873c86e3c 100644 (file)
@@ -841,11 +841,11 @@ def parseOpts(overrideArguments=None):
     postproc.add_option(
         '--prefer-avconv',
         action='store_false', dest='prefer_ffmpeg',
     postproc.add_option(
         '--prefer-avconv',
         action='store_false', dest='prefer_ffmpeg',
-        help='Prefer avconv over ffmpeg for running the postprocessors (default)')
+        help='Prefer avconv over ffmpeg for running the postprocessors')
     postproc.add_option(
         '--prefer-ffmpeg',
         action='store_true', dest='prefer_ffmpeg',
     postproc.add_option(
         '--prefer-ffmpeg',
         action='store_true', dest='prefer_ffmpeg',
-        help='Prefer ffmpeg over avconv for running the postprocessors')
+        help='Prefer ffmpeg over avconv for running the postprocessors (default)')
     postproc.add_option(
         '--ffmpeg-location', '--avconv-location', metavar='PATH',
         dest='ffmpeg_location',
     postproc.add_option(
         '--ffmpeg-location', '--avconv-location', metavar='PATH',
         dest='ffmpeg_location',
index 3ea1afcf31cb49e0992a79b46c5985bb33b742a3..757b496a1c723176c04e60fb43962b0270b6cf8e 100644 (file)
@@ -77,7 +77,7 @@ class FFmpegPostProcessor(PostProcessor):
 
     def _determine_executables(self):
         programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
 
     def _determine_executables(self):
         programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
-        prefer_ffmpeg = False
+        prefer_ffmpeg = True
 
         self.basename = None
         self.probe_basename = None
 
         self.basename = None
         self.probe_basename = None
@@ -85,7 +85,7 @@ class FFmpegPostProcessor(PostProcessor):
         self._paths = None
         self._versions = None
         if self._downloader:
         self._paths = None
         self._versions = None
         if self._downloader:
-            prefer_ffmpeg = self._downloader.params.get('prefer_ffmpeg', False)
+            prefer_ffmpeg = self._downloader.params.get('prefer_ffmpeg', True)
             location = self._downloader.params.get('ffmpeg_location')
             if location is not None:
                 if not os.path.exists(location):
             location = self._downloader.params.get('ffmpeg_location')
             if location is not None:
                 if not os.path.exists(location):
@@ -117,19 +117,19 @@ class FFmpegPostProcessor(PostProcessor):
                 (p, get_exe_version(p, args=['-version'])) for p in programs)
             self._paths = dict((p, p) for p in programs)
 
                 (p, get_exe_version(p, args=['-version'])) for p in programs)
             self._paths = dict((p, p) for p in programs)
 
-        if prefer_ffmpeg:
-            prefs = ('ffmpeg', 'avconv')
-        else:
+        if prefer_ffmpeg is False:
             prefs = ('avconv', 'ffmpeg')
             prefs = ('avconv', 'ffmpeg')
+        else:
+            prefs = ('ffmpeg', 'avconv')
         for p in prefs:
             if self._versions[p]:
                 self.basename = p
                 break
 
         for p in prefs:
             if self._versions[p]:
                 self.basename = p
                 break
 
-        if prefer_ffmpeg:
-            prefs = ('ffprobe', 'avprobe')
-        else:
+        if prefer_ffmpeg is False:
             prefs = ('avprobe', 'ffprobe')
             prefs = ('avprobe', 'ffprobe')
+        else:
+            prefs = ('ffprobe', 'avprobe')
         for p in prefs:
             if self._versions[p]:
                 self.probe_basename = p
         for p in prefs:
             if self._versions[p]:
                 self.probe_basename = p
index 6a3199fb992b72e70b6588d3999e9c6ec0a87890..e84d35d4dee2077faf89bbecb0083ca01e6273c4 100644 (file)
@@ -49,7 +49,6 @@ from .compat import (
     compat_os_name,
     compat_parse_qs,
     compat_shlex_quote,
     compat_os_name,
     compat_parse_qs,
     compat_shlex_quote,
-    compat_socket_create_connection,
     compat_str,
     compat_struct_pack,
     compat_struct_unpack,
     compat_str,
     compat_struct_pack,
     compat_struct_unpack,
@@ -82,7 +81,7 @@ def register_socks_protocols():
 compiled_regex_type = type(re.compile(''))
 
 std_headers = {
 compiled_regex_type = type(re.compile(''))
 
 std_headers = {
-    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0 (Chrome)',
+    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0',
     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
     'Accept-Encoding': 'gzip, deflate',
     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
     'Accept-Encoding': 'gzip, deflate',
@@ -184,6 +183,7 @@ DATE_FORMATS_MONTH_FIRST.extend([
 ])
 
 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
 ])
 
 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
+JSON_LD_RE = r'(?is)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
 
 
 def preferredencoding():
 
 
 def preferredencoding():
@@ -881,13 +881,51 @@ def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
         kwargs['strict'] = True
     hc = http_class(*args, **compat_kwargs(kwargs))
     source_address = ydl_handler._params.get('source_address')
         kwargs['strict'] = True
     hc = http_class(*args, **compat_kwargs(kwargs))
     source_address = ydl_handler._params.get('source_address')
+
     if source_address is not None:
     if source_address is not None:
+        # This is to workaround _create_connection() from socket where it will try all
+        # address data from getaddrinfo() including IPv6. This filters the result from
+        # getaddrinfo() based on the source_address value.
+        # This is based on the cpython socket.create_connection() function.
+        # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
+        def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
+            host, port = address
+            err = None
+            addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
+            af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
+            ip_addrs = [addr for addr in addrs if addr[0] == af]
+            if addrs and not ip_addrs:
+                ip_version = 'v4' if af == socket.AF_INET else 'v6'
+                raise socket.error(
+                    "No remote IP%s addresses available for connect, can't use '%s' as source address"
+                    % (ip_version, source_address[0]))
+            for res in ip_addrs:
+                af, socktype, proto, canonname, sa = res
+                sock = None
+                try:
+                    sock = socket.socket(af, socktype, proto)
+                    if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
+                        sock.settimeout(timeout)
+                    sock.bind(source_address)
+                    sock.connect(sa)
+                    err = None  # Explicitly break reference cycle
+                    return sock
+                except socket.error as _:
+                    err = _
+                    if sock is not None:
+                        sock.close()
+            if err is not None:
+                raise err
+            else:
+                raise socket.error('getaddrinfo returns an empty list')
+        if hasattr(hc, '_create_connection'):
+            hc._create_connection = _create_connection
         sa = (source_address, 0)
         if hasattr(hc, 'source_address'):  # Python 2.7+
             hc.source_address = sa
         else:  # Python 2.6
             def _hc_connect(self, *args, **kwargs):
         sa = (source_address, 0)
         if hasattr(hc, 'source_address'):  # Python 2.7+
             hc.source_address = sa
         else:  # Python 2.6
             def _hc_connect(self, *args, **kwargs):
-                sock = compat_socket_create_connection(
+                sock = _create_connection(
                     (self.host, self.port), self.timeout, sa)
                 if is_https:
                     self.sock = ssl.wrap_socket(
                     (self.host, self.port), self.timeout, sa)
                 if is_https:
                     self.sock = ssl.wrap_socket(
@@ -1865,6 +1903,13 @@ def strip_or_none(v):
     return None if v is None else v.strip()
 
 
     return None if v is None else v.strip()
 
 
+def url_or_none(url):
+    if not url or not isinstance(url, compat_str):
+        return None
+    url = url.strip()
+    return url if re.match(r'^(?:[a-zA-Z][\da-zA-Z.+-]*:)?//', url) else None
+
+
 def parse_duration(s):
     if not isinstance(s, compat_basestring):
         return None
 def parse_duration(s):
     if not isinstance(s, compat_basestring):
         return None
@@ -2281,7 +2326,7 @@ def parse_age_limit(s):
 def strip_jsonp(code):
     return re.sub(
         r'''(?sx)^
 def strip_jsonp(code):
     return re.sub(
         r'''(?sx)^
-            (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]+)
+            (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
             (?:\s*&&\s*(?P=func_name))?
             \s*\(\s*(?P<callback_data>.*)\);?
             \s*?(?://[^\n]*)*$''',
             (?:\s*&&\s*(?P=func_name))?
             \s*\(\s*(?P<callback_data>.*)\);?
             \s*?(?://[^\n]*)*$''',
@@ -2432,7 +2477,7 @@ def parse_codecs(codecs_str):
     vcodec, acodec = None, None
     for full_codec in splited_codecs:
         codec = full_codec.split('.')[0]
     vcodec, acodec = None, None
     for full_codec in splited_codecs:
         codec = full_codec.split('.')[0]
-        if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1'):
+        if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01'):
             if not vcodec:
                 vcodec = full_codec
         elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
             if not vcodec:
                 vcodec = full_codec
         elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
@@ -3561,7 +3606,7 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
             setattr(self, '%s_open' % type,
                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
                         meth(r, proxy, type))
             setattr(self, '%s_open' % type,
                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
                         meth(r, proxy, type))
-        return compat_urllib_request.ProxyHandler.__init__(self, proxies)
+        compat_urllib_request.ProxyHandler.__init__(self, proxies)
 
     def proxy_open(self, req, proxy, type):
         req_proxy = req.headers.get('Ytdl-request-proxy')
 
     def proxy_open(self, req, proxy, type):
         req_proxy = req.headers.get('Ytdl-request-proxy')
index 49fef60ea7460780706f0b40d610c36c99505da1..b078c4993ca68661dd6b93c669c945ff704d7f7b 100644 (file)
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
 from __future__ import unicode_literals
 
-__version__ = '2018.06.18'
+__version__ = '2018.09.10'