]> Raphaël G. Git Repositories - youtubedl/commitdiff
New upstream version 2017.12.31
authorRogério Brito <rbrito@ime.usp.br>
Thu, 4 Jan 2018 05:48:45 +0000 (03:48 -0200)
committerRogério Brito <rbrito@ime.usp.br>
Thu, 4 Jan 2018 05:48:45 +0000 (03:48 -0200)
134 files changed:
AUTHORS [new file with mode: 0644]
ChangeLog
MANIFEST.in
Makefile
README.md
README.txt
devscripts/run_tests.sh
docs/supportedsites.md
setup.cfg [new file with mode: 0644]
setup.py
test/test_InfoExtractor.py
test/test_YoutubeDL.py
test/test_utils.py
test/testdata/f4m/custom_base_url.f4m [new file with mode: 0644]
test/testdata/m3u8/pluzz_francetv_11507.m3u8 [new file with mode: 0644]
test/testdata/m3u8/teamcoco_11995.m3u8 [new file with mode: 0644]
test/testdata/m3u8/toggle_mobile_12211.m3u8 [new file with mode: 0644]
test/testdata/m3u8/twitch_vod.m3u8 [new file with mode: 0644]
test/testdata/m3u8/vidio.m3u8 [new file with mode: 0644]
test/testdata/mpd/float_duration.mpd [new file with mode: 0644]
test/testdata/mpd/urls_only.mpd [new file with mode: 0644]
youtube-dl
youtube-dl.1
youtube_dl/YoutubeDL.py
youtube_dl/downloader/fragment.py
youtube_dl/downloader/hls.py
youtube_dl/downloader/http.py
youtube_dl/extractor/abc.py
youtube_dl/extractor/afreecatv.py
youtube_dl/extractor/animeondemand.py
youtube_dl/extractor/ard.py
youtube_dl/extractor/atresplayer.py
youtube_dl/extractor/aws.py [new file with mode: 0644]
youtube_dl/extractor/bambuser.py
youtube_dl/extractor/bbc.py
youtube_dl/extractor/br.py
youtube_dl/extractor/brightcove.py
youtube_dl/extractor/byutv.py
youtube_dl/extractor/cartoonnetwork.py
youtube_dl/extractor/cbslocal.py
youtube_dl/extractor/ccma.py
youtube_dl/extractor/collegerama.py [deleted file]
youtube_dl/extractor/common.py
youtube_dl/extractor/crunchyroll.py
youtube_dl/extractor/cspan.py
youtube_dl/extractor/dailymotion.py
youtube_dl/extractor/daisuki.py
youtube_dl/extractor/discovery.py
youtube_dl/extractor/discoverygo.py
youtube_dl/extractor/disney.py
youtube_dl/extractor/dramafever.py
youtube_dl/extractor/drtuber.py
youtube_dl/extractor/ellentube.py [new file with mode: 0644]
youtube_dl/extractor/ellentv.py [deleted file]
youtube_dl/extractor/espn.py
youtube_dl/extractor/extractors.py
youtube_dl/extractor/faz.py
youtube_dl/extractor/fczenit.py
youtube_dl/extractor/filmweb.py [new file with mode: 0644]
youtube_dl/extractor/firstpost.py [deleted file]
youtube_dl/extractor/fktv.py [deleted file]
youtube_dl/extractor/fox.py
youtube_dl/extractor/fox9.py
youtube_dl/extractor/francetv.py
youtube_dl/extractor/freespeech.py
youtube_dl/extractor/funimation.py
youtube_dl/extractor/gamersyde.py [deleted file]
youtube_dl/extractor/gamespot.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/instagram.py
youtube_dl/extractor/internazionale.py [new file with mode: 0644]
youtube_dl/extractor/itv.py
youtube_dl/extractor/jwplatform.py
youtube_dl/extractor/kaltura.py
youtube_dl/extractor/livestream.py
youtube_dl/extractor/mailru.py
youtube_dl/extractor/massengeschmacktv.py [new file with mode: 0644]
youtube_dl/extractor/mediasite.py [new file with mode: 0644]
youtube_dl/extractor/mnet.py
youtube_dl/extractor/mtv.py
youtube_dl/extractor/nexx.py
youtube_dl/extractor/nick.py
youtube_dl/extractor/noco.py
youtube_dl/extractor/nowtv.py [deleted file]
youtube_dl/extractor/odnoklassniki.py
youtube_dl/extractor/once.py
youtube_dl/extractor/openload.py
youtube_dl/extractor/orf.py
youtube_dl/extractor/pandatv.py
youtube_dl/extractor/patreon.py
youtube_dl/extractor/pbs.py
youtube_dl/extractor/performgroup.py [new file with mode: 0644]
youtube_dl/extractor/playtvak.py
youtube_dl/extractor/pluralsight.py
youtube_dl/extractor/porncom.py
youtube_dl/extractor/rai.py
youtube_dl/extractor/roosterteeth.py
youtube_dl/extractor/rozhlas.py
youtube_dl/extractor/safari.py
youtube_dl/extractor/sandia.py [deleted file]
youtube_dl/extractor/scrippsnetworks.py
youtube_dl/extractor/sevenplus.py [new file with mode: 0644]
youtube_dl/extractor/shahid.py
youtube_dl/extractor/slutload.py
youtube_dl/extractor/sonyliv.py
youtube_dl/extractor/spankbang.py
youtube_dl/extractor/stretchinternet.py [new file with mode: 0644]
youtube_dl/extractor/tbs.py
youtube_dl/extractor/tnaflix.py
youtube_dl/extractor/toutv.py
youtube_dl/extractor/turner.py
youtube_dl/extractor/tva.py
youtube_dl/extractor/tvnow.py [new file with mode: 0644]
youtube_dl/extractor/twentythreevideo.py [new file with mode: 0644]
youtube_dl/extractor/twitch.py
youtube_dl/extractor/twitter.py
youtube_dl/extractor/udemy.py
youtube_dl/extractor/ufctv.py [new file with mode: 0644]
youtube_dl/extractor/umg.py [new file with mode: 0644]
youtube_dl/extractor/vidzi.py
youtube_dl/extractor/viki.py
youtube_dl/extractor/vimeo.py
youtube_dl/extractor/vk.py
youtube_dl/extractor/voot.py
youtube_dl/extractor/vshare.py
youtube_dl/extractor/vvvvid.py
youtube_dl/extractor/wsj.py
youtube_dl/extractor/xhamster.py
youtube_dl/extractor/xiami.py
youtube_dl/extractor/youku.py
youtube_dl/extractor/youtube.py
youtube_dl/postprocessor/xattrpp.py
youtube_dl/utils.py
youtube_dl/version.py

diff --git a/AUTHORS b/AUTHORS
new file mode 100644 (file)
index 0000000..7e01224
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1,233 @@
+Ricardo Garcia Gonzalez
+Danny Colligan
+Benjamin Johnson
+Vasyl' Vavrychuk
+Witold Baryluk
+Paweł Paprota
+Gergely Imreh
+Rogério Brito
+Philipp Hagemeister
+Sören Schulze
+Kevin Ngo
+Ori Avtalion
+shizeeg
+Filippo Valsorda
+Christian Albrecht
+Dave Vasilevsky
+Jaime Marquínez Ferrándiz
+Jeff Crouse
+Osama Khalid
+Michael Walter
+M. Yasoob Ullah Khalid
+Julien Fraichard
+Johny Mo Swag
+Axel Noack
+Albert Kim
+Pierre Rudloff
+Huarong Huo
+Ismael Mejía
+Steffan Donal
+Andras Elso
+Jelle van der Waa
+Marcin Cieślak
+Anton Larionov
+Takuya Tsuchida
+Sergey M.
+Michael Orlitzky
+Chris Gahan
+Saimadhav Heblikar
+Mike Col
+Oleg Prutz
+pulpe
+Andreas Schmitz
+Michael Kaiser
+Niklas Laxström
+David Triendl
+Anthony Weems
+David Wagner
+Juan C. Olivares
+Mattias Harrysson
+phaer
+Sainyam Kapoor
+Nicolas Évrard
+Jason Normore
+Hoje Lee
+Adam Thalhammer
+Georg Jähnig
+Ralf Haring
+Koki Takahashi
+Ariset Llerena
+Adam Malcontenti-Wilson
+Tobias Bell
+Naglis Jonaitis
+Charles Chen
+Hassaan Ali
+Dobrosław Żybort
+David Fabijan
+Sebastian Haas
+Alexander Kirk
+Erik Johnson
+Keith Beckman
+Ole Ernst
+Aaron McDaniel (mcd1992)
+Magnus Kolstad
+Hari Padmanaban
+Carlos Ramos
+5moufl
+lenaten
+Dennis Scheiba
+Damon Timm
+winwon
+Xavier Beynon
+Gabriel Schubiner
+xantares
+Jan Matějka
+Mauroy Sébastien
+William Sewell
+Dao Hoang Son
+Oskar Jauch
+Matthew Rayfield
+t0mm0
+Tithen-Firion
+Zack Fernandes
+cryptonaut
+Adrian Kretz
+Mathias Rav
+Petr Kutalek
+Will Glynn
+Max Reimann
+Cédric Luthi
+Thijs Vermeir
+Joel Leclerc
+Christopher Krooss
+Ondřej Caletka
+Dinesh S
+Johan K. Jensen
+Yen Chi Hsuan
+Enam Mijbah Noor
+David Luhmer
+Shaya Goldberg
+Paul Hartmann
+Frans de Jonge
+Robin de Rooij
+Ryan Schmidt
+Leslie P. Polzer
+Duncan Keall
+Alexander Mamay
+Devin J. Pohly
+Eduardo Ferro Aldama
+Jeff Buchbinder
+Amish Bhadeshia
+Joram Schrijver
+Will W.
+Mohammad Teimori Pabandi
+Roman Le Négrate
+Matthias Küch
+Julian Richen
+Ping O.
+Mister Hat
+Peter Ding
+jackyzy823
+George Brighton
+Remita Amine
+Aurélio A. Heckert
+Bernhard Minks
+sceext
+Zach Bruggeman
+Tjark Saul
+slangangular
+Behrouz Abbasi
+ngld
+nyuszika7h
+Shaun Walbridge
+Lee Jenkins
+Anssi Hannula
+Lukáš Lalinský
+Qijiang Fan
+Rémy Léone
+Marco Ferragina
+reiv
+Muratcan Simsek
+Evan Lu
+flatgreen
+Brian Foley
+Vignesh Venkat
+Tom Gijselinck
+Founder Fang
+Andrew Alexeyew
+Saso Bezlaj
+Erwin de Haan
+Jens Wille
+Robin Houtevelts
+Patrick Griffis
+Aidan Rowe
+mutantmonkey
+Ben Congdon
+Kacper Michajłow
+José Joaquín Atria
+Viťas Strádal
+Kagami Hiiragi
+Philip Huppert
+blahgeek
+Kevin Deldycke
+inondle
+Tomáš Čech
+Déstin Reed
+Roman Tsiupa
+Artur Krysiak
+Jakub Adam Wieczorek
+Aleksandar Topuzović
+Nehal Patel
+Rob van Bekkum
+Petr Zvoníček
+Pratyush Singh
+Aleksander Nitecki
+Sebastian Blunt
+Matěj Cepl
+Xie Yanbo
+Philip Xu
+John Hawkinson
+Rich Leeper
+Zhong Jianxin
+Thor77
+Mattias Wadman
+Arjan Verwer
+Costy Petrisor
+Logan B
+Alex Seiler
+Vijay Singh
+Paul Hartmann
+Stephen Chen
+Fabian Stahl
+Bagira
+Odd Stråbø
+Philip Herzog
+Thomas Christlieb
+Marek Rusinowski
+Tobias Gruetzmacher
+Olivier Bilodeau
+Lars Vierbergen
+Juanjo Benages
+Xiao Di Guan
+Thomas Winant
+Daniel Twardowski
+Jeremie Jarosh
+Gerard Rovira
+Marvin Ewald
+Frédéric Bournival
+Timendum
+gritstub
+Adam Voss
+Mike Fährmann
+Jan Kundrát
+Giuseppe Fabiano
+Örn Guðjónsson
+Parmjit Virk
+Genki Sky
+Ľuboš Katrinec
+Corey Nicholson
+Ashutosh Chaudhary
+John Dong
+Tatsuyuki Ishi
+Daniel Weber
+Kay Bouché
index 8af3682745463d71f6bbf97057f840a3b01e00f0..bfffb1f5f410861037389853167563a43b41df1b 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,198 @@
+version 2017.12.31
+
+Core
++ [extractor/common] Add container meta field for formats extracted
+  in _parse_mpd_formats (#13616)
++ [downloader/hls] Use HTTP headers for key request
+* [common] Use AACL as the default fourcc when AudioTag is 255
+* [extractor/common] Fix extraction of DASH formats with the same
+  representation id (#15111)
+
+Extractors
++ [slutload] Add support for mobile URLs (#14806)
+* [abc:iview] Bypass geo restriction
+* [abc:iview] Fix extraction (#14711, #14782, #14838, #14917, #14963, #14985,
+  #15035, #15057, #15061, #15071, #15095, #15106)
+* [openload] Fix extraction (#15118)
+- [sandia] Remove extractor
+- [collegerama] Remove extractor
++ [mediasite] Add support for sites based on Mediasite Video Platform (#5428,
+  #11185, #14343)
++ [ufctv] Add support for ufc.tv (#14520)
+* [pluralsight] Fix missing first line of subtitles (#11118)
+* [openload] Fallback on f-page extraction (#14665, #14879)
+* [vimeo] Improve password protected videos extraction (#15114)
+* [aws] Fix canonical/signed headers generation on python 2 (#15102)
+
+
+version 2017.12.28
+
+Extractors
++ [internazionale] Add support for internazionale.it (#14973)
+* [playtvak] Relax video regular expression and make description optional
+  (#15037)
++ [filmweb] Add support for filmweb.no (#8773, #10368)
++ [23video] Add support for 23video.com
++ [espn] Add support for fivethirtyeight.com (#6864)
++ [umg:de] Add support for universal-music.de (#11582, #11584)
++ [espn] Add support for espnfc and extract more formats (#8053)
+* [youku] Update ccode (#14880)
++ [openload] Add support for oload.stream (#15070)
+* [youku] Fix list extraction (#15065)
+
+
+version 2017.12.23
+
+Core
+* [extractor/common] Move X-Forwarded-For setup code into _request_webpage
++ [YoutubeDL] Add support for playlist_uploader and playlist_uploader_id in
+  output template (#11427, #15018)
++ [extractor/common] Introduce uploader, uploader_id and uploader_url
+  meta fields for playlists (#11427, #15018)
+* [downloader/fragment] Encode filename of fragment being removed (#15020)
++ [utils] Add another date format pattern (#14999)
+
+Extractors
++ [kaltura] Add another embed pattern for entry_id
++ [7plus] Add support for 7plus.com.au (#15043)
+* [animeondemand] Relax login error regular expression
++ [shahid] Add support for show pages (#7401)
++ [youtube] Extract uploader, uploader_id and uploader_url for playlists
+  (#11427, #15018)
+* [afreecatv] Improve format extraction (#15019)
++ [cspan] Add support for audio only pages and catch page errors (#14995)
++ [mailru] Add support for embed URLs (#14904)
+* [crunchyroll] Future-proof XML element checks (#15013)
+* [cbslocal] Fix timestamp extraction (#14999, #15000)
+* [discoverygo] Correct TTML subtitle extension
+* [vk] Make view count optional (#14979)
+* [disney] Skip Apple FairPlay formats (#14982)
+* [voot] Fix format extraction (#14758)
+
+
+version 2017.12.14
+
+Core
+* [postprocessor/xattr] Clarify NO_SPACE message (#14970)
+* [downloader/http] Return actual download result from real_download (#14971)
+
+Extractors
++ [itv] Extract more subtitles and duration
+* [itv] Improve extraction (#14944)
++ [byutv] Add support for geo restricted videos
+* [byutv] Fix extraction (#14966, #14967)
++ [bbccouk] Fix extraction for 320k HLS streams
++ [toutv] Add support for special video URLs (#14179)
+* [discovery] Fix free videos extraction (#14157, #14954)
+* [tvnow] Fix extraction (#7831)
++ [nickelodeon:br] Add support for nickelodeon brazil websites (#14893)
+* [nick] Improve extraction (#14876)
+* [tbs] Fix extraction (#13658)
+
+
+version 2017.12.10
+
+Core
++ [utils] Add sami mimetype to mimetype2ext
+
+Extractors
+* [culturebox] Improve video id extraction (#14947)
+* [twitter] Improve extraction (#14197)
++ [udemy] Extract more HLS formats
+* [udemy] Improve course id extraction (#14938)
++ [stretchinternet] Add support for portal.stretchinternet.com (#14576)
+* [ellentube] Fix extraction (#14407, #14570)
++ [raiplay:playlist] Add support for playlists (#14563)
+* [sonyliv] Bypass geo restriction
+* [sonyliv] Extract higher quality formats (#14922)
+* [fox] Extract subtitles
++ [fox] Add support for Adobe Pass authentication (#14205, #14489)
+- [dailymotion:cloud] Remove extractor (#6794)
+* [xhamster] Fix thumbnail extraction (#14780)
++ [xhamster] Add support for mobile URLs (#14780)
+* [generic] Don't pass video id as mpd id while extracting DASH (#14902)
+* [ard] Skip invalid stream URLs (#14906)
+* [porncom] Fix metadata extraction (#14911)
+* [pluralsight] Detect agreement request (#14913)
+* [toutv] Fix login (#14614)
+
+
+version 2017.12.02
+
+Core
++ [downloader/fragment] Commit part file after each fragment
++ [extractor/common] Add durations for DASH fragments with bare SegmentURLs
++ [extractor/common] Add support for DASH manifests with SegmentLists with
+  bare SegmentURLs (#14844)
++ [utils] Add hvc1 codec code to parse_codecs
+
+Extractors
+* [xhamster] Fix extraction (#14884)
+* [youku] Update ccode (#14872)
+* [mnet] Fix format extraction (#14883)
++ [xiami] Add Referer header to API request
+* [mtv] Correct scc extention in extracted subtitles (#13730)
+* [vvvvid] Fix extraction for kenc videos (#13406)
++ [br] Add support for BR Mediathek videos (#14560, #14788)
++ [daisuki] Add support for motto.daisuki.com (#14681)
+* [odnoklassniki] Fix API metadata request (#14862)
+* [itv] Fix HLS formats extraction
++ [pbs] Add another media id regular expression
+
+
+version 2017.11.26
+
+Core
+* [extractor/common] Use final URL when dumping request (#14769)
+
+Extractors
+* [fczenit] Fix extraction
+- [firstpost] Remove extractor
+* [freespeech] Fix extraction
+* [nexx] Extract more formats
++ [openload] Add support for openload.link (#14763)
+* [empflix] Relax URL regular expression
+* [empflix] Fix extractrion
+* [tnaflix] Don't modify download URLs (#14811)
+- [gamersyde] Remove extractor
+* [francetv:generationwhat] Fix extraction
++ [massengeschmacktv] Add support for Massengeschmack TV
+* [fox9] Fix extraction
+* [faz] Fix extraction and add support for Perform Group embeds (#14714)
++ [performgroup] Add support for performgroup.com
++ [jwplatform] Add support for iframes (#14828)
+* [culturebox] Fix extraction (#14827)
+* [youku] Fix extraction; update ccode (#14815)
+* [livestream] Make SMIL extraction non fatal (#14792)
++ [drtuber] Add support for mobile URLs (#14772)
++ [spankbang] Add support for mobile URLs (#14771)
+* [instagram] Fix description, timestamp and counters extraction (#14755)
+
+
+version 2017.11.15
+
+Core
+* [common] Skip Apple FairPlay m3u8 manifests (#14741)
+* [YoutubeDL] Fix playlist range optimization for --playlist-items (#14740)
+
+Extractors
+* [vshare] Capture and output error message
+* [vshare] Fix extraction (#14473)
+* [crunchyroll] Extract old RTMP formats
+* [tva] Fix extraction (#14736)
+* [gamespot] Lower preference of HTTP formats (#14652)
+* [instagram:user] Fix extraction (#14699)
+* [ccma] Fix typo (#14730)
+- Remove sensitive data from logging in messages
+* [instagram:user] Fix extraction (#14699)
++ [gamespot] Add support for article URLs (#14652)
+* [gamespot] Skip Brightcove Once HTTP formats (#14652)
+* [cartoonnetwork] Update tokenizer_src (#14666)
++ [wsj] Recognize another URL pattern (#14704)
+* [pandatv] Update API URL and sign format URLs (#14693)
+* [crunchyroll] Use old login method (#11572)
+
+
 version 2017.11.06
 
 Core
@@ -25,8 +220,8 @@ Extractors
 + [fxnetworks] Extract series metadata (#14603)
 + [younow] Add support for younow.com (#9255, #9432, #12436)
 * [dctptv] Fix extraction (#14599)
-* [youtube] Restrict embed regex (#14600)
-* [vimeo] Restrict iframe embed regex (#14600)
+* [youtube] Restrict embed regular expression (#14600)
+* [vimeo] Restrict iframe embed regular expression (#14600)
 * [soundgasm] Improve extraction (#14588)
 - [myvideo] Remove extractor (#8557)
 + [nbc] Add support for classic-tv videos (#14575)
index 5743f605a2ab4e93e76416732f6e42b252e87150..4e43e99f394dfc8506447ea4e8328467f5f6a8f5 100644 (file)
@@ -1,7 +1,9 @@
 include README.md
-include test/*.py
-include test/*.json
+include LICENSE
+include AUTHORS
+include ChangeLog
 include youtube-dl.bash-completion
 include youtube-dl.fish
 include youtube-dl.1
 recursive-include docs Makefile conf.py *.rst
+recursive-include test *
index c74eea7922e9308d70b5c88a3d5c068e4e1f7826..fe247810fead99df5f519c9e8b8b66dd373a50f1 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -36,8 +36,17 @@ test:
 
 ot: offlinetest
 
+# Keep this list in sync with devscripts/run_tests.sh
 offlinetest: codetest
-       $(PYTHON) -m nose --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py --exclude test_socks.py
+       $(PYTHON) -m nose --verbose test \
+               --exclude test_age_restriction.py \
+               --exclude test_download.py \
+               --exclude test_iqiyi_sdk_interpreter.py \
+               --exclude test_socks.py \
+               --exclude test_subtitles.py \
+               --exclude test_write_annotations.py \
+               --exclude test_youtube_lists.py \
+               --exclude test_youtube_signature.py
 
 tar: youtube-dl.tar.gz
 
@@ -101,7 +110,7 @@ _EXTRACTOR_FILES = $(shell find youtube_dl/extractor -iname '*.py' -and -not -in
 youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES)
        $(PYTHON) devscripts/make_lazy_extractors.py $@
 
-youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish ChangeLog
+youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish ChangeLog AUTHORS
        @tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \
                --exclude '*.DS_Store' \
                --exclude '*.kate-swp' \
@@ -110,11 +119,10 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
                --exclude '*~' \
                --exclude '__pycache__' \
                --exclude '.git' \
-               --exclude 'testdata' \
                --exclude 'docs/_build' \
                -- \
                bin devscripts test youtube_dl docs \
-               ChangeLog LICENSE README.md README.txt \
+               ChangeLog AUTHORS LICENSE README.md README.txt \
                Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion \
-               youtube-dl.zsh youtube-dl.fish setup.py \
+               youtube-dl.zsh youtube-dl.fish setup.py setup.cfg \
                youtube-dl
index ea321d5362060ec68642508e35d31ec449d3ab6f..47b0640abfd179a13909835e2e05af589bb60e6d 100644 (file)
--- a/README.md
+++ b/README.md
@@ -511,6 +511,9 @@ The basic usage is not to set any template arguments when downloading a single f
  - `average_rating` (numeric): Average rating give by users, the scale used depends on the webpage
  - `comment_count` (numeric): Number of comments on the video
  - `age_limit` (numeric): Age restriction for the video (years)
+ - `is_live` (boolean): Whether this video is a live stream or a fixed-length video
+ - `start_time` (numeric): Time in seconds where the reproduction should start, as specified in the URL
+ - `end_time` (numeric): Time in seconds where the reproduction should end, as specified in the URL
  - `format` (string): A human-readable description of the format 
  - `format_id` (string): Format code specified by `--format`
  - `format_note` (string): Additional info about the format
@@ -536,6 +539,8 @@ The basic usage is not to set any template arguments when downloading a single f
  - `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according to the total length of the playlist
  - `playlist_id` (string): Playlist identifier
  - `playlist_title` (string): Playlist title
+ - `playlist_uploader` (string): Full name of the playlist uploader
+ - `playlist_uploader_id` (string): Nickname or id of the playlist uploader
 
 Available for the video that belongs to some logical chapter or section:
 
index 4b7adfd68c28eb9df629d0904334a29670c663d0..0a748ea7bab0075b8abd3984fc48685ed8d7d253 100644 (file)
@@ -596,6 +596,12 @@ with sequence type are:
     used depends on the webpage
 -   comment_count (numeric): Number of comments on the video
 -   age_limit (numeric): Age restriction for the video (years)
+-   is_live (boolean): Whether this video is a live stream or a
+    fixed-length video
+-   start_time (numeric): Time in seconds where the reproduction should
+    start, as specified in the URL
+-   end_time (numeric): Time in seconds where the reproduction should
+    end, as specified in the URL
 -   format (string): A human-readable description of the format
 -   format_id (string): Format code specified by --format
 -   format_note (string): Additional info about the format
@@ -625,6 +631,9 @@ with sequence type are:
     with leading zeros according to the total length of the playlist
 -   playlist_id (string): Playlist identifier
 -   playlist_title (string): Playlist title
+-   playlist_uploader (string): Full name of the playlist uploader
+-   playlist_uploader_id (string): Nickname or id of the playlist
+    uploader
 
 Available for the video that belongs to some logical chapter or section:
 
index 6ba26720dcc57929f2bad9550c5a256a3fc09953..dd37a80f5c8cdc37eda95293706fba5d4939a603 100755 (executable)
@@ -1,6 +1,7 @@
 #!/bin/bash
 
-DOWNLOAD_TESTS="age_restriction|download|subtitles|write_annotations|iqiyi_sdk_interpreter|youtube_lists"
+# Keep this list in sync with the `offlinetest` target in Makefile
+DOWNLOAD_TESTS="age_restriction|download|iqiyi_sdk_interpreter|socks|subtitles|write_annotations|youtube_lists|youtube_signature"
 
 test_set=""
 multiprocess_args=""
index 6009df571204fdcd58bc1a47c9c66f10e8f45330..75bd5c922ac59e873d5ca998797b4ad3eadef8df 100644 (file)
@@ -3,6 +3,7 @@
  - **1up.com**
  - **20min**
  - **220.ro**
+ - **23video**
  - **24video**
  - **3qsdn**: 3Q SDN
  - **3sat**
@@ -10,6 +11,7 @@
  - **56.com**
  - **5min**
  - **6play**
+ - **7plus**
  - **8tracks**
  - **91porn**
  - **9c9media**
  - **BokeCC**
  - **BostonGlobe**
  - **Bpb**: Bundeszentrale für politische Bildung
- - **BR**: Bayerischer Rundfunk Mediathek
+ - **BR**: Bayerischer Rundfunk
  - **BravoTV**
  - **Break**
  - **brightcove:legacy**
  - **brightcove:new**
+ - **BRMediathek**: Bayerischer Rundfunk Mediathek
  - **bt:article**: Bergens Tidende Articles
  - **bt:vestlendingen**: Bergens Tidende - Vestlendingen
  - **BuzzFeed**
  - **BYUtv**
- - **BYUtvEvent**
  - **Camdemy**
  - **CamdemyFolder**
  - **CamWithHer**
  - **CNN**
  - **CNNArticle**
  - **CNNBlogs**
- - **CollegeRama**
  - **ComCarCoff**
  - **ComedyCentral**
  - **ComedyCentralFullEpisodes**
  - **dailymotion**
  - **dailymotion:playlist**
  - **dailymotion:user**
- - **DailymotionCloud**
- - **Daisuki**
- - **DaisukiPlaylist**
+ - **DaisukiMotto**
+ - **DaisukiMottoPlaylist**
  - **daum.net**
  - **daum.net:clip**
  - **daum.net:playlist**
  - **eHow**
  - **Einthusan**
  - **eitb.tv**
- - **EllenTV**
- - **EllenTV:clips**
+ - **EllenTube**
+ - **EllenTubePlaylist**
+ - **EllenTubeVideo**
  - **ElPais**: El País
  - **Embedly**
  - **EMPFlix**
  - **fc2**
  - **fc2:embed**
  - **Fczenit**
- - **fernsehkritik.tv**
  - **filmon**
  - **filmon:channel**
- - **Firstpost**
+ - **Filmweb**
+ - **FiveThirtyEight**
  - **FiveTV**
  - **Flickr**
  - **Flipagram**
  - **foxnews:article**
  - **foxnews:insider**
  - **FoxSports**
- - **france2.fr:generation-quoi**
+ - **france2.fr:generation-what**
  - **FranceCulture**
  - **FranceInter**
  - **FranceTV**
  - **GameInformer**
  - **GameOne**
  - **gameone:playlist**
- - **Gamersyde**
  - **GameSpot**
  - **GameStar**
  - **Gaskrank**
  - **InfoQ**
  - **Instagram**
  - **instagram:user**: Instagram user profile
+ - **Internazionale**
  - **InternetVideoArchive**
  - **IPrima**
  - **iqiyi**: 爱奇艺
  - **mangomolo:live**
  - **mangomolo:video**
  - **ManyVids**
+ - **massengeschmack.tv**
  - **MatchTV**
  - **MDR**: MDR.DE and KiKA
  - **media.ccc.de**
  - **Medialaan**
  - **Mediaset**
+ - **Mediasite**
  - **Medici**
  - **megaphone.fm**: megaphone.fm embedded players
  - **Meipai**: 美拍
  - **nhl.com:videocenter:category**: NHL videocenter category
  - **nick.com**
  - **nick.de**
+ - **nickelodeon:br**
  - **nickelodeonru**
  - **nicknight**
  - **niconico**: ニコニコ動画
  - **nowness**
  - **nowness:playlist**
  - **nowness:series**
- - **NowTV** (Currently broken)
- - **NowTVList**
  - **nowvideo**: NowVideo
  - **Noz**
  - **npo**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
  - **pcmag**
  - **PearVideo**
  - **People**
+ - **PerformGroup**
  - **periscope**: Periscope
  - **periscope:user**: Periscope user videos
  - **PhilharmonieDeParis**: Philharmonie de Paris
  - **Rai**
  - **RaiPlay**
  - **RaiPlayLive**
+ - **RaiPlayPlaylist**
  - **RBMARadio**
  - **RDS**: RDS.ca
  - **RedBullTV**
  - **safari**: safaribooksonline.com online video
  - **safari:api**
  - **safari:course**: safaribooksonline.com online courses
- - **Sandia**: Sandia National Laboratories
  - **Sapo**: SAPO Vídeos
  - **savefrom.net**
  - **SBS**: sbs.com.au
  - **Servus**
  - **Sexu**
  - **Shahid**
+ - **ShahidShow**
  - **Shared**: shared.sx
  - **ShowRoomLive**
  - **Sina**
  - **streamcloud.eu**
  - **StreamCZ**
  - **StreetVoice**
+ - **StretchInternet**
  - **SunPorno**
  - **SVT**
  - **SVTPlay**: SVT Play and Öppet arkiv
  - **tagesschau:player**
  - **Tass**
  - **TastyTrade**
- - **TBS** (Currently broken)
+ - **TBS**
  - **TDSLifeway**
  - **teachertube**: teachertube.com videos
  - **teachertube:user:collection**: teachertube.com user and collection videos
  - **tvland.com**
  - **TVN24**
  - **TVNoe**
+ - **TVNow**
+ - **TVNowList**
  - **tvp**: Telewizja Polska
  - **tvp:embed**: Telewizja Polska
  - **tvp:series**
  - **udemy**
  - **udemy:course**
  - **UDNEmbed**: 聯合影音
+ - **UFCTV**
  - **UKTVPlay**
+ - **umg:de**: Universal Music Deutschland
  - **Unistra**
  - **Unity**
  - **uol.com.br**
diff --git a/setup.cfg b/setup.cfg
new file mode 100644 (file)
index 0000000..2dc06ff
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,6 @@
+[wheel]
+universal = True
+
+[flake8]
+exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git
+ignore = E402,E501,E731
index 67d6633ed6f8301f6fdd31a079a6e4dd1ce0b6b3..7dbb5805f8e124dd3f4634255d9090940b2880e4 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -109,6 +109,7 @@ setup(
     author_email='ytdl@yt-dl.org',
     maintainer='Sergey M.',
     maintainer_email='dstftw@gmail.com',
+    license='Unlicense',
     packages=[
         'youtube_dl',
         'youtube_dl.extractor', 'youtube_dl.downloader',
index 686c63efac7ff4b94ca6012ab7affdade8b7c008..7b31d5198b561f7e3725a15055439ca5f35b791b 100644 (file)
@@ -493,9 +493,20 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
         _TEST_CASES = [
             (
                 # https://github.com/rg3/youtube-dl/issues/13919
+                # Also tests duplicate representation ids, see
+                # https://github.com/rg3/youtube-dl/issues/15111
                 'float_duration',
                 'http://unknown/manifest.mpd',
                 [{
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'm4a',
+                    'format_id': '318597',
+                    'format_note': 'DASH audio',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'none',
+                    'tbr': 61.587,
+                }, {
                     'manifest_url': 'http://unknown/manifest.mpd',
                     'ext': 'mp4',
                     'format_id': '318597',
@@ -562,7 +573,89 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                     'width': 1920,
                     'height': 1080,
                 }]
-            ),
+            ), (
+                # https://github.com/rg3/youtube-dl/pull/14844
+                'urls_only',
+                'http://unknown/manifest.mpd',
+                [{
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': 'h264_aac_144p_m4s',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc3.42c01e',
+                    'tbr': 200,
+                    'width': 256,
+                    'height': 144,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': 'h264_aac_240p_m4s',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc3.42c01e',
+                    'tbr': 400,
+                    'width': 424,
+                    'height': 240,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': 'h264_aac_360p_m4s',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc3.42c01e',
+                    'tbr': 800,
+                    'width': 640,
+                    'height': 360,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': 'h264_aac_480p_m4s',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc3.42c01e',
+                    'tbr': 1200,
+                    'width': 856,
+                    'height': 480,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': 'h264_aac_576p_m4s',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc3.42c01e',
+                    'tbr': 1600,
+                    'width': 1024,
+                    'height': 576,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': 'h264_aac_720p_m4s',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc3.42c01e',
+                    'tbr': 2400,
+                    'width': 1280,
+                    'height': 720,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': 'h264_aac_1080p_m4s',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc3.42c01e',
+                    'tbr': 4400,
+                    'width': 1920,
+                    'height': 1080,
+                }]
+            )
         ]
 
         for mpd_file, mpd_url, expected_formats in _TEST_CASES:
@@ -601,5 +694,6 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                 self.ie._sort_formats(formats)
                 expect_value(self, formats, expected_formats, None)
 
+
 if __name__ == '__main__':
     unittest.main()
index 4af92fbd4b8ac0cff84b290bc737f173cc6b34ca..f0f5a84708d12ef28650b45d0494520dd4f71da1 100644 (file)
@@ -466,11 +466,11 @@ class TestFormatSelection(unittest.TestCase):
         ydl = YDL({'simulate': True})
         self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best')
 
-        ydl = YDL({'is_live': True})
-        self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio')
+        ydl = YDL({})
+        self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
 
-        ydl = YDL({'simulate': True, 'is_live': True})
-        self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best')
+        ydl = YDL({'simulate': True})
+        self.assertEqual(ydl._default_format_spec({'is_live': True}), 'bestvideo+bestaudio/best')
 
         ydl = YDL({'outtmpl': '-'})
         self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio')
index cc13f795c338d816b442d9d338262eb9323f4f64..0857c0fc0cef84b7e09a09ef77dc815d4a508cef 100644 (file)
@@ -343,6 +343,7 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100)
         self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361)
         self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540)
+        self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140)
 
     def test_determine_ext(self):
         self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
diff --git a/test/testdata/f4m/custom_base_url.f4m b/test/testdata/f4m/custom_base_url.f4m
new file mode 100644 (file)
index 0000000..74e1539
--- /dev/null
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<manifest xmlns="http://ns.adobe.com/f4m/1.0">
+    <streamType>recorded</streamType>
+    <baseURL>http://vod.livestream.com/events/0000000000673980/</baseURL>
+    <duration>269.293</duration>
+    <bootstrapInfo profile="named" id="bootstrap_1">AAAAm2Fic3QAAAAAAAAAAQAAAAPoAAAAAAAEG+0AAAAAAAAAAAAAAAAAAQAAABlhc3J0AAAAAAAAAAABAAAAAQAAAC4BAAAAVmFmcnQAAAAAAAAD6AAAAAAEAAAAAQAAAAAAAAAAAAAXcAAAAC0AAAAAAAQHQAAAE5UAAAAuAAAAAAAEGtUAAAEYAAAAAAAAAAAAAAAAAAAAAAA=</bootstrapInfo>
+    <media url="b90f532f-b0f6-4f4e-8289-706d490b2fd8_2292" bootstrapInfoId="bootstrap_1" bitrate="2148" width="1280" height="720" videoCodec="avc1.4d401f" audioCodec="mp4a.40.2">
+        <metadata>AgAKb25NZXRhRGF0YQgAAAAIAAhkdXJhdGlvbgBAcNSwIMSbpgAFd2lkdGgAQJQAAAAAAAAABmhlaWdodABAhoAAAAAAAAAJZnJhbWVyYXRlAEA4/7DoLwW3AA12aWRlb2RhdGFyYXRlAECe1DLgjcobAAx2aWRlb2NvZGVjaWQAQBwAAAAAAAAADWF1ZGlvZGF0YXJhdGUAQGSimlvaPKQADGF1ZGlvY29kZWNpZABAJAAAAAAAAAAACQ==</metadata>
+    </media>
+</manifest>
diff --git a/test/testdata/m3u8/pluzz_francetv_11507.m3u8 b/test/testdata/m3u8/pluzz_francetv_11507.m3u8
new file mode 100644 (file)
index 0000000..0809f5a
--- /dev/null
@@ -0,0 +1,14 @@
+#EXTM3U
+    \r#EXT-X-VERSION:5
+    \r#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Francais",DEFAULT=NO,FORCED=NO,URI="http://replayftv-pmd.francetv.fr/subtitles/2017/16/156589847-1492488987.m3u8",LANGUAGE="fra"
+    \r#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="aac",LANGUAGE="fra",NAME="Francais",DEFAULT=YES, AUTOSELECT=YES
+#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=180000,RESOLUTION=256x144,CODECS="avc1.66.30, mp4a.40.2"
+http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_0_av.m3u8?null=0
+#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=303000,RESOLUTION=320x180,CODECS="avc1.66.30, mp4a.40.2"
+http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_1_av.m3u8?null=0
+#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=575000,RESOLUTION=512x288,CODECS="avc1.66.30, mp4a.40.2"
+http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_2_av.m3u8?null=0
+#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=831000,RESOLUTION=704x396,CODECS="avc1.77.30, mp4a.40.2"
+http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_3_av.m3u8?null=0
+#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=1467000,RESOLUTION=1024x576,CODECS="avc1.77.30, mp4a.40.2"
+http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_4_av.m3u8?null=0
diff --git a/test/testdata/m3u8/teamcoco_11995.m3u8 b/test/testdata/m3u8/teamcoco_11995.m3u8
new file mode 100644 (file)
index 0000000..a6e4216
--- /dev/null
@@ -0,0 +1,16 @@
+#EXTM3U
+#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio-0",NAME="Default",AUTOSELECT=YES,DEFAULT=YES,URI="hls/CONAN_020217_Highlight_show-audio-160k_v4.m3u8"
+#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio-1",NAME="Default",AUTOSELECT=YES,DEFAULT=YES,URI="hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8"
+#EXT-X-I-FRAME-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=37862000,CODECS="avc1.4d001f",URI="hls/CONAN_020217_Highlight_show-2m_iframe.m3u8"
+#EXT-X-I-FRAME-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=18750000,CODECS="avc1.4d001e",URI="hls/CONAN_020217_Highlight_show-1m_iframe.m3u8"
+#EXT-X-I-FRAME-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=6535000,CODECS="avc1.42001e",URI="hls/CONAN_020217_Highlight_show-400k_iframe.m3u8"
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=2374000,RESOLUTION=1024x576,CODECS="avc1.4d001f,mp4a.40.2",AUDIO="audio-0"
+hls/CONAN_020217_Highlight_show-2m_v4.m3u8
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1205000,RESOLUTION=640x360,CODECS="avc1.4d001e,mp4a.40.2",AUDIO="audio-0"
+hls/CONAN_020217_Highlight_show-1m_v4.m3u8
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=522000,RESOLUTION=400x224,CODECS="avc1.42001e,mp4a.40.2",AUDIO="audio-0"
+hls/CONAN_020217_Highlight_show-400k_v4.m3u8
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=413000,RESOLUTION=400x224,CODECS="avc1.42001e,mp4a.40.5",AUDIO="audio-1"
+hls/CONAN_020217_Highlight_show-400k_v4.m3u8
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=71000,CODECS="mp4a.40.5",AUDIO="audio-1"
+hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8
diff --git a/test/testdata/m3u8/toggle_mobile_12211.m3u8 b/test/testdata/m3u8/toggle_mobile_12211.m3u8
new file mode 100644 (file)
index 0000000..69604e6
--- /dev/null
@@ -0,0 +1,13 @@
+#EXTM3U
+#EXT-X-VERSION:4
+#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio",LANGUAGE="eng",NAME="English",URI="http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_sa2ntrdg/name/a.mp4/index.m3u8"
+#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio",LANGUAGE="und",NAME="Undefined",URI="http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_r7y0nitg/name/a.mp4/index.m3u8"
+
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=155648,RESOLUTION=320x180,AUDIO="audio"
+http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_qlk9hlzr/name/a.mp4/index.m3u8
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=502784,RESOLUTION=480x270,AUDIO="audio"
+http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_oefackmi/name/a.mp4/index.m3u8
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=827392,RESOLUTION=640x360,AUDIO="audio"
+http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_vyg9pj7k/name/a.mp4/index.m3u8
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1396736,RESOLUTION=854x480,AUDIO="audio"
+http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_50n4psvx/name/a.mp4/index.m3u8
diff --git a/test/testdata/m3u8/twitch_vod.m3u8 b/test/testdata/m3u8/twitch_vod.m3u8
new file mode 100644 (file)
index 0000000..7617277
--- /dev/null
@@ -0,0 +1,20 @@
+#EXTM3U
+#EXT-X-TWITCH-INFO:ORIGIN="s3",CLUSTER="edgecast_vod",REGION="EU",MANIFEST-CLUSTER="edgecast_vod",USER-IP="109.171.17.81"
+#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="chunked",NAME="Source",AUTOSELECT=YES,DEFAULT=YES
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=3214134,CODECS="avc1.100.31,mp4a.40.2",RESOLUTION="1280x720",VIDEO="chunked"
+https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/chunked/index-muted-HM49I092CC.m3u8
+#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="high",NAME="High",AUTOSELECT=YES,DEFAULT=YES
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1603789,CODECS="avc1.42C01F,mp4a.40.2",RESOLUTION="1280x720",VIDEO="high"
+https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/high/index-muted-HM49I092CC.m3u8
+#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="medium",NAME="Medium",AUTOSELECT=YES,DEFAULT=YES
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=893387,CODECS="avc1.42C01E,mp4a.40.2",RESOLUTION="852x480",VIDEO="medium"
+https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/medium/index-muted-HM49I092CC.m3u8
+#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="low",NAME="Low",AUTOSELECT=YES,DEFAULT=YES
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=628347,CODECS="avc1.42C01E,mp4a.40.2",RESOLUTION="640x360",VIDEO="low"
+https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/low/index-muted-HM49I092CC.m3u8
+#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="mobile",NAME="Mobile",AUTOSELECT=YES,DEFAULT=YES
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=280474,CODECS="avc1.42C00D,mp4a.40.2",RESOLUTION="400x226",VIDEO="mobile"
+https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/mobile/index-muted-HM49I092CC.m3u8
+#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="audio_only",NAME="Audio Only",AUTOSELECT=NO,DEFAULT=NO
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=182725,CODECS="mp4a.40.2",VIDEO="audio_only"
+https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/audio_only/index-muted-HM49I092CC.m3u8
diff --git a/test/testdata/m3u8/vidio.m3u8 b/test/testdata/m3u8/vidio.m3u8
new file mode 100644 (file)
index 0000000..89c2444
--- /dev/null
@@ -0,0 +1,10 @@
+#EXTM3U
+
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=300000,RESOLUTION=480x270,NAME="270p 3G"
+https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b300.mp4.m3u8
+
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=600000,RESOLUTION=640x360,NAME="360p SD"
+https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b600.mp4.m3u8
+
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1200000,RESOLUTION=1280x720,NAME="720p HD"
+https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b1200.mp4.m3u8
diff --git a/test/testdata/mpd/float_duration.mpd b/test/testdata/mpd/float_duration.mpd
new file mode 100644 (file)
index 0000000..8dc1d2d
--- /dev/null
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<MPD xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="urn:mpeg:dash:schema:mpd:2011" type="static" minBufferTime="PT2S" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" mediaPresentationDuration="PT6014S">
+       <Period bitstreamSwitching="true">
+               <AdaptationSet mimeType="audio/mp4" codecs="mp4a.40.2" startWithSAP="1" segmentAlignment="true">
+                       <SegmentTemplate timescale="1000000" presentationTimeOffset="0" initialization="ai_$RepresentationID$.mp4d" media="a_$RepresentationID$_$Number$.mp4d" duration="2000000.0" startNumber="0"></SegmentTemplate>
+                       <Representation id="318597" bandwidth="61587"></Representation>
+               </AdaptationSet>
+               <AdaptationSet mimeType="video/mp4" startWithSAP="1" segmentAlignment="true">
+                       <SegmentTemplate timescale="1000000" presentationTimeOffset="0" initialization="vi_$RepresentationID$.mp4d" media="v_$RepresentationID$_$Number$.mp4d" duration="2000000.0" startNumber="0"></SegmentTemplate>
+                       <Representation id="318597" codecs="avc1.42001f" width="340" height="192" bandwidth="318597"></Representation>
+                       <Representation id="638590" codecs="avc1.42001f" width="512" height="288" bandwidth="638590"></Representation>
+                       <Representation id="1022565" codecs="avc1.4d001f" width="688" height="384" bandwidth="1022565"></Representation>
+                       <Representation id="2046506" codecs="avc1.4d001f" width="1024" height="576" bandwidth="2046506"></Representation>
+                       <Representation id="3998017" codecs="avc1.640029" width="1280" height="720" bandwidth="3998017"></Representation>
+                       <Representation id="5997485" codecs="avc1.640032" width="1920" height="1080" bandwidth="5997485"></Representation>
+               </AdaptationSet>
+       </Period>
+</MPD>
\ No newline at end of file
diff --git a/test/testdata/mpd/urls_only.mpd b/test/testdata/mpd/urls_only.mpd
new file mode 100644 (file)
index 0000000..2b9d595
--- /dev/null
@@ -0,0 +1,218 @@
+<?xml version="1.0" ?>
+<MPD maxSegmentDuration="PT0H0M10.000S" mediaPresentationDuration="PT0H4M1.728S" minBufferTime="PT1.500S" profiles="urn:mpeg:dash:profile:isoff-main:2011" type="static" xmlns="urn:mpeg:dash:schema:mpd:2011">
+  <Period duration="PT0H4M1.728S">
+    <AdaptationSet bitstreamSwitching="true" lang="und" maxHeight="1080" maxWidth="1920" par="16:9" segmentAlignment="true">
+      <ContentComponent contentType="video" id="1"/>
+      <Representation audioSamplingRate="44100" bandwidth="200000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="144" id="h264_aac_144p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="256">
+        <SegmentList duration="10000" timescale="1000">
+          <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/init/432f65a0.mp4"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/0/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/1/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/2/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/3/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/4/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/5/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/6/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/7/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/8/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/9/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/10/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/11/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/12/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/13/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/14/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/15/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/16/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/17/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/18/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/19/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/20/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/21/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/22/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/23/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/24/432f65a0.m4s"/>
+        </SegmentList>
+      </Representation>
+      <Representation audioSamplingRate="44100" bandwidth="400000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="240" id="h264_aac_240p_m4s" mimeType="video/mp4" sar="160:159" startWithSAP="1" width="424">
+        <SegmentList duration="10000" timescale="1000">
+          <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/init/432f65a0.mp4"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/0/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/1/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/2/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/3/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/4/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/5/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/6/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/7/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/8/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/9/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/10/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/11/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/12/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/13/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/14/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/15/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/16/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/17/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/18/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/19/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/20/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/21/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/22/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/23/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/24/432f65a0.m4s"/>
+        </SegmentList>
+      </Representation>
+      <Representation audioSamplingRate="44100" bandwidth="800000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="360" id="h264_aac_360p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="640">
+        <SegmentList duration="10000" timescale="1000">
+          <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/init/432f65a0.mp4"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/0/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/1/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/2/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/3/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/4/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/5/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/6/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/7/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/8/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/9/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/10/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/11/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/12/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/13/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/14/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/15/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/16/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/17/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/18/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/19/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/20/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/21/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/22/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/23/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/24/432f65a0.m4s"/>
+        </SegmentList>
+      </Representation>
+      <Representation audioSamplingRate="44100" bandwidth="1200000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="480" id="h264_aac_480p_m4s" mimeType="video/mp4" sar="320:321" startWithSAP="1" width="856">
+        <SegmentList duration="10000" timescale="1000">
+          <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/init/432f65a0.mp4"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/0/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/1/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/2/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/3/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/4/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/5/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/6/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/7/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/8/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/9/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/10/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/11/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/12/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/13/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/14/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/15/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/16/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/17/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/18/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/19/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/20/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/21/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/22/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/23/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/24/432f65a0.m4s"/>
+        </SegmentList>
+      </Representation>
+      <Representation audioSamplingRate="44100" bandwidth="1600000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="576" id="h264_aac_576p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="1024">
+        <SegmentList duration="10000" timescale="1000">
+          <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/init/432f65a0.mp4"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/0/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/1/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/2/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/3/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/4/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/5/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/6/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/7/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/8/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/9/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/10/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/11/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/12/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/13/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/14/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/15/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/16/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/17/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/18/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/19/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/20/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/21/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/22/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/23/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/24/432f65a0.m4s"/>
+        </SegmentList>
+      </Representation>
+      <Representation audioSamplingRate="44100" bandwidth="2400000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="720" id="h264_aac_720p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="1280">
+        <SegmentList duration="10000" timescale="1000">
+          <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/init/432f65a0.mp4"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/0/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/1/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/2/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/3/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/4/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/5/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/6/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/7/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/8/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/9/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/10/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/11/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/12/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/13/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/14/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/15/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/16/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/17/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/18/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/19/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/20/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/21/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/22/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/23/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/24/432f65a0.m4s"/>
+        </SegmentList>
+      </Representation>
+      <Representation audioSamplingRate="44100" bandwidth="4400000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="1080" id="h264_aac_1080p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="1920">
+        <SegmentList duration="10000" timescale="1000">
+          <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/init/432f65a0.mp4"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/0/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/1/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/2/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/3/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/4/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/5/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/6/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/7/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/8/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/9/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/10/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/11/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/12/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/13/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/14/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/15/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/16/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/17/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/18/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/19/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/20/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/21/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/22/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/23/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/24/432f65a0.m4s"/>
+        </SegmentList>
+      </Representation>
+    </AdaptationSet>
+  </Period>
+</MPD>
index 3b69288626280e3c15249625c3a0b8972223a9e6..d00c30eba1c9fbb46745744ed936827f4a71d556 100755 (executable)
Binary files a/youtube-dl and b/youtube-dl differ
index 9ab22b088eb9f20093113f24114b551de9843ebd..410fce138b3daa4ec54743527a7d1518a84840f3 100644 (file)
@@ -1066,6 +1066,15 @@ scale used depends on the webpage
 .IP \[bu] 2
 \f[C]age_limit\f[] (numeric): Age restriction for the video (years)
 .IP \[bu] 2
+\f[C]is_live\f[] (boolean): Whether this video is a live stream or a
+fixed\-length video
+.IP \[bu] 2
+\f[C]start_time\f[] (numeric): Time in seconds where the reproduction
+should start, as specified in the URL
+.IP \[bu] 2
+\f[C]end_time\f[] (numeric): Time in seconds where the reproduction
+should end, as specified in the URL
+.IP \[bu] 2
 \f[C]format\f[] (string): A human\-readable description of the format
 .IP \[bu] 2
 \f[C]format_id\f[] (string): Format code specified by
@@ -1120,6 +1129,11 @@ padded with leading zeros according to the total length of the playlist
 \f[C]playlist_id\f[] (string): Playlist identifier
 .IP \[bu] 2
 \f[C]playlist_title\f[] (string): Playlist title
+.IP \[bu] 2
+\f[C]playlist_uploader\f[] (string): Full name of the playlist uploader
+.IP \[bu] 2
+\f[C]playlist_uploader_id\f[] (string): Nickname or id of the playlist
+uploader
 .PP
 Available for the video that belongs to some logical chapter or section:
 .IP \[bu] 2
index 342d6b47c03cc817994662eeed5d9245f45609de..ace80f14b8bdac8dca7c3d87e08179ead17c5c02 100755 (executable)
@@ -948,7 +948,8 @@ class YoutubeDL(object):
                 report_download(n_entries)
             else:  # iterable
                 if playlistitems:
-                    entries = make_playlistitems_entries(list(ie_entries))
+                    entries = make_playlistitems_entries(list(itertools.islice(
+                        ie_entries, 0, max(playlistitems))))
                 else:
                     entries = list(itertools.islice(
                         ie_entries, playliststart, playlistend))
@@ -974,6 +975,8 @@ class YoutubeDL(object):
                     'playlist': playlist,
                     'playlist_id': ie_result.get('id'),
                     'playlist_title': ie_result.get('title'),
+                    'playlist_uploader': ie_result.get('uploader'),
+                    'playlist_uploader_id': ie_result.get('uploader_id'),
                     'playlist_index': i + playliststart,
                     'extractor': ie_result['extractor'],
                     'webpage_url': ie_result['webpage_url'],
index 93002e45a93278e9f527a08391b8b5b6bd7df218..ea5e3a4b5df9f957328557b6f822ef5494cfc9a6 100644 (file)
@@ -107,11 +107,12 @@ class FragmentFD(FileDownloader):
     def _append_fragment(self, ctx, frag_content):
         try:
             ctx['dest_stream'].write(frag_content)
+            ctx['dest_stream'].flush()
         finally:
             if self.__do_ytdl_file(ctx):
                 self._write_ytdl_file(ctx)
             if not self.params.get('keep_fragments', False):
-                os.remove(ctx['fragment_filename_sanitized'])
+                os.remove(encodeFilename(ctx['fragment_filename_sanitized']))
             del ctx['fragment_filename_sanitized']
 
     def _prepare_frag_download(self, ctx):
index 1a6e226c89c809acec5a011b943043e7314d2cfd..4dc3ab46aa534132862ef0c29c34c871996ea2e4 100644 (file)
@@ -163,7 +163,8 @@ class HlsFD(FragmentFD):
                         return False
                     if decrypt_info['METHOD'] == 'AES-128':
                         iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
-                        decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(decrypt_info['URI']).read()
+                        decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
+                            self._prepare_url(info_dict, decrypt_info['URI'])).read()
                         frag_content = AES.new(
                             decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
                     self._append_fragment(ctx, frag_content)
index 8a6638cc2d47c4afa08104d77e0877f92749ac8a..3ff26ff7086afbf9d1245a144406f891e9ff393c 100644 (file)
@@ -284,8 +284,7 @@ class HttpFD(FileDownloader):
         while count <= retries:
             try:
                 establish_connection()
-                download()
-                return True
+                return download()
             except RetryDownload as e:
                 count += 1
                 if count <= retries:
index 60f753b95c6e89158c3d292bd62e8bee2cd74746..87017ed397bfd75690b0e4cab3821f549d6132cd 100644 (file)
@@ -1,6 +1,9 @@
 from __future__ import unicode_literals
 
+import hashlib
+import hmac
 import re
+import time
 
 from .common import InfoExtractor
 from ..compat import compat_str
@@ -10,6 +13,7 @@ from ..utils import (
     int_or_none,
     parse_iso8601,
     try_get,
+    update_url_query,
 )
 
 
@@ -101,21 +105,24 @@ class ABCIE(InfoExtractor):
 class ABCIViewIE(InfoExtractor):
     IE_NAME = 'abc.net.au:iview'
     _VALID_URL = r'https?://iview\.abc\.net\.au/programs/[^/]+/(?P<id>[^/?#]+)'
+    _GEO_COUNTRIES = ['AU']
 
     # ABC iview programs are normally available for 14 days only.
     _TESTS = [{
-        'url': 'http://iview.abc.net.au/programs/diaries-of-a-broken-mind/ZX9735A001S00',
+        'url': 'http://iview.abc.net.au/programs/call-the-midwife/ZW0898A003S00',
         'md5': 'cde42d728b3b7c2b32b1b94b4a548afc',
         'info_dict': {
-            'id': 'ZX9735A001S00',
+            'id': 'ZW0898A003S00',
             'ext': 'mp4',
-            'title': 'Diaries Of A Broken Mind',
-            'description': 'md5:7de3903874b7a1be279fe6b68718fc9e',
-            'upload_date': '20161010',
-            'uploader_id': 'abc2',
-            'timestamp': 1476064920,
+            'title': 'Series 5 Ep 3',
+            'description': 'md5:e0ef7d4f92055b86c4f33611f180ed79',
+            'upload_date': '20171228',
+            'uploader_id': 'abc1',
+            'timestamp': 1514499187,
+        },
+        'params': {
+            'skip_download': True,
         },
-        'skip': 'Video gone',
     }]
 
     def _real_extract(self, url):
@@ -126,20 +133,30 @@ class ABCIViewIE(InfoExtractor):
         title = video_params.get('title') or video_params['seriesTitle']
         stream = next(s for s in video_params['playlist'] if s.get('type') == 'program')
 
-        format_urls = [
-            try_get(stream, lambda x: x['hds-unmetered'], compat_str)]
-
-        # May have higher quality video
-        sd_url = try_get(
-            stream, lambda x: x['streams']['hds']['sd'], compat_str)
-        if sd_url:
-            format_urls.append(sd_url.replace('metered', 'um'))
-
-        formats = []
-        for format_url in format_urls:
-            if format_url:
-                formats.extend(
-                    self._extract_akamai_formats(format_url, video_id))
+        house_number = video_params.get('episodeHouseNumber')
+        path = '/auth/hls/sign?ts={0}&hn={1}&d=android-mobile'.format(
+            int(time.time()), house_number)
+        sig = hmac.new(
+            'android.content.res.Resources'.encode('utf-8'),
+            path.encode('utf-8'), hashlib.sha256).hexdigest()
+        token = self._download_webpage(
+            'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id)
+
+        def tokenize_url(url, token):
+            return update_url_query(url, {
+                'hdnea': token,
+            })
+
+        for sd in ('sd', 'sd-low'):
+            sd_url = try_get(
+                stream, lambda x: x['streams']['hls'][sd], compat_str)
+            if not sd_url:
+                continue
+            formats = self._extract_m3u8_formats(
+                tokenize_url(sd_url, token), video_id, 'mp4',
+                entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
+            if formats:
+                break
         self._sort_formats(formats)
 
         subtitles = {}
index e6513c7a4d86caabe282fe874826ef5219826085..513dd81df5cd8c044eaba800facdf76cca3cc509 100644 (file)
@@ -228,10 +228,19 @@ class AfreecaTVIE(InfoExtractor):
                     r'^(\d{8})_', key, 'upload date', default=None)
                 file_duration = int_or_none(file_element.get('duration'))
                 format_id = key if key else '%s_%s' % (video_id, file_num)
-                formats = self._extract_m3u8_formats(
-                    file_url, video_id, 'mp4', entry_protocol='m3u8_native',
-                    m3u8_id='hls',
-                    note='Downloading part %d m3u8 information' % file_num)
+                if determine_ext(file_url) == 'm3u8':
+                    formats = self._extract_m3u8_formats(
+                        file_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                        m3u8_id='hls',
+                        note='Downloading part %d m3u8 information' % file_num)
+                else:
+                    formats = [{
+                        'url': file_url,
+                        'format_id': 'http',
+                    }]
+                if not formats:
+                    continue
+                self._sort_formats(formats)
                 file_info = common_entry.copy()
                 file_info.update({
                     'id': format_id,
index 69d36331157010cf5411316096b1a940be5422c1..e4fa72f466c188a318978f750b9af0344d53a980 100644 (file)
@@ -78,15 +78,15 @@ class AnimeOnDemandIE(InfoExtractor):
             post_url = urljoin(self._LOGIN_URL, post_url)
 
         response = self._download_webpage(
-            post_url, None, 'Logging in as %s' % username,
+            post_url, None, 'Logging in',
             data=urlencode_postdata(login_form), headers={
                 'Referer': self._LOGIN_URL,
             })
 
         if all(p not in response for p in ('>Logout<', 'href="/users/sign_out"')):
             error = self._search_regex(
-                r'<p class="alert alert-danger">(.+?)</p>',
-                response, 'error', default=None)
+                r'<p[^>]+\bclass=(["\'])(?:(?!\1).)*\balert\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</p>',
+                response, 'error', default=None, group='error')
             if error:
                 raise ExtractorError('Unable to login: %s' % error, expected=True)
             raise ExtractorError('Unable to log in')
index 915f8862e3769c3f186209435dcc34029dead932..ef73d5a933f2ae08bfff028f0bd58afc0433ce54 100644 (file)
@@ -5,6 +5,7 @@ import re
 
 from .common import InfoExtractor
 from .generic import GenericIE
+from ..compat import compat_str
 from ..utils import (
     determine_ext,
     ExtractorError,
@@ -126,6 +127,8 @@ class ARDMediathekIE(InfoExtractor):
                 quality = stream.get('_quality')
                 server = stream.get('_server')
                 for stream_url in stream_urls:
+                    if not isinstance(stream_url, compat_str) or '//' not in stream_url:
+                        continue
                     ext = determine_ext(stream_url)
                     if quality != 'auto' and ext in ('f4m', 'm3u8'):
                         continue
@@ -146,13 +149,11 @@ class ARDMediathekIE(InfoExtractor):
                                 'play_path': stream_url,
                                 'format_id': 'a%s-rtmp-%s' % (num, quality),
                             }
-                        elif stream_url.startswith('http'):
+                        else:
                             f = {
                                 'url': stream_url,
                                 'format_id': 'a%s-%s-%s' % (num, ext, quality)
                             }
-                        else:
-                            continue
                         m = re.search(r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$', stream_url)
                         if m:
                             f.update({
index 01fa308ff1f150214fe4d996eb0bbc922387b320..1a31ebe0820d67df4b768e8102a134b34cfff0c5 100644 (file)
@@ -87,7 +87,7 @@ class AtresPlayerIE(InfoExtractor):
             self._LOGIN_URL, urlencode_postdata(login_form))
         request.add_header('Content-Type', 'application/x-www-form-urlencoded')
         response = self._download_webpage(
-            request, None, 'Logging in as %s' % username)
+            request, None, 'Logging in')
 
         error = self._html_search_regex(
             r'(?s)<ul[^>]+class="[^"]*\blist_error\b[^"]*">(.+?)</ul>',
diff --git a/youtube_dl/extractor/aws.py b/youtube_dl/extractor/aws.py
new file mode 100644 (file)
index 0000000..dccfeaf
--- /dev/null
@@ -0,0 +1,78 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import datetime
+import hashlib
+import hmac
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_urlencode
+
+
+class AWSIE(InfoExtractor):
+    _AWS_ALGORITHM = 'AWS4-HMAC-SHA256'
+    _AWS_REGION = 'us-east-1'
+
+    def _aws_execute_api(self, aws_dict, video_id, query=None):
+        query = query or {}
+        amz_date = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
+        date = amz_date[:8]
+        headers = {
+            'Accept': 'application/json',
+            'Host': self._AWS_PROXY_HOST,
+            'X-Amz-Date': amz_date,
+            'X-Api-Key': self._AWS_API_KEY
+        }
+        session_token = aws_dict.get('session_token')
+        if session_token:
+            headers['X-Amz-Security-Token'] = session_token
+
+        def aws_hash(s):
+            return hashlib.sha256(s.encode('utf-8')).hexdigest()
+
+        # Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
+        canonical_querystring = compat_urllib_parse_urlencode(query)
+        canonical_headers = ''
+        for header_name, header_value in sorted(headers.items()):
+            canonical_headers += '%s:%s\n' % (header_name.lower(), header_value)
+        signed_headers = ';'.join([header.lower() for header in sorted(headers.keys())])
+        canonical_request = '\n'.join([
+            'GET',
+            aws_dict['uri'],
+            canonical_querystring,
+            canonical_headers,
+            signed_headers,
+            aws_hash('')
+        ])
+
+        # Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html
+        credential_scope_list = [date, self._AWS_REGION, 'execute-api', 'aws4_request']
+        credential_scope = '/'.join(credential_scope_list)
+        string_to_sign = '\n'.join([self._AWS_ALGORITHM, amz_date, credential_scope, aws_hash(canonical_request)])
+
+        # Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html
+        def aws_hmac(key, msg):
+            return hmac.new(key, msg.encode('utf-8'), hashlib.sha256)
+
+        def aws_hmac_digest(key, msg):
+            return aws_hmac(key, msg).digest()
+
+        def aws_hmac_hexdigest(key, msg):
+            return aws_hmac(key, msg).hexdigest()
+
+        k_signing = ('AWS4' + aws_dict['secret_key']).encode('utf-8')
+        for value in credential_scope_list:
+            k_signing = aws_hmac_digest(k_signing, value)
+
+        signature = aws_hmac_hexdigest(k_signing, string_to_sign)
+
+        # Task 4: http://docs.aws.amazon.com/general/latest/gr/sigv4-add-signature-to-request.html
+        headers['Authorization'] = ', '.join([
+            '%s Credential=%s/%s' % (self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope),
+            'SignedHeaders=%s' % signed_headers,
+            'Signature=%s' % signature,
+        ])
+
+        return self._download_json(
+            'https://%s%s%s' % (self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''),
+            video_id, headers=headers)
index 0eb1930c2d24bb01acd57292d4fe7e1b9a330bff..633c57553d9a9e769f726a664e08061217763ced 100644 (file)
@@ -59,7 +59,7 @@ class BambuserIE(InfoExtractor):
             self._LOGIN_URL, urlencode_postdata(login_form))
         request.add_header('Referer', self._LOGIN_URL)
         response = self._download_webpage(
-            request, None, 'Logging in as %s' % username)
+            request, None, 'Logging in')
 
         login_error = self._html_search_regex(
             r'(?s)<div class="messages error">(.+?)</div>',
index 5525f7c9b998c57271aba072bb698f6c03b36777..8b20c03d6e424b95e42b1bea1ac3fb91e24bea11 100644 (file)
@@ -386,7 +386,7 @@ class BBCCoUkIE(InfoExtractor):
                             m3u8_id=format_id, fatal=False))
                         if re.search(self._USP_RE, href):
                             usp_formats = self._extract_m3u8_formats(
-                                re.sub(self._USP_RE, r'/\1\.ism/\1\.m3u8', href),
+                                re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href),
                                 programme_id, ext='mp4', entry_protocol='m3u8_native',
                                 m3u8_id=format_id, fatal=False)
                             for f in usp_formats:
index 2c32b6ae2a833715ab9f524e7d0470d5110ab108..9bde7f2d82896177612a81f74e499ad4e79985b1 100644 (file)
@@ -1,20 +1,23 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import json
 import re
 
 from .common import InfoExtractor
 from ..utils import (
+    determine_ext,
     ExtractorError,
     int_or_none,
     parse_duration,
+    parse_iso8601,
     xpath_element,
     xpath_text,
 )
 
 
 class BRIE(InfoExtractor):
-    IE_DESC = 'Bayerischer Rundfunk Mediathek'
+    IE_DESC = 'Bayerischer Rundfunk'
     _VALID_URL = r'(?P<base_url>https?://(?:www\.)?br(?:-klassik)?\.de)/(?:[a-z0-9\-_]+/)+(?P<id>[a-z0-9\-_]+)\.html'
 
     _TESTS = [
@@ -123,10 +126,10 @@ class BRIE(InfoExtractor):
         for asset in assets.findall('asset'):
             format_url = xpath_text(asset, ['downloadUrl', 'url'])
             asset_type = asset.get('type')
-            if asset_type == 'HDS':
+            if asset_type.startswith('HDS'):
                 formats.extend(self._extract_f4m_formats(
                     format_url + '?hdcore=3.2.0', media_id, f4m_id='hds', fatal=False))
-            elif asset_type == 'HLS':
+            elif asset_type.startswith('HLS'):
                 formats.extend(self._extract_m3u8_formats(
                     format_url, media_id, 'mp4', 'm3u8_native', m3u8_id='hds', fatal=False))
             else:
@@ -169,3 +172,140 @@ class BRIE(InfoExtractor):
         } for variant in variants.findall('variant') if xpath_text(variant, 'url')]
         thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True)
         return thumbnails
+
+
+class BRMediathekIE(InfoExtractor):
+    IE_DESC = 'Bayerischer Rundfunk Mediathek'
+    _VALID_URL = r'https?://(?:www\.)?br\.de/mediathek/video/[^/?&#]*?-(?P<id>av:[0-9a-f]{24})'
+
+    _TESTS = [{
+        'url': 'https://www.br.de/mediathek/video/gesundheit-die-sendung-vom-28112017-av:5a1e6a6e8fce6d001871cc8e',
+        'md5': 'fdc3d485835966d1622587d08ba632ec',
+        'info_dict': {
+            'id': 'av:5a1e6a6e8fce6d001871cc8e',
+            'ext': 'mp4',
+            'title': 'Die Sendung vom 28.11.2017',
+            'description': 'md5:6000cdca5912ab2277e5b7339f201ccc',
+            'timestamp': 1511942766,
+            'upload_date': '20171129',
+        }
+    }]
+
+    def _real_extract(self, url):
+        clip_id = self._match_id(url)
+
+        clip = self._download_json(
+            'https://proxy-base.master.mango.express/graphql',
+            clip_id, data=json.dumps({
+                "query": """{
+  viewer {
+    clip(id: "%s") {
+      title
+      description
+      duration
+      createdAt
+      ageRestriction
+      videoFiles {
+        edges {
+          node {
+            publicLocation
+            fileSize
+            videoProfile {
+              width
+              height
+              bitrate
+              encoding
+            }
+          }
+        }
+      }
+      captionFiles {
+        edges {
+          node {
+            publicLocation
+          }
+        }
+      }
+      teaserImages {
+        edges {
+          node {
+            imageFiles {
+              edges {
+                node {
+                  publicLocation
+                  width
+                  height
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}""" % clip_id}).encode(), headers={
+                'Content-Type': 'application/json',
+            })['data']['viewer']['clip']
+        title = clip['title']
+
+        formats = []
+        for edge in clip.get('videoFiles', {}).get('edges', []):
+            node = edge.get('node', {})
+            n_url = node.get('publicLocation')
+            if not n_url:
+                continue
+            ext = determine_ext(n_url)
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    n_url, clip_id, 'mp4', 'm3u8_native',
+                    m3u8_id='hls', fatal=False))
+            else:
+                video_profile = node.get('videoProfile', {})
+                tbr = int_or_none(video_profile.get('bitrate'))
+                format_id = 'http'
+                if tbr:
+                    format_id += '-%d' % tbr
+                formats.append({
+                    'format_id': format_id,
+                    'url': n_url,
+                    'width': int_or_none(video_profile.get('width')),
+                    'height': int_or_none(video_profile.get('height')),
+                    'tbr': tbr,
+                    'filesize': int_or_none(node.get('fileSize')),
+                })
+        self._sort_formats(formats)
+
+        subtitles = {}
+        for edge in clip.get('captionFiles', {}).get('edges', []):
+            node = edge.get('node', {})
+            n_url = node.get('publicLocation')
+            if not n_url:
+                continue
+            subtitles.setdefault('de', []).append({
+                'url': n_url,
+            })
+
+        thumbnails = []
+        for edge in clip.get('teaserImages', {}).get('edges', []):
+            for image_edge in edge.get('node', {}).get('imageFiles', {}).get('edges', []):
+                node = image_edge.get('node', {})
+                n_url = node.get('publicLocation')
+                if not n_url:
+                    continue
+                thumbnails.append({
+                    'url': n_url,
+                    'width': int_or_none(node.get('width')),
+                    'height': int_or_none(node.get('height')),
+                })
+
+        return {
+            'id': clip_id,
+            'title': title,
+            'description': clip.get('description'),
+            'duration': int_or_none(clip.get('duration')),
+            'timestamp': parse_iso8601(clip.get('createdAt')),
+            'age_limit': int_or_none(clip.get('ageRestriction')),
+            'formats': formats,
+            'subtitles': subtitles,
+            'thumbnails': thumbnails,
+        }
index 0ed59bcbc44ecaf413d7003c4f6f605b1201ca10..f04505011c39fbfe315763ce8182daf6c2b9f663 100644 (file)
@@ -464,7 +464,7 @@ class BrightcoveNewIE(AdobePassIE):
             'timestamp': 1441391203,
             'upload_date': '20150904',
             'uploader_id': '929656772001',
-            'formats': 'mincount:22',
+            'formats': 'mincount:20',
         },
     }, {
         # with rtmp streams
@@ -478,7 +478,7 @@ class BrightcoveNewIE(AdobePassIE):
             'timestamp': 1433556729,
             'upload_date': '20150606',
             'uploader_id': '4036320279001',
-            'formats': 'mincount:41',
+            'formats': 'mincount:39',
         },
         'params': {
             # m3u8 download
@@ -564,59 +564,7 @@ class BrightcoveNewIE(AdobePassIE):
 
         return entries
 
-    def _real_extract(self, url):
-        url, smuggled_data = unsmuggle_url(url, {})
-        self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
-
-        account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()
-
-        webpage = self._download_webpage(
-            'http://players.brightcove.net/%s/%s_%s/index.min.js'
-            % (account_id, player_id, embed), video_id)
-
-        policy_key = None
-
-        catalog = self._search_regex(
-            r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
-        if catalog:
-            catalog = self._parse_json(
-                js_to_json(catalog), video_id, fatal=False)
-            if catalog:
-                policy_key = catalog.get('policyKey')
-
-        if not policy_key:
-            policy_key = self._search_regex(
-                r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
-                webpage, 'policy key', group='pk')
-
-        api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id)
-        try:
-            json_data = self._download_json(api_url, video_id, headers={
-                'Accept': 'application/json;pk=%s' % policy_key
-            })
-        except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
-                json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
-                message = json_data.get('message') or json_data['error_code']
-                if json_data.get('error_subcode') == 'CLIENT_GEO':
-                    self.raise_geo_restricted(msg=message)
-                raise ExtractorError(message, expected=True)
-            raise
-
-        errors = json_data.get('errors')
-        if errors and errors[0].get('error_subcode') == 'TVE_AUTH':
-            custom_fields = json_data['custom_fields']
-            tve_token = self._extract_mvpd_auth(
-                smuggled_data['source_url'], video_id,
-                custom_fields['bcadobepassrequestorid'],
-                custom_fields['bcadobepassresourceid'])
-            json_data = self._download_json(
-                api_url, video_id, headers={
-                    'Accept': 'application/json;pk=%s' % policy_key
-                }, query={
-                    'tveToken': tve_token,
-                })
-
+    def _parse_brightcove_metadata(self, json_data, video_id):
         title = json_data['name'].strip()
 
         formats = []
@@ -682,6 +630,7 @@ class BrightcoveNewIE(AdobePassIE):
                     })
                 formats.append(f)
 
+        errors = json_data.get('errors')
         if not formats and errors:
             error = errors[0]
             raise ExtractorError(
@@ -708,9 +657,64 @@ class BrightcoveNewIE(AdobePassIE):
             'thumbnail': json_data.get('thumbnail') or json_data.get('poster'),
             'duration': duration,
             'timestamp': parse_iso8601(json_data.get('published_at')),
-            'uploader_id': account_id,
+            'uploader_id': json_data.get('account_id'),
             'formats': formats,
             'subtitles': subtitles,
             'tags': json_data.get('tags', []),
             'is_live': is_live,
         }
+
+    def _real_extract(self, url):
+        url, smuggled_data = unsmuggle_url(url, {})
+        self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
+
+        account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()
+
+        webpage = self._download_webpage(
+            'http://players.brightcove.net/%s/%s_%s/index.min.js'
+            % (account_id, player_id, embed), video_id)
+
+        policy_key = None
+
+        catalog = self._search_regex(
+            r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
+        if catalog:
+            catalog = self._parse_json(
+                js_to_json(catalog), video_id, fatal=False)
+            if catalog:
+                policy_key = catalog.get('policyKey')
+
+        if not policy_key:
+            policy_key = self._search_regex(
+                r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
+                webpage, 'policy key', group='pk')
+
+        api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id)
+        try:
+            json_data = self._download_json(api_url, video_id, headers={
+                'Accept': 'application/json;pk=%s' % policy_key
+            })
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+                json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
+                message = json_data.get('message') or json_data['error_code']
+                if json_data.get('error_subcode') == 'CLIENT_GEO':
+                    self.raise_geo_restricted(msg=message)
+                raise ExtractorError(message, expected=True)
+            raise
+
+        errors = json_data.get('errors')
+        if errors and errors[0].get('error_subcode') == 'TVE_AUTH':
+            custom_fields = json_data['custom_fields']
+            tve_token = self._extract_mvpd_auth(
+                smuggled_data['source_url'], video_id,
+                custom_fields['bcadobepassrequestorid'],
+                custom_fields['bcadobepassresourceid'])
+            json_data = self._download_json(
+                api_url, video_id, headers={
+                    'Accept': 'application/json;pk=%s' % policy_key
+                }, query={
+                    'tveToken': tve_token,
+                })
+
+        return self._parse_brightcove_metadata(json_data, video_id)
index 8ef089653d80e75bcf9eb89b81a9c82f08e7971d..4bf4efe1f3c662dbec1414c8004c904b12412551 100644 (file)
@@ -3,20 +3,19 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import ExtractorError
 
 
 class BYUtvIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?'
+    _VALID_URL = r'https?://(?:www\.)?byutv\.org/(?:watch|player)/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?'
     _TESTS = [{
         'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
         'info_dict': {
-            'id': '6587b9a3-89d2-42a6-a7f7-fd2f81840a7d',
+            'id': 'ZvanRocTpW-G5_yZFeltTAMv6jxOU9KH',
             'display_id': 'studio-c-season-5-episode-5',
             'ext': 'mp4',
             'title': 'Season 5 Episode 5',
-            'description': 'md5:e07269172baff037f8e8bf9956bc9747',
-            'thumbnail': r're:^https?://.*\.jpg$',
+            'description': 'md5:1d31dc18ef4f075b28f6a65937d22c65',
+            'thumbnail': r're:^https?://.*',
             'duration': 1486.486,
         },
         'params': {
@@ -26,6 +25,9 @@ class BYUtvIE(InfoExtractor):
     }, {
         'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d',
         'only_matching': True,
+    }, {
+        'url': 'https://www.byutv.org/player/27741493-dc83-40b0-8420-e7ae38a2ae98/byu-football-toledo-vs-byu-93016?listid=4fe0fee5-0d3c-4a29-b725-e4948627f472&listindex=0&q=toledo',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -33,16 +35,16 @@ class BYUtvIE(InfoExtractor):
         video_id = mobj.group('id')
         display_id = mobj.group('display_id') or video_id
 
-        webpage = self._download_webpage(url, display_id)
-        episode_code = self._search_regex(
-            r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information')
-
-        ep = self._parse_json(
-            episode_code, display_id, transform_source=lambda s:
-            re.sub(r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', s))
-
-        if ep['providerType'] != 'Ooyala':
-            raise ExtractorError('Unsupported provider %s' % ep['provider'])
+        ep = self._download_json(
+            'https://api.byutv.org/api3/catalog/getvideosforcontent', video_id,
+            query={
+                'contentid': video_id,
+                'channel': 'byutv',
+                'x-byutv-context': 'web$US',
+            }, headers={
+                'x-byutv-context': 'web$US',
+                'x-byutv-platformkey': 'xsaaw9c7y5',
+            })['ooyalaVOD']
 
         return {
             '_type': 'url_transparent',
@@ -50,44 +52,7 @@ class BYUtvIE(InfoExtractor):
             'url': 'ooyala:%s' % ep['providerId'],
             'id': video_id,
             'display_id': display_id,
-            'title': ep['title'],
+            'title': ep.get('title'),
             'description': ep.get('description'),
             'thumbnail': ep.get('imageThumbnail'),
         }
-
-
-class BYUtvEventIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/event/(?P<id>[0-9a-f-]+)'
-    _TEST = {
-        'url': 'http://www.byutv.org/watch/event/29941b9b-8bf6-48d2-aebf-7a87add9e34b',
-        'info_dict': {
-            'id': '29941b9b-8bf6-48d2-aebf-7a87add9e34b',
-            'ext': 'mp4',
-            'title': 'Toledo vs. BYU (9/30/16)',
-        },
-        'params': {
-            'skip_download': True,
-        },
-        'add_ie': ['Ooyala'],
-    }
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, video_id)
-
-        ooyala_id = self._search_regex(
-            r'providerId\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
-            webpage, 'ooyala id', group='id')
-
-        title = self._search_regex(
-            r'class=["\']description["\'][^>]*>\s*<h1>([^<]+)</h1>', webpage,
-            'title').strip()
-
-        return {
-            '_type': 'url_transparent',
-            'ie_key': 'Ooyala',
-            'url': 'ooyala:%s' % ooyala_id,
-            'id': video_id,
-            'title': title,
-        }
index 086ec90c9bc8a606968e5f56d2bada8602ed1f35..6aeebd7b34544b273b9b7a50bfd604ca42a953e5 100644 (file)
@@ -31,7 +31,7 @@ class CartoonNetworkIE(TurnerBaseIE):
             'http://www.cartoonnetwork.com/video-seo-svc/episodeservices/getCvpPlaylist?networkName=CN2&' + query, video_id, {
                 'secure': {
                     'media_src': 'http://androidhls-secure.cdn.turner.com/toon/big',
-                    'tokenizer_src': 'http://www.cartoonnetwork.com/cntv/mvpd/processors/services/token_ipadAdobe.do',
+                    'tokenizer_src': 'https://token.vgtf.net/token/token_mobile',
                 },
             }, {
                 'url': url,
index 7d78e3aaee7637fdeed114c1b2b0adac44780804..90852a9ef9b7a6707ebcd1ade6cccb9ff9bbde5a 100644 (file)
@@ -91,12 +91,10 @@ class CBSLocalIE(AnvatoIE):
 
         info_dict = self._extract_anvato_videos(webpage, display_id)
 
-        time_str = self._html_search_regex(
-            r'class="entry-date">([^<]+)<', webpage, 'released date', default=None)
-        if time_str:
-            timestamp = unified_timestamp(time_str)
-        else:
-            timestamp = parse_iso8601(self._html_search_meta('uploadDate', webpage))
+        timestamp = unified_timestamp(self._html_search_regex(
+            r'class="(?:entry|post)-date"[^>]*>([^<]+)', webpage,
+            'released date', default=None)) or parse_iso8601(
+            self._html_search_meta('uploadDate', webpage))
 
         info_dict.update({
             'display_id': display_id,
index 39938c9acbae573b56346093fae599b669a09f7b..bec0a825a0df28e953aeee58c73ea5cbc6c0461b 100644 (file)
@@ -93,7 +93,7 @@ class CCMAIE(InfoExtractor):
             'description': clean_html(informacio.get('descripcio')),
             'duration': duration,
             'timestamp': timestamp,
-            'thumnails': thumbnails,
+            'thumbnails': thumbnails,
             'subtitles': subtitles,
             'formats': formats,
         }
diff --git a/youtube_dl/extractor/collegerama.py b/youtube_dl/extractor/collegerama.py
deleted file mode 100644 (file)
index 6a41db8..0000000
+++ /dev/null
@@ -1,93 +0,0 @@
-from __future__ import unicode_literals
-
-import json
-
-from .common import InfoExtractor
-from ..utils import (
-    float_or_none,
-    int_or_none,
-    sanitized_Request,
-)
-
-
-class CollegeRamaIE(InfoExtractor):
-    _VALID_URL = r'https?://collegerama\.tudelft\.nl/Mediasite/Play/(?P<id>[\da-f]+)'
-    _TESTS = [
-        {
-            'url': 'https://collegerama.tudelft.nl/Mediasite/Play/585a43626e544bdd97aeb71a0ec907a01d',
-            'md5': '481fda1c11f67588c0d9d8fbdced4e39',
-            'info_dict': {
-                'id': '585a43626e544bdd97aeb71a0ec907a01d',
-                'ext': 'mp4',
-                'title': 'Een nieuwe wereld: waarden, bewustzijn en techniek van de mensheid 2.0.',
-                'description': '',
-                'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$',
-                'duration': 7713.088,
-                'timestamp': 1413309600,
-                'upload_date': '20141014',
-            },
-        },
-        {
-            'url': 'https://collegerama.tudelft.nl/Mediasite/Play/86a9ea9f53e149079fbdb4202b521ed21d?catalog=fd32fd35-6c99-466c-89d4-cd3c431bc8a4',
-            'md5': 'ef1fdded95bdf19b12c5999949419c92',
-            'info_dict': {
-                'id': '86a9ea9f53e149079fbdb4202b521ed21d',
-                'ext': 'wmv',
-                'title': '64ste Vakantiecursus: Afvalwater',
-                'description': 'md5:7fd774865cc69d972f542b157c328305',
-                'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$',
-                'duration': 10853,
-                'timestamp': 1326446400,
-                'upload_date': '20120113',
-            },
-        },
-    ]
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        player_options_request = {
-            'getPlayerOptionsRequest': {
-                'ResourceId': video_id,
-                'QueryString': '',
-            }
-        }
-
-        request = sanitized_Request(
-            'http://collegerama.tudelft.nl/Mediasite/PlayerService/PlayerService.svc/json/GetPlayerOptions',
-            json.dumps(player_options_request))
-        request.add_header('Content-Type', 'application/json')
-
-        player_options = self._download_json(request, video_id)
-
-        presentation = player_options['d']['Presentation']
-        title = presentation['Title']
-        description = presentation.get('Description')
-        thumbnail = None
-        duration = float_or_none(presentation.get('Duration'), 1000)
-        timestamp = int_or_none(presentation.get('UnixTime'), 1000)
-
-        formats = []
-        for stream in presentation['Streams']:
-            for video in stream['VideoUrls']:
-                thumbnail_url = stream.get('ThumbnailUrl')
-                if thumbnail_url:
-                    thumbnail = 'http://collegerama.tudelft.nl' + thumbnail_url
-                format_id = video['MediaType']
-                if format_id == 'SS':
-                    continue
-                formats.append({
-                    'url': video['Location'],
-                    'format_id': format_id,
-                })
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'title': title,
-            'description': description,
-            'thumbnail': thumbnail,
-            'duration': duration,
-            'timestamp': timestamp,
-            'formats': formats,
-        }
index e2d9f52b018c25abc5a58a93785473ff88d90b74..5b6a09c0b7f732ef5e9361775e42719af0deb8b7 100644 (file)
@@ -301,8 +301,9 @@ class InfoExtractor(object):
     There must be a key "entries", which is a list, an iterable, or a PagedList
     object, each element of which is a valid dictionary by this specification.
 
-    Additionally, playlists can have "title", "description" and "id" attributes
-    with the same semantics as videos (see above).
+    Additionally, playlists can have "id", "title", "description", "uploader",
+    "uploader_id", "uploader_url" attributes with the same semantics as videos
+    (see above).
 
 
     _type "multi_video" indicates that there are multiple videos that
@@ -494,6 +495,16 @@ class InfoExtractor(object):
                 self.to_screen('%s' % (note,))
             else:
                 self.to_screen('%s: %s' % (video_id, note))
+
+        # Some sites check X-Forwarded-For HTTP header in order to figure out
+        # the origin of the client behind proxy. This allows bypassing geo
+        # restriction by faking this header's value to IP that belongs to some
+        # geo unrestricted country. We will do so once we encounter any
+        # geo restriction error.
+        if self._x_forwarded_for_ip:
+            if 'X-Forwarded-For' not in headers:
+                headers['X-Forwarded-For'] = self._x_forwarded_for_ip
+
         if isinstance(url_or_request, compat_urllib_request.Request):
             url_or_request = update_Request(
                 url_or_request, data=data, headers=headers, query=query)
@@ -523,15 +534,6 @@ class InfoExtractor(object):
         if isinstance(url_or_request, (compat_str, str)):
             url_or_request = url_or_request.partition('#')[0]
 
-        # Some sites check X-Forwarded-For HTTP header in order to figure out
-        # the origin of the client behind proxy. This allows bypassing geo
-        # restriction by faking this header's value to IP that belongs to some
-        # geo unrestricted country. We will do so once we encounter any
-        # geo restriction error.
-        if self._x_forwarded_for_ip:
-            if 'X-Forwarded-For' not in headers:
-                headers['X-Forwarded-For'] = self._x_forwarded_for_ip
-
         urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query)
         if urlh is False:
             assert not fatal
@@ -592,19 +594,11 @@ class InfoExtractor(object):
         if not encoding:
             encoding = self._guess_encoding_from_content(content_type, webpage_bytes)
         if self._downloader.params.get('dump_intermediate_pages', False):
-            try:
-                url = url_or_request.get_full_url()
-            except AttributeError:
-                url = url_or_request
-            self.to_screen('Dumping request to ' + url)
+            self.to_screen('Dumping request to ' + urlh.geturl())
             dump = base64.b64encode(webpage_bytes).decode('ascii')
             self._downloader.to_screen(dump)
         if self._downloader.params.get('write_pages', False):
-            try:
-                url = url_or_request.get_full_url()
-            except AttributeError:
-                url = url_or_request
-            basen = '%s_%s' % (video_id, url)
+            basen = '%s_%s' % (video_id, urlh.geturl())
             if len(basen) > 240:
                 h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
                 basen = basen[:240 - len(h)] + h
@@ -1356,6 +1350,9 @@ class InfoExtractor(object):
         if '#EXT-X-FAXS-CM:' in m3u8_doc:  # Adobe Flash Access
             return []
 
+        if re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc):  # Apple FairPlay
+            return []
+
         formats = []
 
         format_url = lambda u: (
@@ -1883,6 +1880,7 @@ class InfoExtractor(object):
                             'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
                             'format_note': 'DASH %s' % content_type,
                             'filesize': filesize,
+                            'container': mimetype2ext(mime_type) + '_dash',
                         }
                         f.update(parse_codecs(representation_attrib.get('codecs')))
                         representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
@@ -1980,6 +1978,22 @@ class InfoExtractor(object):
                                     })
                                     segment_index += 1
                             representation_ms_info['fragments'] = fragments
+                        elif 'segment_urls' in representation_ms_info:
+                            # Segment URLs with no SegmentTimeline
+                            # Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
+                            # https://github.com/rg3/youtube-dl/pull/14844
+                            fragments = []
+                            segment_duration = float_or_none(
+                                representation_ms_info['segment_duration'],
+                                representation_ms_info['timescale']) if 'segment_duration' in representation_ms_info else None
+                            for segment_url in representation_ms_info['segment_urls']:
+                                fragment = {
+                                    location_key(segment_url): segment_url,
+                                }
+                                if segment_duration:
+                                    fragment['duration'] = segment_duration
+                                fragments.append(fragment)
+                            representation_ms_info['fragments'] = fragments
                         # NB: MPD manifest may contain direct URLs to unfragmented media.
                         # No fragments key is present in this case.
                         if 'fragments' in representation_ms_info:
@@ -1994,16 +2008,14 @@ class InfoExtractor(object):
                                     f['url'] = initialization_url
                                 f['fragments'].append({location_key(initialization_url): initialization_url})
                             f['fragments'].extend(representation_ms_info['fragments'])
-                        try:
-                            existing_format = next(
-                                fo for fo in formats
-                                if fo['format_id'] == representation_id)
-                        except StopIteration:
-                            full_info = formats_dict.get(representation_id, {}).copy()
-                            full_info.update(f)
-                            formats.append(full_info)
-                        else:
-                            existing_format.update(f)
+                        # According to [1, 5.3.5.2, Table 7, page 35] @id of Representation
+                        # is not necessarily unique within a Period thus formats with
+                        # the same `format_id` are quite possible. There are numerous examples
+                        # of such manifests (see https://github.com/rg3/youtube-dl/issues/15111,
+                        # https://github.com/rg3/youtube-dl/issues/13919)
+                        full_info = formats_dict.get(representation_id, {}).copy()
+                        full_info.update(f)
+                        formats.append(full_info)
                     else:
                         self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
         return formats
@@ -2043,7 +2055,7 @@ class InfoExtractor(object):
             stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
             stream_name = stream.get('Name')
             for track in stream.findall('QualityLevel'):
-                fourcc = track.get('FourCC')
+                fourcc = track.get('FourCC', 'AACL' if track.get('AudioTag') == '255' else None)
                 # TODO: add support for WVC1 and WMAP
                 if fourcc not in ('H264', 'AVC1', 'AACL'):
                     self.report_warning('%s is not a supported codec' % fourcc)
index 8bdaf0c2c5af7aa1a562033487a16867c3304f19..b92f2544799769abc9690a04d4d1090af435ca79 100644 (file)
@@ -38,11 +38,32 @@ class CrunchyrollBaseIE(InfoExtractor):
     _LOGIN_FORM = 'login_form'
     _NETRC_MACHINE = 'crunchyroll'
 
+    def _call_rpc_api(self, method, video_id, note=None, data=None):
+        data = data or {}
+        data['req'] = 'RpcApi' + method
+        data = compat_urllib_parse_urlencode(data).encode('utf-8')
+        return self._download_xml(
+            'http://www.crunchyroll.com/xml/',
+            video_id, note, fatal=False, data=data, headers={
+                'Content-Type': 'application/x-www-form-urlencoded',
+            })
+
     def _login(self):
         (username, password) = self._get_login_info()
         if username is None:
             return
 
+        self._download_webpage(
+            'https://www.crunchyroll.com/?a=formhandler',
+            None, 'Logging in', 'Wrong login info',
+            data=urlencode_postdata({
+                'formname': 'RpcApiUser_Login',
+                'next_url': 'https://www.crunchyroll.com/acct/membership',
+                'name': username,
+                'password': password,
+            }))
+
+        '''
         login_page = self._download_webpage(
             self._LOGIN_URL, None, 'Downloading login page')
 
@@ -86,6 +107,7 @@ class CrunchyrollBaseIE(InfoExtractor):
             raise ExtractorError('Unable to login: %s' % error, expected=True)
 
         raise ExtractorError('Unable to log in')
+        '''
 
     def _real_initialize(self):
         self._login()
@@ -365,15 +387,19 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
     def _get_subtitles(self, video_id, webpage):
         subtitles = {}
         for sub_id, sub_name in re.findall(r'\bssid=([0-9]+)"[^>]+?\btitle="([^"]+)', webpage):
-            sub_page = self._download_webpage(
-                'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
-                video_id, note='Downloading subtitles for ' + sub_name)
-            id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
-            iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
-            data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
-            if not id or not iv or not data:
+            sub_doc = self._call_rpc_api(
+                'Subtitle_GetXml', video_id,
+                'Downloading subtitles for ' + sub_name, data={
+                    'subtitle_script_id': sub_id,
+                })
+            if sub_doc is None:
                 continue
-            subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
+            sid = sub_doc.get('id')
+            iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
+            data = xpath_text(sub_doc, 'data', 'subtitle data')
+            if not sid or not iv or not data:
+                continue
+            subtitle = self._decrypt_subtitles(data, iv, sid).decode('utf-8')
             lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
             if not lang_code:
                 continue
@@ -444,65 +470,79 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
         for fmt in available_fmts:
             stream_quality, stream_format = self._FORMAT_IDS[fmt]
             video_format = fmt + 'p'
-            streamdata_req = sanitized_Request(
-                'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s'
-                % (video_id, stream_format, stream_quality),
-                compat_urllib_parse_urlencode({'current_page': url}).encode('utf-8'))
-            streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
-            streamdata = self._download_xml(
-                streamdata_req, video_id,
-                note='Downloading media info for %s' % video_format)
-            stream_info = streamdata.find('./{default}preload/stream_info')
-            video_encode_id = xpath_text(stream_info, './video_encode_id')
-            if video_encode_id in video_encode_ids:
-                continue
-            video_encode_ids.append(video_encode_id)
-
-            video_file = xpath_text(stream_info, './file')
-            if not video_file:
-                continue
-            if video_file.startswith('http'):
-                formats.extend(self._extract_m3u8_formats(
-                    video_file, video_id, 'mp4', entry_protocol='m3u8_native',
-                    m3u8_id='hls', fatal=False))
-                continue
-
-            video_url = xpath_text(stream_info, './host')
-            if not video_url:
-                continue
-            metadata = stream_info.find('./metadata')
-            format_info = {
-                'format': video_format,
-                'format_id': video_format,
-                'height': int_or_none(xpath_text(metadata, './height')),
-                'width': int_or_none(xpath_text(metadata, './width')),
-            }
-
-            if '.fplive.net/' in video_url:
-                video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
-                parsed_video_url = compat_urlparse.urlparse(video_url)
-                direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
-                    netloc='v.lvlt.crcdn.net',
-                    path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
-                if self._is_valid_url(direct_video_url, video_id, video_format):
-                    format_info.update({
-                        'url': direct_video_url,
-                    })
-                    formats.append(format_info)
+            stream_infos = []
+            streamdata = self._call_rpc_api(
+                'VideoPlayer_GetStandardConfig', video_id,
+                'Downloading media info for %s' % video_format, data={
+                    'media_id': video_id,
+                    'video_format': stream_format,
+                    'video_quality': stream_quality,
+                    'current_page': url,
+                })
+            if streamdata is not None:
+                stream_info = streamdata.find('./{default}preload/stream_info')
+                if stream_info is not None:
+                    stream_infos.append(stream_info)
+            stream_info = self._call_rpc_api(
+                'VideoEncode_GetStreamInfo', video_id,
+                'Downloading stream info for %s' % video_format, data={
+                    'media_id': video_id,
+                    'video_format': stream_format,
+                    'video_encode_quality': stream_quality,
+                })
+            if stream_info is not None:
+                stream_infos.append(stream_info)
+            for stream_info in stream_infos:
+                video_encode_id = xpath_text(stream_info, './video_encode_id')
+                if video_encode_id in video_encode_ids:
                     continue
+                video_encode_ids.append(video_encode_id)
 
-            format_info.update({
-                'url': video_url,
-                'play_path': video_file,
-                'ext': 'flv',
-            })
-            formats.append(format_info)
-        self._sort_formats(formats)
+                video_file = xpath_text(stream_info, './file')
+                if not video_file:
+                    continue
+                if video_file.startswith('http'):
+                    formats.extend(self._extract_m3u8_formats(
+                        video_file, video_id, 'mp4', entry_protocol='m3u8_native',
+                        m3u8_id='hls', fatal=False))
+                    continue
 
-        metadata = self._download_xml(
-            'http://www.crunchyroll.com/xml', video_id,
-            note='Downloading media info', query={
-                'req': 'RpcApiVideoPlayer_GetMediaMetadata',
+                video_url = xpath_text(stream_info, './host')
+                if not video_url:
+                    continue
+                metadata = stream_info.find('./metadata')
+                format_info = {
+                    'format': video_format,
+                    'height': int_or_none(xpath_text(metadata, './height')),
+                    'width': int_or_none(xpath_text(metadata, './width')),
+                }
+
+                if '.fplive.net/' in video_url:
+                    video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
+                    parsed_video_url = compat_urlparse.urlparse(video_url)
+                    direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
+                        netloc='v.lvlt.crcdn.net',
+                        path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
+                    if self._is_valid_url(direct_video_url, video_id, video_format):
+                        format_info.update({
+                            'format_id': 'http-' + video_format,
+                            'url': direct_video_url,
+                        })
+                        formats.append(format_info)
+                        continue
+
+                format_info.update({
+                    'format_id': 'rtmp-' + video_format,
+                    'url': video_url,
+                    'play_path': video_file,
+                    'ext': 'flv',
+                })
+                formats.append(format_info)
+        self._sort_formats(formats, ('height', 'width', 'tbr', 'fps'))
+
+        metadata = self._call_rpc_api(
+            'VideoPlayer_GetMediaMetadata', video_id,
+            note='Downloading media info', data={
                 'media_id': video_id,
             })
 
index 171820e2722b1c8eb1f7fdcfb5991b3186c58f01..67d6df4b0eb285f588b1352eae4a46e0c4b220fd 100644 (file)
@@ -4,13 +4,14 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
-    int_or_none,
-    unescapeHTML,
-    find_xpath_attr,
-    smuggle_url,
     determine_ext,
     ExtractorError,
     extract_attributes,
+    find_xpath_attr,
+    get_element_by_class,
+    int_or_none,
+    smuggle_url,
+    unescapeHTML,
 )
 from .senateisvp import SenateISVPIE
 from .ustream import UstreamIE
@@ -68,6 +69,10 @@ class CSpanIE(InfoExtractor):
             'uploader': 'HouseCommittee',
             'uploader_id': '12987475',
         },
+    }, {
+        # Audio Only
+        'url': 'https://www.c-span.org/video/?437336-1/judiciary-antitrust-competition-policy-consumer-rights',
+        'only_matching': True,
     }]
     BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
 
@@ -111,7 +116,15 @@ class CSpanIE(InfoExtractor):
                     title = self._og_search_title(webpage)
                     surl = smuggle_url(senate_isvp_url, {'force_title': title})
                     return self.url_result(surl, 'SenateISVP', video_id, title)
+                video_id = self._search_regex(
+                    r'jwsetup\.clipprog\s*=\s*(\d+);',
+                    webpage, 'jwsetup program id', default=None)
+                if video_id:
+                    video_type = 'program'
         if video_type is None or video_id is None:
+            error_message = get_element_by_class('VLplayer-error-message', webpage)
+            if error_message:
+                raise ExtractorError(error_message)
             raise ExtractorError('unable to find video id and type')
 
         def get_text_attr(d, attr):
@@ -138,7 +151,7 @@ class CSpanIE(InfoExtractor):
         entries = []
         for partnum, f in enumerate(files):
             formats = []
-            for quality in f['qualities']:
+            for quality in f.get('qualities', []):
                 formats.append({
                     'format_id': '%s-%sp' % (get_text_attr(quality, 'bitrate'), get_text_attr(quality, 'height')),
                     'url': unescapeHTML(get_text_attr(quality, 'file')),
index 21a2d02392a7ad0393b7db499eb1b4a0ee054547..0e7d587dd47c539254468913d4215c2e57d1d5be 100644 (file)
@@ -413,52 +413,3 @@ class DailymotionUserIE(DailymotionPlaylistIE):
             'title': full_user,
             'entries': self._extract_entries(user),
         }
-
-
-class DailymotionCloudIE(DailymotionBaseInfoExtractor):
-    _VALID_URL_PREFIX = r'https?://api\.dmcloud\.net/(?:player/)?embed/'
-    _VALID_URL = r'%s[^/]+/(?P<id>[^/?]+)' % _VALID_URL_PREFIX
-    _VALID_EMBED_URL = r'%s[^/]+/[^\'"]+' % _VALID_URL_PREFIX
-
-    _TESTS = [{
-        # From http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html
-        # Tested at FranceTvInfo_2
-        'url': 'http://api.dmcloud.net/embed/4e7343f894a6f677b10006b4/556e03339473995ee145930c?auth=1464865870-0-jyhsm84b-ead4c701fb750cf9367bf4447167a3db&autoplay=1',
-        'only_matching': True,
-    }, {
-        # http://www.francetvinfo.fr/societe/larguez-les-amarres-le-cobaturage-se-developpe_980101.html
-        'url': 'http://api.dmcloud.net/player/embed/4e7343f894a6f677b10006b4/559545469473996d31429f06?auth=1467430263-0-90tglw2l-a3a4b64ed41efe48d7fccad85b8b8fda&autoplay=1',
-        'only_matching': True,
-    }]
-
-    @classmethod
-    def _extract_dmcloud_url(cls, webpage):
-        mobj = re.search(r'<iframe[^>]+src=[\'"](%s)[\'"]' % cls._VALID_EMBED_URL, webpage)
-        if mobj:
-            return mobj.group(1)
-
-        mobj = re.search(
-            r'<input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=[\'"](%s)[\'"]' % cls._VALID_EMBED_URL,
-            webpage)
-        if mobj:
-            return mobj.group(1)
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        webpage = self._download_webpage_no_ff(url, video_id)
-
-        title = self._html_search_regex(r'<title>([^>]+)</title>', webpage, 'title')
-
-        video_info = self._parse_json(self._search_regex(
-            r'var\s+info\s*=\s*([^;]+);', webpage, 'video info'), video_id)
-
-        # TODO: parse ios_url, which is in fact a manifest
-        video_url = video_info['mp4_url']
-
-        return {
-            'id': video_id,
-            'url': video_url,
-            'title': title,
-            'thumbnail': video_info.get('thumbnail_url'),
-        }
index 58cc986666c26466a6a67b971369e1fd6b987514..5c9ac68a02590b3eccea83cd4d9a49ffca47a2e8 100644 (file)
@@ -13,33 +13,30 @@ from ..aes import (
 from ..utils import (
     bytes_to_intlist,
     bytes_to_long,
-    clean_html,
+    extract_attributes,
     ExtractorError,
     intlist_to_bytes,
-    get_element_by_id,
     js_to_json,
     int_or_none,
     long_to_bytes,
     pkcs1pad,
-    remove_end,
 )
 
 
-class DaisukiIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?daisuki\.net/[^/]+/[^/]+/[^/]+/watch\.[^.]+\.(?P<id>\d+)\.html'
+class DaisukiMottoIE(InfoExtractor):
+    _VALID_URL = r'https?://motto\.daisuki\.net/framewatch/embed/[^/]+/(?P<id>[0-9a-zA-Z]{3})'
 
     _TEST = {
-        'url': 'http://www.daisuki.net/tw/en/anime/watch.TheIdolMasterCG.11213.html',
+        'url': 'http://motto.daisuki.net/framewatch/embed/embedDRAGONBALLSUPERUniverseSurvivalsaga/V2e/760/428',
         'info_dict': {
-            'id': '11213',
+            'id': 'V2e',
             'ext': 'mp4',
-            'title': '#01 Who is in the pumpkin carriage? - THE IDOLM@STER CINDERELLA GIRLS',
+            'title': '#117 SHOWDOWN OF LOVE! ANDROIDS VS UNIVERSE 2!!',
             'subtitles': {
                 'mul': [{
                     'ext': 'ttml',
                 }],
             },
-            'creator': 'BANDAI NAMCO Entertainment',
         },
         'params': {
             'skip_download': True,  # AES-encrypted HLS stream
@@ -73,15 +70,17 @@ class DaisukiIE(InfoExtractor):
 
             n, e = self._RSA_KEY
             encrypted_aeskey = long_to_bytes(pow(bytes_to_long(padded_aeskey), e, n))
-            init_data = self._download_json('http://www.daisuki.net/bin/bgn/init', video_id, query={
-                's': flashvars.get('s', ''),
-                'c': flashvars.get('ss3_prm', ''),
-                'e': url,
-                'd': base64.b64encode(intlist_to_bytes(aes_cbc_encrypt(
-                    bytes_to_intlist(json.dumps(data)),
-                    aes_key, iv))).decode('ascii'),
-                'a': base64.b64encode(encrypted_aeskey).decode('ascii'),
-            }, note='Downloading JSON metadata' + (' (try #%d)' % (idx + 1) if idx > 0 else ''))
+            init_data = self._download_json(
+                'http://motto.daisuki.net/fastAPI/bgn/init/',
+                video_id, query={
+                    's': flashvars.get('s', ''),
+                    'c': flashvars.get('ss3_prm', ''),
+                    'e': url,
+                    'd': base64.b64encode(intlist_to_bytes(aes_cbc_encrypt(
+                        bytes_to_intlist(json.dumps(data)),
+                        aes_key, iv))).decode('ascii'),
+                    'a': base64.b64encode(encrypted_aeskey).decode('ascii'),
+                }, note='Downloading JSON metadata' + (' (try #%d)' % (idx + 1) if idx > 0 else ''))
 
             if 'rtn' in init_data:
                 encrypted_rtn = init_data['rtn']
@@ -98,14 +97,11 @@ class DaisukiIE(InfoExtractor):
                 aes_key, iv)).decode('utf-8').rstrip('\0'),
             video_id)
 
+        title = rtn['title_str']
+
         formats = self._extract_m3u8_formats(
             rtn['play_url'], video_id, ext='mp4', entry_protocol='m3u8_native')
 
-        title = remove_end(self._og_search_title(webpage), ' - DAISUKI')
-
-        creator = self._html_search_regex(
-            r'Creator\s*:\s*([^<]+)', webpage, 'creator', fatal=False)
-
         subtitles = {}
         caption_url = rtn.get('caption_url')
         if caption_url:
@@ -120,21 +116,18 @@ class DaisukiIE(InfoExtractor):
             'title': title,
             'formats': formats,
             'subtitles': subtitles,
-            'creator': creator,
         }
 
 
-class DaisukiPlaylistIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)daisuki\.net/[^/]+/[^/]+/[^/]+/detail\.(?P<id>[a-zA-Z0-9]+)\.html'
+class DaisukiMottoPlaylistIE(InfoExtractor):
+    _VALID_URL = r'https?://motto\.daisuki\.net/(?P<id>information)/'
 
     _TEST = {
-        'url': 'http://www.daisuki.net/tw/en/anime/detail.TheIdolMasterCG.html',
+        'url': 'http://motto.daisuki.net/information/',
         'info_dict': {
-            'id': 'TheIdolMasterCG',
-            'title': 'THE IDOLM@STER CINDERELLA GIRLS',
-            'description': 'md5:0f2c028a9339f7a2c7fbf839edc5c5d8',
+            'title': 'DRAGON BALL SUPER',
         },
-        'playlist_count': 26,
+        'playlist_mincount': 117,
     }
 
     def _real_extract(self, url):
@@ -142,18 +135,19 @@ class DaisukiPlaylistIE(InfoExtractor):
 
         webpage = self._download_webpage(url, playlist_id)
 
-        episode_pattern = r'''(?sx)
-            <img[^>]+delay="[^"]+/(\d+)/movie\.jpg".+?
-            <p[^>]+class=".*?\bepisodeNumber\b.*?">(?:<a[^>]+>)?([^<]+)'''
-        entries = [{
-            '_type': 'url_transparent',
-            'url': url.replace('detail', 'watch').replace('.html', '.' + movie_id + '.html'),
-            'episode_id': episode_id,
-            'episode_number': int_or_none(episode_id),
-        } for movie_id, episode_id in re.findall(episode_pattern, webpage)]
-
-        playlist_title = remove_end(
-            self._og_search_title(webpage, fatal=False), ' - Anime - DAISUKI')
-        playlist_description = clean_html(get_element_by_id('synopsisTxt', webpage))
-
-        return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
+        entries = []
+        for li in re.findall(r'(<li[^>]+?data-product_id="[a-zA-Z0-9]{3}"[^>]+>)', webpage):
+            attr = extract_attributes(li)
+            ad_id = attr.get('data-ad_id')
+            product_id = attr.get('data-product_id')
+            if ad_id and product_id:
+                episode_id = attr.get('data-chapter')
+                entries.append({
+                    '_type': 'url_transparent',
+                    'url': 'http://motto.daisuki.net/framewatch/embed/%s/%s/760/428' % (ad_id, product_id),
+                    'episode_id': episode_id,
+                    'episode_number': int_or_none(episode_id),
+                    'ie_key': 'DaisukiMotto',
+                })
+
+        return self.playlist_result(entries, playlist_title='DRAGON BALL SUPER')
index 55853f76f91e97db19423c6cf8c1b8b56e006447..f9cec1d23db9e0ff389a9fa5ffd24ff9c8558d4b 100644 (file)
@@ -1,14 +1,18 @@
 from __future__ import unicode_literals
 
-from .common import InfoExtractor
+import random
+import re
+import string
+
+from .discoverygo import DiscoveryGoBaseIE
 from ..utils import (
-    parse_duration,
-    parse_iso8601,
+    ExtractorError,
+    update_url_query,
 )
-from ..compat import compat_str
+from ..compat import compat_HTTPError
 
 
-class DiscoveryIE(InfoExtractor):
+class DiscoveryIE(DiscoveryGoBaseIE):
     _VALID_URL = r'''(?x)https?://(?:www\.)?(?:
             discovery|
             investigationdiscovery|
@@ -19,79 +23,65 @@ class DiscoveryIE(InfoExtractor):
             sciencechannel|
             tlc|
             velocity
-        )\.com/(?:[^/]+/)*(?P<id>[^./?#]+)'''
+        )\.com(?P<path>/tv-shows/[^/]+/(?:video|full-episode)s/(?P<id>[^./?#]+))'''
     _TESTS = [{
-        'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
+        'url': 'https://www.discovery.com/tv-shows/cash-cab/videos/dave-foley',
         'info_dict': {
-            'id': '20769',
+            'id': '5a2d9b4d6b66d17a5026e1fd',
             'ext': 'mp4',
-            'title': 'Mission Impossible Outtakes',
-            'description': ('Watch Jamie Hyneman and Adam Savage practice being'
-                            ' each other -- to the point of confusing Jamie\'s dog -- and '
-                            'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s'
-                            ' back.'),
-            'duration': 156,
-            'timestamp': 1302032462,
-            'upload_date': '20110405',
-            'uploader_id': '103207',
+            'title': 'Dave Foley',
+            'description': 'md5:4b39bcafccf9167ca42810eb5f28b01f',
+            'duration': 608,
         },
         'params': {
             'skip_download': True,  # requires ffmpeg
         }
     }, {
-        'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mythbusters-the-simpsons',
-        'info_dict': {
-            'id': 'mythbusters-the-simpsons',
-            'title': 'MythBusters: The Simpsons',
-        },
-        'playlist_mincount': 10,
-    }, {
-        'url': 'http://www.animalplanet.com/longfin-eels-maneaters/',
-        'info_dict': {
-            'id': '78326',
-            'ext': 'mp4',
-            'title': 'Longfin Eels: Maneaters?',
-            'description': 'Jeremy Wade tests whether or not New Zealand\'s longfin eels are man-eaters by covering himself in fish guts and getting in the water with them.',
-            'upload_date': '20140725',
-            'timestamp': 1406246400,
-            'duration': 116,
-            'uploader_id': '103207',
-        },
-        'params': {
-            'skip_download': True,  # requires ffmpeg
-        }
+        'url': 'https://www.investigationdiscovery.com/tv-shows/final-vision/full-episodes/final-vision',
+        'only_matching': True,
     }]
+    _GEO_COUNTRIES = ['US']
+    _GEO_BYPASS = False
 
     def _real_extract(self, url):
-        display_id = self._match_id(url)
-        info = self._download_json(url + '?flat=1', display_id)
-
-        video_title = info.get('playlist_title') or info.get('video_title')
+        path, display_id = re.match(self._VALID_URL, url).groups()
+        webpage = self._download_webpage(url, display_id)
 
-        entries = []
+        react_data = self._parse_json(self._search_regex(
+            r'window\.__reactTransmitPacket\s*=\s*({.+?});',
+            webpage, 'react data'), display_id)
+        content_blocks = react_data['layout'][path]['contentBlocks']
+        video = next(cb for cb in content_blocks if cb.get('type') == 'video')['content']['items'][0]
+        video_id = video['id']
 
-        for idx, video_info in enumerate(info['playlist']):
-            subtitles = {}
-            caption_url = video_info.get('captionsUrl')
-            if caption_url:
-                subtitles = {
-                    'en': [{
-                        'url': caption_url,
-                    }]
-                }
+        access_token = self._download_json(
+            'https://www.discovery.com/anonymous', display_id, query={
+                'authLink': update_url_query(
+                    'https://login.discovery.com/v1/oauth2/authorize', {
+                        'client_id': react_data['application']['apiClientId'],
+                        'redirect_uri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html',
+                        'response_type': 'anonymous',
+                        'state': 'nonce,' + ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
+                    })
+            })['access_token']
 
-            entries.append({
-                '_type': 'url_transparent',
-                'url': 'http://players.brightcove.net/103207/default_default/index.html?videoId=ref:%s' % video_info['referenceId'],
-                'id': compat_str(video_info['id']),
-                'title': video_info['title'],
-                'description': video_info.get('description'),
-                'duration': parse_duration(video_info.get('video_length')),
-                'webpage_url': video_info.get('href') or video_info.get('url'),
-                'thumbnail': video_info.get('thumbnailURL'),
-                'alt_title': video_info.get('secondary_title'),
-                'timestamp': parse_iso8601(video_info.get('publishedDate')),
-                'subtitles': subtitles,
-            })
+        try:
+            stream = self._download_json(
+                'https://api.discovery.com/v1/streaming/video/' + video_id,
+                display_id, headers={
+                    'Authorization': 'Bearer ' + access_token,
+                })
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+                e_description = self._parse_json(
+                    e.cause.read().decode(), display_id)['description']
+                if 'resource not available for country' in e_description:
+                    self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+                if 'Authorized Networks' in e_description:
+                    raise ExtractorError(
+                        'This video is only available via cable service provider subscription that'
+                        ' is not currently supported. You may want to use --cookies.', expected=True)
+                raise ExtractorError(e_description)
+            raise
 
-        return self.playlist_result(entries, display_id, video_title)
+        return self._extract_video_info(video, stream, display_id)
index 7cd5d4291668705f12fdd973756e36a1ace1cc9f..3368c4c075505df8a4ff03198d66a33a76f5b2f6 100644 (file)
@@ -5,6 +5,7 @@ import re
 from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
+    determine_ext,
     extract_attributes,
     ExtractorError,
     int_or_none,
@@ -27,42 +28,9 @@ class DiscoveryGoBaseIE(InfoExtractor):
             velocitychannel
         )go\.com/%s(?P<id>[^/?#&]+)'''
 
-
-class DiscoveryGoIE(DiscoveryGoBaseIE):
-    _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % r'(?:[^/]+/)+'
-    _GEO_COUNTRIES = ['US']
-    _TEST = {
-        'url': 'https://www.discoverygo.com/bering-sea-gold/reaper-madness/',
-        'info_dict': {
-            'id': '58c167d86b66d12f2addeb01',
-            'ext': 'mp4',
-            'title': 'Reaper Madness',
-            'description': 'md5:09f2c625c99afb8946ed4fb7865f6e78',
-            'duration': 2519,
-            'series': 'Bering Sea Gold',
-            'season_number': 8,
-            'episode_number': 6,
-            'age_limit': 14,
-        },
-    }
-
-    def _real_extract(self, url):
-        display_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, display_id)
-
-        container = extract_attributes(
-            self._search_regex(
-                r'(<div[^>]+class=["\']video-player-container[^>]+>)',
-                webpage, 'video container'))
-
-        video = self._parse_json(
-            container.get('data-video') or container.get('data-json'),
-            display_id)
-
+    def _extract_video_info(self, video, stream, display_id):
         title = video['name']
 
-        stream = video.get('stream')
         if not stream:
             if video.get('authenticated') is True:
                 raise ExtractorError(
@@ -106,7 +74,11 @@ class DiscoveryGoIE(DiscoveryGoBaseIE):
                         not subtitle_url.startswith('http')):
                     continue
                 lang = caption.get('fileLang', 'en')
-                subtitles.setdefault(lang, []).append({'url': subtitle_url})
+                ext = determine_ext(subtitle_url)
+                subtitles.setdefault(lang, []).append({
+                    'url': subtitle_url,
+                    'ext': 'ttml' if ext == 'xml' else ext,
+                })
 
         return {
             'id': video_id,
@@ -124,6 +96,43 @@ class DiscoveryGoIE(DiscoveryGoBaseIE):
         }
 
 
+class DiscoveryGoIE(DiscoveryGoBaseIE):
+    _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % r'(?:[^/]+/)+'
+    _GEO_COUNTRIES = ['US']
+    _TEST = {
+        'url': 'https://www.discoverygo.com/bering-sea-gold/reaper-madness/',
+        'info_dict': {
+            'id': '58c167d86b66d12f2addeb01',
+            'ext': 'mp4',
+            'title': 'Reaper Madness',
+            'description': 'md5:09f2c625c99afb8946ed4fb7865f6e78',
+            'duration': 2519,
+            'series': 'Bering Sea Gold',
+            'season_number': 8,
+            'episode_number': 6,
+            'age_limit': 14,
+        },
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        container = extract_attributes(
+            self._search_regex(
+                r'(<div[^>]+class=["\']video-player-container[^>]+>)',
+                webpage, 'video container'))
+
+        video = self._parse_json(
+            container.get('data-video') or container.get('data-json'),
+            display_id)
+
+        stream = video.get('stream')
+
+        return self._extract_video_info(video, stream, display_id)
+
+
 class DiscoveryGoPlaylistIE(DiscoveryGoBaseIE):
     _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % ''
     _TEST = {
index 968c4c7fd5216f65741ee21a8052452fd3993d40..0eee82fd6b6e79c9aa309f4a18080a3039d82208 100644 (file)
@@ -10,6 +10,7 @@ from ..utils import (
     compat_str,
     determine_ext,
     ExtractorError,
+    update_url_query,
 )
 
 
@@ -108,9 +109,16 @@ class DisneyIE(InfoExtractor):
                 continue
             tbr = int_or_none(flavor.get('bitrate'))
             if tbr == 99999:
-                formats.extend(self._extract_m3u8_formats(
+                # wrong ks(Kaltura Signature) causes 404 Error
+                flavor_url = update_url_query(flavor_url, {'ks': ''})
+                m3u8_formats = self._extract_m3u8_formats(
                     flavor_url, video_id, 'mp4',
-                    m3u8_id=flavor_format, fatal=False))
+                    m3u8_id=flavor_format, fatal=False)
+                for f in m3u8_formats:
+                    # Apple FairPlay
+                    if '/fpshls/' in f['url']:
+                        continue
+                    formats.append(f)
                 continue
             format_id = []
             if flavor_format:
index 95883a037f537de47a0ee9f3f18d913eb9ec1647..6b60e542b37418473e1ee2a959cb6ae08a60d2e5 100644 (file)
@@ -54,7 +54,7 @@ class DramaFeverBaseIE(AMPIE):
         request = sanitized_Request(
             self._LOGIN_URL, urlencode_postdata(login_form))
         response = self._download_webpage(
-            request, None, 'Logging in as %s' % username)
+            request, None, 'Logging in')
 
         if all(logout_pattern not in response
                for logout_pattern in ['href="/accounts/logout/"', '>Log out<']):
index c5d56a9adf9c4ff8ad338d326d5548fb5bdf50e1..c88b3126b1676f11ee3696a2499e1f7a0a57d8b3 100644 (file)
@@ -10,7 +10,7 @@ from ..utils import (
 
 
 class DrTuberIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?drtuber\.com/(?:video|embed)/(?P<id>\d+)(?:/(?P<display_id>[\w-]+))?'
+    _VALID_URL = r'https?://(?:(?:www|m)\.)?drtuber\.com/(?:video|embed)/(?P<id>\d+)(?:/(?P<display_id>[\w-]+))?'
     _TESTS = [{
         'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf',
         'md5': '93e680cf2536ad0dfb7e74d94a89facd',
@@ -28,6 +28,9 @@ class DrTuberIE(InfoExtractor):
     }, {
         'url': 'http://www.drtuber.com/embed/489939',
         'only_matching': True,
+    }, {
+        'url': 'http://m.drtuber.com/video/3893529/lingerie-blowjob-from-beautiful-teen',
+        'only_matching': True,
     }]
 
     @staticmethod
diff --git a/youtube_dl/extractor/ellentube.py b/youtube_dl/extractor/ellentube.py
new file mode 100644 (file)
index 0000000..5444732
--- /dev/null
@@ -0,0 +1,133 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    clean_html,
+    extract_attributes,
+    float_or_none,
+    int_or_none,
+    try_get,
+)
+
+
+class EllenTubeBaseIE(InfoExtractor):
+    def _extract_data_config(self, webpage, video_id):
+        details = self._search_regex(
+            r'(<[^>]+\bdata-component=(["\'])[Dd]etails.+?></div>)', webpage,
+            'details')
+        return self._parse_json(
+            extract_attributes(details)['data-config'], video_id)
+
+    def _extract_video(self, data, video_id):
+        title = data['title']
+
+        formats = []
+        duration = None
+        for entry in data.get('media'):
+            if entry.get('id') == 'm3u8':
+                formats = self._extract_m3u8_formats(
+                    entry['url'], video_id, 'mp4',
+                    entry_protocol='m3u8_native', m3u8_id='hls')
+                duration = int_or_none(entry.get('duration'))
+                break
+        self._sort_formats(formats)
+
+        def get_insight(kind):
+            return int_or_none(try_get(
+                data, lambda x: x['insight']['%ss' % kind]))
+
+        return {
+            'extractor_key': EllenTubeIE.ie_key(),
+            'id': video_id,
+            'title': title,
+            'description': data.get('description'),
+            'duration': duration,
+            'thumbnail': data.get('thumbnail'),
+            'timestamp': float_or_none(data.get('publishTime'), scale=1000),
+            'view_count': get_insight('view'),
+            'like_count': get_insight('like'),
+            'formats': formats,
+        }
+
+
+class EllenTubeIE(EllenTubeBaseIE):
+    _VALID_URL = r'''(?x)
+                        (?:
+                            ellentube:|
+                            https://api-prod\.ellentube\.com/ellenapi/api/item/
+                        )
+                        (?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})
+                    '''
+    _TESTS = [{
+        'url': 'https://api-prod.ellentube.com/ellenapi/api/item/0822171c-3829-43bf-b99f-d77358ae75e3',
+        'md5': '2fabc277131bddafdd120e0fc0f974c9',
+        'info_dict': {
+            'id': '0822171c-3829-43bf-b99f-d77358ae75e3',
+            'ext': 'mp4',
+            'title': 'Ellen Meets Las Vegas Survivors Jesus Campos and Stephen Schuck',
+            'description': 'md5:76e3355e2242a78ad9e3858e5616923f',
+            'thumbnail': r're:^https?://.+?',
+            'duration': 514,
+            'timestamp': 1508505120,
+            'upload_date': '20171020',
+            'view_count': int,
+            'like_count': int,
+        }
+    }, {
+        'url': 'ellentube:734a3353-f697-4e79-9ca9-bfc3002dc1e0',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        data = self._download_json(
+            'https://api-prod.ellentube.com/ellenapi/api/item/%s' % video_id,
+            video_id)
+        return self._extract_video(data, video_id)
+
+
+class EllenTubeVideoIE(EllenTubeBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?ellentube\.com/video/(?P<id>.+?)\.html'
+    _TEST = {
+        'url': 'https://www.ellentube.com/video/ellen-meets-las-vegas-survivors-jesus-campos-and-stephen-schuck.html',
+        'only_matching': True,
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        video_id = self._extract_data_config(webpage, display_id)['id']
+        return self.url_result(
+            'ellentube:%s' % video_id, ie=EllenTubeIE.ie_key(),
+            video_id=video_id)
+
+
+class EllenTubePlaylistIE(EllenTubeBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?ellentube\.com/(?:episode|studios)/(?P<id>.+?)\.html'
+    _TESTS = [{
+        'url': 'https://www.ellentube.com/episode/dax-shepard-jordan-fisher-haim.html',
+        'info_dict': {
+            'id': 'dax-shepard-jordan-fisher-haim',
+            'title': "Dax Shepard, 'DWTS' Team Jordan Fisher & Lindsay Arnold, HAIM",
+            'description': 'md5:bfc982194dabb3f4e325e43aa6b2e21c',
+        },
+        'playlist_count': 6,
+    }, {
+        'url': 'https://www.ellentube.com/studios/macey-goes-rving0.html',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        data = self._extract_data_config(webpage, display_id)['data']
+        feed = self._download_json(
+            'https://api-prod.ellentube.com/ellenapi/api/feed/?%s'
+            % data['filter'], display_id)
+        entries = [
+            self._extract_video(elem, elem['id'])
+            for elem in feed if elem.get('type') == 'VIDEO' and elem.get('id')]
+        return self.playlist_result(
+            entries, display_id, data.get('title'),
+            clean_html(data.get('description')))
diff --git a/youtube_dl/extractor/ellentv.py b/youtube_dl/extractor/ellentv.py
deleted file mode 100644 (file)
index e0a13dd..0000000
+++ /dev/null
@@ -1,101 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from .kaltura import KalturaIE
-from ..utils import NO_DEFAULT
-
-
-class EllenTVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P<id>[a-z0-9_-]+)'
-    _TESTS = [{
-        'url': 'http://www.ellentv.com/videos/0-ipq1gsai/',
-        'md5': '4294cf98bc165f218aaa0b89e0fd8042',
-        'info_dict': {
-            'id': '0_ipq1gsai',
-            'ext': 'mov',
-            'title': 'Fast Fingers of Fate',
-            'description': 'md5:3539013ddcbfa64b2a6d1b38d910868a',
-            'timestamp': 1428035648,
-            'upload_date': '20150403',
-            'uploader_id': 'batchUser',
-        },
-    }, {
-        # not available via http://widgets.ellentube.com/
-        'url': 'http://www.ellentv.com/videos/1-szkgu2m2/',
-        'info_dict': {
-            'id': '1_szkgu2m2',
-            'ext': 'flv',
-            'title': "Ellen's Amazingly Talented Audience",
-            'description': 'md5:86ff1e376ff0d717d7171590e273f0a5',
-            'timestamp': 1255140900,
-            'upload_date': '20091010',
-            'uploader_id': 'ellenkaltura@gmail.com',
-        },
-        'params': {
-            'skip_download': True,
-        },
-    }]
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        URLS = ('http://widgets.ellentube.com/videos/%s' % video_id, url)
-
-        for num, url_ in enumerate(URLS, 1):
-            webpage = self._download_webpage(
-                url_, video_id, fatal=num == len(URLS))
-
-            default = NO_DEFAULT if num == len(URLS) else None
-
-            partner_id = self._search_regex(
-                r"var\s+partnerId\s*=\s*'([^']+)", webpage, 'partner id',
-                default=default)
-
-            kaltura_id = self._search_regex(
-                [r'id="kaltura_player_([^"]+)"',
-                 r"_wb_entry_id\s*:\s*'([^']+)",
-                 r'data-kaltura-entry-id="([^"]+)'],
-                webpage, 'kaltura id', default=default)
-
-            if partner_id and kaltura_id:
-                break
-
-        return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), KalturaIE.ie_key())
-
-
-class EllenTVClipsIE(InfoExtractor):
-    IE_NAME = 'EllenTV:clips'
-    _VALID_URL = r'https?://(?:www\.)?ellentv\.com/episodes/(?P<id>[a-z0-9_-]+)'
-    _TEST = {
-        'url': 'http://www.ellentv.com/episodes/meryl-streep-vanessa-hudgens/',
-        'info_dict': {
-            'id': 'meryl-streep-vanessa-hudgens',
-            'title': 'Meryl Streep, Vanessa Hudgens',
-        },
-        'playlist_mincount': 5,
-    }
-
-    def _real_extract(self, url):
-        playlist_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, playlist_id)
-        playlist = self._extract_playlist(webpage, playlist_id)
-
-        return {
-            '_type': 'playlist',
-            'id': playlist_id,
-            'title': self._og_search_title(webpage),
-            'entries': self._extract_entries(playlist)
-        }
-
-    def _extract_playlist(self, webpage, playlist_id):
-        json_string = self._search_regex(r'playerView.addClips\(\[\{(.*?)\}\]\);', webpage, 'json')
-        return self._parse_json('[{' + json_string + '}]', playlist_id)
-
-    def _extract_entries(self, playlist):
-        return [
-            self.url_result(
-                'kaltura:%s:%s' % (item['kaltura_partner_id'], item['kaltura_entry_id']),
-                KalturaIE.ie_key(), video_id=item['kaltura_entry_id'])
-            for item in playlist]
index 7a743606836517736e858411e626a5e2a502890e..127c69b2eb668b85236eccfaccc63dcb7dd9d714 100644 (file)
@@ -1,6 +1,9 @@
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
+from .once import OnceIE
 from ..compat import compat_str
 from ..utils import (
     determine_ext,
@@ -9,22 +12,27 @@ from ..utils import (
 )
 
 
-class ESPNIE(InfoExtractor):
+class ESPNIE(OnceIE):
     _VALID_URL = r'''(?x)
                     https?://
                         (?:
-                            (?:(?:\w+\.)+)?espn\.go|
-                            (?:www\.)?espn
-                        )\.com/
-                        (?:
-                            (?:
-                                video/clip|
-                                watch/player
-                            )
                             (?:
-                                \?.*?\bid=|
-                                /_/id/
-                            )
+                                (?:
+                                    (?:(?:\w+\.)+)?espn\.go|
+                                    (?:www\.)?espn
+                                )\.com/
+                                (?:
+                                    (?:
+                                        video/(?:clip|iframe/twitter)|
+                                        watch/player
+                                    )
+                                    (?:
+                                        .*?\?.*?\bid=|
+                                        /_/id/
+                                    )
+                                )
+                            )|
+                            (?:www\.)espnfc\.(?:com|us)/(?:video/)?[^/]+/\d+/video/
                         )
                         (?P<id>\d+)
                     '''
@@ -77,6 +85,15 @@ class ESPNIE(InfoExtractor):
     }, {
         'url': 'http://www.espn.com/video/clip/_/id/17989860',
         'only_matching': True,
+    }, {
+        'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.espnfc.us/video/espn-fc-tv/86/video/3319154/nashville-unveiled-as-the-newest-club-in-mls',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.espnfc.com/english-premier-league/23/video/3324163/premier-league-in-90-seconds-golden-tweets',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -93,7 +110,9 @@ class ESPNIE(InfoExtractor):
 
         def traverse_source(source, base_source_id=None):
             for source_id, source in source.items():
-                if isinstance(source, compat_str):
+                if source_id == 'alert':
+                    continue
+                elif isinstance(source, compat_str):
                     extract_source(source, base_source_id)
                 elif isinstance(source, dict):
                     traverse_source(
@@ -106,7 +125,9 @@ class ESPNIE(InfoExtractor):
                 return
             format_urls.add(source_url)
             ext = determine_ext(source_url)
-            if ext == 'smil':
+            if OnceIE.suitable(source_url):
+                formats.extend(self._extract_once_formats(source_url))
+            elif ext == 'smil':
                 formats.extend(self._extract_smil_formats(
                     source_url, video_id, fatal=False))
             elif ext == 'f4m':
@@ -117,12 +138,24 @@ class ESPNIE(InfoExtractor):
                     source_url, video_id, 'mp4', entry_protocol='m3u8_native',
                     m3u8_id=source_id, fatal=False))
             else:
-                formats.append({
+                f = {
                     'url': source_url,
                     'format_id': source_id,
-                })
-
-        traverse_source(clip['links']['source'])
+                }
+                mobj = re.search(r'(\d+)p(\d+)_(\d+)k\.', source_url)
+                if mobj:
+                    f.update({
+                        'height': int(mobj.group(1)),
+                        'fps': int(mobj.group(2)),
+                        'tbr': int(mobj.group(3)),
+                    })
+                if source_id == 'mezzanine':
+                    f['preference'] = 1
+                formats.append(f)
+
+        links = clip.get('links', {})
+        traverse_source(links.get('source', {}))
+        traverse_source(links.get('mobile', {}))
         self._sort_formats(formats)
 
         description = clip.get('caption') or clip.get('description')
@@ -144,9 +177,6 @@ class ESPNIE(InfoExtractor):
 class ESPNArticleIE(InfoExtractor):
     _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
     _TESTS = [{
-        'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
-        'only_matching': True,
-    }, {
         'url': 'http://espn.go.com/nba/recap?gameId=400793786',
         'only_matching': True,
     }, {
@@ -175,3 +205,34 @@ class ESPNArticleIE(InfoExtractor):
 
         return self.url_result(
             'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key())
+
+
+class FiveThirtyEightIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?fivethirtyeight\.com/features/(?P<id>[^/?#]+)'
+    _TEST = {
+        'url': 'http://fivethirtyeight.com/features/how-the-6-8-raiders-can-still-make-the-playoffs/',
+        'info_dict': {
+            'id': '21846851',
+            'ext': 'mp4',
+            'title': 'FiveThirtyEight: The Raiders can still make the playoffs',
+            'description': 'Neil Paine breaks down the simplest scenario that will put the Raiders into the playoffs at 8-8.',
+            'timestamp': 1513960621,
+            'upload_date': '20171222',
+        },
+        'params': {
+            'skip_download': True,
+        },
+        'expected_warnings': ['Unable to download f4m manifest'],
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_id = self._search_regex(
+            r'data-video-id=["\'](?P<id>\d+)',
+            webpage, 'video id', group='id')
+
+        return self.url_result(
+            'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key())
index d084707ee81e9e137e260e45220e68e0efcc95d0..e64defe624dd9158b4cb36320b76d2a867a2d602 100644 (file)
@@ -127,7 +127,10 @@ from .bloomberg import BloombergIE
 from .bokecc import BokeCCIE
 from .bostonglobe import BostonGlobeIE
 from .bpb import BpbIE
-from .br import BRIE
+from .br import (
+    BRIE,
+    BRMediathekIE,
+)
 from .bravotv import BravoTVIE
 from .breakcom import BreakIE
 from .brightcove import (
@@ -135,10 +138,7 @@ from .brightcove import (
     BrightcoveNewIE,
 )
 from .buzzfeed import BuzzFeedIE
-from .byutv import (
-    BYUtvIE,
-    BYUtvEventIE,
-)
+from .byutv import BYUtvIE
 from .c56 import C56IE
 from .camdemy import (
     CamdemyIE,
@@ -205,7 +205,6 @@ from .cnn import (
     CNNArticleIE,
 )
 from .coub import CoubIE
-from .collegerama import CollegeRamaIE
 from .comedycentral import (
     ComedyCentralFullEpisodesIE,
     ComedyCentralIE,
@@ -243,11 +242,10 @@ from .dailymotion import (
     DailymotionIE,
     DailymotionPlaylistIE,
     DailymotionUserIE,
-    DailymotionCloudIE,
 )
 from .daisuki import (
-    DaisukiIE,
-    DaisukiPlaylistIE,
+    DaisukiMottoIE,
+    DaisukiMottoPlaylistIE,
 )
 from .daum import (
     DaumIE,
@@ -309,9 +307,10 @@ from .ehow import EHowIE
 from .eighttracks import EightTracksIE
 from .einthusan import EinthusanIE
 from .eitb import EitbIE
-from .ellentv import (
-    EllenTVIE,
-    EllenTVClipsIE,
+from .ellentube import (
+    EllenTubeIE,
+    EllenTubeVideoIE,
+    EllenTubePlaylistIE,
 )
 from .elpais import ElPaisIE
 from .embedly import EmbedlyIE
@@ -322,6 +321,7 @@ from .escapist import EscapistIE
 from .espn import (
     ESPNIE,
     ESPNArticleIE,
+    FiveThirtyEightIE,
 )
 from .esri import EsriVideoIE
 from .etonline import ETOnlineIE
@@ -344,11 +344,10 @@ from .filmon import (
     FilmOnIE,
     FilmOnChannelIE,
 )
-from .firstpost import FirstpostIE
+from .filmweb import FilmwebIE
 from .firsttv import FirstTVIE
 from .fivemin import FiveMinIE
 from .fivetv import FiveTVIE
-from .fktv import FKTVIE
 from .flickr import FlickrIE
 from .flipagram import FlipagramIE
 from .folketinget import FolketingetIE
@@ -375,7 +374,7 @@ from .francetv import (
     FranceTVIE,
     FranceTVEmbedIE,
     FranceTVInfoIE,
-    GenerationQuoiIE,
+    GenerationWhatIE,
     CultureboxIE,
 )
 from .freesound import FreesoundIE
@@ -391,7 +390,6 @@ from .gameone import (
     GameOneIE,
     GameOnePlaylistIE,
 )
-from .gamersyde import GamersydeIE
 from .gamespot import GameSpotIE
 from .gamestar import GameStarIE
 from .gaskrank import GaskrankIE
@@ -467,6 +465,7 @@ from .indavideo import (
 )
 from .infoq import InfoQIE
 from .instagram import InstagramIE, InstagramUserIE
+from .internazionale import InternazionaleIE
 from .internetvideoarchive import InternetVideoArchiveIE
 from .iprima import IPrimaIE
 from .iqiyi import IqiyiIE
@@ -572,9 +571,11 @@ from .mangomolo import (
     MangomoloLiveIE,
 )
 from .manyvids import ManyVidsIE
+from .massengeschmacktv import MassengeschmackTVIE
 from .matchtv import MatchTVIE
 from .mdr import MDRIE
 from .mediaset import MediasetIE
+from .mediasite import MediasiteIE
 from .medici import MediciIE
 from .megaphone import MegaphoneIE
 from .meipai import MeipaiIE
@@ -688,6 +689,7 @@ from .nhl import (
 )
 from .nick import (
     NickIE,
+    NickBrIE,
     NickDeIE,
     NickNightIE,
     NickRuIE,
@@ -720,10 +722,6 @@ from .nowness import (
     NownessPlaylistIE,
     NownessSeriesIE,
 )
-from .nowtv import (
-    NowTVIE,
-    NowTVListIE,
-)
 from .noz import NozIE
 from .npo import (
     AndereTijdenIE,
@@ -789,6 +787,7 @@ from .patreon import PatreonIE
 from .pbs import PBSIE
 from .pearvideo import PearVideoIE
 from .people import PeopleIE
+from .performgroup import PerformGroupIE
 from .periscope import (
     PeriscopeIE,
     PeriscopeUserIE,
@@ -855,6 +854,7 @@ from .radiofrance import RadioFranceIE
 from .rai import (
     RaiPlayIE,
     RaiPlayLiveIE,
+    RaiPlayPlaylistIE,
     RaiIE,
 )
 from .rbmaradio import RBMARadioIE
@@ -912,7 +912,6 @@ from .rutube import (
 from .rutv import RUTVIE
 from .ruutu import RuutuIE
 from .ruv import RuvIE
-from .sandia import SandiaIE
 from .safari import (
     SafariIE,
     SafariApiIE,
@@ -929,8 +928,12 @@ from .senateisvp import SenateISVPIE
 from .sendtonews import SendtoNewsIE
 from .servingsys import ServingSysIE
 from .servus import ServusIE
+from .sevenplus import SevenPlusIE
 from .sexu import SexuIE
-from .shahid import ShahidIE
+from .shahid import (
+    ShahidIE,
+    ShahidShowIE,
+)
 from .shared import (
     SharedIE,
     VivoIE,
@@ -998,6 +1001,7 @@ from .streamango import StreamangoIE
 from .streamcloud import StreamcloudIE
 from .streamcz import StreamCZIE
 from .streetvoice import StreetVoiceIE
+from .stretchinternet import StretchInternetIE
 from .sunporno import SunPornoIE
 from .svt import (
     SVTIE,
@@ -1100,6 +1104,10 @@ from .tvigle import TvigleIE
 from .tvland import TVLandIE
 from .tvn24 import TVN24IE
 from .tvnoe import TVNoeIE
+from .tvnow import (
+    TVNowIE,
+    TVNowListIE,
+)
 from .tvp import (
     TVPEmbedIE,
     TVPIE,
@@ -1113,6 +1121,7 @@ from .tvplayer import TVPlayerIE
 from .tweakers import TweakersIE
 from .twentyfourvideo import TwentyFourVideoIE
 from .twentymin import TwentyMinutenIE
+from .twentythreevideo import TwentyThreeVideoIE
 from .twitch import (
     TwitchVideoIE,
     TwitchChapterIE,
@@ -1135,8 +1144,10 @@ from .udemy import (
     UdemyCourseIE
 )
 from .udn import UDNEmbedIE
+from .ufctv import UFCTVIE
 from .uktvplay import UKTVPlayIE
 from .digiteka import DigitekaIE
+from .umg import UMGDeIE
 from .unistra import UnistraIE
 from .unity import UnityIE
 from .uol import UOLIE
index 4bc8fc5127010e1b3ced207da04f8926716cc94d..312ee2aeed953400df55086a13bfa40ebbe31650 100644 (file)
@@ -1,7 +1,10 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
+from ..compat import compat_etree_fromstring
 from ..utils import (
     xpath_element,
     xpath_text,
@@ -43,10 +46,15 @@ class FazIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
         description = self._og_search_description(webpage)
-        config_xml_url = self._search_regex(
-            r'videoXMLURL\s*=\s*"([^"]+)', webpage, 'config xml url')
-        config = self._download_xml(
-            config_xml_url, video_id, 'Downloading config xml')
+        media = self._html_search_regex(
+            r"data-videojs-media='([^']+)",
+            webpage, 'media')
+        if media == 'extern':
+            perform_url = self._search_regex(
+                r"<iframe[^>]+?src='((?:http:)?//player\.performgroup\.com/eplayer/eplayer\.html#/?[0-9a-f]{26}\.[0-9a-z]{26})",
+                webpage, 'perform url')
+            return self.url_result(perform_url)
+        config = compat_etree_fromstring(media)
 
         encodings = xpath_element(config, 'ENCODINGS', 'encodings', True)
         formats = []
@@ -55,12 +63,24 @@ class FazIE(InfoExtractor):
             if encoding is not None:
                 encoding_url = xpath_text(encoding, 'FILENAME')
                 if encoding_url:
-                    formats.append({
+                    tbr = xpath_text(encoding, 'AVERAGEBITRATE', 1000)
+                    if tbr:
+                        tbr = int_or_none(tbr.replace(',', '.'))
+                    f = {
                         'url': encoding_url,
                         'format_id': code.lower(),
                         'quality': pref,
-                        'tbr': int_or_none(xpath_text(encoding, 'AVERAGEBITRATE')),
-                    })
+                        'tbr': tbr,
+                        'vcodec': xpath_text(encoding, 'CODEC'),
+                    }
+                    mobj = re.search(r'(\d+)x(\d+)_(\d+)\.mp4', encoding_url)
+                    if mobj:
+                        f.update({
+                            'width': int(mobj.group(1)),
+                            'height': int(mobj.group(2)),
+                            'tbr': tbr or int(mobj.group(3)),
+                        })
+                    formats.append(f)
         self._sort_formats(formats)
 
         return {
index 8d1010b88c83dcbfd3e71e9f20275bf6fb9c9d21..8db7c59638704ee4febe9d371c0c85c397fb50f9 100644 (file)
@@ -2,7 +2,10 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..compat import compat_urlparse
+from ..utils import (
+    int_or_none,
+    float_or_none,
+)
 
 
 class FczenitIE(InfoExtractor):
@@ -14,6 +17,8 @@ class FczenitIE(InfoExtractor):
             'id': '41044',
             'ext': 'mp4',
             'title': 'Так пишется история: казанский разгром ЦСКА на «Зенит-ТВ»',
+            'timestamp': 1462283735,
+            'upload_date': '20160503',
         },
     }
 
@@ -21,28 +26,31 @@ class FczenitIE(InfoExtractor):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        video_title = self._html_search_regex(
-            r'<[^>]+class=\"photoalbum__title\">([^<]+)', webpage, 'title')
+        msi_id = self._search_regex(
+            r"(?s)config\s*=\s*{.+?video_id\s*:\s*'([^']+)'", webpage, 'msi id')
 
-        video_items = self._parse_json(self._search_regex(
-            r'arrPath\s*=\s*JSON\.parse\(\'(.+)\'\)', webpage, 'video items'),
-            video_id)
-
-        def merge_dicts(*dicts):
-            ret = {}
-            for a_dict in dicts:
-                ret.update(a_dict)
-            return ret
+        msi_data = self._download_json(
+            'http://player.fc-zenit.ru/msi/video', msi_id, query={
+                'video': msi_id,
+            })['data']
+        title = msi_data['name']
 
         formats = [{
-            'url': compat_urlparse.urljoin(url, video_url),
-            'tbr': int(tbr),
-        } for tbr, video_url in merge_dicts(*video_items).items()]
+            'format_id': q.get('label'),
+            'url': q['url'],
+            'height': int_or_none(q.get('label')),
+        } for q in msi_data['qualities'] if q.get('url')]
 
         self._sort_formats(formats)
 
+        tags = [tag['label'] for tag in msi_data.get('tags', []) if tag.get('label')]
+
         return {
             'id': video_id,
-            'title': video_title,
+            'title': title,
+            'thumbnail': msi_data.get('preview'),
             'formats': formats,
+            'duration': float_or_none(msi_data.get('duration')),
+            'timestamp': int_or_none(msi_data.get('date')),
+            'tags': tags,
         }
diff --git a/youtube_dl/extractor/filmweb.py b/youtube_dl/extractor/filmweb.py
new file mode 100644 (file)
index 0000000..56000bc
--- /dev/null
@@ -0,0 +1,42 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class FilmwebIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?filmweb\.no/(?P<type>trailere|filmnytt)/article(?P<id>\d+)\.ece'
+    _TEST = {
+        'url': 'http://www.filmweb.no/trailere/article1264921.ece',
+        'md5': 'e353f47df98e557d67edaceda9dece89',
+        'info_dict': {
+            'id': '13033574',
+            'ext': 'mp4',
+            'title': 'Det som en gang var',
+            'upload_date': '20160316',
+            'timestamp': 1458140101,
+            'uploader_id': '12639966',
+            'uploader': 'Live Roaldset',
+        }
+    }
+
+    def _real_extract(self, url):
+        article_type, article_id = re.match(self._VALID_URL, url).groups()
+        if article_type == 'filmnytt':
+            webpage = self._download_webpage(url, article_id)
+            article_id = self._search_regex(r'data-videoid="(\d+)"', webpage, 'article id')
+        embed_code = self._download_json(
+            'https://www.filmweb.no/template_v2/ajax/json_trailerEmbed.jsp',
+            article_id, query={
+                'articleId': article_id,
+            })['embedCode']
+        iframe_url = self._proto_relative_url(self._search_regex(
+            r'<iframe[^>]+src="([^"]+)', embed_code, 'iframe url'))
+
+        return {
+            '_type': 'url_transparent',
+            'id': article_id,
+            'url': iframe_url,
+            'ie_key': 'TwentyThreeVideo',
+        }
diff --git a/youtube_dl/extractor/firstpost.py b/youtube_dl/extractor/firstpost.py
deleted file mode 100644 (file)
index e8936cb..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-
-
-class FirstpostIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html'
-
-    _TEST = {
-        'url': 'http://www.firstpost.com/india/india-to-launch-indigenous-aircraft-carrier-monday-1025403.html',
-        'md5': 'ee9114957692f01fb1263ed87039112a',
-        'info_dict': {
-            'id': '1025403',
-            'ext': 'mp4',
-            'title': 'India to launch indigenous aircraft carrier INS Vikrant today',
-            'description': 'md5:feef3041cb09724e0bdc02843348f5f4',
-        }
-    }
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        page = self._download_webpage(url, video_id)
-
-        title = self._html_search_meta('twitter:title', page, 'title', fatal=True)
-        description = self._html_search_meta('twitter:description', page, 'title')
-
-        data = self._download_xml(
-            'http://www.firstpost.com/getvideoxml-%s.xml' % video_id, video_id,
-            'Downloading video XML')
-
-        item = data.find('./playlist/item')
-        thumbnail = item.find('./image').text
-
-        formats = [
-            {
-                'url': details.find('./file').text,
-                'format_id': details.find('./label').text.strip(),
-                'width': int(details.find('./width').text.strip()),
-                'height': int(details.find('./height').text.strip()),
-            } for details in item.findall('./source/file_details') if details.find('./file').text
-        ]
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'title': title,
-            'description': description,
-            'thumbnail': thumbnail,
-            'formats': formats,
-        }
diff --git a/youtube_dl/extractor/fktv.py b/youtube_dl/extractor/fktv.py
deleted file mode 100644 (file)
index 2958452..0000000
+++ /dev/null
@@ -1,51 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
-    clean_html,
-    determine_ext,
-    js_to_json,
-)
-
-
-class FKTVIE(InfoExtractor):
-    IE_NAME = 'fernsehkritik.tv'
-    _VALID_URL = r'https?://(?:www\.)?fernsehkritik\.tv/folge-(?P<id>[0-9]+)(?:/.*)?'
-
-    _TEST = {
-        'url': 'http://fernsehkritik.tv/folge-1',
-        'md5': '21f0b0c99bce7d5b524eb1b17b1c6d79',
-        'info_dict': {
-            'id': '1',
-            'ext': 'mp4',
-            'title': 'Folge 1 vom 10. April 2007',
-            'thumbnail': r're:^https?://.*\.jpg$',
-        },
-    }
-
-    def _real_extract(self, url):
-        episode = self._match_id(url)
-
-        webpage = self._download_webpage(
-            'http://fernsehkritik.tv/folge-%s/play' % episode, episode)
-        title = clean_html(self._html_search_regex(
-            '<h3>([^<]+)</h3>', webpage, 'title'))
-        thumbnail = self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False)
-        sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json)
-
-        formats = []
-        for source in sources:
-            furl = source.get('src')
-            if furl:
-                formats.append({
-                    'url': furl,
-                    'format_id': determine_ext(furl),
-                })
-        self._sort_formats(formats)
-
-        return {
-            'id': episode,
-            'title': title,
-            'formats': formats,
-            'thumbnail': thumbnail,
-        }
index 5f98d017b84aae5a88eb07655b68ac52726dd643..11d6c9c3251ff86c6ea26a27a83dec064963ac2c 100644 (file)
@@ -11,6 +11,7 @@ from ..utils import (
     parse_duration,
     try_get,
     unified_timestamp,
+    update_url_query,
 )
 
 
@@ -62,7 +63,8 @@ class FOXIE(AdobePassIE):
         duration = int_or_none(video.get('durationInSeconds')) or int_or_none(
             video.get('duration')) or parse_duration(video.get('duration'))
         timestamp = unified_timestamp(video.get('datePublished'))
-        age_limit = parse_age_limit(video.get('contentRating'))
+        rating = video.get('contentRating')
+        age_limit = parse_age_limit(rating)
 
         data = try_get(
             video, lambda x: x['trackingData']['properties'], dict) or {}
@@ -77,8 +79,24 @@ class FOXIE(AdobePassIE):
         release_year = int_or_none(video.get('releaseYear'))
 
         if data.get('authRequired'):
-            # TODO: AP
-            pass
+            resource = self._get_mvpd_resource(
+                'fbc-fox', title, video.get('guid'), rating)
+            release_url = update_url_query(
+                release_url, {
+                    'auth': self._extract_mvpd_auth(
+                        url, video_id, 'fbc-fox', resource)
+                })
+
+        subtitles = {}
+        for doc_rel in video.get('documentReleases', []):
+            rel_url = doc_rel.get('url')
+            if not url or doc_rel.get('format') != 'SCC':
+                continue
+            subtitles['en'] = [{
+                'url': rel_url,
+                'ext': 'scc',
+            }]
+            break
 
         info = {
             'id': video_id,
@@ -93,6 +111,7 @@ class FOXIE(AdobePassIE):
             'episode': episode,
             'episode_number': episode_number,
             'release_year': release_year,
+            'subtitles': subtitles,
         }
 
         urlh = self._request_webpage(HEADRequest(release_url), video_id)
index 56d9975d0e05936d23bf622c2ac34dd7ffa26332..17dfffa7b70867adaa5a8995e4e24e03ed18df4d 100644 (file)
@@ -2,7 +2,6 @@
 from __future__ import unicode_literals
 
 from .anvato import AnvatoIE
-from ..utils import js_to_json
 
 
 class FOX9IE(AnvatoIE):
@@ -34,9 +33,9 @@ class FOX9IE(AnvatoIE):
 
         video_id = self._parse_json(
             self._search_regex(
-                r'AnvatoPlaylist\s*\(\s*(\[.+?\])\s*\)\s*;',
+                r"this\.videosJson\s*=\s*'(\[.+?\])';",
                 webpage, 'anvato playlist'),
-            video_id, transform_source=js_to_json)[0]['video']
+            video_id)[0]['video']
 
         return self._get_anvato_videos(
             'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b',
index 2bcbb3e39bb5502e83f64b45b3ca115d7a3bbaa9..095bb3954c523852be8d7d09d498d324d429c826 100644 (file)
@@ -3,7 +3,6 @@
 from __future__ import unicode_literals
 
 import re
-import json
 
 from .common import InfoExtractor
 from ..compat import compat_urlparse
@@ -14,10 +13,7 @@ from ..utils import (
     parse_duration,
     determine_ext,
 )
-from .dailymotion import (
-    DailymotionIE,
-    DailymotionCloudIE,
-)
+from .dailymotion import DailymotionIE
 
 
 class FranceTVBaseInfoExtractor(InfoExtractor):
@@ -291,10 +287,6 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
         page_title = mobj.group('title')
         webpage = self._download_webpage(url, page_title)
 
-        dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
-        if dmcloud_url:
-            return self.url_result(dmcloud_url, DailymotionCloudIE.ie_key())
-
         dailymotion_urls = DailymotionIE._extract_urls(webpage)
         if dailymotion_urls:
             return self.playlist_result([
@@ -308,31 +300,32 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
         return self._extract_video(video_id, catalogue)
 
 
-class GenerationQuoiIE(InfoExtractor):
-    IE_NAME = 'france2.fr:generation-quoi'
-    _VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<id>[^/?#]+)'
+class GenerationWhatIE(InfoExtractor):
+    IE_NAME = 'france2.fr:generation-what'
+    _VALID_URL = r'https?://generation-what\.francetv\.fr/[^/]+/video/(?P<id>[^/?#]+)'
 
-    _TEST = {
-        'url': 'http://generation-quoi.france2.fr/portrait/garde-a-vous',
+    _TESTS = [{
+        'url': 'http://generation-what.francetv.fr/portrait/video/present-arms',
         'info_dict': {
-            'id': 'k7FJX8VBcvvLmX4wA5Q',
+            'id': 'wtvKYUG45iw',
             'ext': 'mp4',
-            'title': 'Génération Quoi - Garde à Vous',
-            'uploader': 'Génération Quoi',
-        },
-        'params': {
-            # It uses Dailymotion
-            'skip_download': True,
+            'title': 'Generation What - Garde à vous - FRA',
+            'uploader': 'Generation What',
+            'uploader_id': 'UCHH9p1eetWCgt4kXBYCb3_w',
+            'upload_date': '20160411',
         },
-    }
+    }, {
+        'url': 'http://generation-what.francetv.fr/europe/video/present-arms',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
-        info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % display_id)
-        info_json = self._download_webpage(info_url, display_id)
-        info = json.loads(info_json)
-        return self.url_result('http://www.dailymotion.com/video/%s' % info['id'],
-                               ie='Dailymotion')
+        webpage = self._download_webpage(url, display_id)
+        youtube_id = self._search_regex(
+            r"window\.videoURL\s*=\s*'([0-9A-Za-z_-]{11})';",
+            webpage, 'youtube id')
+        return self.url_result(youtube_id, 'Youtube', youtube_id)
 
 
 class CultureboxIE(FranceTVBaseInfoExtractor):
@@ -363,6 +356,7 @@ class CultureboxIE(FranceTVBaseInfoExtractor):
             raise ExtractorError('Video %s is not available' % name, expected=True)
 
         video_id, catalogue = self._search_regex(
-            r'"http://videos\.francetv\.fr/video/([^@]+@[^"]+)"', webpage, 'video id').split('@')
+            r'["\'>]https?://videos\.francetv\.fr/video/([^@]+@.+?)["\'<]',
+            webpage, 'video id').split('@')
 
         return self._extract_video(video_id, catalogue)
index 7fa271b51fa59e14342d0de21cd50be719f3467d..486a49c05271c37280b059fb3a6c6bb4fd0de3ed 100644 (file)
@@ -1,37 +1,34 @@
 from __future__ import unicode_literals
 
-import re
-import json
-
 from .common import InfoExtractor
 
 
 class FreespeechIE(InfoExtractor):
     IE_NAME = 'freespeech.org'
-    _VALID_URL = r'https?://(?:www\.)?freespeech\.org/video/(?P<title>.+)'
+    _VALID_URL = r'https?://(?:www\.)?freespeech\.org/stories/(?P<id>.+)'
     _TEST = {
         'add_ie': ['Youtube'],
-        'url': 'https://www.freespeech.org/video/obama-romney-campaign-colorado-ahead-debate-0',
+        'url': 'http://www.freespeech.org/stories/fcc-announces-net-neutrality-rollback-whats-stake/',
         'info_dict': {
-            'id': 'poKsVCZ64uU',
-            'ext': 'webm',
-            'title': 'Obama, Romney Campaign in Colorado Ahead of Debate',
-            'description': 'Obama, Romney Campaign in Colorado Ahead of Debate',
-            'uploader': 'freespeechtv',
+            'id': 'waRk6IPqyWM',
+            'ext': 'mp4',
+            'title': 'What\'s At Stake - Net Neutrality Special',
+            'description': 'Presented by MNN and FSTV',
+            'upload_date': '20170728',
             'uploader_id': 'freespeechtv',
-            'upload_date': '20121002',
+            'uploader': 'freespeechtv',
         },
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        title = mobj.group('title')
-        webpage = self._download_webpage(url, title)
-        info_json = self._search_regex(r'jQuery\.extend\(Drupal\.settings, ({.*?})\);', webpage, 'info')
-        info = json.loads(info_json)
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        youtube_url = self._search_regex(
+            r'data-video-url="([^"]+)"',
+            webpage, 'youtube url')
 
         return {
             '_type': 'url',
-            'url': info['jw_player']['basic_video_node_player']['file'],
+            'url': youtube_url,
             'ie_key': 'Youtube',
         }
index 8c37509ec60f132671952c8ad018e4ba79a5fac6..107f658baf2c393036dd4d2c770c01258e29e1a9 100644 (file)
@@ -57,7 +57,7 @@ class FunimationIE(InfoExtractor):
         try:
             data = self._download_json(
                 'https://prod-api-funimationnow.dadcdigital.com/api/auth/login/',
-                None, 'Logging in as %s' % username, data=urlencode_postdata({
+                None, 'Logging in', data=urlencode_postdata({
                     'username': username,
                     'password': password,
                 }))
diff --git a/youtube_dl/extractor/gamersyde.py b/youtube_dl/extractor/gamersyde.py
deleted file mode 100644 (file)
index a218a69..0000000
+++ /dev/null
@@ -1,70 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
-    js_to_json,
-    parse_duration,
-    remove_start,
-)
-
-
-class GamersydeIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?gamersyde\.com/hqstream_(?P<display_id>[\da-z_]+)-(?P<id>\d+)_[a-z]{2}\.html'
-    _TEST = {
-        'url': 'http://www.gamersyde.com/hqstream_bloodborne_birth_of_a_hero-34371_en.html',
-        'md5': 'f38d400d32f19724570040d5ce3a505f',
-        'info_dict': {
-            'id': '34371',
-            'ext': 'mp4',
-            'duration': 372,
-            'title': 'Bloodborne - Birth of a hero',
-            'thumbnail': r're:^https?://.*\.jpg$',
-        }
-    }
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        display_id = mobj.group('display_id')
-
-        webpage = self._download_webpage(url, display_id)
-
-        playlist = self._parse_json(
-            self._search_regex(
-                r'(?s)playlist: \[({.+?})\]\s*}\);', webpage, 'files'),
-            display_id, transform_source=js_to_json)
-
-        formats = []
-        for source in playlist['sources']:
-            video_url = source.get('file')
-            if not video_url:
-                continue
-            format_id = source.get('label')
-            f = {
-                'url': video_url,
-                'format_id': format_id,
-            }
-            m = re.search(r'^(?P<height>\d+)[pP](?P<fps>\d+)fps', format_id)
-            if m:
-                f.update({
-                    'height': int(m.group('height')),
-                    'fps': int(m.group('fps')),
-                })
-            formats.append(f)
-        self._sort_formats(formats)
-
-        title = remove_start(playlist['title'], '%s - ' % video_id)
-        thumbnail = playlist.get('image')
-        duration = parse_duration(self._search_regex(
-            r'Length:</label>([^<]+)<', webpage, 'duration', fatal=False))
-
-        return {
-            'id': video_id,
-            'display_id': display_id,
-            'title': title,
-            'thumbnail': thumbnail,
-            'duration': duration,
-            'formats': formats,
-        }
index 6d177cbaf388afb1bb1a71873558ee55ca70ee2c..ab647dd4154cdc996c455ca45a5d6fa9d1e20f70 100644 (file)
@@ -14,7 +14,7 @@ from ..utils import (
 
 
 class GameSpotIE(OnceIE):
-    _VALID_URL = r'https?://(?:www\.)?gamespot\.com/videos/(?:[^/]+/\d+-|embed/)(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?gamespot\.com/(?:video|article)s/(?:[^/]+/\d+-|embed/)(?P<id>\d+)'
     _TESTS = [{
         'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',
         'md5': 'b2a30deaa8654fcccd43713a6b6a4825',
@@ -38,6 +38,9 @@ class GameSpotIE(OnceIE):
     }, {
         'url': 'https://www.gamespot.com/videos/embed/6439218/',
         'only_matching': True,
+    }, {
+        'url': 'https://www.gamespot.com/articles/the-last-of-us-2-receives-new-ps4-trailer/1100-6454469/',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -108,7 +111,8 @@ class GameSpotIE(OnceIE):
             onceux_url = self._parse_json(unescapeHTML(onceux_json), page_id).get('metadataUri')
             if onceux_url:
                 formats.extend(self._extract_once_formats(re.sub(
-                    r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url)))
+                    r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url),
+                    http_formats_preference=-1))
 
         if not formats:
             for quality in ['sd', 'hd']:
index 2a9c3e2dea0f397bc24b783550216ea42a582283..cc4c90b8cef194952e1b7d7517d6036329eb1c79 100644 (file)
@@ -59,10 +59,7 @@ from .tnaflix import TNAFlixNetworkEmbedIE
 from .drtuber import DrTuberIE
 from .redtube import RedTubeIE
 from .vimeo import VimeoIE
-from .dailymotion import (
-    DailymotionIE,
-    DailymotionCloudIE,
-)
+from .dailymotion import DailymotionIE
 from .dailymail import DailyMailIE
 from .onionstudios import OnionStudiosIE
 from .viewlift import ViewLiftEmbedIE
@@ -102,6 +99,8 @@ from .joj import JojIE
 from .megaphone import MegaphoneIE
 from .vzaar import VzaarIE
 from .channel9 import Channel9IE
+from .vshare import VShareIE
+from .mediasite import MediasiteIE
 
 
 class GenericIE(InfoExtractor):
@@ -1098,9 +1097,9 @@ class GenericIE(InfoExtractor):
         },
         # jwplayer rtmp
         {
-            'url': 'http://www.suffolk.edu/sjc/',
+            'url': 'http://www.suffolk.edu/sjc/live.php',
             'info_dict': {
-                'id': 'sjclive',
+                'id': 'live',
                 'ext': 'flv',
                 'title': 'Massachusetts Supreme Judicial Court Oral Arguments',
                 'uploader': 'www.suffolk.edu',
@@ -1108,7 +1107,7 @@ class GenericIE(InfoExtractor):
             'params': {
                 'skip_download': True,
             },
-            'skip': 'does not contain a video anymore',
+            'skip': 'Only has video a few mornings per month, see http://www.suffolk.edu/sjc/',
         },
         # Complex jwplayer
         {
@@ -1135,6 +1134,19 @@ class GenericIE(InfoExtractor):
                 'skip_download': True,
             }
         },
+        {
+            # JWPlatform iframe
+            'url': 'https://www.mediaite.com/tv/dem-senator-claims-gary-cohn-faked-a-bad-connection-during-trump-call-to-get-him-off-the-phone/',
+            'md5': 'ca00a040364b5b439230e7ebfd02c4e9',
+            'info_dict': {
+                'id': 'O0c5JcKT',
+                'ext': 'mp4',
+                'upload_date': '20171122',
+                'timestamp': 1511366290,
+                'title': 'Dem Senator Claims Gary Cohn Faked a Bad Connection During Trump Call to Get Him Off the Phone',
+            },
+            'add_ie': [JWPlatformIE.ie_key()],
+        },
         {
             # Video.js embed, multiple formats
             'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html',
@@ -1458,23 +1470,6 @@ class GenericIE(InfoExtractor):
                 'timestamp': 1432570283,
             },
         },
-        # Dailymotion Cloud video
-        {
-            'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
-            'md5': 'dcaf23ad0c67a256f4278bce6e0bae38',
-            'info_dict': {
-                'id': 'x2uy8t3',
-                'ext': 'mp4',
-                'title': 'Sauvons les abeilles ! - Le débat',
-                'description': 'md5:d9082128b1c5277987825d684939ca26',
-                'thumbnail': r're:^https?://.*\.jpe?g$',
-                'timestamp': 1434970506,
-                'upload_date': '20150622',
-                'uploader': 'Public Sénat',
-                'uploader_id': 'xa9gza',
-            },
-            'skip': 'File not found.',
-        },
         # OnionStudios embed
         {
             'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
@@ -1921,6 +1916,28 @@ class GenericIE(InfoExtractor):
                 'title': 'Rescue Kit 14 Free Edition - Getting started',
             },
             'playlist_count': 4,
+        },
+        {
+            # vshare embed
+            'url': 'https://youtube-dl-demo.neocities.org/vshare.html',
+            'md5': '17b39f55b5497ae8b59f5fbce8e35886',
+            'info_dict': {
+                'id': '0f64ce6',
+                'title': 'vl14062007715967',
+                'ext': 'mp4',
+            }
+        },
+        {
+            'url': 'http://www.heidelberg-laureate-forum.org/blog/video/lecture-friday-september-23-2016-sir-c-antony-r-hoare/',
+            'md5': 'aecd089f55b1cb5a59032cb049d3a356',
+            'info_dict': {
+                'id': '90227f51a80c4d8f86c345a7fa62bd9a1d',
+                'ext': 'mp4',
+                'title': 'Lecture: Friday, September 23, 2016 - Sir Tony Hoare',
+                'description': 'md5:5a51db84a62def7b7054df2ade403c6c',
+                'timestamp': 1474354800,
+                'upload_date': '20160920',
+            }
         }
         # {
         #     # TODO: find another test
@@ -2171,7 +2188,7 @@ class GenericIE(InfoExtractor):
                 return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
             elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
                 info_dict['formats'] = self._parse_mpd_formats(
-                    doc, video_id,
+                    doc,
                     mpd_base_url=compat_str(full_response.geturl()).rpartition('/')[0],
                     mpd_url=url)
                 self._sort_formats(info_dict['formats'])
@@ -2680,11 +2697,6 @@ class GenericIE(InfoExtractor):
         if senate_isvp_url:
             return self.url_result(senate_isvp_url, 'SenateISVP')
 
-        # Look for Dailymotion Cloud videos
-        dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
-        if dmcloud_url:
-            return self.url_result(dmcloud_url, 'DailymotionCloud')
-
         # Look for OnionStudios embeds
         onionstudios_url = OnionStudiosIE._extract_url(webpage)
         if onionstudios_url:
@@ -2879,6 +2891,21 @@ class GenericIE(InfoExtractor):
             return self.playlist_from_matches(
                 channel9_urls, video_id, video_title, ie=Channel9IE.ie_key())
 
+        vshare_urls = VShareIE._extract_urls(webpage)
+        if vshare_urls:
+            return self.playlist_from_matches(
+                vshare_urls, video_id, video_title, ie=VShareIE.ie_key())
+
+        # Look for Mediasite embeds
+        mediasite_urls = MediasiteIE._extract_urls(webpage)
+        if mediasite_urls:
+            entries = [
+                self.url_result(smuggle_url(
+                    compat_urlparse.urljoin(url, mediasite_url),
+                    {'UrlReferrer': url}), ie=MediasiteIE.ie_key())
+                for mediasite_url in mediasite_urls]
+            return self.playlist_result(entries, video_id, video_title)
+
         def merge_dicts(dict1, dict2):
             merged = {}
             for k, v in dict1.items():
index 4667335e03faa2a9fac1433718f17eb714b79d64..a77f619d291ba4e02ecdc2c785795229fd0de2ca 100644 (file)
@@ -1,5 +1,6 @@
 from __future__ import unicode_literals
 
+import itertools
 import re
 
 from .common import InfoExtractor
@@ -7,7 +8,6 @@ from ..compat import compat_str
 from ..utils import (
     get_element_by_attribute,
     int_or_none,
-    limit_length,
     lowercase_escape,
     try_get,
 )
@@ -130,13 +130,21 @@ class InstagramIE(InfoExtractor):
                 video_url = media.get('video_url')
                 height = int_or_none(media.get('dimensions', {}).get('height'))
                 width = int_or_none(media.get('dimensions', {}).get('width'))
-                description = media.get('caption')
+                description = try_get(
+                    media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
+                    compat_str) or media.get('caption')
                 thumbnail = media.get('display_src')
-                timestamp = int_or_none(media.get('date'))
+                timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
                 uploader = media.get('owner', {}).get('full_name')
                 uploader_id = media.get('owner', {}).get('username')
-                like_count = int_or_none(media.get('likes', {}).get('count'))
-                comment_count = int_or_none(media.get('comments', {}).get('count'))
+
+                def get_count(key, kind):
+                    return int_or_none(try_get(
+                        media, (lambda x: x['edge_media_%s' % key]['count'],
+                                lambda x: x['%ss' % kind]['count'])))
+                like_count = get_count('preview_like', 'like')
+                comment_count = get_count('to_comment', 'comment')
+
                 comments = [{
                     'author': comment.get('user', {}).get('username'),
                     'author_id': comment.get('user', {}).get('id'),
@@ -212,7 +220,7 @@ class InstagramIE(InfoExtractor):
 
 
 class InstagramUserIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
+    _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<id>[^/]{2,})/?(?:$|[?#])'
     IE_DESC = 'Instagram user profile'
     IE_NAME = 'instagram:user'
     _TEST = {
@@ -221,82 +229,79 @@ class InstagramUserIE(InfoExtractor):
             'id': 'porsche',
             'title': 'porsche',
         },
-        'playlist_mincount': 2,
-        'playlist': [{
-            'info_dict': {
-                'id': '614605558512799803_462752227',
-                'ext': 'mp4',
-                'title': '#Porsche Intelligent Performance.',
-                'thumbnail': r're:^https?://.*\.jpg',
-                'uploader': 'Porsche',
-                'uploader_id': 'porsche',
-                'timestamp': 1387486713,
-                'upload_date': '20131219',
-            },
-        }],
+        'playlist_count': 5,
         'params': {
             'extract_flat': True,
             'skip_download': True,
+            'playlistend': 5,
         }
     }
 
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        uploader_id = mobj.group('username')
+    def _entries(self, uploader_id):
+        query = {
+            '__a': 1,
+        }
 
-        entries = []
-        page_count = 0
-        media_url = 'http://instagram.com/%s/media' % uploader_id
-        while True:
+        def get_count(kind):
+            return int_or_none(try_get(
+                node, lambda x: x['%ss' % kind]['count']))
+
+        for page_num in itertools.count(1):
             page = self._download_json(
-                media_url, uploader_id,
-                note='Downloading page %d ' % (page_count + 1),
-            )
-            page_count += 1
+                'https://instagram.com/%s/' % uploader_id, uploader_id,
+                note='Downloading page %d' % page_num,
+                fatal=False, query=query)
+            if not page:
+                break
+
+            nodes = try_get(page, lambda x: x['user']['media']['nodes'], list)
+            if not nodes:
+                break
+
+            max_id = None
 
-            for it in page['items']:
-                if it.get('type') != 'video':
+            for node in nodes:
+                node_id = node.get('id')
+                if node_id:
+                    max_id = node_id
+
+                if node.get('__typename') != 'GraphVideo' and node.get('is_video') is not True:
+                    continue
+                video_id = node.get('code')
+                if not video_id:
                     continue
-                like_count = int_or_none(it.get('likes', {}).get('count'))
-                user = it.get('user', {})
-
-                formats = [{
-                    'format_id': k,
-                    'height': v.get('height'),
-                    'width': v.get('width'),
-                    'url': v['url'],
-                } for k, v in it['videos'].items()]
-                self._sort_formats(formats)
-
-                thumbnails_el = it.get('images', {})
-                thumbnail = thumbnails_el.get('thumbnail', {}).get('url')
-
-                # In some cases caption is null, which corresponds to None
-                # in python. As a result, it.get('caption', {}) gives None
-                title = (it.get('caption') or {}).get('text', it['id'])
-
-                entries.append({
-                    'id': it['id'],
-                    'title': limit_length(title, 80),
-                    'formats': formats,
+
+                info = self.url_result(
+                    'https://instagram.com/p/%s/' % video_id,
+                    ie=InstagramIE.ie_key(), video_id=video_id)
+
+                description = try_get(
+                    node, [lambda x: x['caption'], lambda x: x['text']['id']],
+                    compat_str)
+                thumbnail = node.get('thumbnail_src') or node.get('display_src')
+                timestamp = int_or_none(node.get('date'))
+
+                comment_count = get_count('comment')
+                like_count = get_count('like')
+                view_count = int_or_none(node.get('video_views'))
+
+                info.update({
+                    'description': description,
                     'thumbnail': thumbnail,
-                    'webpage_url': it.get('link'),
-                    'uploader': user.get('full_name'),
-                    'uploader_id': user.get('username'),
+                    'timestamp': timestamp,
+                    'comment_count': comment_count,
                     'like_count': like_count,
-                    'timestamp': int_or_none(it.get('created_time')),
+                    'view_count': view_count,
                 })
 
-            if not page['items']:
+                yield info
+
+            if not max_id:
                 break
-            max_id = page['items'][-1]['id'].split('_')[0]
-            media_url = (
-                'http://instagram.com/%s/media?max_id=%s' % (
-                    uploader_id, max_id))
 
-        return {
-            '_type': 'playlist',
-            'entries': entries,
-            'id': uploader_id,
-            'title': uploader_id,
-        }
+            query['max_id'] = max_id
+
+    def _real_extract(self, url):
+        uploader_id = self._match_id(url)
+        return self.playlist_result(
+            self._entries(uploader_id), uploader_id, uploader_id)
diff --git a/youtube_dl/extractor/internazionale.py b/youtube_dl/extractor/internazionale.py
new file mode 100644 (file)
index 0000000..10ba1f6
--- /dev/null
@@ -0,0 +1,64 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import unified_timestamp
+
+
+class InternazionaleIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?internazionale\.it/video/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+    _TEST = {
+        'url': 'https://www.internazionale.it/video/2015/02/19/richard-linklater-racconta-una-scena-di-boyhood',
+        'md5': '3e39d32b66882c1218e305acbf8348ca',
+        'info_dict': {
+            'id': '265968',
+            'display_id': 'richard-linklater-racconta-una-scena-di-boyhood',
+            'ext': 'mp4',
+            'title': 'Richard Linklater racconta una scena di Boyhood',
+            'description': 'md5:efb7e5bbfb1a54ae2ed5a4a015f0e665',
+            'timestamp': 1424354635,
+            'upload_date': '20150219',
+            'thumbnail': r're:^https?://.*\.jpg$',
+        },
+        'params': {
+            'format': 'bestvideo',
+        },
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        DATA_RE = r'data-%s=(["\'])(?P<value>(?:(?!\1).)+)\1'
+
+        title = self._search_regex(
+            DATA_RE % 'video-title', webpage, 'title', default=None,
+            group='value') or self._og_search_title(webpage)
+
+        video_id = self._search_regex(
+            DATA_RE % 'job-id', webpage, 'video id', group='value')
+        video_path = self._search_regex(
+            DATA_RE % 'video-path', webpage, 'video path', group='value')
+
+        video_base = 'https://video.internazionale.it/%s/%s.' % (video_path, video_id)
+
+        formats = self._extract_m3u8_formats(
+            video_base + 'm3u8', display_id, 'mp4',
+            entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
+        formats.extend(self._extract_mpd_formats(
+            video_base + 'mpd', display_id, mpd_id='dash', fatal=False))
+        self._sort_formats(formats)
+
+        timestamp = unified_timestamp(self._html_search_meta(
+            'article:published_time', webpage, 'timestamp'))
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'thumbnail': self._og_search_thumbnail(webpage),
+            'description': self._og_search_description(webpage),
+            'timestamp': timestamp,
+            'formats': formats,
+        }
index 26c48e4b889545f4e2ffacccfdb859678c4bf2c3..18a7d7f8cde2f2434d0db003836b5e7a437b2c9a 100644 (file)
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
 import uuid
 import xml.etree.ElementTree as etree
 import json
+import re
 
 from .common import InfoExtractor
 from ..compat import (
@@ -25,7 +26,7 @@ from ..utils import (
 class ITVIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
     _GEO_COUNTRIES = ['GB']
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053',
         'info_dict': {
             'id': '2a2936a0053',
@@ -36,7 +37,11 @@ class ITVIE(InfoExtractor):
             # rtmp download
             'skip_download': True,
         },
-    }
+    }, {
+        # unavailable via data-playlist-url
+        'url': 'https://www.itv.com/hub/through-the-keyhole/2a2271a0033',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -100,6 +105,18 @@ class ITVIE(InfoExtractor):
             'Content-Type': 'text/xml; charset=utf-8',
             'SOAPAction': 'http://tempuri.org/PlaylistService/GetPlaylist',
         })
+
+        info = self._search_json_ld(webpage, video_id, default={})
+        formats = []
+        subtitles = {}
+
+        def extract_subtitle(sub_url):
+            ext = determine_ext(sub_url, 'ttml')
+            subtitles.setdefault('en', []).append({
+                'url': sub_url,
+                'ext': 'ttml' if ext == 'xml' else ext,
+            })
+
         resp_env = self._download_xml(
             params['data-playlist-url'], video_id,
             headers=headers, data=etree.tostring(req_env))
@@ -110,41 +127,59 @@ class ITVIE(InfoExtractor):
             if fault_code == 'InvalidGeoRegion':
                 self.raise_geo_restricted(
                     msg=fault_string, countries=self._GEO_COUNTRIES)
-            raise ExtractorError('%s said: %s' % (self.IE_NAME, fault_string))
-        title = xpath_text(playlist, 'EpisodeTitle', fatal=True)
-        video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
-        media_files = xpath_element(video_element, 'MediaFiles', fatal=True)
-        rtmp_url = media_files.attrib['base']
+            elif fault_code != 'InvalidEntity':
+                raise ExtractorError(
+                    '%s said: %s' % (self.IE_NAME, fault_string), expected=True)
+            info.update({
+                'title': self._og_search_title(webpage),
+                'episode_title': params.get('data-video-episode'),
+                'series': params.get('data-video-title'),
+            })
+        else:
+            title = xpath_text(playlist, 'EpisodeTitle', default=None)
+            info.update({
+                'title': title,
+                'episode_title': title,
+                'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')),
+                'series': xpath_text(playlist, 'ProgrammeTitle'),
+                'duration': parse_duration(xpath_text(playlist, 'Duration')),
+            })
+            video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
+            media_files = xpath_element(video_element, 'MediaFiles', fatal=True)
+            rtmp_url = media_files.attrib['base']
 
-        formats = []
-        for media_file in media_files.findall('MediaFile'):
-            play_path = xpath_text(media_file, 'URL')
-            if not play_path:
-                continue
-            tbr = int_or_none(media_file.get('bitrate'), 1000)
-            f = {
-                'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''),
-                'play_path': play_path,
-                # Providing this swfVfy allows to avoid truncated downloads
-                'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf',
-                'page_url': url,
-                'tbr': tbr,
-                'ext': 'flv',
-            }
-            app = self._search_regex(
-                'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None)
-            if app:
-                f.update({
-                    'url': rtmp_url.split('?', 1)[0],
-                    'app': app,
-                })
-            else:
-                f['url'] = rtmp_url
-            formats.append(f)
-
-        ios_playlist_url = params.get('data-video-playlist')
+            for media_file in media_files.findall('MediaFile'):
+                play_path = xpath_text(media_file, 'URL')
+                if not play_path:
+                    continue
+                tbr = int_or_none(media_file.get('bitrate'), 1000)
+                f = {
+                    'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''),
+                    'play_path': play_path,
+                    # Providing this swfVfy allows to avoid truncated downloads
+                    'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf',
+                    'page_url': url,
+                    'tbr': tbr,
+                    'ext': 'flv',
+                }
+                app = self._search_regex(
+                    'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None)
+                if app:
+                    f.update({
+                        'url': rtmp_url.split('?', 1)[0],
+                        'app': app,
+                    })
+                else:
+                    f['url'] = rtmp_url
+                formats.append(f)
+
+            for caption_url in video_element.findall('ClosedCaptioningURIs/URL'):
+                if caption_url.text:
+                    extract_subtitle(caption_url.text)
+
+        ios_playlist_url = params.get('data-video-playlist') or params.get('data-video-id')
         hmac = params.get('data-video-hmac')
-        if ios_playlist_url and hmac:
+        if ios_playlist_url and hmac and re.match(r'https?://', ios_playlist_url):
             headers = self.geo_verification_headers()
             headers.update({
                 'Accept': 'application/vnd.itv.vod.playlist.v2+json',
@@ -159,12 +194,12 @@ class ITVIE(InfoExtractor):
                         'token': ''
                     },
                     'device': {
-                        'manufacturer': 'Apple',
-                        'model': 'iPad',
+                        'manufacturer': 'Safari',
+                        'model': '5',
                         'os': {
-                            'name': 'iPhone OS',
-                            'version': '9.3',
-                            'type': 'ios'
+                            'name': 'Windows NT',
+                            'version': '6.1',
+                            'type': 'desktop'
                         }
                     },
                     'client': {
@@ -173,10 +208,10 @@ class ITVIE(InfoExtractor):
                     },
                     'variantAvailability': {
                         'featureset': {
-                            'min': ['hls', 'aes'],
-                            'max': ['hls', 'aes']
+                            'min': ['hls', 'aes', 'outband-webvtt'],
+                            'max': ['hls', 'aes', 'outband-webvtt']
                         },
-                        'platformTag': 'mobile'
+                        'platformTag': 'dotcom'
                     }
                 }).encode(), headers=headers, fatal=False)
             if ios_playlist:
@@ -197,27 +232,22 @@ class ITVIE(InfoExtractor):
                         formats.append({
                             'url': href,
                         })
-        self._sort_formats(formats)
+                subs = video_data.get('Subtitles')
+                if isinstance(subs, list):
+                    for sub in subs:
+                        if not isinstance(sub, dict):
+                            continue
+                        href = sub.get('Href')
+                        if isinstance(href, compat_str):
+                            extract_subtitle(href)
+                if not info.get('duration'):
+                    info['duration'] = parse_duration(video_data.get('Duration'))
 
-        subtitles = {}
-        for caption_url in video_element.findall('ClosedCaptioningURIs/URL'):
-            if not caption_url.text:
-                continue
-            ext = determine_ext(caption_url.text, 'ttml')
-            subtitles.setdefault('en', []).append({
-                'url': caption_url.text,
-                'ext': 'ttml' if ext == 'xml' else ext,
-            })
+        self._sort_formats(formats)
 
-        info = self._search_json_ld(webpage, video_id, default={})
         info.update({
             'id': video_id,
-            'title': title,
             'formats': formats,
             'subtitles': subtitles,
-            'episode_title': title,
-            'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')),
-            'series': xpath_text(playlist, 'ProgrammeTitle'),
-            'duartion': parse_duration(xpath_text(playlist, 'Duration')),
         })
         return info
index 33d55f7706d79e0b87a9830ae9abce3de6f33826..c9bcbb08f787ef74bea78c4b402ecb7b167556e7 100644 (file)
@@ -24,7 +24,7 @@ class JWPlatformIE(InfoExtractor):
     @staticmethod
     def _extract_url(webpage):
         mobj = re.search(
-            r'<script[^>]+?src=["\'](?P<url>(?:https?:)?//content.jwplatform.com/players/[a-zA-Z0-9]{8})',
+            r'<(?:script|iframe)[^>]+?src=["\'](?P<url>(?:https?:)?//content.jwplatform.com/players/[a-zA-Z0-9]{8})',
             webpage)
         if mobj:
             return mobj.group('url')
index bdac2df3e52ffeafa55d4d6f05af18edcaf9e0dd..562e25f6d3ca979fe40ef3a5e46ee05ca8e92244 100644 (file)
@@ -125,9 +125,12 @@ class KalturaIE(InfoExtractor):
                         (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
                     (?P=q1).*?
                     (?:
-                        entry_?[Ii]d|
-                        (?P<q2>["'])entry_?[Ii]d(?P=q2)
-                    )\s*:\s*
+                        (?:
+                            entry_?[Ii]d|
+                            (?P<q2>["'])entry_?[Ii]d(?P=q2)
+                        )\s*:\s*|
+                        \[\s*(?P<q2_1>["'])entry_?[Ii]d(?P=q2_1)\s*\]\s*=\s*
+                    )
                     (?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
                 ''', webpage) or
             re.search(
index 317ebbc4ee60d17051574ae05b8575526216de05..c4776bbf3b5aa590e2b014b4d23e55f0de569cc6 100644 (file)
@@ -114,7 +114,7 @@ class LivestreamIE(InfoExtractor):
 
         smil_url = video_data.get('smil_url')
         if smil_url:
-            formats.extend(self._extract_smil_formats(smil_url, video_id))
+            formats.extend(self._extract_smil_formats(smil_url, video_id, fatal=False))
 
         m3u8_url = video_data.get('m3u8_url')
         if m3u8_url:
index f7cc3c83289f1101207c385d5bfed2055c7b7f67..6b7c5e3e03dc5b2cb6012a6e03d23a71fe359fee 100644 (file)
@@ -13,8 +13,15 @@ from ..utils import (
 class MailRuIE(InfoExtractor):
     IE_NAME = 'mailru'
     IE_DESC = 'Видео@Mail.Ru'
-    _VALID_URL = r'https?://(?:(?:www|m)\.)?my\.mail\.ru/(?:video/.*#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|(?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html)'
-
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:(?:www|m)\.)?my\.mail\.ru/
+                        (?:
+                            video/.*\#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|
+                            (?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html|
+                            (?:video/embed|\+/video/meta)/(?P<metaid>\d+)
+                        )
+                    '''
     _TESTS = [
         {
             'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76',
@@ -23,7 +30,7 @@ class MailRuIE(InfoExtractor):
                 'id': '46301138_76',
                 'ext': 'mp4',
                 'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро',
-                'timestamp': 1393232740,
+                'timestamp': 1393235077,
                 'upload_date': '20140224',
                 'uploader': 'sonypicturesrus',
                 'uploader_id': 'sonypicturesrus@mail.ru',
@@ -40,7 +47,7 @@ class MailRuIE(InfoExtractor):
                 'title': 'Samsung Galaxy S5 Hammer Smash Fail Battery Explosion',
                 'timestamp': 1397039888,
                 'upload_date': '20140409',
-                'uploader': 'hitech@corp.mail.ru',
+                'uploader': 'hitech',
                 'uploader_id': 'hitech@corp.mail.ru',
                 'duration': 245,
             },
@@ -65,28 +72,42 @@ class MailRuIE(InfoExtractor):
         {
             'url': 'http://m.my.mail.ru/mail/3sktvtr/video/_myvideo/138.html',
             'only_matching': True,
+        },
+        {
+            'url': 'https://my.mail.ru/video/embed/7949340477499637815',
+            'only_matching': True,
+        },
+        {
+            'url': 'http://my.mail.ru/+/video/meta/7949340477499637815',
+            'only_matching': True,
         }
     ]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('idv1')
-
-        if not video_id:
-            video_id = mobj.group('idv2prefix') + mobj.group('idv2suffix')
-
-        webpage = self._download_webpage(url, video_id)
+        meta_id = mobj.group('metaid')
+
+        video_id = None
+        if meta_id:
+            meta_url = 'https://my.mail.ru/+/video/meta/%s' % meta_id
+        else:
+            video_id = mobj.group('idv1')
+            if not video_id:
+                video_id = mobj.group('idv2prefix') + mobj.group('idv2suffix')
+            webpage = self._download_webpage(url, video_id)
+            page_config = self._parse_json(self._search_regex(
+                r'(?s)<script[^>]+class="sp-video__page-config"[^>]*>(.+?)</script>',
+                webpage, 'page config', default='{}'), video_id, fatal=False)
+            if page_config:
+                meta_url = page_config.get('metaUrl') or page_config.get('video', {}).get('metaUrl')
+            else:
+                meta_url = None
 
         video_data = None
-
-        page_config = self._parse_json(self._search_regex(
-            r'(?s)<script[^>]+class="sp-video__page-config"[^>]*>(.+?)</script>',
-            webpage, 'page config', default='{}'), video_id, fatal=False)
-        if page_config:
-            meta_url = page_config.get('metaUrl') or page_config.get('video', {}).get('metaUrl')
-            if meta_url:
-                video_data = self._download_json(
-                    meta_url, video_id, 'Downloading video meta JSON', fatal=False)
+        if meta_url:
+            video_data = self._download_json(
+                meta_url, video_id or meta_id, 'Downloading video meta JSON',
+                fatal=not video_id)
 
         # Fallback old approach
         if not video_data:
diff --git a/youtube_dl/extractor/massengeschmacktv.py b/youtube_dl/extractor/massengeschmacktv.py
new file mode 100644 (file)
index 0000000..cfcc6b2
--- /dev/null
@@ -0,0 +1,77 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    clean_html,
+    determine_ext,
+    int_or_none,
+    js_to_json,
+    mimetype2ext,
+    parse_filesize,
+)
+
+
+class MassengeschmackTVIE(InfoExtractor):
+    IE_NAME = 'massengeschmack.tv'
+    _VALID_URL = r'https?://(?:www\.)?massengeschmack\.tv/play/(?P<id>[^?&#]+)'
+
+    _TEST = {
+        'url': 'https://massengeschmack.tv/play/fktv202',
+        'md5': 'a9e054db9c2b5a08f0a0527cc201e8d3',
+        'info_dict': {
+            'id': 'fktv202',
+            'ext': 'mp4',
+            'title': 'Fernsehkritik-TV - Folge 202',
+        },
+    }
+
+    def _real_extract(self, url):
+        episode = self._match_id(url)
+
+        webpage = self._download_webpage(url, episode)
+        title = clean_html(self._html_search_regex(
+            '<h3>([^<]+)</h3>', webpage, 'title'))
+        thumbnail = self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False)
+        sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json)
+
+        formats = []
+        for source in sources:
+            furl = source.get('src')
+            if not furl:
+                continue
+            furl = self._proto_relative_url(furl)
+            ext = determine_ext(furl) or mimetype2ext(source.get('type'))
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    furl, episode, 'mp4', 'm3u8_native',
+                    m3u8_id='hls', fatal=False))
+            else:
+                formats.append({
+                    'url': furl,
+                    'format_id': determine_ext(furl),
+                })
+
+        for (durl, format_id, width, height, filesize) in re.findall(r'''(?x)
+                                   <a[^>]+?href="(?P<url>(?:https:)?//[^"]+)".*?
+                                   <strong>(?P<format_id>.+?)</strong>.*?
+                                   <small>(?:(?P<width>\d+)x(?P<height>\d+))?\s+?\((?P<filesize>[\d,]+\s*[GM]iB)\)</small>
+                                ''', webpage):
+            formats.append({
+                'url': durl,
+                'format_id': format_id,
+                'width': int_or_none(width),
+                'height': int_or_none(height),
+                'filesize': parse_filesize(filesize),
+                'vcodec': 'none' if format_id.startswith('Audio') else None,
+            })
+
+        self._sort_formats(formats, ('width', 'height', 'filesize', 'tbr'))
+
+        return {
+            'id': episode,
+            'title': title,
+            'formats': formats,
+            'thumbnail': thumbnail,
+        }
diff --git a/youtube_dl/extractor/mediasite.py b/youtube_dl/extractor/mediasite.py
new file mode 100644 (file)
index 0000000..0e2645c
--- /dev/null
@@ -0,0 +1,214 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_str,
+    compat_urlparse,
+)
+from ..utils import (
+    ExtractorError,
+    float_or_none,
+    mimetype2ext,
+    unescapeHTML,
+    unsmuggle_url,
+    urljoin,
+)
+
+
+class MediasiteIE(InfoExtractor):
+    _VALID_URL = r'(?xi)https?://[^/]+/Mediasite/Play/(?P<id>[0-9a-f]{32,34})(?P<query>\?[^#]+|)'
+    _TESTS = [
+        {
+            'url': 'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271681e4f199af3c60d1f82869b1d',
+            'info_dict': {
+                'id': '2db6c271681e4f199af3c60d1f82869b1d',
+                'ext': 'mp4',
+                'title': 'Lecture: Tuesday, September 20, 2016 - Sir Andrew Wiles',
+                'description': 'Sir Andrew Wiles: “Equations in arithmetic”\\n\\nI will describe some of the interactions between modern number theory and the problem of solving equations in rational numbers or integers\\u0027.',
+                'timestamp': 1474268400.0,
+                'upload_date': '20160919',
+            },
+        },
+        {
+            'url': 'http://mediasite.uib.no/Mediasite/Play/90bb363295d945d6b548c867d01181361d?catalog=a452b7df-9ae1-46b7-a3ba-aceeb285f3eb',
+            'info_dict': {
+                'id': '90bb363295d945d6b548c867d01181361d',
+                'ext': 'mp4',
+                'upload_date': '20150429',
+                'title': '5) IT-forum 2015-Dag 1  - Dungbeetle -  How and why Rain created a tiny bug tracker for Unity',
+                'timestamp': 1430311380.0,
+            },
+        },
+        {
+            'url': 'https://collegerama.tudelft.nl/Mediasite/Play/585a43626e544bdd97aeb71a0ec907a01d',
+            'md5': '481fda1c11f67588c0d9d8fbdced4e39',
+            'info_dict': {
+                'id': '585a43626e544bdd97aeb71a0ec907a01d',
+                'ext': 'mp4',
+                'title': 'Een nieuwe wereld: waarden, bewustzijn en techniek van de mensheid 2.0.',
+                'description': '',
+                'thumbnail': r're:^https?://.*\.jpg(?:\?.*)?$',
+                'duration': 7713.088,
+                'timestamp': 1413309600,
+                'upload_date': '20141014',
+            },
+        },
+        {
+            'url': 'https://collegerama.tudelft.nl/Mediasite/Play/86a9ea9f53e149079fbdb4202b521ed21d?catalog=fd32fd35-6c99-466c-89d4-cd3c431bc8a4',
+            'md5': 'ef1fdded95bdf19b12c5999949419c92',
+            'info_dict': {
+                'id': '86a9ea9f53e149079fbdb4202b521ed21d',
+                'ext': 'wmv',
+                'title': '64ste Vakantiecursus: Afvalwater',
+                'description': 'md5:7fd774865cc69d972f542b157c328305',
+                'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$',
+                'duration': 10853,
+                'timestamp': 1326446400,
+                'upload_date': '20120113',
+            },
+        },
+        {
+            'url': 'http://digitalops.sandia.gov/Mediasite/Play/24aace4429fc450fb5b38cdbf424a66e1d',
+            'md5': '9422edc9b9a60151727e4b6d8bef393d',
+            'info_dict': {
+                'id': '24aace4429fc450fb5b38cdbf424a66e1d',
+                'ext': 'mp4',
+                'title': 'Xyce Software Training - Section 1',
+                'description': r're:(?s)SAND Number: SAND 2013-7800.{200,}',
+                'upload_date': '20120409',
+                'timestamp': 1333983600,
+                'duration': 7794,
+            }
+        }
+    ]
+
+    # look in Mediasite.Core.js (Mediasite.ContentStreamType[*])
+    _STREAM_TYPES = {
+        0: 'video1',  # the main video
+        2: 'slide',
+        3: 'presentation',
+        4: 'video2',  # screencast?
+        5: 'video3',
+    }
+
+    @staticmethod
+    def _extract_urls(webpage):
+        return [
+            unescapeHTML(mobj.group('url'))
+            for mobj in re.finditer(
+                r'(?xi)<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:(?:https?:)?//[^/]+)?/Mediasite/Play/[0-9a-f]{32,34}(?:\?.*?)?)\1',
+                webpage)]
+
+    def _real_extract(self, url):
+        url, data = unsmuggle_url(url, {})
+        mobj = re.match(self._VALID_URL, url)
+        resource_id = mobj.group('id')
+        query = mobj.group('query')
+
+        webpage, urlh = self._download_webpage_handle(url, resource_id)  # XXX: add UrlReferrer?
+        redirect_url = compat_str(urlh.geturl())
+
+        # XXX: might have also extracted UrlReferrer and QueryString from the html
+        service_path = compat_urlparse.urljoin(redirect_url, self._html_search_regex(
+            r'<div[^>]+\bid=["\']ServicePath[^>]+>(.+?)</div>', webpage, resource_id,
+            default='/Mediasite/PlayerService/PlayerService.svc/json'))
+
+        player_options = self._download_json(
+            '%s/GetPlayerOptions' % service_path, resource_id,
+            headers={
+                'Content-type': 'application/json; charset=utf-8',
+                'X-Requested-With': 'XMLHttpRequest',
+            },
+            data=json.dumps({
+                'getPlayerOptionsRequest': {
+                    'ResourceId': resource_id,
+                    'QueryString': query,
+                    'UrlReferrer': data.get('UrlReferrer', ''),
+                    'UseScreenReader': False,
+                }
+            }).encode('utf-8'))['d']
+
+        presentation = player_options['Presentation']
+        title = presentation['Title']
+
+        if presentation is None:
+            raise ExtractorError(
+                'Mediasite says: %s' % player_options['PlayerPresentationStatusMessage'],
+                expected=True)
+
+        thumbnails = []
+        formats = []
+        for snum, Stream in enumerate(presentation['Streams']):
+            stream_type = Stream.get('StreamType')
+            if stream_type is None:
+                continue
+
+            video_urls = Stream.get('VideoUrls')
+            if not isinstance(video_urls, list):
+                video_urls = []
+
+            stream_id = self._STREAM_TYPES.get(
+                stream_type, 'type%u' % stream_type)
+
+            stream_formats = []
+            for unum, VideoUrl in enumerate(video_urls):
+                video_url = VideoUrl.get('Location')
+                if not video_url or not isinstance(video_url, compat_str):
+                    continue
+                # XXX: if Stream.get('CanChangeScheme', False), switch scheme to HTTP/HTTPS
+
+                media_type = VideoUrl.get('MediaType')
+                if media_type == 'SS':
+                    stream_formats.extend(self._extract_ism_formats(
+                        video_url, resource_id,
+                        ism_id='%s-%u.%u' % (stream_id, snum, unum),
+                        fatal=False))
+                elif media_type == 'Dash':
+                    stream_formats.extend(self._extract_mpd_formats(
+                        video_url, resource_id,
+                        mpd_id='%s-%u.%u' % (stream_id, snum, unum),
+                        fatal=False))
+                else:
+                    stream_formats.append({
+                        'format_id': '%s-%u.%u' % (stream_id, snum, unum),
+                        'url': video_url,
+                        'ext': mimetype2ext(VideoUrl.get('MimeType')),
+                    })
+
+            # TODO: if Stream['HasSlideContent']:
+            # synthesise an MJPEG video stream '%s-%u.slides' % (stream_type, snum)
+            # from Stream['Slides']
+            # this will require writing a custom downloader...
+
+            # disprefer 'secondary' streams
+            if stream_type != 0:
+                for fmt in stream_formats:
+                    fmt['preference'] = -1
+
+            thumbnail_url = Stream.get('ThumbnailUrl')
+            if thumbnail_url:
+                thumbnails.append({
+                    'id': '%s-%u' % (stream_id, snum),
+                    'url': urljoin(redirect_url, thumbnail_url),
+                    'preference': -1 if stream_type != 0 else 0,
+                })
+            formats.extend(stream_formats)
+
+        self._sort_formats(formats)
+
+        # XXX: Presentation['Presenters']
+        # XXX: Presentation['Transcript']
+
+        return {
+            'id': resource_id,
+            'title': title,
+            'description': presentation.get('Description'),
+            'duration': float_or_none(presentation.get('Duration'), 1000),
+            'timestamp': float_or_none(presentation.get('UnixTime'), 1000),
+            'formats': formats,
+            'thumbnails': thumbnails,
+        }
index 6a85dcbd522cfb087499daab81482fac84d75a0a..0e26ca1b302c11f6b9a7707f99928fd904f8c13d 100644 (file)
@@ -40,21 +40,29 @@ class MnetIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
+        # TODO: extract rtmp formats
+        # no stype -> rtmp url
+        # stype=H -> m3u8 url
+        # stype=M -> mpd url
         info = self._download_json(
-            'http://content.api.mnet.com/player/vodConfig?id=%s&ctype=CLIP' % video_id,
-            video_id, 'Downloading vod config JSON')['data']['info']
+            'http://content.api.mnet.com/player/vodConfig',
+            video_id, 'Downloading vod config JSON', query={
+                'id': video_id,
+                'ctype': 'CLIP',
+                'stype': 'H',
+            })['data']['info']
 
         title = info['title']
 
-        rtmp_info = self._download_json(
-            info['cdn'], video_id, 'Downloading vod cdn JSON')
-
-        formats = [{
-            'url': rtmp_info['serverurl'] + rtmp_info['fileurl'],
-            'ext': 'flv',
-            'page_url': url,
-            'player_url': 'http://flvfile.mnet.com/service/player/201602/cjem_player_tv.swf?v=201602191318',
-        }]
+        cdn_data = self._download_json(
+            info['cdn'], video_id, 'Downloading vod cdn JSON')['data'][0]
+        m3u8_url = cdn_data['url']
+        token = cdn_data.get('token')
+        if token and token != '-':
+            m3u8_url += '?' + token
+        formats = self._extract_wowza_formats(
+            m3u8_url, video_id, skip_protocols=['rtmp', 'rtsp', 'f4m'])
+        self._sort_formats(formats)
 
         description = info.get('ment')
         duration = parse_duration(info.get('time'))
index 1154a35365ca9b388ebfdbe2a5790f67a25ee750..7a3b57abd852f4ffb8e49bdce9d9e8029fca65c3 100644 (file)
@@ -115,10 +115,17 @@ class MTVServicesInfoExtractor(InfoExtractor):
             if transcript.get('kind') != 'captions':
                 continue
             lang = transcript.get('srclang')
-            subtitles[lang] = [{
-                'url': compat_str(typographic.get('src')),
-                'ext': typographic.get('format')
-            } for typographic in transcript.findall('./typographic')]
+            for typographic in transcript.findall('./typographic'):
+                sub_src = typographic.get('src')
+                if not sub_src:
+                    continue
+                ext = typographic.get('format')
+                if ext == 'cea-608':
+                    ext = 'scc'
+                subtitles.setdefault(lang, []).append({
+                    'url': compat_str(sub_src),
+                    'ext': ext
+                })
         return subtitles
 
     def _get_video_info(self, itemdoc, use_hls=True):
index 071879ba4f5317326ebed99ee59e8a6f4e6b4ce4..9203c04777db9d84ab7efad05476878e6687080f 100644 (file)
@@ -28,7 +28,7 @@ class NexxIE(InfoExtractor):
     _TESTS = [{
         # movie
         'url': 'https://api.nexx.cloud/v3/748/videos/byid/128907',
-        'md5': '16746bfc28c42049492385c989b26c4a',
+        'md5': '828cea195be04e66057b846288295ba1',
         'info_dict': {
             'id': '128907',
             'ext': 'mp4',
@@ -42,9 +42,6 @@ class NexxIE(InfoExtractor):
             'timestamp': 1384264416,
             'upload_date': '20131112',
         },
-        'params': {
-            'format': 'bestvideo',
-        },
     }, {
         # episode
         'url': 'https://api.nexx.cloud/v3/741/videos/byid/247858',
@@ -62,7 +59,6 @@ class NexxIE(InfoExtractor):
             'season_number': 2,
         },
         'params': {
-            'format': 'bestvideo',
             'skip_download': True,
         },
     }, {
@@ -193,35 +189,67 @@ class NexxIE(InfoExtractor):
         stream_data = video['streamdata']
         language = general.get('language_raw') or ''
 
-        # TODO: reverse more cdns and formats
+        # TODO: reverse more cdns
 
         cdn = stream_data['cdnType']
         assert cdn == 'azure'
 
         azure_locator = stream_data['azureLocator']
 
-        AZURE_URL = 'http://nx-p%02d.akamaized.net/'
+        AZURE_URL = 'http://nx%s%02d.akamaized.net/'
 
-        for secure in ('s', ''):
-            cdn_shield = stream_data.get('cdnShieldHTTP%s' % secure.upper())
-            if cdn_shield:
-                azure_base = 'http%s://%s' % (secure, cdn_shield)
-                break
-        else:
-            azure_base = AZURE_URL % int(stream_data['azureAccount'].replace('nexxplayplus', ''))
+        def get_cdn_shield_base(shield_type='', prefix='-p'):
+            for secure in ('', 's'):
+                cdn_shield = stream_data.get('cdnShield%sHTTP%s' % (shield_type, secure.upper()))
+                if cdn_shield:
+                    return 'http%s://%s' % (secure, cdn_shield)
+            else:
+                return AZURE_URL % (prefix, int(stream_data['azureAccount'].replace('nexxplayplus', '')))
 
+        azure_stream_base = get_cdn_shield_base()
         is_ml = ',' in language
-        azure_m3u8_url = '%s%s/%s_src%s.ism/Manifest(format=m3u8-aapl)' % (
-            azure_base, azure_locator, video_id, ('_manifest' if is_ml else ''))
+        azure_manifest_url = '%s%s/%s_src%s.ism/Manifest' % (
+            azure_stream_base, azure_locator, video_id, ('_manifest' if is_ml else '')) + '%s'
 
         protection_token = try_get(
             video, lambda x: x['protectiondata']['token'], compat_str)
         if protection_token:
-            azure_m3u8_url += '?hdnts=%s' % protection_token
+            azure_manifest_url += '?hdnts=%s' % protection_token
 
         formats = self._extract_m3u8_formats(
-            azure_m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
-            m3u8_id='%s-hls' % cdn)
+            azure_manifest_url % '(format=m3u8-aapl)',
+            video_id, 'mp4', 'm3u8_native',
+            m3u8_id='%s-hls' % cdn, fatal=False)
+        formats.extend(self._extract_mpd_formats(
+            azure_manifest_url % '(format=mpd-time-csf)',
+            video_id, mpd_id='%s-dash' % cdn, fatal=False))
+        formats.extend(self._extract_ism_formats(
+            azure_manifest_url % '', video_id, ism_id='%s-mss' % cdn, fatal=False))
+
+        azure_progressive_base = get_cdn_shield_base('Prog', '-d')
+        azure_file_distribution = stream_data.get('azureFileDistribution')
+        if azure_file_distribution:
+            fds = azure_file_distribution.split(',')
+            if fds:
+                for fd in fds:
+                    ss = fd.split(':')
+                    if len(ss) == 2:
+                        tbr = int_or_none(ss[0])
+                        if tbr:
+                            f = {
+                                'url': '%s%s/%s_src_%s_%d.mp4' % (
+                                    azure_progressive_base, azure_locator, video_id, ss[1], tbr),
+                                'format_id': '%s-http-%d' % (cdn, tbr),
+                                'tbr': tbr,
+                            }
+                            width_height = ss[1].split('x')
+                            if len(width_height) == 2:
+                                f.update({
+                                    'width': int_or_none(width_height[0]),
+                                    'height': int_or_none(width_height[1]),
+                                })
+                            formats.append(f)
+
         self._sort_formats(formats)
 
         return {
index 310eea2cf054248260868515469962232b8562c2..7edd68472b1fd6b5a9410e94aec5a6d0933be064 100644 (file)
@@ -10,7 +10,7 @@ from ..utils import update_url_query
 class NickIE(MTVServicesInfoExtractor):
     # None of videos on the website are still alive?
     IE_NAME = 'nick.com'
-    _VALID_URL = r'https?://(?:(?:www|beta)\.)?nick(?:jr)?\.com/(?:[^/]+/)?(?:videos/clip|[^/]+/videos)/(?P<id>[^/?#.]+)'
+    _VALID_URL = r'https?://(?P<domain>(?:(?:www|beta)\.)?nick(?:jr)?\.com)/(?:[^/]+/)?(?:videos/clip|[^/]+/videos)/(?P<id>[^/?#.]+)'
     _FEED_URL = 'http://udat.mtvnservices.com/service1/dispatch.htm'
     _GEO_COUNTRIES = ['US']
     _TESTS = [{
@@ -69,8 +69,59 @@ class NickIE(MTVServicesInfoExtractor):
             'mgid': uri,
         }
 
-    def _extract_mgid(self, webpage):
-        return self._search_regex(r'data-contenturi="([^"]+)', webpage, 'mgid')
+    def _real_extract(self, url):
+        domain, display_id = re.match(self._VALID_URL, url).groups()
+        video_data = self._download_json(
+            'http://%s/data/video.endLevel.json' % domain,
+            display_id, query={
+                'urlKey': display_id,
+            })
+        return self._get_videos_info(video_data['player'] + video_data['id'])
+
+
+class NickBrIE(MTVServicesInfoExtractor):
+    IE_NAME = 'nickelodeon:br'
+    _VALID_URL = r'https?://(?P<domain>(?:www\.)?nickjr|mundonick\.uol)\.com\.br/(?:programas/)?[^/]+/videos/(?:episodios/)?(?P<id>[^/?#.]+)'
+    _TESTS = [{
+        'url': 'http://www.nickjr.com.br/patrulha-canina/videos/210-labirinto-de-pipoca/',
+        'only_matching': True,
+    }, {
+        'url': 'http://mundonick.uol.com.br/programas/the-loud-house/videos/muitas-irmas/7ljo9j',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        domain, display_id = re.match(self._VALID_URL, url).groups()
+        webpage = self._download_webpage(url, display_id)
+        uri = self._search_regex(
+            r'data-(?:contenturi|mgid)="([^"]+)', webpage, 'mgid')
+        video_id = self._id_from_uri(uri)
+        config = self._download_json(
+            'http://media.mtvnservices.com/pmt/e1/access/index.html',
+            video_id, query={
+                'uri': uri,
+                'configtype': 'edge',
+            }, headers={
+                'Referer': url,
+            })
+        info_url = self._remove_template_parameter(config['feedWithQueryParams'])
+        if info_url == 'None':
+            if domain.startswith('www.'):
+                domain = domain[4:]
+            content_domain = {
+                'mundonick.uol': 'mundonick.com.br',
+                'nickjr': 'br.nickelodeonjunior.tv',
+            }[domain]
+            query = {
+                'mgid': uri,
+                'imageEp': content_domain,
+                'arcEp': content_domain,
+            }
+            if domain == 'nickjr.com.br':
+                query['ep'] = 'c4b16088'
+            info_url = update_url_query(
+                'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed', query)
+        return self._get_videos_info_from_url(info_url, video_id)
 
 
 class NickDeIE(MTVServicesInfoExtractor):
index 8b83e1f760141bc1a6a0ff4c56761e09b6e3d920..a9f9b10c47b8b9f481a8ab3f88705b1b0e76eb0f 100644 (file)
@@ -70,7 +70,7 @@ class NocoIE(InfoExtractor):
             return
 
         login = self._download_json(
-            self._LOGIN_URL, None, 'Logging in as %s' % username,
+            self._LOGIN_URL, None, 'Logging in',
             data=urlencode_postdata({
                 'a': 'login',
                 'cookie': '1',
diff --git a/youtube_dl/extractor/nowtv.py b/youtube_dl/extractor/nowtv.py
deleted file mode 100644 (file)
index e43b371..0000000
+++ /dev/null
@@ -1,261 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
-    ExtractorError,
-    determine_ext,
-    int_or_none,
-    parse_iso8601,
-    parse_duration,
-    remove_start,
-)
-
-
-class NowTVBaseIE(InfoExtractor):
-    _VIDEO_FIELDS = (
-        'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort',
-        'broadcastStartDate', 'seoUrl', 'duration', 'files',
-        'format.defaultImage169Format', 'format.defaultImage169Logo')
-
-    def _extract_video(self, info, display_id=None):
-        video_id = compat_str(info['id'])
-
-        files = info['files']
-        if not files:
-            if info.get('geoblocked', False):
-                raise ExtractorError(
-                    'Video %s is not available from your location due to geo restriction' % video_id,
-                    expected=True)
-            if not info.get('free', True):
-                raise ExtractorError(
-                    'Video %s is not available for free' % video_id, expected=True)
-
-        formats = []
-        for item in files['items']:
-            if determine_ext(item['path']) != 'f4v':
-                continue
-            app, play_path = remove_start(item['path'], '/').split('/', 1)
-            formats.append({
-                'url': 'rtmpe://fms.rtl.de',
-                'app': app,
-                'play_path': 'mp4:%s' % play_path,
-                'ext': 'flv',
-                'page_url': 'http://rtlnow.rtl.de',
-                'player_url': 'http://cdn.static-fra.de/now/vodplayer.swf',
-                'tbr': int_or_none(item.get('bitrate')),
-            })
-        self._sort_formats(formats)
-
-        title = info['title']
-        description = info.get('articleLong') or info.get('articleShort')
-        timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')
-        duration = parse_duration(info.get('duration'))
-
-        f = info.get('format', {})
-        thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
-
-        return {
-            'id': video_id,
-            'display_id': display_id or info.get('seoUrl'),
-            'title': title,
-            'description': description,
-            'thumbnail': thumbnail,
-            'timestamp': timestamp,
-            'duration': duration,
-            'formats': formats,
-        }
-
-
-class NowTVIE(NowTVBaseIE):
-    _WORKING = False
-    _VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/(?:(?:list/[^/]+|jahr/\d{4}/\d{1,2})/)?(?P<id>[^/]+)/(?:player|preview)'
-
-    _TESTS = [{
-        # rtl
-        'url': 'http://www.nowtv.de/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit/player',
-        'info_dict': {
-            'id': '203519',
-            'display_id': 'bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit',
-            'ext': 'flv',
-            'title': 'Inka Bause stellt die neuen Bauern vor',
-            'description': 'md5:e234e1ed6d63cf06be5c070442612e7e',
-            'thumbnail': r're:^https?://.*\.jpg$',
-            'timestamp': 1432580700,
-            'upload_date': '20150525',
-            'duration': 2786,
-        },
-        'params': {
-            # rtmp download
-            'skip_download': True,
-        },
-    }, {
-        # rtl2
-        'url': 'http://www.nowtv.de/rtl2/berlin-tag-nacht/berlin-tag-nacht-folge-934/player',
-        'info_dict': {
-            'id': '203481',
-            'display_id': 'berlin-tag-nacht/berlin-tag-nacht-folge-934',
-            'ext': 'flv',
-            'title': 'Berlin - Tag & Nacht (Folge 934)',
-            'description': 'md5:c85e88c2e36c552dfe63433bc9506dd0',
-            'thumbnail': r're:^https?://.*\.jpg$',
-            'timestamp': 1432666800,
-            'upload_date': '20150526',
-            'duration': 2641,
-        },
-        'params': {
-            # rtmp download
-            'skip_download': True,
-        },
-    }, {
-        # rtlnitro
-        'url': 'http://www.nowtv.de/rtlnitro/alarm-fuer-cobra-11-die-autobahnpolizei/hals-und-beinbruch-2014-08-23-21-10-00/player',
-        'info_dict': {
-            'id': '165780',
-            'display_id': 'alarm-fuer-cobra-11-die-autobahnpolizei/hals-und-beinbruch-2014-08-23-21-10-00',
-            'ext': 'flv',
-            'title': 'Hals- und Beinbruch',
-            'description': 'md5:b50d248efffe244e6f56737f0911ca57',
-            'thumbnail': r're:^https?://.*\.jpg$',
-            'timestamp': 1432415400,
-            'upload_date': '20150523',
-            'duration': 2742,
-        },
-        'params': {
-            # rtmp download
-            'skip_download': True,
-        },
-    }, {
-        # superrtl
-        'url': 'http://www.nowtv.de/superrtl/medicopter-117/angst/player',
-        'info_dict': {
-            'id': '99205',
-            'display_id': 'medicopter-117/angst',
-            'ext': 'flv',
-            'title': 'Angst!',
-            'description': 'md5:30cbc4c0b73ec98bcd73c9f2a8c17c4e',
-            'thumbnail': r're:^https?://.*\.jpg$',
-            'timestamp': 1222632900,
-            'upload_date': '20080928',
-            'duration': 3025,
-        },
-        'params': {
-            # rtmp download
-            'skip_download': True,
-        },
-    }, {
-        # ntv
-        'url': 'http://www.nowtv.de/ntv/ratgeber-geld/thema-ua-der-erste-blick-die-apple-watch/player',
-        'info_dict': {
-            'id': '203521',
-            'display_id': 'ratgeber-geld/thema-ua-der-erste-blick-die-apple-watch',
-            'ext': 'flv',
-            'title': 'Thema u.a.: Der erste Blick: Die Apple Watch',
-            'description': 'md5:4312b6c9d839ffe7d8caf03865a531af',
-            'thumbnail': r're:^https?://.*\.jpg$',
-            'timestamp': 1432751700,
-            'upload_date': '20150527',
-            'duration': 1083,
-        },
-        'params': {
-            # rtmp download
-            'skip_download': True,
-        },
-    }, {
-        # vox
-        'url': 'http://www.nowtv.de/vox/der-hundeprofi/buero-fall-chihuahua-joel/player',
-        'info_dict': {
-            'id': '128953',
-            'display_id': 'der-hundeprofi/buero-fall-chihuahua-joel',
-            'ext': 'flv',
-            'title': "Büro-Fall / Chihuahua 'Joel'",
-            'description': 'md5:e62cb6bf7c3cc669179d4f1eb279ad8d',
-            'thumbnail': r're:^https?://.*\.jpg$',
-            'timestamp': 1432408200,
-            'upload_date': '20150523',
-            'duration': 3092,
-        },
-        'params': {
-            # rtmp download
-            'skip_download': True,
-        },
-    }, {
-        'url': 'http://www.nowtv.de/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit/preview',
-        'only_matching': True,
-    }, {
-        'url': 'http://www.nowtv.at/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit/preview?return=/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit',
-        'only_matching': True,
-    }, {
-        'url': 'http://www.nowtv.de/rtl2/echtzeit/list/aktuell/schnelles-geld-am-ende-der-welt/player',
-        'only_matching': True,
-    }, {
-        'url': 'http://www.nowtv.de/rtl2/zuhause-im-glueck/jahr/2015/11/eine-erschuetternde-diagnose/player',
-        'only_matching': True,
-    }]
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        display_id = '%s/%s' % (mobj.group('show_id'), mobj.group('id'))
-
-        info = self._download_json(
-            'https://api.nowtv.de/v3/movies/%s?fields=%s'
-            % (display_id, ','.join(self._VIDEO_FIELDS)), display_id)
-
-        return self._extract_video(info, display_id)
-
-
-class NowTVListIE(NowTVBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/list/(?P<id>[^?/#&]+)$'
-
-    _SHOW_FIELDS = ('title', )
-    _SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
-
-    _TESTS = [{
-        'url': 'http://www.nowtv.at/rtl/stern-tv/list/aktuell',
-        'info_dict': {
-            'id': '17006',
-            'title': 'stern TV - Aktuell',
-        },
-        'playlist_count': 1,
-    }, {
-        'url': 'http://www.nowtv.at/rtl/das-supertalent/list/free-staffel-8',
-        'info_dict': {
-            'id': '20716',
-            'title': 'Das Supertalent - FREE Staffel 8',
-        },
-        'playlist_count': 14,
-    }]
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        show_id = mobj.group('show_id')
-        season_id = mobj.group('id')
-
-        fields = []
-        fields.extend(self._SHOW_FIELDS)
-        fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS)
-        fields.extend(
-            'formatTabs.formatTabPages.container.movies.%s' % field
-            for field in self._VIDEO_FIELDS)
-
-        list_info = self._download_json(
-            'https://api.nowtv.de/v3/formats/seo?fields=%s&name=%s.php'
-            % (','.join(fields), show_id),
-            season_id)
-
-        season = next(
-            season for season in list_info['formatTabs']['items']
-            if season.get('seoheadline') == season_id)
-
-        title = '%s - %s' % (list_info['title'], season['headline'])
-
-        entries = []
-        for container in season['formatTabPages']['items']:
-            for info in ((container.get('container') or {}).get('movies') or {}).get('items') or []:
-                entries.append(self._extract_video(info))
-
-        return self.playlist_result(
-            entries, compat_str(season.get('id') or season_id), title)
index 854b6800c18bd47d2ed1b40cace0969a22eebc75..8e13bcf1fe4674e803f2a87f177980779fa62637 100644 (file)
@@ -14,6 +14,7 @@ from ..utils import (
     int_or_none,
     qualities,
     unescapeHTML,
+    urlencode_postdata,
 )
 
 
@@ -56,7 +57,7 @@ class OdnoklassnikiIE(InfoExtractor):
         'url': 'http://ok.ru/video/64211978996595-1',
         'md5': '2f206894ffb5dbfcce2c5a14b909eea5',
         'info_dict': {
-            'id': '64211978996595-1',
+            'id': 'V_VztHT5BzY',
             'ext': 'mp4',
             'title': 'Космическая среда от 26 августа 2015',
             'description': 'md5:848eb8b85e5e3471a3a803dae1343ed0',
@@ -127,9 +128,14 @@ class OdnoklassnikiIE(InfoExtractor):
         if metadata:
             metadata = self._parse_json(metadata, video_id)
         else:
+            data = {}
+            st_location = flashvars.get('location')
+            if st_location:
+                data['st.location'] = st_location
             metadata = self._download_json(
                 compat_urllib_parse_unquote(flashvars['metadataUrl']),
-                video_id, 'Downloading metadata JSON')
+                video_id, 'Downloading metadata JSON',
+                data=urlencode_postdata(data))
 
         movie = metadata['movie']
 
index a637c8ecfb0f03ecc46536363800c38f716fe557..8ae5fadd858396853ec27819a2586fde141f8cda 100644 (file)
@@ -11,7 +11,7 @@ class OnceIE(InfoExtractor):
     ADAPTIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/master/playlist/%s/%s/%s/content.m3u8'
     PROGRESSIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/media/progressive/%s/%s/%s/%s/content.mp4'
 
-    def _extract_once_formats(self, url):
+    def _extract_once_formats(self, url, http_formats_preference=None):
         domain_id, application_id, media_item_id = re.match(
             OnceIE._VALID_URL, url).groups()
         formats = self._extract_m3u8_formats(
@@ -35,6 +35,7 @@ class OnceIE(InfoExtractor):
                     'format_id': adaptive_format['format_id'].replace(
                         'hls', 'http'),
                     'protocol': 'http',
+                    'preference': http_formats_preference,
                 })
                 progressive_formats.append(progressive_format)
         self._check_formats(progressive_formats, media_item_id)
index b50d6c77b4e2fad46bf3d5ada798f1cb8f255a29..b282bcfd9d2abc950c4a0e24fe808adfbf1a0e41 100644 (file)
@@ -112,6 +112,8 @@ class PhantomJSwrapper(object):
         return get_exe_version('phantomjs', version_re=r'([0-9.]+)')
 
     def __init__(self, extractor, required_version=None, timeout=10000):
+        self._TMP_FILES = {}
+
         self.exe = check_executable('phantomjs', ['-v'])
         if not self.exe:
             raise ExtractorError('PhantomJS executable not found in PATH, '
@@ -130,7 +132,6 @@ class PhantomJSwrapper(object):
         self.options = {
             'timeout': timeout,
         }
-        self._TMP_FILES = {}
         for name in self._TMP_FILE_NAMES:
             tmp = tempfile.NamedTemporaryFile(delete=False)
             tmp.close()
@@ -140,7 +141,7 @@ class PhantomJSwrapper(object):
         for name in self._TMP_FILE_NAMES:
             try:
                 os.remove(self._TMP_FILES[name].name)
-            except:
+            except (IOError, OSError, KeyError):
                 pass
 
     def _save_cookies(self, url):
@@ -242,7 +243,7 @@ class PhantomJSwrapper(object):
 
 
 class OpenloadIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:openload\.(?:co|io)|oload\.tv)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
 
     _TESTS = [{
         'url': 'https://openload.co/f/kUEfGclsU9o',
@@ -283,9 +284,20 @@ class OpenloadIE(InfoExtractor):
         # for title and ext
         'url': 'https://openload.co/embed/Sxz5sADo82g/',
         'only_matching': True,
+    }, {
+        # unavailable via https://openload.co/embed/e-Ixz9ZR5L0/ but available
+        # via https://openload.co/f/e-Ixz9ZR5L0/
+        'url': 'https://openload.co/f/e-Ixz9ZR5L0/',
+        'only_matching': True,
     }, {
         'url': 'https://oload.tv/embed/KnG-kKZdcfY/',
         'only_matching': True,
+    }, {
+        'url': 'http://www.openload.link/f/KnG-kKZdcfY',
+        'only_matching': True,
+    }, {
+        'url': 'https://oload.stream/f/KnG-kKZdcfY',
+        'only_matching': True,
     }]
 
     _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
@@ -298,20 +310,30 @@ class OpenloadIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        url = 'https://openload.co/embed/%s/' % video_id
+        url_pattern = 'https://openload.co/%%s/%s/' % video_id
         headers = {
             'User-Agent': self._USER_AGENT,
         }
 
-        webpage = self._download_webpage(url, video_id, headers=headers)
-
-        if 'File not found' in webpage or 'deleted by the owner' in webpage:
-            raise ExtractorError('File not found', expected=True, video_id=video_id)
+        for path in ('embed', 'f'):
+            page_url = url_pattern % path
+            last = path == 'f'
+            webpage = self._download_webpage(
+                page_url, video_id, 'Downloading %s webpage' % path,
+                headers=headers, fatal=last)
+            if not webpage:
+                continue
+            if 'File not found' in webpage or 'deleted by the owner' in webpage:
+                if not last:
+                    continue
+                raise ExtractorError('File not found', expected=True, video_id=video_id)
+            break
 
         phantom = PhantomJSwrapper(self, required_version='2.0')
-        webpage, _ = phantom.get(url, html=webpage, video_id=video_id, headers=headers)
+        webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id, headers=headers)
 
-        decoded_id = get_element_by_id('streamurl', webpage)
+        decoded_id = (get_element_by_id('streamurl', webpage) or
+                      get_element_by_id('streamuri', webpage))
 
         video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id
 
@@ -320,7 +342,7 @@ class OpenloadIE(InfoExtractor):
             'title', default=None) or self._html_search_meta(
             'description', webpage, 'title', fatal=True)
 
-        entries = self._parse_html5_media_entries(url, webpage, video_id)
+        entries = self._parse_html5_media_entries(page_url, webpage, video_id)
         entry = entries[0] if entries else {}
         subtitles = entry.get('subtitles')
 
index 74fe8017ee7ba2efa16137b83f8eab3678c3c3c0..c1fb580ca8d9a20d785d71194d2822e51b2ee1ae 100644 (file)
@@ -49,13 +49,13 @@ class ORFTVthekIE(InfoExtractor):
         'params': {
             'skip_download': True,  # rtsp downloads
         },
-        '_skip': 'Blocked outside of Austria / Germany',
+        'skip': 'Blocked outside of Austria / Germany',
     }, {
         'url': 'http://tvthek.orf.at/topic/Fluechtlingskrise/10463081/Heimat-Fremde-Heimat/13879132/Senioren-betreuen-Migrantenkinder/13879141',
-        'skip_download': True,
+        'only_matching': True,
     }, {
         'url': 'http://tvthek.orf.at/profile/Universum/35429',
-        'skip_download': True,
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
index c86d7077127e2da252bd90db257bc152aa18e166..13a2e7efc76e4f0fbd9e778644781c5218770f61 100644 (file)
@@ -33,7 +33,7 @@ class PandaTVIE(InfoExtractor):
         video_id = self._match_id(url)
 
         config = self._download_json(
-            'https://www.panda.tv/api_room?roomid=%s' % video_id, video_id)
+            'https://www.panda.tv/api_room_v2?roomid=%s' % video_id, video_id)
 
         error_code = config.get('errno', 0)
         if error_code is not 0:
@@ -66,6 +66,11 @@ class PandaTVIE(InfoExtractor):
             plflag1 = '4'
         live_panda = 'live_panda' if plflag0 < 1 else ''
 
+        plflag_auth = self._parse_json(video_info['plflag_list'], video_id)
+        sign = plflag_auth['auth']['sign']
+        ts = plflag_auth['auth']['time']
+        rid = plflag_auth['auth']['rid']
+
         quality_key = qualities(['OD', 'HD', 'SD'])
         suffix = ['_small', '_mid', '']
         formats = []
@@ -77,8 +82,8 @@ class PandaTVIE(InfoExtractor):
                 continue
             for pref, (ext, pl) in enumerate((('m3u8', '-hls'), ('flv', ''))):
                 formats.append({
-                    'url': 'https://pl%s%s.live.panda.tv/live_panda/%s%s%s.%s'
-                    % (pl, plflag1, room_key, live_panda, suffix[quality], ext),
+                    'url': 'https://pl%s%s.live.panda.tv/live_panda/%s%s%s.%s?sign=%s&ts=%s&rid=%s'
+                    % (pl, plflag1, room_key, live_panda, suffix[quality], ext, sign, ts, rid),
                     'format_id': '%s-%s' % (k, ext),
                     'quality': quality,
                     'source_preference': pref,
index a6a2c273f240db52c967a12a96484261bd37664a..d4b1d34ca8486442139409eda90ed95be6c0ffc9 100644 (file)
@@ -67,7 +67,7 @@ class PatreonIE(InfoExtractor):
             'https://www.patreon.com/processLogin',
             compat_urllib_parse_urlencode(login_form).encode('utf-8')
         )
-        login_page = self._download_webpage(request, None, note='Logging in as %s' % username)
+        login_page = self._download_webpage(request, None, note='Logging in')
 
         if re.search(r'onLoginFailed', login_page):
             raise ExtractorError('Unable to login, incorrect username and/or password', expected=True)
index b51dcbe10dd14136220516c76e6cada7f70a9b0c..f11d5da5271d91fb15f9ee006743b1360897b08e 100644 (file)
@@ -421,6 +421,7 @@ class PBSIE(InfoExtractor):
                 r'class="coveplayerid">([^<]+)<',                       # coveplayer
                 r'<section[^>]+data-coveid="(\d+)"',                    # coveplayer from http://www.pbs.org/wgbh/frontline/film/real-csi/
                 r'<input type="hidden" id="pbs_video_id_[0-9]+" value="([0-9]+)"/>',  # jwplayer
+                r"(?s)window\.PBS\.playerConfig\s*=\s*{.*?id\s*:\s*'([0-9]+)',",
             ]
 
             media_id = self._search_regex(
diff --git a/youtube_dl/extractor/performgroup.py b/youtube_dl/extractor/performgroup.py
new file mode 100644 (file)
index 0000000..26942bf
--- /dev/null
@@ -0,0 +1,83 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class PerformGroupIE(InfoExtractor):
+    _VALID_URL = r'https?://player\.performgroup\.com/eplayer(?:/eplayer\.html|\.js)#/?(?P<id>[0-9a-f]{26})\.(?P<auth_token>[0-9a-z]{26})'
+    _TESTS = [{
+        # http://www.faz.net/aktuell/sport/fussball/wm-2018-playoffs-schweiz-besiegt-nordirland-1-0-15286104.html
+        'url': 'http://player.performgroup.com/eplayer/eplayer.html#d478c41c5d192f56b9aa859de8.1w4crrej5w14e1ed4s1ce4ykab',
+        'md5': '259cb03d142e2e52471e8837ecacb29f',
+        'info_dict': {
+            'id': 'xgrwobuzumes1lwjxtcdpwgxd',
+            'ext': 'mp4',
+            'title': 'Liga MX: Keine Einsicht nach Horrorfoul',
+            'description': 'md5:7cd3b459c82725b021e046ab10bf1c5b',
+            'timestamp': 1511533477,
+            'upload_date': '20171124',
+        }
+    }]
+
+    def _call_api(self, service, auth_token, content_id, referer_url):
+        return self._download_json(
+            'http://ep3.performfeeds.com/ep%s/%s/%s/' % (service, auth_token, content_id),
+            content_id, headers={
+                'Referer': referer_url,
+                'Origin': 'http://player.performgroup.com',
+            }, query={
+                '_fmt': 'json',
+            })
+
+    def _real_extract(self, url):
+        player_id, auth_token = re.search(self._VALID_URL, url).groups()
+        bootstrap = self._call_api('bootstrap', auth_token, player_id, url)
+        video = bootstrap['config']['dataSource']['sourceItems'][0]['videos'][0]
+        video_id = video['uuid']
+        vod = self._call_api('vod', auth_token, video_id, url)
+        media = vod['videos']['video'][0]['media']
+
+        formats = []
+        hls_url = media.get('hls', {}).get('url')
+        if hls_url:
+            formats.extend(self._extract_m3u8_formats(hls_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+
+        hds_url = media.get('hds', {}).get('url')
+        if hds_url:
+            formats.extend(self._extract_f4m_formats(hds_url + '?hdcore', video_id, f4m_id='hds', fatal=False))
+
+        for c in media.get('content', []):
+            c_url = c.get('url')
+            if not c_url:
+                continue
+            tbr = int_or_none(c.get('bitrate'), 1000)
+            format_id = 'http'
+            if tbr:
+                format_id += '-%d' % tbr
+            formats.append({
+                'format_id': format_id,
+                'url': c_url,
+                'tbr': tbr,
+                'width': int_or_none(c.get('width')),
+                'height': int_or_none(c.get('height')),
+                'filesize': int_or_none(c.get('fileSize')),
+                'vcodec': c.get('type'),
+                'fps': int_or_none(c.get('videoFrameRate')),
+                'vbr': int_or_none(c.get('videoRate'), 1000),
+                'abr': int_or_none(c.get('audioRate'), 1000),
+            })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': video['title'],
+            'description': video.get('description'),
+            'thumbnail': video.get('poster'),
+            'duration': int_or_none(video.get('duration')),
+            'timestamp': int_or_none(video.get('publishedTime'), 1000),
+            'formats': formats,
+        }
index 391e1bd09ca5677d196c0f67a86c0cb1421b2158..4c5f57919b4e786b95570ce523790afd62b4e3ed 100644 (file)
@@ -24,7 +24,7 @@ class PlaytvakIE(InfoExtractor):
             'id': 'A150730_150323_hodinovy-manzel_kuko',
             'ext': 'mp4',
             'title': 'Vyžeňte vosy a sršně ze zahrady',
-            'description': 'md5:f93d398691044d303bc4a3de62f3e976',
+            'description': 'md5:4436e61b7df227a093778efb7e373571',
             'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$',
             'duration': 279,
             'timestamp': 1438732860,
@@ -36,9 +36,19 @@ class PlaytvakIE(InfoExtractor):
         'info_dict': {
             'id': 'A150624_164934_planespotting_cat',
             'ext': 'flv',
-            'title': 're:^Přímý přenos iDNES.cz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+            'title': 're:^Planespotting [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
             'description': 'Sledujte provoz na ranveji Letiště Václava Havla v Praze',
-            'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$',
+            'is_live': True,
+        },
+        'params': {
+            'skip_download': True,  # requires rtmpdump
+        },
+    }, {  # another live stream, this one without Misc.videoFLV
+        'url': 'https://slowtv.playtvak.cz/zive-sledujte-vlaky-v-primem-prenosu-dwi-/hlavni-nadrazi.aspx?c=A151218_145728_hlavni-nadrazi_plap',
+        'info_dict': {
+            'id': 'A151218_145728_hlavni-nadrazi_plap',
+            'ext': 'flv',
+            'title': 're:^Hlavní nádraží [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
             'is_live': True,
         },
         'params': {
@@ -95,7 +105,7 @@ class PlaytvakIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         info_url = self._html_search_regex(
-            r'Misc\.videoFLV\(\s*{\s*data\s*:\s*"([^"]+)"', webpage, 'info url')
+            r'Misc\.video(?:FLV)?\(\s*{\s*data\s*:\s*"([^"]+)"', webpage, 'info url')
 
         parsed_url = compat_urlparse.urlparse(info_url)
 
@@ -160,7 +170,7 @@ class PlaytvakIE(InfoExtractor):
         if is_live:
             title = self._live_title(title)
         description = self._og_search_description(webpage, default=None) or self._html_search_meta(
-            'description', webpage, 'description')
+            'description', webpage, 'description', default=None)
         timestamp = None
         duration = None
         if not is_live:
index f6a9131b19bf693511a84cfa3118d89030053c6d..aacc5d4bb8a8c139b354be7f724612c308bfc6c5 100644 (file)
@@ -116,7 +116,7 @@ class PluralsightIE(PluralsightBaseIE):
             post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
 
         response = self._download_webpage(
-            post_url, None, 'Logging in as %s' % username,
+            post_url, None, 'Logging in',
             data=urlencode_postdata(login_form),
             headers={'Content-Type': 'application/x-www-form-urlencoded'})
 
@@ -131,6 +131,13 @@ class PluralsightIE(PluralsightBaseIE):
             if BLOCKED in response:
                 raise ExtractorError(
                     'Unable to login: %s' % BLOCKED, expected=True)
+            MUST_AGREE = 'To continue using Pluralsight, you must agree to'
+            if any(p in response for p in (MUST_AGREE, '>Disagree<', '>Agree<')):
+                raise ExtractorError(
+                    'Unable to login: %s some documents. Go to pluralsight.com, '
+                    'log in and agree with what Pluralsight requires.'
+                    % MUST_AGREE, expected=True)
+
             raise ExtractorError('Unable to log in')
 
     def _get_subtitles(self, author, clip_id, lang, name, duration, video_id):
@@ -164,12 +171,12 @@ class PluralsightIE(PluralsightBaseIE):
         for num, current in enumerate(subs):
             current = subs[num]
             start, text = (
-                float_or_none(dict_get(current, TIME_OFFSET_KEYS)),
+                float_or_none(dict_get(current, TIME_OFFSET_KEYS, skip_false_values=False)),
                 dict_get(current, TEXT_KEYS))
             if start is None or text is None:
                 continue
             end = duration if num == len(subs) - 1 else float_or_none(
-                dict_get(subs[num + 1], TIME_OFFSET_KEYS))
+                dict_get(subs[num + 1], TIME_OFFSET_KEYS, skip_false_values=False))
             if end is None:
                 continue
             srt += os.linesep.join(
index 8218c7d3bf7ddc8cc7de74f2fc5d2d838cecc982..60ade06da37d2bd2a56e717f9761f525cb4b7436 100644 (file)
@@ -77,12 +77,14 @@ class PornComIE(InfoExtractor):
         self._sort_formats(formats)
 
         view_count = str_to_int(self._search_regex(
-            r'class=["\']views["\'][^>]*><p>([\d,.]+)', webpage,
+            (r'Views:\s*</span>\s*<span>\s*([\d,.]+)',
+             r'class=["\']views["\'][^>]*><p>([\d,.]+)'), webpage,
             'view count', fatal=False))
 
         def extract_list(kind):
             s = self._search_regex(
-                r'(?s)<p[^>]*>%s:(.+?)</p>' % kind.capitalize(),
+                (r'(?s)%s:\s*</span>\s*<span>(.+?)</span>' % kind.capitalize(),
+                 r'(?s)<p[^>]*>%s:(.+?)</p>' % kind.capitalize()),
                 webpage, kind, fatal=False)
             return re.findall(r'<a[^>]+>([^<]+)</a>', s or '')
 
index 5bf64a56b71d6e0d4282d69476410e1108555877..d22311031f42d5a4d35c66e9182db2993ddda8ce 100644 (file)
@@ -17,6 +17,7 @@ from ..utils import (
     parse_duration,
     strip_or_none,
     try_get,
+    unescapeHTML,
     unified_strdate,
     unified_timestamp,
     update_url_query,
@@ -249,6 +250,41 @@ class RaiPlayLiveIE(RaiBaseIE):
         }
 
 
+class RaiPlayPlaylistIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+)'
+    _TESTS = [{
+        'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/',
+        'info_dict': {
+            'id': 'nondirloalmiocapo',
+            'title': 'Non dirlo al mio capo',
+            'description': 'md5:9f3d603b2947c1c7abb098f3b14fac86',
+        },
+        'playlist_mincount': 12,
+    }]
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        title = self._html_search_meta(
+            ('programma', 'nomeProgramma'), webpage, 'title')
+        description = unescapeHTML(self._html_search_meta(
+            ('description', 'og:description'), webpage, 'description'))
+        print(description)
+
+        entries = []
+        for mobj in re.finditer(
+                r'<a\b[^>]+\bhref=(["\'])(?P<path>/raiplay/video/.+?)\1',
+                webpage):
+            video_url = urljoin(url, mobj.group('path'))
+            entries.append(self.url_result(
+                video_url, ie=RaiPlayIE.ie_key(),
+                video_id=RaiPlayIE._match_id(video_url)))
+
+        return self.playlist_result(entries, playlist_id, title, description)
+
+
 class RaiIE(RaiBaseIE):
     _VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/dl/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE
     _TESTS = [{
index 46dfc78f5edac0e9e8ef66f37efa4bbd7afcf3ea..8b703800ecbd595c5cb1e188fdb830b5ed1cfa35 100644 (file)
@@ -68,7 +68,7 @@ class RoosterTeethIE(InfoExtractor):
 
         login_request = self._download_webpage(
             self._LOGIN_URL, None,
-            note='Logging in as %s' % username,
+            note='Logging in',
             data=urlencode_postdata(login_form),
             headers={
                 'Referer': self._LOGIN_URL,
index f8eda8dea8136f73dacd9fc83e70c3c98ef117e8..fccf69401d7cf054a0ac556ac6322a02b5c59836 100644 (file)
@@ -21,7 +21,7 @@ class RozhlasIE(InfoExtractor):
         }
     }, {
         'url': 'http://prehravac.rozhlas.cz/audio/3421320/embed',
-        'skip_download': True,
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
index 909a6ba97993f161c6c39646b073185733626d18..cc6698f882a5859883372b32dc71578ebe37da8a 100644 (file)
@@ -61,7 +61,7 @@ class SafariBaseIE(InfoExtractor):
         request = sanitized_Request(
             self._LOGIN_URL, urlencode_postdata(login_form), headers=headers)
         login_page = self._download_webpage(
-            request, None, 'Logging in as %s' % username)
+            request, None, 'Logging in')
 
         if not is_logged(login_page):
             raise ExtractorError(
diff --git a/youtube_dl/extractor/sandia.py b/youtube_dl/extractor/sandia.py
deleted file mode 100644 (file)
index 96e43af..0000000
+++ /dev/null
@@ -1,65 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import json
-
-from .common import InfoExtractor
-from ..utils import (
-    int_or_none,
-    mimetype2ext,
-)
-
-
-class SandiaIE(InfoExtractor):
-    IE_DESC = 'Sandia National Laboratories'
-    _VALID_URL = r'https?://digitalops\.sandia\.gov/Mediasite/Play/(?P<id>[0-9a-f]+)'
-    _TEST = {
-        'url': 'http://digitalops.sandia.gov/Mediasite/Play/24aace4429fc450fb5b38cdbf424a66e1d',
-        'md5': '9422edc9b9a60151727e4b6d8bef393d',
-        'info_dict': {
-            'id': '24aace4429fc450fb5b38cdbf424a66e1d',
-            'ext': 'mp4',
-            'title': 'Xyce Software Training - Section 1',
-            'description': 're:(?s)SAND Number: SAND 2013-7800.{200,}',
-            'upload_date': '20120409',
-            'timestamp': 1333983600,
-            'duration': 7794,
-        }
-    }
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        presentation_data = self._download_json(
-            'http://digitalops.sandia.gov/Mediasite/PlayerService/PlayerService.svc/json/GetPlayerOptions',
-            video_id, data=json.dumps({
-                'getPlayerOptionsRequest': {
-                    'ResourceId': video_id,
-                    'QueryString': '',
-                }
-            }), headers={
-                'Content-Type': 'application/json; charset=utf-8',
-            })['d']['Presentation']
-
-        title = presentation_data['Title']
-
-        formats = []
-        for stream in presentation_data.get('Streams', []):
-            for fd in stream.get('VideoUrls', []):
-                formats.append({
-                    'format_id': fd['MediaType'],
-                    'format_note': fd['MimeType'].partition('/')[2],
-                    'ext': mimetype2ext(fd['MimeType']),
-                    'url': fd['Location'],
-                    'protocol': 'f4m' if fd['MimeType'] == 'video/x-mp4-fragmented' else None,
-                })
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'title': title,
-            'description': presentation_data.get('Description'),
-            'formats': formats,
-            'timestamp': int_or_none(presentation_data.get('UnixTime'), 1000),
-            'duration': int_or_none(presentation_data.get('Duration'), 1000),
-        }
index b446a02bace5f3e2679db7579a38e106c4f681ad..4023aeef81e4b4094744b89842c3aab5576a61ab 100644 (file)
@@ -1,13 +1,11 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import datetime
 import json
 import hashlib
-import hmac
 import re
 
-from .common import InfoExtractor
+from .aws import AWSIE
 from .anvato import AnvatoIE
 from ..utils import (
     smuggle_url,
@@ -16,7 +14,7 @@ from ..utils import (
 )
 
 
-class ScrippsNetworksWatchIE(InfoExtractor):
+class ScrippsNetworksWatchIE(AWSIE):
     IE_NAME = 'scrippsnetworks:watch'
     _VALID_URL = r'''(?x)
                     https?://
@@ -64,44 +62,27 @@ class ScrippsNetworksWatchIE(InfoExtractor):
         'travelchannel': 'trav',
         'geniuskitchen': 'genius',
     }
-    _SNI_HOST = 'web.api.video.snidigital.com'
 
-    _AWS_REGION = 'us-east-1'
-    _AWS_IDENTITY_ID_JSON = json.dumps({
-        'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % _AWS_REGION
-    })
-    _AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback'
     _AWS_API_KEY = 'E7wSQmq0qK6xPrF13WmzKiHo4BQ7tip4pQcSXVl1'
-    _AWS_SERVICE = 'execute-api'
-    _AWS_REQUEST = 'aws4_request'
-    _AWS_SIGNED_HEADERS = ';'.join([
-        'host', 'x-amz-date', 'x-amz-security-token', 'x-api-key'])
-    _AWS_CANONICAL_REQUEST_TEMPLATE = '''GET
-%(uri)s
-
-host:%(host)s
-x-amz-date:%(date)s
-x-amz-security-token:%(token)s
-x-api-key:%(key)s
+    _AWS_PROXY_HOST = 'web.api.video.snidigital.com'
 
-%(signed_headers)s
-%(payload_hash)s'''
+    _AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback'
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         site_id, video_id = mobj.group('site', 'id')
 
-        def aws_hash(s):
-            return hashlib.sha256(s.encode('utf-8')).hexdigest()
-
+        aws_identity_id_json = json.dumps({
+            'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % self._AWS_REGION
+        }).encode('utf-8')
         token = self._download_json(
-            'https://cognito-identity.us-east-1.amazonaws.com/', video_id,
-            data=self._AWS_IDENTITY_ID_JSON.encode('utf-8'),
+            'https://cognito-identity.%s.amazonaws.com/' % self._AWS_REGION, video_id,
+            data=aws_identity_id_json,
             headers={
                 'Accept': '*/*',
                 'Content-Type': 'application/x-amz-json-1.1',
                 'Referer': url,
-                'X-Amz-Content-Sha256': aws_hash(self._AWS_IDENTITY_ID_JSON),
+                'X-Amz-Content-Sha256': hashlib.sha256(aws_identity_id_json).hexdigest(),
                 'X-Amz-Target': 'AWSCognitoIdentityService.GetOpenIdToken',
                 'X-Amz-User-Agent': self._AWS_USER_AGENT,
             })['Token']
@@ -124,64 +105,12 @@ x-api-key:%(key)s
                 sts, './/{https://sts.amazonaws.com/doc/2011-06-15/}%s' % key,
                 fatal=True)
 
-        access_key_id = get('AccessKeyId')
-        secret_access_key = get('SecretAccessKey')
-        session_token = get('SessionToken')
-
-        # Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
-        uri = '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id)
-        datetime_now = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
-        date = datetime_now[:8]
-        canonical_string = self._AWS_CANONICAL_REQUEST_TEMPLATE % {
-            'uri': uri,
-            'host': self._SNI_HOST,
-            'date': datetime_now,
-            'token': session_token,
-            'key': self._AWS_API_KEY,
-            'signed_headers': self._AWS_SIGNED_HEADERS,
-            'payload_hash': aws_hash(''),
-        }
-
-        # Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html
-        credential_string = '/'.join([date, self._AWS_REGION, self._AWS_SERVICE, self._AWS_REQUEST])
-        string_to_sign = '\n'.join([
-            'AWS4-HMAC-SHA256', datetime_now, credential_string,
-            aws_hash(canonical_string)])
-
-        # Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html
-        def aws_hmac(key, msg):
-            return hmac.new(key, msg.encode('utf-8'), hashlib.sha256)
-
-        def aws_hmac_digest(key, msg):
-            return aws_hmac(key, msg).digest()
-
-        def aws_hmac_hexdigest(key, msg):
-            return aws_hmac(key, msg).hexdigest()
-
-        k_secret = 'AWS4' + secret_access_key
-        k_date = aws_hmac_digest(k_secret.encode('utf-8'), date)
-        k_region = aws_hmac_digest(k_date, self._AWS_REGION)
-        k_service = aws_hmac_digest(k_region, self._AWS_SERVICE)
-        k_signing = aws_hmac_digest(k_service, self._AWS_REQUEST)
-
-        signature = aws_hmac_hexdigest(k_signing, string_to_sign)
-
-        auth_header = ', '.join([
-            'AWS4-HMAC-SHA256 Credential=%s' % '/'.join(
-                [access_key_id, date, self._AWS_REGION, self._AWS_SERVICE, self._AWS_REQUEST]),
-            'SignedHeaders=%s' % self._AWS_SIGNED_HEADERS,
-            'Signature=%s' % signature,
-        ])
-
-        mcp_id = self._download_json(
-            'https://%s%s' % (self._SNI_HOST, uri), video_id, headers={
-                'Accept': '*/*',
-                'Referer': url,
-                'Authorization': auth_header,
-                'X-Amz-Date': datetime_now,
-                'X-Amz-Security-Token': session_token,
-                'X-Api-Key': self._AWS_API_KEY,
-            })['results'][0]['mcpId']
+        mcp_id = self._aws_execute_api({
+            'uri': '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id),
+            'access_key': get('AccessKeyId'),
+            'secret_key': get('SecretAccessKey'),
+            'session_token': get('SessionToken'),
+        }, video_id)['results'][0]['mcpId']
 
         return self.url_result(
             smuggle_url(
diff --git a/youtube_dl/extractor/sevenplus.py b/youtube_dl/extractor/sevenplus.py
new file mode 100644 (file)
index 0000000..9792f82
--- /dev/null
@@ -0,0 +1,67 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .brightcove import BrightcoveNewIE
+from ..utils import update_url_query
+
+
+class SevenPlusIE(BrightcoveNewIE):
+    IE_NAME = '7plus'
+    _VALID_URL = r'https?://(?:www\.)?7plus\.com\.au/(?P<path>[^?]+\?.*?\bepisode-id=(?P<id>[^&#]+))'
+    _TESTS = [{
+        'url': 'https://7plus.com.au/BEAT?episode-id=BEAT-001',
+        'info_dict': {
+            'id': 'BEAT-001',
+            'ext': 'mp4',
+            'title': 'S1 E1 - Help / Lucy In The Sky With Diamonds',
+            'description': 'md5:37718bea20a8eedaca7f7361af566131',
+            'uploader_id': '5303576322001',
+            'upload_date': '20171031',
+            'timestamp': 1509440068,
+        },
+        'params': {
+            'format': 'bestvideo',
+            'skip_download': True,
+        }
+    }, {
+        'url': 'https://7plus.com.au/UUUU?episode-id=AUMS43-001',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        path, episode_id = re.match(self._VALID_URL, url).groups()
+
+        media = self._download_json(
+            'https://videoservice.swm.digital/playback', episode_id, query={
+                'appId': '7plus',
+                'deviceType': 'web',
+                'platformType': 'web',
+                'accountId': 5303576322001,
+                'referenceId': 'ref:' + episode_id,
+                'deliveryId': 'csai',
+                'videoType': 'vod',
+            })['media']
+
+        for source in media.get('sources', {}):
+            src = source.get('src')
+            if not src:
+                continue
+            source['src'] = update_url_query(src, {'rule': ''})
+
+        info = self._parse_brightcove_metadata(media, episode_id)
+
+        content = self._download_json(
+            'https://component-cdn.swm.digital/content/' + path,
+            episode_id, headers={
+                'market-id': 4,
+            }, fatal=False) or {}
+        for item in content.get('items', {}):
+            if item.get('componentData', {}).get('componentType') == 'infoPanel':
+                for src_key, dst_key in [('title', 'title'), ('shortSynopsis', 'description')]:
+                    value = item.get(src_key)
+                    if value:
+                        info[dst_key] = value
+
+        return info
index 374f7faf9d0becc45f8dd5e49d6774fc45b8e002..5c2a6206be752da16025ec714c8465e940ed8d68 100644 (file)
@@ -1,22 +1,53 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import re
 import json
+import math
+import re
 
-from .common import InfoExtractor
+from .aws import AWSIE
 from ..compat import compat_HTTPError
 from ..utils import (
+    clean_html,
     ExtractorError,
+    InAdvancePagedList,
     int_or_none,
     parse_iso8601,
     str_or_none,
     urlencode_postdata,
-    clean_html,
 )
 
 
-class ShahidIE(InfoExtractor):
+class ShahidBaseIE(AWSIE):
+    _AWS_PROXY_HOST = 'api2.shahid.net'
+    _AWS_API_KEY = '2RRtuMHx95aNI1Kvtn2rChEuwsCogUd4samGPjLh'
+
+    def _handle_error(self, e):
+        fail_data = self._parse_json(
+            e.cause.read().decode('utf-8'), None, fatal=False)
+        if fail_data:
+            faults = fail_data.get('faults', [])
+            faults_message = ', '.join([clean_html(fault['userMessage']) for fault in faults if fault.get('userMessage')])
+            if faults_message:
+                raise ExtractorError(faults_message, expected=True)
+
+    def _call_api(self, path, video_id, request=None):
+        query = {}
+        if request:
+            query['request'] = json.dumps(request)
+        try:
+            return self._aws_execute_api({
+                'uri': '/proxy/v2/' + path,
+                'access_key': 'AKIAI6X4TYCIXM2B7MUQ',
+                'secret_key': '4WUUJWuFvtTkXbhaWTDv7MhO+0LqoYDWfEnUXoWn',
+            }, video_id, query)
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError):
+                self._handle_error(e)
+            raise
+
+
+class ShahidIE(ShahidBaseIE):
     _NETRC_MACHINE = 'shahid'
     _VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:serie|show|movie)s/[^/]+/(?P<type>episode|clip|movie)-(?P<id>\d+)'
     _TESTS = [{
@@ -41,34 +72,25 @@ class ShahidIE(InfoExtractor):
         'only_matching': True
     }]
 
-    def _api2_request(self, *args, **kwargs):
-        try:
-            return self._download_json(*args, **kwargs)
-        except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError):
-                fail_data = self._parse_json(
-                    e.cause.read().decode('utf-8'), None, fatal=False)
-                if fail_data:
-                    faults = fail_data.get('faults', [])
-                    faults_message = ', '.join([clean_html(fault['userMessage']) for fault in faults if fault.get('userMessage')])
-                    if faults_message:
-                        raise ExtractorError(faults_message, expected=True)
-            raise
-
     def _real_initialize(self):
         email, password = self._get_login_info()
         if email is None:
             return
 
-        user_data = self._api2_request(
-            'https://shahid.mbc.net/wd/service/users/login',
-            None, 'Logging in', data=json.dumps({
-                'email': email,
-                'password': password,
-                'basic': 'false',
-            }).encode('utf-8'), headers={
-                'Content-Type': 'application/json; charset=UTF-8',
-            })['user']
+        try:
+            user_data = self._download_json(
+                'https://shahid.mbc.net/wd/service/users/login',
+                None, 'Logging in', data=json.dumps({
+                    'email': email,
+                    'password': password,
+                    'basic': 'false',
+                }).encode('utf-8'), headers={
+                    'Content-Type': 'application/json; charset=UTF-8',
+                })['user']
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError):
+                self._handle_error(e)
+            raise
 
         self._download_webpage(
             'https://shahid.mbc.net/populateContext',
@@ -81,25 +103,13 @@ class ShahidIE(InfoExtractor):
                 'sessionId': user_data['sessionId'],
             }))
 
-    def _get_api_data(self, response):
-        data = response.get('data', {})
-
-        error = data.get('error')
-        if error:
-            raise ExtractorError(
-                '%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())),
-                expected=True)
-
-        return data
-
     def _real_extract(self, url):
         page_type, video_id = re.match(self._VALID_URL, url).groups()
         if page_type == 'clip':
             page_type = 'episode'
 
-        playout = self._api2_request(
-            'https://api2.shahid.net/proxy/v2/playout/url/' + video_id,
-            video_id, 'Downloading player JSON')['playout']
+        playout = self._call_api(
+            'playout/url/' + video_id, video_id)['playout']
 
         if playout.get('drm'):
             raise ExtractorError('This video is DRM protected.', expected=True)
@@ -107,13 +117,27 @@ class ShahidIE(InfoExtractor):
         formats = self._extract_m3u8_formats(playout['url'], video_id, 'mp4')
         self._sort_formats(formats)
 
-        video = self._get_api_data(self._download_json(
+        # video = self._call_api(
+        #     'product/id', video_id, {
+        #         'id': video_id,
+        #         'productType': 'ASSET',
+        #         'productSubType': page_type.upper()
+        #     })['productModel']
+
+        response = self._download_json(
             'http://api.shahid.net/api/v1_1/%s/%s' % (page_type, video_id),
             video_id, 'Downloading video JSON', query={
                 'apiKey': 'sh@hid0nlin3',
                 'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=',
-            }))[page_type]
+            })
+        data = response.get('data', {})
+        error = data.get('error')
+        if error:
+            raise ExtractorError(
+                '%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())),
+                expected=True)
 
+        video = data[page_type]
         title = video['title']
         categories = [
             category['name']
@@ -135,3 +159,57 @@ class ShahidIE(InfoExtractor):
             'episode_id': video_id,
             'formats': formats,
         }
+
+
+class ShahidShowIE(ShahidBaseIE):
+    _VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:show|serie)s/[^/]+/(?:show|series)-(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://shahid.mbc.net/ar/shows/%D8%B1%D8%A7%D9%85%D8%B2-%D9%82%D8%B1%D8%B4-%D8%A7%D9%84%D8%A8%D8%AD%D8%B1/show-79187',
+        'info_dict': {
+            'id': '79187',
+            'title': 'رامز قرش البحر',
+            'description': 'md5:c88fa7e0f02b0abd39d417aee0d046ff',
+        },
+        'playlist_mincount': 32,
+    }, {
+        'url': 'https://shahid.mbc.net/ar/series/How-to-live-Longer-(The-Big-Think)/series-291861',
+        'only_matching': True
+    }]
+    _PAGE_SIZE = 30
+
+    def _real_extract(self, url):
+        show_id = self._match_id(url)
+
+        product = self._call_api(
+            'playableAsset', show_id, {'showId': show_id})['productModel']
+        playlist = product['playlist']
+        playlist_id = playlist['id']
+        show = product.get('show', {})
+
+        def page_func(page_num):
+            playlist = self._call_api(
+                'product/playlist', show_id, {
+                    'playListId': playlist_id,
+                    'pageNumber': page_num,
+                    'pageSize': 30,
+                    'sorts': [{
+                        'order': 'DESC',
+                        'type': 'SORTDATE'
+                    }],
+                })
+            for product in playlist.get('productList', {}).get('products', []):
+                product_url = product.get('productUrl', []).get('url')
+                if not product_url:
+                    continue
+                yield self.url_result(
+                    product_url, 'Shahid',
+                    str_or_none(product.get('id')),
+                    product.get('title'))
+
+        entries = InAdvancePagedList(
+            page_func,
+            math.ceil(playlist['count'] / self._PAGE_SIZE),
+            self._PAGE_SIZE)
+
+        return self.playlist_result(
+            entries, show_id, show.get('title'), show.get('description'))
index 7145d285a0244acdbdb58dfbf330d2c75391dea3..6fc2ff60d2a923f763fc834127eb62b667c542d5 100644 (file)
@@ -1,11 +1,13 @@
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 
 
 class SlutloadIE(InfoExtractor):
     _VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P<id>[^/]+)/?$'
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/',
         'md5': '868309628ba00fd488cf516a113fd717',
         'info_dict': {
@@ -15,11 +17,17 @@ class SlutloadIE(InfoExtractor):
             'age_limit': 18,
             'thumbnail': r're:https?://.*?\.jpg'
         }
-    }
+    }, {
+        # mobile site
+        'url': 'http://mobile.slutload.com/video/masturbation-solo/fviFLmc6kzJ/',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+
+        desktop_url = re.sub(r'^(https?://)mobile\.', r'\1', url)
+        webpage = self._download_webpage(desktop_url, video_id)
 
         video_title = self._html_search_regex(r'<h1><strong>([^<]+)</strong>',
                                               webpage, 'title').strip()
index accd112aa284c4a91ee5c2c28c5b339014e4fc4a..c3078e285799ab9d159a072d67bf560ee03bd25b 100644 (file)
@@ -2,6 +2,7 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from ..utils import smuggle_url
 
 
 class SonyLIVIE(InfoExtractor):
@@ -10,12 +11,12 @@ class SonyLIVIE(InfoExtractor):
         'url': "http://www.sonyliv.com/details/episodes/5024612095001/Ep.-1---Achaari-Cheese-Toast---Bachelor's-Delight",
         'info_dict': {
             'title': "Ep. 1 - Achaari Cheese Toast - Bachelor's Delight",
-            'id': '5024612095001',
+            'id': 'ref:5024612095001',
             'ext': 'mp4',
-            'upload_date': '20160707',
+            'upload_date': '20170923',
             'description': 'md5:7f28509a148d5be9d0782b4d5106410d',
-            'uploader_id': '4338955589001',
-            'timestamp': 1467870968,
+            'uploader_id': '5182475815001',
+            'timestamp': 1506200547,
         },
         'params': {
             'skip_download': True,
@@ -26,9 +27,11 @@ class SonyLIVIE(InfoExtractor):
         'only_matching': True,
     }]
 
-    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4338955589001/default_default/index.html?videoId=%s'
+    # BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4338955589001/default_default/index.html?videoId=%s'
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5182475815001/default_default/index.html?videoId=ref:%s'
 
     def _real_extract(self, url):
         brightcove_id = self._match_id(url)
         return self.url_result(
-            self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
+            smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, {'geo_countries': ['IN']}),
+            'BrightcoveNew', brightcove_id)
index 2863e53b5a47be353ae18df446c3ffd8a95d0913..e6c2dcfc438b758bf43080799817fb8f6a3e2075 100644 (file)
@@ -7,7 +7,7 @@ from ..utils import ExtractorError
 
 
 class SpankBangIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:(?:www|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video'
+    _VALID_URL = r'https?://(?:(?:www|m|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video'
     _TESTS = [{
         'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
         'md5': '1cc433e1d6aa14bc376535b8679302f7',
@@ -15,7 +15,7 @@ class SpankBangIE(InfoExtractor):
             'id': '3vvn',
             'ext': 'mp4',
             'title': 'fantasy solo',
-            'description': 'Watch fantasy solo free HD porn video - 05 minutes - dillion harper masturbates on a bed free adult movies.',
+            'description': 'Watch fantasy solo free HD porn video - 05 minutes -  Babe,Masturbation,Solo,Toy  - dillion harper masturbates on a bed free adult movies sexy clips.',
             'thumbnail': r're:^https?://.*\.jpg$',
             'uploader': 'silly2587',
             'age_limit': 18,
@@ -28,6 +28,10 @@ class SpankBangIE(InfoExtractor):
         # no uploader
         'url': 'http://spankbang.com/lklg/video/sex+with+anyone+wedding+edition+2',
         'only_matching': True,
+    }, {
+        # mobile page
+        'url': 'http://m.spankbang.com/1o2de/video/can+t+remember+her+name',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
diff --git a/youtube_dl/extractor/stretchinternet.py b/youtube_dl/extractor/stretchinternet.py
new file mode 100644 (file)
index 0000000..ae2ac1b
--- /dev/null
@@ -0,0 +1,48 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class StretchInternetIE(InfoExtractor):
+    _VALID_URL = r'https?://portal\.stretchinternet\.com/[^/]+/portal\.htm\?.*?\beventId=(?P<id>\d+)'
+    _TEST = {
+        'url': 'https://portal.stretchinternet.com/umary/portal.htm?eventId=313900&streamType=video',
+        'info_dict': {
+            'id': '313900',
+            'ext': 'mp4',
+            'title': 'Augustana (S.D.) Baseball vs University of Mary',
+            'description': 'md5:7578478614aae3bdd4a90f578f787438',
+            'timestamp': 1490468400,
+            'upload_date': '20170325',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        stream = self._download_json(
+            'https://neo-client.stretchinternet.com/streamservice/v1/media/stream/v%s'
+            % video_id, video_id)
+
+        video_url = 'https://%s' % stream['source']
+
+        event = self._download_json(
+            'https://neo-client.stretchinternet.com/portal-ws/getEvent.json',
+            video_id, query={
+                'clientID': 99997,
+                'eventID': video_id,
+                'token': 'asdf',
+            })['event']
+
+        title = event.get('title') or event['mobileTitle']
+        description = event.get('customText')
+        timestamp = int_or_none(event.get('longtime'))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'timestamp': timestamp,
+            'url': video_url,
+        }
index e9474533f4dc66072539f75adf7abc7b20723240..eab22c38f187c50334cb43f48535e8982ad847a3 100644 (file)
@@ -4,58 +4,109 @@ from __future__ import unicode_literals
 import re
 
 from .turner import TurnerBaseIE
-from ..utils import extract_attributes
+from ..utils import (
+    float_or_none,
+    int_or_none,
+    strip_or_none,
+)
 
 
 class TBSIE(TurnerBaseIE):
-    # https://github.com/rg3/youtube-dl/issues/13658
-    _WORKING = False
-
-    _VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com/videos/(?:[^/]+/)+(?P<id>[^/?#]+)\.html'
+    _VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com/(?:movies|shows/[^/]+/(?:clips|season-\d+/episode-\d+))/(?P<id>[^/?#]+)'
     _TESTS = [{
-        'url': 'http://www.tbs.com/videos/people-of-earth/season-1/extras/2007318/theatrical-trailer.html',
-        'md5': '9e61d680e2285066ade7199e6408b2ee',
+        'url': 'http://www.tntdrama.com/shows/the-alienist/clips/monster',
         'info_dict': {
-            'id': '2007318',
+            'id': '8d384cde33b89f3a43ce5329de42903ed5099887',
             'ext': 'mp4',
-            'title': 'Theatrical Trailer',
-            'description': 'Catch the latest comedy from TBS, People of Earth, premiering Halloween night--Monday, October 31, at 9/8c.',
+            'title': 'Monster',
+            'description': 'Get a first look at the theatrical trailer for TNT’s highly anticipated new psychological thriller The Alienist, which premieres January 22 on TNT.',
+            'timestamp': 1508175329,
+            'upload_date': '20171016',
         },
-        'skip': 'TBS videos are deleted after a while',
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        }
     }, {
-        'url': 'http://www.tntdrama.com/videos/good-behavior/season-1/extras/1538823/you-better-run.html',
-        'md5': 'ce53c6ead5e9f3280b4ad2031a6fab56',
-        'info_dict': {
-            'id': '1538823',
-            'ext': 'mp4',
-            'title': 'You Better Run',
-            'description': 'Letty Raines must figure out what she\'s running toward while running away from her past. Good Behavior premieres November 15 at 9/8c.',
-        },
-        'skip': 'TBS videos are deleted after a while',
+        'url': 'http://www.tbs.com/shows/search-party/season-1/episode-1/explicit-the-mysterious-disappearance-of-the-girl-no-one-knew',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.tntdrama.com/movies/star-wars-a-new-hope',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
-        domain, display_id = re.match(self._VALID_URL, url).groups()
-        site = domain[:3]
+        site, display_id = re.match(self._VALID_URL, url).groups()
         webpage = self._download_webpage(url, display_id)
-        video_params = extract_attributes(self._search_regex(r'(<[^>]+id="page-video"[^>]*>)', webpage, 'video params'))
-        query = None
-        clip_id = video_params.get('clipid')
-        if clip_id:
-            query = 'id=' + clip_id
-        else:
-            query = 'titleId=' + video_params['titleid']
-        return self._extract_cvp_info(
-            'http://www.%s.com/service/cvpXml?%s' % (domain, query), display_id, {
-                'default': {
-                    'media_src': 'http://ht.cdn.turner.com/%s/big' % site,
-                },
-                'secure': {
-                    'media_src': 'http://androidhls-secure.cdn.turner.com/%s/big' % site,
-                    'tokenizer_src': 'http://www.%s.com/video/processors/services/token_ipadAdobe.do' % domain,
-                },
-            }, {
-                'url': url,
-                'site_name': site.upper(),
-                'auth_required': video_params.get('isAuthRequired') != 'false',
-            })
+        video_data = self._parse_json(self._search_regex(
+            r'<script[^>]+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})</script>',
+            webpage, 'drupal setting'), display_id)['turner_playlist'][0]
+
+        media_id = video_data['mediaID']
+        title = video_data['title']
+
+        streams_data = self._download_json(
+            'http://medium.ngtv.io/media/%s/tv' % media_id,
+            media_id)['media']['tv']
+        duration = None
+        chapters = []
+        formats = []
+        for supported_type in ('unprotected', 'bulkaes'):
+            stream_data = streams_data.get(supported_type, {})
+            m3u8_url = stream_data.get('secureUrl') or stream_data.get('url')
+            if not m3u8_url:
+                continue
+            if stream_data.get('playlistProtection') == 'spe':
+                m3u8_url = self._add_akamai_spe_token(
+                    'http://www.%s.com/service/token_spe' % site,
+                    m3u8_url, media_id, {
+                        'url': url,
+                        'site_name': site[:3].upper(),
+                        'auth_required': video_data.get('authRequired') == '1',
+                    })
+            formats.extend(self._extract_m3u8_formats(
+                m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
+
+            duration = float_or_none(stream_data.get('totalRuntime') or video_data.get('duration'))
+
+            if not chapters:
+                for chapter in stream_data.get('contentSegments', []):
+                    start_time = float_or_none(chapter.get('start'))
+                    duration = float_or_none(chapter.get('duration'))
+                    if start_time is None or duration is None:
+                        continue
+                    chapters.append({
+                        'start_time': start_time,
+                        'end_time': start_time + duration,
+                    })
+        self._sort_formats(formats)
+
+        thumbnails = []
+        for image_id, image in video_data.get('images', {}).items():
+            image_url = image.get('url')
+            if not image_url or image.get('type') != 'video':
+                continue
+            i = {
+                'id': image_id,
+                'url': image_url,
+            }
+            mobj = re.search(r'(\d+)x(\d+)', image_url)
+            if mobj:
+                i.update({
+                    'width': int(mobj.group(1)),
+                    'height': int(mobj.group(2)),
+                })
+            thumbnails.append(i)
+
+        return {
+            'id': media_id,
+            'title': title,
+            'description': strip_or_none(video_data.get('descriptionNoTags') or video_data.get('shortDescriptionNoTags')),
+            'duration': duration,
+            'timestamp': int_or_none(video_data.get('created')),
+            'season_number': int_or_none(video_data.get('season')),
+            'episode_number': int_or_none(video_data.get('episode')),
+            'cahpters': chapters,
+            'thumbnails': thumbnails,
+            'formats': formats,
+        }
index 7e6ec3430bda4bd042d0b598ad2c7ef4dea53e77..0c2f8f119fbc3d3535d571f430865a071f98e10f 100644 (file)
@@ -21,6 +21,8 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
         r'flashvars\.config\s*=\s*escape\("([^"]+)"',
         r'<input[^>]+name="config\d?" value="([^"]+)"',
     ]
+    _HOST = 'tna'
+    _VKEY_SUFFIX = ''
     _TITLE_REGEX = r'<input[^>]+name="title" value="([^"]+)"'
     _DESCRIPTION_REGEX = r'<input[^>]+name="description" value="([^"]+)"'
     _UPLOADER_REGEX = r'<input[^>]+name="username" value="([^"]+)"'
@@ -72,7 +74,13 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
-        display_id = mobj.group('display_id') if 'display_id' in mobj.groupdict() else video_id
+        for display_id_key in ('display_id', 'display_id_2'):
+            if display_id_key in mobj.groupdict():
+                display_id = mobj.group(display_id_key)
+                if display_id:
+                    break
+        else:
+            display_id = video_id
 
         webpage = self._download_webpage(url, display_id)
 
@@ -81,8 +89,8 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
 
         if not cfg_url:
             inputs = self._hidden_inputs(webpage)
-            cfg_url = ('https://cdn-fck.tnaflix.com/tnaflix/%s.fid?key=%s&VID=%s&premium=1&vip=1&alpha'
-                       % (inputs['vkey'], inputs['nkey'], video_id))
+            cfg_url = ('https://cdn-fck.%sflix.com/%sflix/%s%s.fid?key=%s&VID=%s&premium=1&vip=1&alpha'
+                       % (self._HOST, self._HOST, inputs['vkey'], self._VKEY_SUFFIX, inputs['nkey'], video_id))
 
         cfg_xml = self._download_xml(
             cfg_url, display_id, 'Downloading metadata',
@@ -91,7 +99,8 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
         formats = []
 
         def extract_video_url(vl):
-            return re.sub(r'speed=\d+', 'speed=', unescapeHTML(vl.text))
+            # Any URL modification now results in HTTP Error 403: Forbidden
+            return unescapeHTML(vl.text)
 
         video_link = cfg_xml.find('./videoLink')
         if video_link is not None:
@@ -192,18 +201,21 @@ class TNAFlixNetworkEmbedIE(TNAFlixNetworkBaseIE):
             webpage)]
 
 
-class TNAFlixIE(TNAFlixNetworkBaseIE):
+class TNAEMPFlixBaseIE(TNAFlixNetworkBaseIE):
+    _DESCRIPTION_REGEX = r'(?s)>Description:</[^>]+>(.+?)<'
+    _UPLOADER_REGEX = r'<span>by\s*<a[^>]+\bhref=["\']/profile/[^>]+>([^<]+)<'
+    _CATEGORIES_REGEX = r'(?s)<span[^>]*>Categories:</span>(.+?)</div>'
+
+
+class TNAFlixIE(TNAEMPFlixBaseIE):
     _VALID_URL = r'https?://(?:www\.)?tnaflix\.com/[^/]+/(?P<display_id>[^/]+)/video(?P<id>\d+)'
 
     _TITLE_REGEX = r'<title>(.+?) - (?:TNAFlix Porn Videos|TNAFlix\.com)</title>'
-    _DESCRIPTION_REGEX = r'(?s)>Description:</[^>]+>(.+?)<'
-    _UPLOADER_REGEX = r'<i>\s*Verified Member\s*</i>\s*<h\d+>(.+?)<'
-    _CATEGORIES_REGEX = r'(?s)<span[^>]*>Categories:</span>(.+?)</div>'
 
     _TESTS = [{
         # anonymous uploader, no categories
         'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878',
-        'md5': 'ecf3498417d09216374fc5907f9c6ec0',
+        'md5': '7e569419fe6d69543d01e6be22f5f7c4',
         'info_dict': {
             'id': '553878',
             'display_id': 'Carmella-Decesare-striptease',
@@ -228,7 +240,7 @@ class TNAFlixIE(TNAFlixNetworkBaseIE):
             'duration': 164,
             'age_limit': 18,
             'uploader': 'bobwhite39',
-            'categories': ['Amateur Porn', 'Squirting Videos', 'Teen Girls 18+'],
+            'categories': list,
         }
     }, {
         'url': 'https://www.tnaflix.com/amateur-porn/bunzHD-Ms.Donk/video358632',
@@ -236,14 +248,15 @@ class TNAFlixIE(TNAFlixNetworkBaseIE):
     }]
 
 
-class EMPFlixIE(TNAFlixNetworkBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?empflix\.com/videos/(?P<display_id>.+?)-(?P<id>[0-9]+)\.html'
+class EMPFlixIE(TNAEMPFlixBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?empflix\.com/(?:videos/(?P<display_id>.+?)-|[^/]+/(?P<display_id_2>[^/]+)/video)(?P<id>[0-9]+)'
 
-    _UPLOADER_REGEX = r'<span[^>]+class="infoTitle"[^>]*>Uploaded By:</span>(.+?)</li>'
+    _HOST = 'emp'
+    _VKEY_SUFFIX = '-1'
 
     _TESTS = [{
         'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
-        'md5': 'b1bc15b6412d33902d6e5952035fcabc',
+        'md5': 'bc30d48b91a7179448a0bda465114676',
         'info_dict': {
             'id': '33051',
             'display_id': 'Amateur-Finger-Fuck',
@@ -259,6 +272,9 @@ class EMPFlixIE(TNAFlixNetworkBaseIE):
     }, {
         'url': 'http://www.empflix.com/videos/[AROMA][ARMD-718]-Aoi-Yoshino-Sawa-25826.html',
         'only_matching': True,
+    }, {
+        'url': 'https://www.empflix.com/amateur-porn/Amateur-Finger-Fuck/video33051',
+        'only_matching': True,
     }]
 
 
index e59ed266109d16e12e7c64a9ba499ef4c116e449..2e7876cc5b392be0f6524744c676664d66c2b5f8 100644 (file)
@@ -1,6 +1,8 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from ..utils import (
     int_or_none,
@@ -14,7 +16,7 @@ from ..utils import (
 class TouTvIE(InfoExtractor):
     _NETRC_MACHINE = 'toutv'
     IE_NAME = 'tou.tv'
-    _VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/S[0-9]+E[0-9]+)?)'
+    _VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/S[0-9]+[EC][0-9]+)?)'
     _access_token = None
     _claims = None
 
@@ -35,13 +37,16 @@ class TouTvIE(InfoExtractor):
     }, {
         'url': 'http://ici.tou.tv/hackers',
         'only_matching': True,
+    }, {
+        'url': 'https://ici.tou.tv/l-age-adulte/S01C501',
+        'only_matching': True,
     }]
 
     def _real_initialize(self):
         email, password = self._get_login_info()
         if email is None:
             return
-        state = 'http://ici.tou.tv//'
+        state = 'http://ici.tou.tv/'
         webpage = self._download_webpage(state, None, 'Downloading homepage')
         toutvlogin = self._parse_json(self._search_regex(
             r'(?s)toutvlogin\s*=\s*({.+?});', webpage, 'toutvlogin'), None, js_to_json)
@@ -54,16 +59,30 @@ class TouTvIE(InfoExtractor):
                 'scope': 'media-drmt openid profile email id.write media-validation.read.privileged',
                 'state': state,
             })
-        login_form = self._search_regex(
-            r'(?s)(<form[^>]+(?:id|name)="Form-login".+?</form>)', login_webpage, 'login form')
-        form_data = self._hidden_inputs(login_form)
+
+        def extract_form_url_and_data(wp, default_form_url, form_spec_re=''):
+            form, form_elem = re.search(
+                r'(?s)((<form[^>]+?%s[^>]*?>).+?</form>)' % form_spec_re, wp).groups()
+            form_data = self._hidden_inputs(form)
+            form_url = extract_attributes(form_elem).get('action') or default_form_url
+            return form_url, form_data
+
+        post_url, form_data = extract_form_url_and_data(
+            login_webpage,
+            'https://services.radio-canada.ca/auth/oauth/v2/authorize/login',
+            r'(?:id|name)="Form-login"')
         form_data.update({
             'login-email': email,
             'login-password': password,
         })
-        post_url = extract_attributes(login_form).get('action') or authorize_url
-        _, urlh = self._download_webpage_handle(
+        consent_webpage = self._download_webpage(
             post_url, None, 'Logging in', data=urlencode_postdata(form_data))
+        post_url, form_data = extract_form_url_and_data(
+            consent_webpage,
+            'https://services.radio-canada.ca/auth/oauth/v2/authorize/consent')
+        _, urlh = self._download_webpage_handle(
+            post_url, None, 'Following Redirection',
+            data=urlencode_postdata(form_data))
         self._access_token = self._search_regex(
             r'access_token=([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
             urlh.geturl(), 'access token')
index efeb677ee9a7be81e3b9289968ca9df41a2cb4ef..e73b64aebd80977cbf9551f20e5dadac2a15a9df 100644 (file)
@@ -18,9 +18,32 @@ from ..utils import (
 
 
 class TurnerBaseIE(AdobePassIE):
+    _AKAMAI_SPE_TOKEN_CACHE = {}
+
     def _extract_timestamp(self, video_data):
         return int_or_none(xpath_attr(video_data, 'dateCreated', 'uts'))
 
+    def _add_akamai_spe_token(self, tokenizer_src, video_url, content_id, ap_data):
+        secure_path = self._search_regex(r'https?://[^/]+(.+/)', video_url, 'secure path') + '*'
+        token = self._AKAMAI_SPE_TOKEN_CACHE.get(secure_path)
+        if not token:
+            query = {
+                'path': secure_path,
+                'videoId': content_id,
+            }
+            if ap_data.get('auth_required'):
+                query['accessToken'] = self._extract_mvpd_auth(ap_data['url'], content_id, ap_data['site_name'], ap_data['site_name'])
+            auth = self._download_xml(
+                tokenizer_src, content_id, query=query)
+            error_msg = xpath_text(auth, 'error/msg')
+            if error_msg:
+                raise ExtractorError(error_msg, expected=True)
+            token = xpath_text(auth, 'token')
+            if not token:
+                return video_url
+            self._AKAMAI_SPE_TOKEN_CACHE[secure_path] = token
+        return video_url + '?hdnea=' + token
+
     def _extract_cvp_info(self, data_src, video_id, path_data={}, ap_data={}):
         video_data = self._download_xml(data_src, video_id)
         video_id = video_data.attrib['id']
@@ -33,7 +56,6 @@ class TurnerBaseIE(AdobePassIE):
         #         rtmp_src = splited_rtmp_src[1]
         # aifp = xpath_text(video_data, 'akamai/aifp', default='')
 
-        tokens = {}
         urls = []
         formats = []
         rex = re.compile(
@@ -67,26 +89,10 @@ class TurnerBaseIE(AdobePassIE):
                 secure_path_data = path_data.get('secure')
                 if not secure_path_data:
                     continue
-                video_url = secure_path_data['media_src'] + video_url
-                secure_path = self._search_regex(r'https?://[^/]+(.+/)', video_url, 'secure path') + '*'
-                token = tokens.get(secure_path)
-                if not token:
-                    query = {
-                        'path': secure_path,
-                        'videoId': content_id,
-                    }
-                    if ap_data.get('auth_required'):
-                        query['accessToken'] = self._extract_mvpd_auth(ap_data['url'], video_id, ap_data['site_name'], ap_data['site_name'])
-                    auth = self._download_xml(
-                        secure_path_data['tokenizer_src'], video_id, query=query)
-                    error_msg = xpath_text(auth, 'error/msg')
-                    if error_msg:
-                        raise ExtractorError(error_msg, expected=True)
-                    token = xpath_text(auth, 'token')
-                    if not token:
-                        continue
-                    tokens[secure_path] = token
-                video_url = video_url + '?hdnea=' + token
+                video_url = self._add_akamai_spe_token(
+                    secure_path_data['tokenizer_src'],
+                    secure_path_data['media_src'] + video_url,
+                    content_id, ap_data)
             elif not re.match('https?://', video_url):
                 base_path_data = path_data.get(ext, path_data.get('default', {}))
                 media_src = base_path_data.get('media_src')
index b57abeaa49b0620184f9f4a96145303aec1ceeba..0b863df2ff4ad214162c6187ac7aaa65fe3fc6c9 100644 (file)
@@ -32,6 +32,8 @@ class TVAIE(InfoExtractor):
         video_data = self._download_json(
             'https://videos.tva.ca/proxy/item/_' + video_id, video_id, headers={
                 'Accept': 'application/json',
+            }, query={
+                'appId': '5955fc5f23eec60006c951f1',
             })
 
         def get_attribute(key):
diff --git a/youtube_dl/extractor/tvnow.py b/youtube_dl/extractor/tvnow.py
new file mode 100644 (file)
index 0000000..e2169f2
--- /dev/null
@@ -0,0 +1,175 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    ExtractorError,
+    parse_iso8601,
+    parse_duration,
+    update_url_query,
+)
+
+
+class TVNowBaseIE(InfoExtractor):
+    _VIDEO_FIELDS = (
+        'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort',
+        'broadcastStartDate', 'isDrm', 'duration', 'manifest.dashclear',
+        'format.defaultImage169Format', 'format.defaultImage169Logo')
+
+    def _call_api(self, path, video_id, query):
+        return self._download_json(
+            'https://api.tvnow.de/v3/' + path,
+            video_id, query=query)
+
+    def _extract_video(self, info, display_id):
+        video_id = compat_str(info['id'])
+        title = info['title']
+
+        mpd_url = info['manifest']['dashclear']
+        if not mpd_url:
+            if info.get('isDrm'):
+                raise ExtractorError(
+                    'Video %s is DRM protected' % video_id, expected=True)
+            if info.get('geoblocked'):
+                raise ExtractorError(
+                    'Video %s is not available from your location due to geo restriction' % video_id,
+                    expected=True)
+            if not info.get('free', True):
+                raise ExtractorError(
+                    'Video %s is not available for free' % video_id, expected=True)
+
+        mpd_url = update_url_query(mpd_url, {'filter': ''})
+        formats = self._extract_mpd_formats(mpd_url, video_id, mpd_id='dash', fatal=False)
+        formats.extend(self._extract_ism_formats(
+            mpd_url.replace('dash.', 'hss.').replace('/.mpd', '/Manifest'),
+            video_id, ism_id='mss', fatal=False))
+        formats.extend(self._extract_m3u8_formats(
+            mpd_url.replace('dash.', 'hls.').replace('/.mpd', '/.m3u8'),
+            video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+        self._sort_formats(formats)
+
+        description = info.get('articleLong') or info.get('articleShort')
+        timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')
+        duration = parse_duration(info.get('duration'))
+
+        f = info.get('format', {})
+        thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'timestamp': timestamp,
+            'duration': duration,
+            'formats': formats,
+        }
+
+
+class TVNowIE(TVNowBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?tvnow\.(?:de|at|ch)/(?:rtl(?:2|plus)?|nitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/(?:(?:list/[^/]+|jahr/\d{4}/\d{1,2})/)?(?P<id>[^/]+)/(?:player|preview)'
+
+    _TESTS = [{
+        # rtl
+        'url': 'https://www.tvnow.de/rtl/alarm-fuer-cobra-11/freier-fall/player?return=/rtl',
+        'info_dict': {
+            'id': '385314',
+            'display_id': 'alarm-fuer-cobra-11/freier-fall',
+            'ext': 'mp4',
+            'title': 'Freier Fall',
+            'description': 'md5:8c2d8f727261adf7e0dc18366124ca02',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'timestamp': 1512677700,
+            'upload_date': '20171207',
+            'duration': 2862.0,
+        },
+    }, {
+        # rtl2
+        'url': 'https://www.tvnow.de/rtl2/armes-deutschland/episode-0008/player',
+        'only_matching': 'True',
+    }, {
+        # rtlnitro
+        'url': 'https://www.tvnow.de/nitro/alarm-fuer-cobra-11-die-autobahnpolizei/auf-eigene-faust-pilot/player',
+        'only_matching': 'True',
+    }, {
+        # superrtl
+        'url': 'https://www.tvnow.de/superrtl/die-lustigsten-schlamassel-der-welt/u-a-ketchup-effekt/player',
+        'only_matching': 'True',
+    }, {
+        # ntv
+        'url': 'https://www.tvnow.de/ntv/startup-news/goetter-in-weiss/player',
+        'only_matching': 'True',
+    }, {
+        # vox
+        'url': 'https://www.tvnow.de/vox/auto-mobil/neues-vom-automobilmarkt-2017-11-19-17-00-00/player',
+        'only_matching': 'True',
+    }, {
+        # rtlplus
+        'url': 'https://www.tvnow.de/rtlplus/op-ruft-dr-bruckner/die-vernaehte-frau/player',
+        'only_matching': 'True',
+    }]
+
+    def _real_extract(self, url):
+        display_id = '%s/%s' % re.match(self._VALID_URL, url).groups()
+
+        info = self._call_api(
+            'movies/' + display_id, display_id, query={
+                'fields': ','.join(self._VIDEO_FIELDS),
+            })
+
+        return self._extract_video(info, display_id)
+
+
+class TVNowListIE(TVNowBaseIE):
+    _VALID_URL = r'(?P<base_url>https?://(?:www\.)?tvnow\.(?:de|at|ch)/(?:rtl(?:2|plus)?|nitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/)list/(?P<id>[^?/#&]+)$'
+
+    _SHOW_FIELDS = ('title', )
+    _SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
+    _VIDEO_FIELDS = ('id', 'headline', 'seoUrl', )
+
+    _TESTS = [{
+        'url': 'https://www.tvnow.de/rtl/30-minuten-deutschland/list/aktuell',
+        'info_dict': {
+            'id': '28296',
+            'title': '30 Minuten Deutschland - Aktuell',
+        },
+        'playlist_mincount': 1,
+    }]
+
+    def _real_extract(self, url):
+        base_url, show_id, season_id = re.match(self._VALID_URL, url).groups()
+
+        fields = []
+        fields.extend(self._SHOW_FIELDS)
+        fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS)
+        fields.extend(
+            'formatTabs.formatTabPages.container.movies.%s' % field
+            for field in self._VIDEO_FIELDS)
+
+        list_info = self._call_api(
+            'formats/seo', season_id, query={
+                'fields': ','.join(fields),
+                'name': show_id + '.php'
+            })
+
+        season = next(
+            season for season in list_info['formatTabs']['items']
+            if season.get('seoheadline') == season_id)
+
+        title = '%s - %s' % (list_info['title'], season['headline'])
+
+        entries = []
+        for container in season['formatTabPages']['items']:
+            for info in ((container.get('container') or {}).get('movies') or {}).get('items') or []:
+                seo_url = info.get('seoUrl')
+                if not seo_url:
+                    continue
+                entries.append(self.url_result(
+                    base_url + seo_url + '/player', 'TVNow', info.get('id')))
+
+        return self.playlist_result(
+            entries, compat_str(season.get('id') or season_id), title)
diff --git a/youtube_dl/extractor/twentythreevideo.py b/youtube_dl/extractor/twentythreevideo.py
new file mode 100644 (file)
index 0000000..aa0c6e9
--- /dev/null
@@ -0,0 +1,77 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class TwentyThreeVideoIE(InfoExtractor):
+    IE_NAME = '23video'
+    _VALID_URL = r'https?://video\.(?P<domain>twentythree\.net|23video\.com|filmweb\.no)/v\.ihtml/player\.html\?(?P<query>.*?\bphoto(?:_|%5f)id=(?P<id>\d+).*)'
+    _TEST = {
+        'url': 'https://video.twentythree.net/v.ihtml/player.html?showDescriptions=0&source=site&photo%5fid=20448876&autoPlay=1',
+        'md5': '75fcf216303eb1dae9920d651f85ced4',
+        'info_dict': {
+            'id': '20448876',
+            'ext': 'mp4',
+            'title': 'Video Marketing Minute: Personalized Video',
+            'timestamp': 1513855354,
+            'upload_date': '20171221',
+            'uploader_id': '12258964',
+            'uploader': 'Rasmus Bysted',
+        }
+    }
+
+    def _real_extract(self, url):
+        domain, query, photo_id = re.match(self._VALID_URL, url).groups()
+        base_url = 'https://video.%s' % domain
+        photo_data = self._download_json(
+            base_url + '/api/photo/list?' + query, photo_id, query={
+                'format': 'json',
+            }, transform_source=lambda s: self._search_regex(r'(?s)({.+})', s, 'photo data'))['photo']
+        title = photo_data['title']
+
+        formats = []
+
+        audio_path = photo_data.get('audio_download')
+        if audio_path:
+            formats.append({
+                'format_id': 'audio',
+                'url': base_url + audio_path,
+                'filesize': int_or_none(photo_data.get('audio_size')),
+                'vcodec': 'none',
+            })
+
+        def add_common_info_to_list(l, template, id_field, id_value):
+            f_base = template % id_value
+            f_path = photo_data.get(f_base + 'download')
+            if not f_path:
+                return
+            l.append({
+                id_field: id_value,
+                'url': base_url + f_path,
+                'width': int_or_none(photo_data.get(f_base + 'width')),
+                'height': int_or_none(photo_data.get(f_base + 'height')),
+                'filesize': int_or_none(photo_data.get(f_base + 'size')),
+            })
+
+        for f in ('mobile_high', 'medium', 'hd', '1080p', '4k'):
+            add_common_info_to_list(formats, 'video_%s_', 'format_id', f)
+
+        thumbnails = []
+        for t in ('quad16', 'quad50', 'quad75', 'quad100', 'small', 'portrait', 'standard', 'medium', 'large', 'original'):
+            add_common_info_to_list(thumbnails, '%s_', 'id', t)
+
+        return {
+            'id': photo_id,
+            'title': title,
+            'timestamp': int_or_none(photo_data.get('creation_date_epoch')),
+            'duration': int_or_none(photo_data.get('video_length')),
+            'view_count': int_or_none(photo_data.get('view_count')),
+            'comment_count': int_or_none(photo_data.get('number_of_comments')),
+            'uploader_id': photo_data.get('user_id'),
+            'uploader': photo_data.get('display_name'),
+            'thumbnails': thumbnails,
+            'formats': formats,
+        }
index fefcd28078f6c69058366fbb7b8f9e616508fbb6..bf57eac01f2f42dd5dd4d4fc7bdce55f08ee7513 100644 (file)
@@ -101,7 +101,7 @@ class TwitchBaseIE(InfoExtractor):
             fail(clean_html(login_page))
 
         redirect_page, handle = login_step(
-            login_page, handle, 'Logging in as %s' % username, {
+            login_page, handle, 'Logging in', {
                 'username': username,
                 'password': password,
             })
index 1b0b9637160f3c096b4baf07f6146bc9d84a31c8..d7e425041f1246a20d587174619db6db3f7c0c83 100644 (file)
@@ -43,7 +43,7 @@ class TwitterBaseIE(InfoExtractor):
 
 class TwitterCardIE(TwitterBaseIE):
     IE_NAME = 'twitter:card'
-    _VALID_URL = r'https?://(?:www\.)?twitter\.com/i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?twitter\.com/i/(?P<path>cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
     _TESTS = [
         {
             'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
@@ -51,11 +51,10 @@ class TwitterCardIE(TwitterBaseIE):
             'info_dict': {
                 'id': '560070183650213889',
                 'ext': 'mp4',
-                'title': 'Twitter Card',
+                'title': 'Twitter web player',
                 'thumbnail': r're:^https?://.*\.jpg$',
                 'duration': 30.033,
             },
-            'skip': 'Video gone',
         },
         {
             'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
@@ -63,11 +62,9 @@ class TwitterCardIE(TwitterBaseIE):
             'info_dict': {
                 'id': '623160978427936768',
                 'ext': 'mp4',
-                'title': 'Twitter Card',
-                'thumbnail': r're:^https?://.*\.jpg',
-                'duration': 80.155,
+                'title': 'Twitter web player',
+                'thumbnail': r're:^https?://.*(?:\bformat=|\.)jpg',
             },
-            'skip': 'Video gone',
         },
         {
             'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
@@ -120,15 +117,15 @@ class TwitterCardIE(TwitterBaseIE):
             elif media_url.endswith('.mpd'):
                 formats.extend(self._extract_mpd_formats(media_url, video_id, mpd_id='dash'))
             else:
-                vbr = int_or_none(dict_get(media_variant, ('bitRate', 'bitrate')), scale=1000)
+                tbr = int_or_none(dict_get(media_variant, ('bitRate', 'bitrate')), scale=1000)
                 a_format = {
                     'url': media_url,
-                    'format_id': 'http-%d' % vbr if vbr else 'http',
-                    'vbr': vbr,
+                    'format_id': 'http-%d' % tbr if tbr else 'http',
+                    'tbr': tbr,
                 }
                 # Reported bitRate may be zero
-                if not a_format['vbr']:
-                    del a_format['vbr']
+                if not a_format['tbr']:
+                    del a_format['tbr']
 
                 self._search_dimensions_in_video_url(a_format, media_url)
 
@@ -150,79 +147,83 @@ class TwitterCardIE(TwitterBaseIE):
         bearer_token = self._search_regex(
             r'BEARER_TOKEN\s*:\s*"([^"]+)"',
             main_script, 'bearer token')
-        guest_token = self._search_regex(
-            r'document\.cookie\s*=\s*decodeURIComponent\("gt=(\d+)',
-            webpage, 'guest token')
+        # https://developer.twitter.com/en/docs/tweets/post-and-engage/api-reference/get-statuses-show-id
         api_data = self._download_json(
-            'https://api.twitter.com/2/timeline/conversation/%s.json' % video_id,
-            video_id, 'Downloading mobile API data',
+            'https://api.twitter.com/1.1/statuses/show/%s.json' % video_id,
+            video_id, 'Downloading API data',
             headers={
                 'Authorization': 'Bearer ' + bearer_token,
-                'x-guest-token': guest_token,
             })
-        media_info = try_get(api_data, lambda o: o['globalObjects']['tweets'][video_id]
-                                                  ['extended_entities']['media'][0]['video_info']) or {}
+        media_info = try_get(api_data, lambda o: o['extended_entities']['media'][0]['video_info']) or {}
         return self._parse_media_info(media_info, video_id)
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        path, video_id = re.search(self._VALID_URL, url).groups()
 
         config = None
         formats = []
         duration = None
 
-        webpage = self._download_webpage(url, video_id)
+        urls = [url]
+        if path.startswith('cards/'):
+            urls.append('https://twitter.com/i/videos/' + video_id)
 
-        iframe_url = self._html_search_regex(
-            r'<iframe[^>]+src="((?:https?:)?//(?:www\.youtube\.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"',
-            webpage, 'video iframe', default=None)
-        if iframe_url:
-            return self.url_result(iframe_url)
+        for u in urls:
+            webpage = self._download_webpage(u, video_id)
 
-        config = self._parse_json(self._html_search_regex(
-            r'data-(?:player-)?config="([^"]+)"', webpage,
-            'data player config', default='{}'),
-            video_id)
+            iframe_url = self._html_search_regex(
+                r'<iframe[^>]+src="((?:https?:)?//(?:www\.youtube\.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"',
+                webpage, 'video iframe', default=None)
+            if iframe_url:
+                return self.url_result(iframe_url)
 
-        if config.get('source_type') == 'vine':
-            return self.url_result(config['player_url'], 'Vine')
+            config = self._parse_json(self._html_search_regex(
+                r'data-(?:player-)?config="([^"]+)"', webpage,
+                'data player config', default='{}'),
+                video_id)
 
-        periscope_url = PeriscopeIE._extract_url(webpage)
-        if periscope_url:
-            return self.url_result(periscope_url, PeriscopeIE.ie_key())
+            if config.get('source_type') == 'vine':
+                return self.url_result(config['player_url'], 'Vine')
 
-        video_url = config.get('video_url') or config.get('playlist', [{}])[0].get('source')
+            periscope_url = PeriscopeIE._extract_url(webpage)
+            if periscope_url:
+                return self.url_result(periscope_url, PeriscopeIE.ie_key())
 
-        if video_url:
-            if determine_ext(video_url) == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(video_url, video_id, ext='mp4', m3u8_id='hls'))
-            else:
-                f = {
-                    'url': video_url,
-                }
+            video_url = config.get('video_url') or config.get('playlist', [{}])[0].get('source')
+
+            if video_url:
+                if determine_ext(video_url) == 'm3u8':
+                    formats.extend(self._extract_m3u8_formats(video_url, video_id, ext='mp4', m3u8_id='hls'))
+                else:
+                    f = {
+                        'url': video_url,
+                    }
+
+                    self._search_dimensions_in_video_url(f, video_url)
 
-                self._search_dimensions_in_video_url(f, video_url)
+                    formats.append(f)
 
-                formats.append(f)
+            vmap_url = config.get('vmapUrl') or config.get('vmap_url')
+            if vmap_url:
+                formats.extend(
+                    self._extract_formats_from_vmap_url(vmap_url, video_id))
 
-        vmap_url = config.get('vmapUrl') or config.get('vmap_url')
-        if vmap_url:
-            formats.extend(
-                self._extract_formats_from_vmap_url(vmap_url, video_id))
+            media_info = None
 
-        media_info = None
+            for entity in config.get('status', {}).get('entities', []):
+                if 'mediaInfo' in entity:
+                    media_info = entity['mediaInfo']
 
-        for entity in config.get('status', {}).get('entities', []):
-            if 'mediaInfo' in entity:
-                media_info = entity['mediaInfo']
+            if media_info:
+                formats.extend(self._parse_media_info(media_info, video_id))
+                duration = float_or_none(media_info.get('duration', {}).get('nanos'), scale=1e9)
 
-        if media_info:
-            formats.extend(self._parse_media_info(media_info, video_id))
-            duration = float_or_none(media_info.get('duration', {}).get('nanos'), scale=1e9)
+            username = config.get('user', {}).get('screen_name')
+            if username:
+                formats.extend(self._extract_mobile_formats(username, video_id))
 
-        username = config.get('user', {}).get('screen_name')
-        if username:
-            formats.extend(self._extract_mobile_formats(username, video_id))
+            if formats:
+                break
 
         self._remove_duplicate_formats(formats)
         self._sort_formats(formats)
@@ -258,9 +259,6 @@ class TwitterIE(InfoExtractor):
             'uploader_id': 'freethenipple',
             'duration': 12.922,
         },
-        'params': {
-            'skip_download': True,  # requires ffmpeg
-        },
     }, {
         'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
         'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
@@ -277,7 +275,6 @@ class TwitterIE(InfoExtractor):
         'skip': 'Account suspended',
     }, {
         'url': 'https://twitter.com/starwars/status/665052190608723968',
-        'md5': '39b7199856dee6cd4432e72c74bc69d4',
         'info_dict': {
             'id': '665052190608723968',
             'ext': 'mp4',
@@ -303,20 +300,16 @@ class TwitterIE(InfoExtractor):
         },
     }, {
         'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
-        'md5': '',
         'info_dict': {
             'id': '700207533655363584',
             'ext': 'mp4',
-            'title': 'あかさ - BEAT PROD: @suhmeduh #Damndaniel',
-            'description': 'あかさ on Twitter: "BEAT PROD: @suhmeduh  https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"',
+            'title': 'JG - BEAT PROD: @suhmeduh #Damndaniel',
+            'description': 'JG on Twitter: "BEAT PROD: @suhmeduh  https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"',
             'thumbnail': r're:^https?://.*\.jpg',
-            'uploader': 'あかさ',
+            'uploader': 'JG',
             'uploader_id': 'jaydingeer',
             'duration': 30.0,
         },
-        'params': {
-            'skip_download': True,  # requires ffmpeg
-        },
     }, {
         'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
         'md5': '89a15ed345d13b86e9a5a5e051fa308a',
@@ -342,9 +335,6 @@ class TwitterIE(InfoExtractor):
             'uploader': 'Captain America',
             'duration': 3.17,
         },
-        'params': {
-            'skip_download': True,  # requires ffmpeg
-        },
     }, {
         'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
         'info_dict': {
@@ -370,9 +360,6 @@ class TwitterIE(InfoExtractor):
             'uploader_id': 'news_al3alm',
             'duration': 277.4,
         },
-        'params': {
-            'format': 'best[format_id^=http-]',
-        },
     }, {
         'url': 'https://twitter.com/i/web/status/910031516746514432',
         'info_dict': {
index 207c4a6a7ee8131c3e2e5d5823aefb336ad47c47..195f5ce78d308126a1077cda11a4c00b437343fe 100644 (file)
@@ -62,11 +62,11 @@ class UdemyIE(InfoExtractor):
     def _extract_course_info(self, webpage, video_id):
         course = self._parse_json(
             unescapeHTML(self._search_regex(
-                r'ng-init=["\'].*\bcourse=({.+?});', webpage, 'course', default='{}')),
+                r'ng-init=["\'].*\bcourse=({.+?})[;"\']',
+                webpage, 'course', default='{}')),
             video_id, fatal=False) or {}
         course_id = course.get('id') or self._search_regex(
-            (r'&quot;id&quot;\s*:\s*(\d+)', r'data-course-id=["\'](\d+)'),
-            webpage, 'course id')
+            r'data-course-id=["\'](\d+)', webpage, 'course id')
         return course_id, course.get('title')
 
     def _enroll_course(self, base_url, webpage, course_id):
@@ -164,7 +164,7 @@ class UdemyIE(InfoExtractor):
         })
 
         response = self._download_webpage(
-            self._LOGIN_URL, None, 'Logging in as %s' % username,
+            self._LOGIN_URL, None, 'Logging in',
             data=urlencode_postdata(login_form),
             headers={
                 'Referer': self._ORIGIN_URL,
@@ -257,6 +257,11 @@ class UdemyIE(InfoExtractor):
                 video_url = source.get('file') or source.get('src')
                 if not video_url or not isinstance(video_url, compat_str):
                     continue
+                if source.get('type') == 'application/x-mpegURL' or determine_ext(video_url) == 'm3u8':
+                    formats.extend(self._extract_m3u8_formats(
+                        video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                        m3u8_id='hls', fatal=False))
+                    continue
                 format_id = source.get('label')
                 f = {
                     'url': video_url,
diff --git a/youtube_dl/extractor/ufctv.py b/youtube_dl/extractor/ufctv.py
new file mode 100644 (file)
index 0000000..ab82381
--- /dev/null
@@ -0,0 +1,55 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_duration,
+    parse_iso8601,
+)
+
+
+class UFCTVIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?ufc\.tv/video/(?P<id>[^/]+)'
+    _TEST = {
+        'url': 'https://www.ufc.tv/video/ufc-219-countdown-full-episode',
+        'info_dict': {
+            'id': '34167',
+            'ext': 'mp4',
+            'title': 'UFC 219 Countdown: Full Episode',
+            'description': 'md5:26d4e8bf4665ae5878842d7050c3c646',
+            'timestamp': 1513962360,
+            'upload_date': '20171222',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        }
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        video_data = self._download_json(url, display_id, query={
+            'format': 'json',
+        })
+        video_id = str(video_data['id'])
+        title = video_data['name']
+        m3u8_url = self._download_json(
+            'https://www.ufc.tv/service/publishpoint', video_id, query={
+                'type': 'video',
+                'format': 'json',
+                'id': video_id,
+            }, headers={
+                'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_1 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A402 Safari/604.1',
+            })['path']
+        m3u8_url = m3u8_url.replace('_iphone.', '.')
+        formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': video_data.get('description'),
+            'duration': parse_duration(video_data.get('runtime')),
+            'timestamp': parse_iso8601(video_data.get('releaseDate')),
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/umg.py b/youtube_dl/extractor/umg.py
new file mode 100644 (file)
index 0000000..d815cd9
--- /dev/null
@@ -0,0 +1,103 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    parse_filesize,
+    parse_iso8601,
+)
+
+
+class UMGDeIE(InfoExtractor):
+    IE_NAME = 'umg:de'
+    IE_DESC = 'Universal Music Deutschland'
+    _VALID_URL = r'https?://(?:www\.)?universal-music\.de/[^/]+/videos/[^/?#]+-(?P<id>\d+)'
+    _TEST = {
+        'url': 'https://www.universal-music.de/sido/videos/jedes-wort-ist-gold-wert-457803',
+        'md5': 'ebd90f48c80dcc82f77251eb1902634f',
+        'info_dict': {
+            'id': '457803',
+            'ext': 'mp4',
+            'title': 'Jedes Wort ist Gold wert',
+            'timestamp': 1513591800,
+            'upload_date': '20171218',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        video_data = self._download_json(
+            'https://api.universal-music.de/graphql',
+            video_id, query={
+                'query': '''{
+  universalMusic(channel:16) {
+    video(id:%s) {
+      headline
+      formats {
+        formatId
+        url
+        type
+        width
+        height
+        mimeType
+        fileSize
+      }
+      duration
+      createdDate
+    }
+  }
+}''' % video_id})['data']['universalMusic']['video']
+
+        title = video_data['headline']
+        hls_url_template = 'http://mediadelivery.universal-music-services.de/vod/mp4:autofill/storage/' + '/'.join(list(video_id)) + '/content/%s/file/playlist.m3u8'
+
+        thumbnails = []
+        formats = []
+
+        def add_m3u8_format(format_id):
+            m3u8_formats = self._extract_m3u8_formats(
+                hls_url_template % format_id, video_id, 'mp4',
+                'm3u8_native', m3u8_id='hls', fatal='False')
+            if m3u8_formats and m3u8_formats[0].get('height'):
+                formats.extend(m3u8_formats)
+
+        for f in video_data.get('formats', []):
+            f_url = f.get('url')
+            mime_type = f.get('mimeType')
+            if not f_url or mime_type == 'application/mxf':
+                continue
+            fmt = {
+                'url': f_url,
+                'width': int_or_none(f.get('width')),
+                'height': int_or_none(f.get('height')),
+                'filesize': parse_filesize(f.get('fileSize')),
+            }
+            f_type = f.get('type')
+            if f_type == 'Image':
+                thumbnails.append(fmt)
+            elif f_type == 'Video':
+                format_id = f.get('formatId')
+                if format_id:
+                    fmt['format_id'] = format_id
+                    if mime_type == 'video/mp4':
+                        add_m3u8_format(format_id)
+                urlh = self._request_webpage(f_url, video_id, fatal=False)
+                if urlh:
+                    first_byte = urlh.read(1)
+                    if first_byte not in (b'F', b'\x00'):
+                        continue
+                    formats.append(fmt)
+        if not formats:
+            for format_id in (867, 836, 940):
+                add_m3u8_format(format_id)
+        self._sort_formats(formats, ('width', 'height', 'filesize', 'tbr'))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'duration': int_or_none(video_data.get('duration')),
+            'timestamp': parse_iso8601(video_data.get('createdDate'), ' '),
+            'thumbnails': thumbnails,
+            'formats': formats,
+        }
index e64873bce230295f76de7fdd7973363afd5e3867..ac35d55a9505808144fed105d535d53779afb1cb 100644 (file)
@@ -28,10 +28,10 @@ class VidziIE(InfoExtractor):
         },
     }, {
         'url': 'http://vidzi.tv/embed-4z2yb0rzphe9-600x338.html',
-        'skip_download': True,
+        'only_matching': True,
     }, {
         'url': 'http://vidzi.cc/cghql9yq6emu.html',
-        'skip_download': True,
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
index 853e5c75f431da9bbc7383c31006d29c705bb881..ad2a2a4b70fdde18548e4dde62fcf9ccfc264ba4 100644 (file)
@@ -99,7 +99,7 @@ class VikiBaseIE(InfoExtractor):
 
         login = self._call_api(
             'sessions.json', None,
-            'Logging in as %s' % username, post_data=login_form)
+            'Logging in', post_data=login_form)
 
         self._token = login.get('token')
         if not self._token:
index cedb548767e84a512b8ca5e0253d81f62a8ee502..6af70565781e391915d807f49639a859c8f1b9ff 100644 (file)
@@ -468,11 +468,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
         request = sanitized_Request(url, headers=headers)
         try:
             webpage, urlh = self._download_webpage_handle(request, video_id)
+            redirect_url = compat_str(urlh.geturl())
             # Some URLs redirect to ondemand can't be extracted with
             # this extractor right away thus should be passed through
             # ondemand extractor (e.g. https://vimeo.com/73445910)
-            if VimeoOndemandIE.suitable(urlh.geturl()):
-                return self.url_result(urlh.geturl(), VimeoOndemandIE.ie_key())
+            if VimeoOndemandIE.suitable(redirect_url):
+                return self.url_result(redirect_url, VimeoOndemandIE.ie_key())
         except ExtractorError as ee:
             if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
                 errmsg = ee.cause.read()
@@ -541,15 +542,15 @@ class VimeoIE(VimeoBaseInfoExtractor):
             if re.search(r'<form[^>]+?id="pw_form"', webpage) is not None:
                 if '_video_password_verified' in data:
                     raise ExtractorError('video password verification failed!')
-                self._verify_video_password(url, video_id, webpage)
+                self._verify_video_password(redirect_url, video_id, webpage)
                 return self._real_extract(
-                    smuggle_url(url, {'_video_password_verified': 'verified'}))
+                    smuggle_url(redirect_url, {'_video_password_verified': 'verified'}))
             else:
                 raise ExtractorError('Unable to extract info section',
                                      cause=e)
         else:
             if config.get('view') == 4:
-                config = self._verify_player_video_password(url, video_id)
+                config = self._verify_player_video_password(redirect_url, video_id)
 
         def is_rented():
             if '>You rented this title.<' in webpage:
index 105e172d539d4376e4534df31266c489f3d62518..d4838b3e5f59410c7e3db3b99823261d51350925 100644 (file)
@@ -67,7 +67,7 @@ class VKBaseIE(InfoExtractor):
 
         login_page = self._download_webpage(
             'https://login.vk.com/?act=login', None,
-            note='Logging in as %s' % username,
+            note='Logging in',
             data=urlencode_postdata(login_form))
 
         if re.search(r'onLoginFailed', login_page):
@@ -414,7 +414,7 @@ class VKIE(VKBaseIE):
 
         view_count = str_to_int(self._search_regex(
             r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)',
-            info_page, 'view count', fatal=False))
+            info_page, 'view count', default=None))
 
         formats = []
         for format_id, format_url in data.items():
index 5de3deb8c0e8f95335f49c69dc3a7a4746cb05a2..751b21ee517a174f28082ce21e94995e073203f9 100644 (file)
@@ -2,7 +2,6 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from .kaltura import KalturaIE
 from ..utils import (
     ExtractorError,
     int_or_none,
@@ -21,7 +20,6 @@ class VootIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Ishq Ka Rang Safed - Season 01 - Episode 340',
             'description': 'md5:06291fbbbc4dcbe21235c40c262507c1',
-            'uploader_id': 'batchUser',
             'timestamp': 1472162937,
             'upload_date': '20160825',
             'duration': 1146,
@@ -63,6 +61,10 @@ class VootIE(InfoExtractor):
 
         entry_id = media['EntryId']
         title = media['MediaName']
+        formats = self._extract_m3u8_formats(
+            'https://cdnapisec.kaltura.com/p/1982551/playManifest/pt/https/f/applehttp/t/web/e/' + entry_id,
+            video_id, 'mp4', m3u8_id='hls')
+        self._sort_formats(formats)
 
         description, series, season_number, episode, episode_number = [None] * 5
 
@@ -82,9 +84,8 @@ class VootIE(InfoExtractor):
                 episode_number = int_or_none(value)
 
         return {
-            '_type': 'url_transparent',
-            'url': 'kaltura:1982551:%s' % entry_id,
-            'ie_key': KalturaIE.ie_key(),
+            'extractor_key': 'Kaltura',
+            'id': entry_id,
             'title': title,
             'description': description,
             'series': series,
@@ -95,4 +96,5 @@ class VootIE(InfoExtractor):
             'duration': int_or_none(media.get('Duration')),
             'view_count': int_or_none(media.get('ViewCounter')),
             'like_count': int_or_none(media.get('like_counter')),
+            'formats': formats,
         }
index 5addbc2803d213f4fcb987ecc26aa8c2ae0de6aa..e4ec778896692c84ce7be54d9628b64be93de5f7 100644 (file)
@@ -1,14 +1,21 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
+from ..compat import compat_chr
+from ..utils import (
+    decode_packed_codes,
+    ExtractorError,
+)
 
 
 class VShareIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)'
     _TESTS = [{
         'url': 'https://vshare.io/d/0f64ce6',
-        'md5': '16d7b8fef58846db47419199ff1ab3e7',
+        'md5': '17b39f55b5497ae8b59f5fbce8e35886',
         'info_dict': {
             'id': '0f64ce6',
             'title': 'vl14062007715967',
@@ -19,20 +26,49 @@ class VShareIE(InfoExtractor):
         'only_matching': True,
     }]
 
+    @staticmethod
+    def _extract_urls(webpage):
+        return re.findall(
+            r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)',
+            webpage)
+
+    def _extract_packed(self, webpage):
+        packed = self._search_regex(
+            r'(eval\(function.+)', webpage, 'packed code')
+        unpacked = decode_packed_codes(packed)
+        digits = self._search_regex(r'\[((?:\d+,?)+)\]', unpacked, 'digits')
+        digits = [int(digit) for digit in digits.split(',')]
+        key_digit = self._search_regex(
+            r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit')
+        chars = [compat_chr(d - int(key_digit)) for d in digits]
+        return ''.join(chars)
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
         webpage = self._download_webpage(
-            'https://vshare.io/d/%s' % video_id, video_id)
+            'https://vshare.io/v/%s/width-650/height-430/1' % video_id,
+            video_id)
 
         title = self._html_search_regex(
-            r'(?s)<div id="root-container">(.+?)<br/>', webpage, 'title')
-        video_url = self._search_regex(
-            r'<a[^>]+href=(["\'])(?P<url>(?:https?:)?//.+?)\1[^>]*>[Cc]lick\s+here',
-            webpage, 'video url', group='url')
+            r'<title>([^<]+)</title>', webpage, 'title')
+        title = title.split(' - ')[0]
+
+        error = self._html_search_regex(
+            r'(?s)<div[^>]+\bclass=["\']xxx-error[^>]+>(.+?)</div', webpage,
+            'error', default=None)
+        if error:
+            raise ExtractorError(error, expected=True)
 
-        return {
+        info = self._parse_html5_media_entries(
+            url, '<video>%s</video>' % self._extract_packed(webpage),
+            video_id)[0]
+
+        self._sort_formats(info['formats'])
+
+        info.update({
             'id': video_id,
             'title': title,
-            'url': video_url,
-        }
+        })
+
+        return info
index 656a4b9e5a68afddcee721321dccab2104074aeb..3d0dc403b6dc3a6b9ff5b2c50b55d3b1b033d855 100644 (file)
@@ -22,6 +22,9 @@ class VVVVIDIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Ping Pong',
         },
+        'params': {
+            'skip_download': True,
+        },
     }, {
         # video_type == 'video/rcs'
         'url': 'https://www.vvvvid.it/#!show/376/death-note-live-action/377/482493/episodio-01',
@@ -31,6 +34,9 @@ class VVVVIDIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Episodio 01',
         },
+        'params': {
+            'skip_download': True,
+        },
     }]
     _conn_id = None
 
@@ -116,8 +122,20 @@ class VVVVIDIE(InfoExtractor):
             embed_code = ds(embed_code)
             video_type = video_data.get('video_type')
             if video_type in ('video/rcs', 'video/kenc'):
-                formats.extend(self._extract_akamai_formats(
-                    embed_code, video_id))
+                embed_code = re.sub(r'https?://([^/]+)/z/', r'https://\1/i/', embed_code).replace('/manifest.f4m', '/master.m3u8')
+                if video_type == 'video/kenc':
+                    kenc = self._download_json(
+                        'https://www.vvvvid.it/kenc', video_id, query={
+                            'action': 'kt',
+                            'conn_id': self._conn_id,
+                            'url': embed_code,
+                        }, fatal=False) or {}
+                    kenc_message = kenc.get('message')
+                    if kenc_message:
+                        embed_code += '?' + ds(kenc_message)
+                formats.extend(self._extract_m3u8_formats(
+                    embed_code, video_id, 'mp4',
+                    m3u8_id='hls', fatal=False))
             else:
                 formats.extend(self._extract_wowza_formats(
                     'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id))
index 9b5487710e9d1b73a2e26983bacde9e2e8891761..67236f377d2714b517b1f27e687258547c75ac62 100644 (file)
@@ -13,7 +13,7 @@ class WSJIE(InfoExtractor):
     _VALID_URL = r'''(?x)
                         (?:
                             https?://video-api\.wsj\.com/api-video/player/iframe\.html\?.*?\bguid=|
-                            https?://(?:www\.)?(?:wsj|barrons)\.com/video/[^/]+/|
+                            https?://(?:www\.)?(?:wsj|barrons)\.com/video/(?:[^/]+/)+|
                             wsj:
                         )
                         (?P<id>[a-fA-F0-9-]{36})
@@ -38,6 +38,9 @@ class WSJIE(InfoExtractor):
     }, {
         'url': 'http://www.barrons.com/video/capitalism-deserves-more-respect-from-millennials/F301217E-6F46-43AE-B8D2-B7180D642EE9.html',
         'only_matching': True,
+    }, {
+        'url': 'https://www.wsj.com/video/series/a-brief-history-of/the-modern-cell-carrier-how-we-got-here/980E2187-401D-48A1-B82B-1486CEE06CB9',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
index be3624ef2ea889835992e55949271c0028e9783b..68652a22fc7453d01802334b3af021557cc1cedd 100644 (file)
@@ -6,10 +6,12 @@ from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
     clean_html,
+    determine_ext,
     dict_get,
     ExtractorError,
     int_or_none,
     parse_duration,
+    try_get,
     unified_strdate,
 )
 
@@ -32,6 +34,7 @@ class XHamsterIE(InfoExtractor):
             'display_id': 'femaleagent_shy_beauty_takes_the_bait',
             'ext': 'mp4',
             'title': 'FemaleAgent Shy beauty takes the bait',
+            'timestamp': 1350194821,
             'upload_date': '20121014',
             'uploader': 'Ruseful2011',
             'duration': 893,
@@ -45,6 +48,7 @@ class XHamsterIE(InfoExtractor):
             'display_id': 'britney_spears_sexy_booty',
             'ext': 'mp4',
             'title': 'Britney Spears  Sexy Booty',
+            'timestamp': 1379123460,
             'upload_date': '20130914',
             'uploader': 'jojo747400',
             'duration': 200,
@@ -61,6 +65,7 @@ class XHamsterIE(InfoExtractor):
             'id': '5667973',
             'ext': 'mp4',
             'title': '....',
+            'timestamp': 1454948101,
             'upload_date': '20160208',
             'uploader': 'parejafree',
             'duration': 72,
@@ -70,6 +75,10 @@ class XHamsterIE(InfoExtractor):
         'params': {
             'skip_download': True,
         },
+    }, {
+        # mobile site
+        'url': 'https://m.xhamster.com/videos/cute-teen-jacqueline-solo-masturbation-8559111',
+        'only_matching': True,
     }, {
         'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html',
         'only_matching': True,
@@ -88,7 +97,8 @@ class XHamsterIE(InfoExtractor):
         video_id = mobj.group('id') or mobj.group('id_2')
         display_id = mobj.group('display_id') or mobj.group('display_id_2')
 
-        webpage = self._download_webpage(url, video_id)
+        desktop_url = re.sub(r'^(https?://(?:.+?\.)?)m\.', r'\1', url)
+        webpage = self._download_webpage(desktop_url, video_id)
 
         error = self._html_search_regex(
             r'<div[^>]+id=["\']videoClosed["\'][^>]*>(.+?)</div>',
@@ -96,6 +106,83 @@ class XHamsterIE(InfoExtractor):
         if error:
             raise ExtractorError(error, expected=True)
 
+        age_limit = self._rta_search(webpage)
+
+        def get_height(s):
+            return int_or_none(self._search_regex(
+                r'^(\d+)[pP]', s, 'height', default=None))
+
+        initials = self._parse_json(
+            self._search_regex(
+                r'window\.initials\s*=\s*({.+?})\s*;\s*\n', webpage, 'initials',
+                default='{}'),
+            video_id, fatal=False)
+        if initials:
+            video = initials['videoModel']
+            title = video['title']
+            formats = []
+            for format_id, formats_dict in video['sources'].items():
+                if not isinstance(formats_dict, dict):
+                    continue
+                for quality, format_item in formats_dict.items():
+                    if format_id == 'download':
+                        # Download link takes some time to be generated,
+                        # skipping for now
+                        continue
+                        if not isinstance(format_item, dict):
+                            continue
+                        format_url = format_item.get('link')
+                        filesize = int_or_none(
+                            format_item.get('size'), invscale=1000000)
+                    else:
+                        format_url = format_item
+                        filesize = None
+                    if not isinstance(format_url, compat_str):
+                        continue
+                    formats.append({
+                        'format_id': '%s-%s' % (format_id, quality),
+                        'url': format_url,
+                        'ext': determine_ext(format_url, 'mp4'),
+                        'height': get_height(quality),
+                        'filesize': filesize,
+                    })
+            self._sort_formats(formats)
+
+            categories_list = video.get('categories')
+            if isinstance(categories_list, list):
+                categories = []
+                for c in categories_list:
+                    if not isinstance(c, dict):
+                        continue
+                    c_name = c.get('name')
+                    if isinstance(c_name, compat_str):
+                        categories.append(c_name)
+            else:
+                categories = None
+
+            return {
+                'id': video_id,
+                'display_id': display_id,
+                'title': title,
+                'description': video.get('description'),
+                'timestamp': int_or_none(video.get('created')),
+                'uploader': try_get(
+                    video, lambda x: x['author']['name'], compat_str),
+                'thumbnail': video.get('thumbURL'),
+                'duration': int_or_none(video.get('duration')),
+                'view_count': int_or_none(video.get('views')),
+                'like_count': int_or_none(try_get(
+                    video, lambda x: x['rating']['likes'], int)),
+                'dislike_count': int_or_none(try_get(
+                    video, lambda x: x['rating']['dislikes'], int)),
+                'comment_count': int_or_none(video.get('views')),
+                'age_limit': age_limit,
+                'categories': categories,
+                'formats': formats,
+            }
+
+        # Old layout fallback
+
         title = self._html_search_regex(
             [r'<h1[^>]*>([^<]+)</h1>',
              r'<meta[^>]+itemprop=".*?caption.*?"[^>]+content="(.+?)"',
@@ -119,8 +206,7 @@ class XHamsterIE(InfoExtractor):
             formats.append({
                 'format_id': format_id,
                 'url': format_url,
-                'height': int_or_none(self._search_regex(
-                    r'^(\d+)[pP]', format_id, 'height', default=None))
+                'height': get_height(format_id),
             })
 
         video_url = self._search_regex(
@@ -148,8 +234,8 @@ class XHamsterIE(InfoExtractor):
             webpage, 'uploader', default='anonymous')
 
         thumbnail = self._search_regex(
-            [r'''thumb\s*:\s*(?P<q>["'])(?P<thumbnail>.+?)(?P=q)''',
-             r'''<video[^>]+poster=(?P<q>["'])(?P<thumbnail>.+?)(?P=q)[^>]*>'''],
+            [r'''["']thumbUrl["']\s*:\s*(?P<q>["'])(?P<thumbnail>.+?)(?P=q)''',
+             r'''<video[^>]+"poster"=(?P<q>["'])(?P<thumbnail>.+?)(?P=q)[^>]*>'''],
             webpage, 'thumbnail', fatal=False, group='thumbnail')
 
         duration = parse_duration(self._search_regex(
@@ -167,8 +253,6 @@ class XHamsterIE(InfoExtractor):
         mobj = re.search(r'</label>Comments \((?P<commentcount>\d+)\)</div>', webpage)
         comment_count = mobj.group('commentcount') if mobj else 0
 
-        age_limit = self._rta_search(webpage)
-
         categories_html = self._search_regex(
             r'(?s)<table.+?(<span>Categories:.+?)</table>', webpage,
             'categories', default=None)
@@ -195,15 +279,16 @@ class XHamsterIE(InfoExtractor):
 
 
 class XHamsterEmbedIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?xhamster\.com/xembed\.php\?video=(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:.+?\.)?xhamster\.com/xembed\.php\?video=(?P<id>\d+)'
     _TEST = {
         'url': 'http://xhamster.com/xembed.php?video=3328539',
         'info_dict': {
             'id': '3328539',
             'ext': 'mp4',
             'title': 'Pen Masturbation',
+            'timestamp': 1406581861,
             'upload_date': '20140728',
-            'uploader_id': 'anonymous',
+            'uploader': 'ManyakisArt',
             'duration': 5,
             'age_limit': 18,
         }
index d017e03de2092c8726bdad7e86b364b57e44e136..7f871c8ec7c65b8e969517165126824342dbf2e4 100644 (file)
@@ -40,9 +40,12 @@ class XiamiBaseIE(InfoExtractor):
             'subtitles': subtitles,
         }
 
-    def _extract_tracks(self, item_id, typ=None):
+    def _extract_tracks(self, item_id, referer, typ=None):
         playlist = self._download_json(
-            '%s/%s%s' % (self._API_BASE_URL, item_id, '/type/%s' % typ if typ else ''), item_id)
+            '%s/%s%s' % (self._API_BASE_URL, item_id, '/type/%s' % typ if typ else ''),
+            item_id, headers={
+                'Referer': referer,
+            })
         return [
             self._extract_track(track, item_id)
             for track in playlist['data']['trackList']]
@@ -135,13 +138,13 @@ class XiamiSongIE(XiamiBaseIE):
     }]
 
     def _real_extract(self, url):
-        return self._extract_tracks(self._match_id(url))[0]
+        return self._extract_tracks(self._match_id(url), url)[0]
 
 
 class XiamiPlaylistBaseIE(XiamiBaseIE):
     def _real_extract(self, url):
         item_id = self._match_id(url)
-        return self.playlist_result(self._extract_tracks(item_id, self._TYPE), item_id)
+        return self.playlist_result(self._extract_tracks(item_id, url, self._TYPE), item_id)
 
 
 class XiamiAlbumIE(XiamiPlaylistBaseIE):
index 0c4bc2edab616cceff7cf68f7f64d12010c6e16d..c7947d4a1165212d0bbeb1ad571d9b6a1c04590a 100644 (file)
@@ -154,7 +154,7 @@ class YoukuIE(InfoExtractor):
         # request basic data
         basic_data_params = {
             'vid': video_id,
-            'ccode': '0402' if 'tudou.com' in url else '0401',
+            'ccode': '0507',
             'client_ip': '192.168.1.1',
             'utid': cna,
             'client_ts': time.time() / 1000,
@@ -240,7 +240,11 @@ class YoukuShowIE(InfoExtractor):
     }, {
         # Ongoing playlist. The initial page is the last one
         'url': 'http://list.youku.com/show/id_za7c275ecd7b411e1a19e.html',
-        'only_matchine': True,
+        'only_matching': True,
+    }, {
+        #  No data-id value.
+        'url': 'http://list.youku.com/show/id_zefbfbd61237fefbfbdef.html',
+        'only_matching': True,
     }]
 
     def _extract_entries(self, playlist_data_url, show_id, note, query):
@@ -276,9 +280,9 @@ class YoukuShowIE(InfoExtractor):
             r'<div[^>]+id="(reload_\d+)', first_page, 'first page reload id')
         # The first reload_id has the same items as first_page
         reload_ids = re.findall('<li[^>]+data-id="([^"]+)">', first_page)
+        entries.extend(initial_entries)
         for idx, reload_id in enumerate(reload_ids):
             if reload_id == first_page_reload_id:
-                entries.extend(initial_entries)
                 continue
             _, new_entries = self._extract_entries(
                 'http://list.youku.com/show/episode', show_id,
index 9943dddc13b478ce23e71c43d19f5cf18694f90b..0919bef0e06ae1b839a7debc38bdf457dafae192 100644 (file)
@@ -2270,6 +2270,19 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
             r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
             page, 'title', default=None)
 
+        _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
+        uploader = self._search_regex(
+            r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
+            page, 'uploader', default=None)
+        mobj = re.search(
+            r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
+            page)
+        if mobj:
+            uploader_id = mobj.group('uploader_id')
+            uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
+        else:
+            uploader_id = uploader_url = None
+
         has_videos = True
 
         if not playlist_title:
@@ -2280,8 +2293,15 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
             except StopIteration:
                 has_videos = False
 
-        return has_videos, self.playlist_result(
+        playlist = self.playlist_result(
             self._entries(page, playlist_id), playlist_id, playlist_title)
+        playlist.update({
+            'uploader': uploader,
+            'uploader_id': uploader_id,
+            'uploader_url': uploader_url,
+        })
+
+        return has_videos, playlist
 
     def _check_download_just_video(self, url, playlist_id):
         # Check if it's a video-specific URL
index fbdfa02acc88ff8ba82684a2e5545aebe3fce5da..b0aed9ca7b2da21d09223b8822a0c530c2a7b4cc 100644 (file)
@@ -42,6 +42,7 @@ class XAttrMetadataPP(PostProcessor):
                 'user.dublincore.format': 'format',
             }
 
+            num_written = 0
             for xattrname, infoname in xattr_mapping.items():
 
                 value = info.get(infoname)
@@ -52,6 +53,7 @@ class XAttrMetadataPP(PostProcessor):
 
                     byte_value = value.encode('utf-8')
                     write_xattr(filename, xattrname, byte_value)
+                    num_written += 1
 
             return [], info
 
@@ -62,8 +64,8 @@ class XAttrMetadataPP(PostProcessor):
         except XAttrMetadataError as e:
             if e.reason == 'NO_SPACE':
                 self._downloader.report_warning(
-                    'There\'s no disk space left or disk quota exceeded. ' +
-                    'Extended attributes are not written.')
+                    'There\'s no disk space left, disk quota exceeded or filesystem xattr limit exceeded. ' +
+                    (('Some ' if num_written else '') + 'extended attributes are not written.').capitalize())
             elif e.reason == 'VALUE_TOO_LONG':
                 self._downloader.report_warning(
                     'Unable to write extended attributes due to too long values.')
index 34866a54b6efc122f4d0edb22712c503fa448ec0..2843a3dc06be1b7b4d27ecf6678cb0ebdb971070 100644 (file)
@@ -159,6 +159,8 @@ DATE_FORMATS = (
     '%Y-%m-%dT%H:%M',
     '%b %d %Y at %H:%M',
     '%b %d %Y at %H:%M:%S',
+    '%B %d %Y at %H:%M',
+    '%B %d %Y at %H:%M:%S',
 )
 
 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
@@ -2350,6 +2352,7 @@ def mimetype2ext(mt):
         'ttml+xml': 'ttml',
         'x-flv': 'flv',
         'x-mp4-fragmented': 'mp4',
+        'x-ms-sami': 'sami',
         'x-ms-wmv': 'wmv',
         'mpegurl': 'm3u8',
         'x-mpegurl': 'm3u8',
@@ -2372,7 +2375,7 @@ def parse_codecs(codecs_str):
     vcodec, acodec = None, None
     for full_codec in splited_codecs:
         codec = full_codec.split('.')[0]
-        if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v'):
+        if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1'):
             if not vcodec:
                 vcodec = full_codec
         elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
index 8b67d23fee344f6192b34c6ab218577538e5575b..a3f84b9ea46ed2a801bc3edb6c34e3551699f61f 100644 (file)
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '2017.11.06'
+__version__ = '2017.12.31'