From: Rogério Brito Date: Thu, 4 Jan 2018 05:48:53 +0000 (-0200) Subject: Update upstream source from tag 'upstream/2017.12.31' X-Git-Url: https://git.rapsys.eu/youtubedl/commitdiff_plain/9113dfef91df19343cf76c6274dd0a85258c1004?hp=80893415fd8cecb59cb8ffbea17a183d4202f02e Update upstream source from tag 'upstream/2017.12.31' Update to upstream version '2017.12.31' with Debian dir 1882f6f128562a71691f1092c6b611d46798c5c4 --- diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..7e01224 --- /dev/null +++ b/AUTHORS @@ -0,0 +1,233 @@ +Ricardo Garcia Gonzalez +Danny Colligan +Benjamin Johnson +Vasyl' Vavrychuk +Witold Baryluk +Paweł Paprota +Gergely Imreh +Rogério Brito +Philipp Hagemeister +Sören Schulze +Kevin Ngo +Ori Avtalion +shizeeg +Filippo Valsorda +Christian Albrecht +Dave Vasilevsky +Jaime Marquínez Ferrándiz +Jeff Crouse +Osama Khalid +Michael Walter +M. Yasoob Ullah Khalid +Julien Fraichard +Johny Mo Swag +Axel Noack +Albert Kim +Pierre Rudloff +Huarong Huo +Ismael Mejía +Steffan Donal +Andras Elso +Jelle van der Waa +Marcin Cieślak +Anton Larionov +Takuya Tsuchida +Sergey M. +Michael Orlitzky +Chris Gahan +Saimadhav Heblikar +Mike Col +Oleg Prutz +pulpe +Andreas Schmitz +Michael Kaiser +Niklas Laxström +David Triendl +Anthony Weems +David Wagner +Juan C. Olivares +Mattias Harrysson +phaer +Sainyam Kapoor +Nicolas Évrard +Jason Normore +Hoje Lee +Adam Thalhammer +Georg Jähnig +Ralf Haring +Koki Takahashi +Ariset Llerena +Adam Malcontenti-Wilson +Tobias Bell +Naglis Jonaitis +Charles Chen +Hassaan Ali +Dobrosław Å»ybort +David Fabijan +Sebastian Haas +Alexander Kirk +Erik Johnson +Keith Beckman +Ole Ernst +Aaron McDaniel (mcd1992) +Magnus Kolstad +Hari Padmanaban +Carlos Ramos +5moufl +lenaten +Dennis Scheiba +Damon Timm +winwon +Xavier Beynon +Gabriel Schubiner +xantares +Jan Matějka +Mauroy Sébastien +William Sewell +Dao Hoang Son +Oskar Jauch +Matthew Rayfield +t0mm0 +Tithen-Firion +Zack Fernandes +cryptonaut +Adrian Kretz +Mathias Rav +Petr Kutalek +Will Glynn +Max Reimann +Cédric Luthi +Thijs Vermeir +Joel Leclerc +Christopher Krooss +Ondřej Caletka +Dinesh S +Johan K. Jensen +Yen Chi Hsuan +Enam Mijbah Noor +David Luhmer +Shaya Goldberg +Paul Hartmann +Frans de Jonge +Robin de Rooij +Ryan Schmidt +Leslie P. Polzer +Duncan Keall +Alexander Mamay +Devin J. Pohly +Eduardo Ferro Aldama +Jeff Buchbinder +Amish Bhadeshia +Joram Schrijver +Will W. +Mohammad Teimori Pabandi +Roman Le Négrate +Matthias Küch +Julian Richen +Ping O. +Mister Hat +Peter Ding +jackyzy823 +George Brighton +Remita Amine +Aurélio A. Heckert +Bernhard Minks +sceext +Zach Bruggeman +Tjark Saul +slangangular +Behrouz Abbasi +ngld +nyuszika7h +Shaun Walbridge +Lee Jenkins +Anssi Hannula +Lukáš Lalinský +Qijiang Fan +Rémy Léone +Marco Ferragina +reiv +Muratcan Simsek +Evan Lu +flatgreen +Brian Foley +Vignesh Venkat +Tom Gijselinck +Founder Fang +Andrew Alexeyew +Saso Bezlaj +Erwin de Haan +Jens Wille +Robin Houtevelts +Patrick Griffis +Aidan Rowe +mutantmonkey +Ben Congdon +Kacper Michajłow +José Joaquín Atria +ViÅ¥as Strádal +Kagami Hiiragi +Philip Huppert +blahgeek +Kevin Deldycke +inondle +Tomáš Čech +Déstin Reed +Roman Tsiupa +Artur Krysiak +Jakub Adam Wieczorek +Aleksandar Topuzović +Nehal Patel +Rob van Bekkum +Petr Zvoníček +Pratyush Singh +Aleksander Nitecki +Sebastian Blunt +Matěj Cepl +Xie Yanbo +Philip Xu +John Hawkinson +Rich Leeper +Zhong Jianxin +Thor77 +Mattias Wadman +Arjan Verwer +Costy Petrisor +Logan B +Alex Seiler +Vijay Singh +Paul Hartmann +Stephen Chen +Fabian Stahl +Bagira +Odd StrÃ¥bø +Philip Herzog +Thomas Christlieb +Marek Rusinowski +Tobias Gruetzmacher +Olivier Bilodeau +Lars Vierbergen +Juanjo Benages +Xiao Di Guan +Thomas Winant +Daniel Twardowski +Jeremie Jarosh +Gerard Rovira +Marvin Ewald +Frédéric Bournival +Timendum +gritstub +Adam Voss +Mike Fährmann +Jan Kundrát +Giuseppe Fabiano +Örn Guðjónsson +Parmjit Virk +Genki Sky +ĽuboÅ¡ Katrinec +Corey Nicholson +Ashutosh Chaudhary +John Dong +Tatsuyuki Ishi +Daniel Weber +Kay Bouché diff --git a/ChangeLog b/ChangeLog index 8af3682..bfffb1f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,198 @@ +version 2017.12.31 + +Core ++ [extractor/common] Add container meta field for formats extracted + in _parse_mpd_formats (#13616) ++ [downloader/hls] Use HTTP headers for key request +* [common] Use AACL as the default fourcc when AudioTag is 255 +* [extractor/common] Fix extraction of DASH formats with the same + representation id (#15111) + +Extractors ++ [slutload] Add support for mobile URLs (#14806) +* [abc:iview] Bypass geo restriction +* [abc:iview] Fix extraction (#14711, #14782, #14838, #14917, #14963, #14985, + #15035, #15057, #15061, #15071, #15095, #15106) +* [openload] Fix extraction (#15118) +- [sandia] Remove extractor +- [collegerama] Remove extractor ++ [mediasite] Add support for sites based on Mediasite Video Platform (#5428, + #11185, #14343) ++ [ufctv] Add support for ufc.tv (#14520) +* [pluralsight] Fix missing first line of subtitles (#11118) +* [openload] Fallback on f-page extraction (#14665, #14879) +* [vimeo] Improve password protected videos extraction (#15114) +* [aws] Fix canonical/signed headers generation on python 2 (#15102) + + +version 2017.12.28 + +Extractors ++ [internazionale] Add support for internazionale.it (#14973) +* [playtvak] Relax video regular expression and make description optional + (#15037) ++ [filmweb] Add support for filmweb.no (#8773, #10368) ++ [23video] Add support for 23video.com ++ [espn] Add support for fivethirtyeight.com (#6864) ++ [umg:de] Add support for universal-music.de (#11582, #11584) ++ [espn] Add support for espnfc and extract more formats (#8053) +* [youku] Update ccode (#14880) ++ [openload] Add support for oload.stream (#15070) +* [youku] Fix list extraction (#15065) + + +version 2017.12.23 + +Core +* [extractor/common] Move X-Forwarded-For setup code into _request_webpage ++ [YoutubeDL] Add support for playlist_uploader and playlist_uploader_id in + output template (#11427, #15018) ++ [extractor/common] Introduce uploader, uploader_id and uploader_url + meta fields for playlists (#11427, #15018) +* [downloader/fragment] Encode filename of fragment being removed (#15020) ++ [utils] Add another date format pattern (#14999) + +Extractors ++ [kaltura] Add another embed pattern for entry_id ++ [7plus] Add support for 7plus.com.au (#15043) +* [animeondemand] Relax login error regular expression ++ [shahid] Add support for show pages (#7401) ++ [youtube] Extract uploader, uploader_id and uploader_url for playlists + (#11427, #15018) +* [afreecatv] Improve format extraction (#15019) ++ [cspan] Add support for audio only pages and catch page errors (#14995) ++ [mailru] Add support for embed URLs (#14904) +* [crunchyroll] Future-proof XML element checks (#15013) +* [cbslocal] Fix timestamp extraction (#14999, #15000) +* [discoverygo] Correct TTML subtitle extension +* [vk] Make view count optional (#14979) +* [disney] Skip Apple FairPlay formats (#14982) +* [voot] Fix format extraction (#14758) + + +version 2017.12.14 + +Core +* [postprocessor/xattr] Clarify NO_SPACE message (#14970) +* [downloader/http] Return actual download result from real_download (#14971) + +Extractors ++ [itv] Extract more subtitles and duration +* [itv] Improve extraction (#14944) ++ [byutv] Add support for geo restricted videos +* [byutv] Fix extraction (#14966, #14967) ++ [bbccouk] Fix extraction for 320k HLS streams ++ [toutv] Add support for special video URLs (#14179) +* [discovery] Fix free videos extraction (#14157, #14954) +* [tvnow] Fix extraction (#7831) ++ [nickelodeon:br] Add support for nickelodeon brazil websites (#14893) +* [nick] Improve extraction (#14876) +* [tbs] Fix extraction (#13658) + + +version 2017.12.10 + +Core ++ [utils] Add sami mimetype to mimetype2ext + +Extractors +* [culturebox] Improve video id extraction (#14947) +* [twitter] Improve extraction (#14197) ++ [udemy] Extract more HLS formats +* [udemy] Improve course id extraction (#14938) ++ [stretchinternet] Add support for portal.stretchinternet.com (#14576) +* [ellentube] Fix extraction (#14407, #14570) ++ [raiplay:playlist] Add support for playlists (#14563) +* [sonyliv] Bypass geo restriction +* [sonyliv] Extract higher quality formats (#14922) +* [fox] Extract subtitles ++ [fox] Add support for Adobe Pass authentication (#14205, #14489) +- [dailymotion:cloud] Remove extractor (#6794) +* [xhamster] Fix thumbnail extraction (#14780) ++ [xhamster] Add support for mobile URLs (#14780) +* [generic] Don't pass video id as mpd id while extracting DASH (#14902) +* [ard] Skip invalid stream URLs (#14906) +* [porncom] Fix metadata extraction (#14911) +* [pluralsight] Detect agreement request (#14913) +* [toutv] Fix login (#14614) + + +version 2017.12.02 + +Core ++ [downloader/fragment] Commit part file after each fragment ++ [extractor/common] Add durations for DASH fragments with bare SegmentURLs ++ [extractor/common] Add support for DASH manifests with SegmentLists with + bare SegmentURLs (#14844) ++ [utils] Add hvc1 codec code to parse_codecs + +Extractors +* [xhamster] Fix extraction (#14884) +* [youku] Update ccode (#14872) +* [mnet] Fix format extraction (#14883) ++ [xiami] Add Referer header to API request +* [mtv] Correct scc extention in extracted subtitles (#13730) +* [vvvvid] Fix extraction for kenc videos (#13406) ++ [br] Add support for BR Mediathek videos (#14560, #14788) ++ [daisuki] Add support for motto.daisuki.com (#14681) +* [odnoklassniki] Fix API metadata request (#14862) +* [itv] Fix HLS formats extraction ++ [pbs] Add another media id regular expression + + +version 2017.11.26 + +Core +* [extractor/common] Use final URL when dumping request (#14769) + +Extractors +* [fczenit] Fix extraction +- [firstpost] Remove extractor +* [freespeech] Fix extraction +* [nexx] Extract more formats ++ [openload] Add support for openload.link (#14763) +* [empflix] Relax URL regular expression +* [empflix] Fix extractrion +* [tnaflix] Don't modify download URLs (#14811) +- [gamersyde] Remove extractor +* [francetv:generationwhat] Fix extraction ++ [massengeschmacktv] Add support for Massengeschmack TV +* [fox9] Fix extraction +* [faz] Fix extraction and add support for Perform Group embeds (#14714) ++ [performgroup] Add support for performgroup.com ++ [jwplatform] Add support for iframes (#14828) +* [culturebox] Fix extraction (#14827) +* [youku] Fix extraction; update ccode (#14815) +* [livestream] Make SMIL extraction non fatal (#14792) ++ [drtuber] Add support for mobile URLs (#14772) ++ [spankbang] Add support for mobile URLs (#14771) +* [instagram] Fix description, timestamp and counters extraction (#14755) + + +version 2017.11.15 + +Core +* [common] Skip Apple FairPlay m3u8 manifests (#14741) +* [YoutubeDL] Fix playlist range optimization for --playlist-items (#14740) + +Extractors +* [vshare] Capture and output error message +* [vshare] Fix extraction (#14473) +* [crunchyroll] Extract old RTMP formats +* [tva] Fix extraction (#14736) +* [gamespot] Lower preference of HTTP formats (#14652) +* [instagram:user] Fix extraction (#14699) +* [ccma] Fix typo (#14730) +- Remove sensitive data from logging in messages +* [instagram:user] Fix extraction (#14699) ++ [gamespot] Add support for article URLs (#14652) +* [gamespot] Skip Brightcove Once HTTP formats (#14652) +* [cartoonnetwork] Update tokenizer_src (#14666) ++ [wsj] Recognize another URL pattern (#14704) +* [pandatv] Update API URL and sign format URLs (#14693) +* [crunchyroll] Use old login method (#11572) + + version 2017.11.06 Core @@ -25,8 +220,8 @@ Extractors + [fxnetworks] Extract series metadata (#14603) + [younow] Add support for younow.com (#9255, #9432, #12436) * [dctptv] Fix extraction (#14599) -* [youtube] Restrict embed regex (#14600) -* [vimeo] Restrict iframe embed regex (#14600) +* [youtube] Restrict embed regular expression (#14600) +* [vimeo] Restrict iframe embed regular expression (#14600) * [soundgasm] Improve extraction (#14588) - [myvideo] Remove extractor (#8557) + [nbc] Add support for classic-tv videos (#14575) diff --git a/MANIFEST.in b/MANIFEST.in index 5743f60..4e43e99 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,7 +1,9 @@ include README.md -include test/*.py -include test/*.json +include LICENSE +include AUTHORS +include ChangeLog include youtube-dl.bash-completion include youtube-dl.fish include youtube-dl.1 recursive-include docs Makefile conf.py *.rst +recursive-include test * diff --git a/Makefile b/Makefile index c74eea7..fe24781 100644 --- a/Makefile +++ b/Makefile @@ -36,8 +36,17 @@ test: ot: offlinetest +# Keep this list in sync with devscripts/run_tests.sh offlinetest: codetest - $(PYTHON) -m nose --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py --exclude test_socks.py + $(PYTHON) -m nose --verbose test \ + --exclude test_age_restriction.py \ + --exclude test_download.py \ + --exclude test_iqiyi_sdk_interpreter.py \ + --exclude test_socks.py \ + --exclude test_subtitles.py \ + --exclude test_write_annotations.py \ + --exclude test_youtube_lists.py \ + --exclude test_youtube_signature.py tar: youtube-dl.tar.gz @@ -101,7 +110,7 @@ _EXTRACTOR_FILES = $(shell find youtube_dl/extractor -iname '*.py' -and -not -in youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES) $(PYTHON) devscripts/make_lazy_extractors.py $@ -youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish ChangeLog +youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish ChangeLog AUTHORS @tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \ --exclude '*.DS_Store' \ --exclude '*.kate-swp' \ @@ -110,11 +119,10 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash- --exclude '*~' \ --exclude '__pycache__' \ --exclude '.git' \ - --exclude 'testdata' \ --exclude 'docs/_build' \ -- \ bin devscripts test youtube_dl docs \ - ChangeLog LICENSE README.md README.txt \ + ChangeLog AUTHORS LICENSE README.md README.txt \ Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion \ - youtube-dl.zsh youtube-dl.fish setup.py \ + youtube-dl.zsh youtube-dl.fish setup.py setup.cfg \ youtube-dl diff --git a/README.md b/README.md index ea321d5..47b0640 100644 --- a/README.md +++ b/README.md @@ -511,6 +511,9 @@ The basic usage is not to set any template arguments when downloading a single f - `average_rating` (numeric): Average rating give by users, the scale used depends on the webpage - `comment_count` (numeric): Number of comments on the video - `age_limit` (numeric): Age restriction for the video (years) + - `is_live` (boolean): Whether this video is a live stream or a fixed-length video + - `start_time` (numeric): Time in seconds where the reproduction should start, as specified in the URL + - `end_time` (numeric): Time in seconds where the reproduction should end, as specified in the URL - `format` (string): A human-readable description of the format - `format_id` (string): Format code specified by `--format` - `format_note` (string): Additional info about the format @@ -536,6 +539,8 @@ The basic usage is not to set any template arguments when downloading a single f - `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according to the total length of the playlist - `playlist_id` (string): Playlist identifier - `playlist_title` (string): Playlist title + - `playlist_uploader` (string): Full name of the playlist uploader + - `playlist_uploader_id` (string): Nickname or id of the playlist uploader Available for the video that belongs to some logical chapter or section: diff --git a/README.txt b/README.txt index 4b7adfd..0a748ea 100644 --- a/README.txt +++ b/README.txt @@ -596,6 +596,12 @@ with sequence type are: used depends on the webpage - comment_count (numeric): Number of comments on the video - age_limit (numeric): Age restriction for the video (years) +- is_live (boolean): Whether this video is a live stream or a + fixed-length video +- start_time (numeric): Time in seconds where the reproduction should + start, as specified in the URL +- end_time (numeric): Time in seconds where the reproduction should + end, as specified in the URL - format (string): A human-readable description of the format - format_id (string): Format code specified by --format - format_note (string): Additional info about the format @@ -625,6 +631,9 @@ with sequence type are: with leading zeros according to the total length of the playlist - playlist_id (string): Playlist identifier - playlist_title (string): Playlist title +- playlist_uploader (string): Full name of the playlist uploader +- playlist_uploader_id (string): Nickname or id of the playlist + uploader Available for the video that belongs to some logical chapter or section: diff --git a/devscripts/run_tests.sh b/devscripts/run_tests.sh index 6ba2672..dd37a80 100755 --- a/devscripts/run_tests.sh +++ b/devscripts/run_tests.sh @@ -1,6 +1,7 @@ #!/bin/bash -DOWNLOAD_TESTS="age_restriction|download|subtitles|write_annotations|iqiyi_sdk_interpreter|youtube_lists" +# Keep this list in sync with the `offlinetest` target in Makefile +DOWNLOAD_TESTS="age_restriction|download|iqiyi_sdk_interpreter|socks|subtitles|write_annotations|youtube_lists|youtube_signature" test_set="" multiprocess_args="" diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 6009df5..75bd5c9 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -3,6 +3,7 @@ - **1up.com** - **20min** - **220.ro** + - **23video** - **24video** - **3qsdn**: 3Q SDN - **3sat** @@ -10,6 +11,7 @@ - **56.com** - **5min** - **6play** + - **7plus** - **8tracks** - **91porn** - **9c9media** @@ -112,16 +114,16 @@ - **BokeCC** - **BostonGlobe** - **Bpb**: Bundeszentrale für politische Bildung - - **BR**: Bayerischer Rundfunk Mediathek + - **BR**: Bayerischer Rundfunk - **BravoTV** - **Break** - **brightcove:legacy** - **brightcove:new** + - **BRMediathek**: Bayerischer Rundfunk Mediathek - **bt:article**: Bergens Tidende Articles - **bt:vestlendingen**: Bergens Tidende - Vestlendingen - **BuzzFeed** - **BYUtv** - - **BYUtvEvent** - **Camdemy** - **CamdemyFolder** - **CamWithHer** @@ -169,7 +171,6 @@ - **CNN** - **CNNArticle** - **CNNBlogs** - - **CollegeRama** - **ComCarCoff** - **ComedyCentral** - **ComedyCentralFullEpisodes** @@ -197,9 +198,8 @@ - **dailymotion** - **dailymotion:playlist** - **dailymotion:user** - - **DailymotionCloud** - - **Daisuki** - - **DaisukiPlaylist** + - **DaisukiMotto** + - **DaisukiMottoPlaylist** - **daum.net** - **daum.net:clip** - **daum.net:playlist** @@ -242,8 +242,9 @@ - **eHow** - **Einthusan** - **eitb.tv** - - **EllenTV** - - **EllenTV:clips** + - **EllenTube** + - **EllenTubePlaylist** + - **EllenTubeVideo** - **ElPais**: El País - **Embedly** - **EMPFlix** @@ -266,10 +267,10 @@ - **fc2** - **fc2:embed** - **Fczenit** - - **fernsehkritik.tv** - **filmon** - **filmon:channel** - - **Firstpost** + - **Filmweb** + - **FiveThirtyEight** - **FiveTV** - **Flickr** - **Flipagram** @@ -283,7 +284,7 @@ - **foxnews:article** - **foxnews:insider** - **FoxSports** - - **france2.fr:generation-quoi** + - **france2.fr:generation-what** - **FranceCulture** - **FranceInter** - **FranceTV** @@ -301,7 +302,6 @@ - **GameInformer** - **GameOne** - **gameone:playlist** - - **Gamersyde** - **GameSpot** - **GameStar** - **Gaskrank** @@ -361,6 +361,7 @@ - **InfoQ** - **Instagram** - **instagram:user**: Instagram user profile + - **Internazionale** - **InternetVideoArchive** - **IPrima** - **iqiyi**: 爱奇艺 @@ -441,11 +442,13 @@ - **mangomolo:live** - **mangomolo:video** - **ManyVids** + - **massengeschmack.tv** - **MatchTV** - **MDR**: MDR.DE and KiKA - **media.ccc.de** - **Medialaan** - **Mediaset** + - **Mediasite** - **Medici** - **megaphone.fm**: megaphone.fm embedded players - **Meipai**: 美拍 @@ -539,6 +542,7 @@ - **nhl.com:videocenter:category**: NHL videocenter category - **nick.com** - **nick.de** + - **nickelodeon:br** - **nickelodeonru** - **nicknight** - **niconico**: ニコニコ動画 @@ -557,8 +561,6 @@ - **nowness** - **nowness:playlist** - **nowness:series** - - **NowTV** (Currently broken) - - **NowTVList** - **nowvideo**: NowVideo - **Noz** - **npo**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl @@ -608,6 +610,7 @@ - **pcmag** - **PearVideo** - **People** + - **PerformGroup** - **periscope**: Periscope - **periscope:user**: Periscope user videos - **PhilharmonieDeParis**: Philharmonie de Paris @@ -662,6 +665,7 @@ - **Rai** - **RaiPlay** - **RaiPlayLive** + - **RaiPlayPlaylist** - **RBMARadio** - **RDS**: RDS.ca - **RedBullTV** @@ -713,7 +717,6 @@ - **safari**: safaribooksonline.com online video - **safari:api** - **safari:course**: safaribooksonline.com online courses - - **Sandia**: Sandia National Laboratories - **Sapo**: SAPO Vídeos - **savefrom.net** - **SBS**: sbs.com.au @@ -729,6 +732,7 @@ - **Servus** - **Sexu** - **Shahid** + - **ShahidShow** - **Shared**: shared.sx - **ShowRoomLive** - **Sina** @@ -781,6 +785,7 @@ - **streamcloud.eu** - **StreamCZ** - **StreetVoice** + - **StretchInternet** - **SunPorno** - **SVT** - **SVTPlay**: SVT Play and Öppet arkiv @@ -792,7 +797,7 @@ - **tagesschau:player** - **Tass** - **TastyTrade** - - **TBS** (Currently broken) + - **TBS** - **TDSLifeway** - **teachertube**: teachertube.com videos - **teachertube:user:collection**: teachertube.com user and collection videos @@ -863,6 +868,8 @@ - **tvland.com** - **TVN24** - **TVNoe** + - **TVNow** + - **TVNowList** - **tvp**: Telewizja Polska - **tvp:embed**: Telewizja Polska - **tvp:series** @@ -884,7 +891,9 @@ - **udemy** - **udemy:course** - **UDNEmbed**: 聯合影音 + - **UFCTV** - **UKTVPlay** + - **umg:de**: Universal Music Deutschland - **Unistra** - **Unity** - **uol.com.br** diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..2dc06ff --- /dev/null +++ b/setup.cfg @@ -0,0 +1,6 @@ +[wheel] +universal = True + +[flake8] +exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git +ignore = E402,E501,E731 diff --git a/setup.py b/setup.py index 67d6633..7dbb580 100644 --- a/setup.py +++ b/setup.py @@ -109,6 +109,7 @@ setup( author_email='ytdl@yt-dl.org', maintainer='Sergey M.', maintainer_email='dstftw@gmail.com', + license='Unlicense', packages=[ 'youtube_dl', 'youtube_dl.extractor', 'youtube_dl.downloader', diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 686c63e..7b31d51 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -493,9 +493,20 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ _TEST_CASES = [ ( # https://github.com/rg3/youtube-dl/issues/13919 + # Also tests duplicate representation ids, see + # https://github.com/rg3/youtube-dl/issues/15111 'float_duration', 'http://unknown/manifest.mpd', [{ + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'm4a', + 'format_id': '318597', + 'format_note': 'DASH audio', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'none', + 'tbr': 61.587, + }, { 'manifest_url': 'http://unknown/manifest.mpd', 'ext': 'mp4', 'format_id': '318597', @@ -562,7 +573,89 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'width': 1920, 'height': 1080, }] - ), + ), ( + # https://github.com/rg3/youtube-dl/pull/14844 + 'urls_only', + 'http://unknown/manifest.mpd', + [{ + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': 'h264_aac_144p_m4s', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc3.42c01e', + 'tbr': 200, + 'width': 256, + 'height': 144, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': 'h264_aac_240p_m4s', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc3.42c01e', + 'tbr': 400, + 'width': 424, + 'height': 240, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': 'h264_aac_360p_m4s', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc3.42c01e', + 'tbr': 800, + 'width': 640, + 'height': 360, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': 'h264_aac_480p_m4s', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc3.42c01e', + 'tbr': 1200, + 'width': 856, + 'height': 480, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': 'h264_aac_576p_m4s', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc3.42c01e', + 'tbr': 1600, + 'width': 1024, + 'height': 576, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': 'h264_aac_720p_m4s', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc3.42c01e', + 'tbr': 2400, + 'width': 1280, + 'height': 720, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': 'h264_aac_1080p_m4s', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc3.42c01e', + 'tbr': 4400, + 'width': 1920, + 'height': 1080, + }] + ) ] for mpd_file, mpd_url, expected_formats in _TEST_CASES: @@ -601,5 +694,6 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ self.ie._sort_formats(formats) expect_value(self, formats, expected_formats, None) + if __name__ == '__main__': unittest.main() diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 4af92fb..f0f5a84 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -466,11 +466,11 @@ class TestFormatSelection(unittest.TestCase): ydl = YDL({'simulate': True}) self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best') - ydl = YDL({'is_live': True}) - self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio') + ydl = YDL({}) + self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio') - ydl = YDL({'simulate': True, 'is_live': True}) - self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best') + ydl = YDL({'simulate': True}) + self.assertEqual(ydl._default_format_spec({'is_live': True}), 'bestvideo+bestaudio/best') ydl = YDL({'outtmpl': '-'}) self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio') diff --git a/test/test_utils.py b/test/test_utils.py index cc13f79..0857c0f 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -343,6 +343,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100) self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361) self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540) + self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140) def test_determine_ext(self): self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4') diff --git a/test/testdata/f4m/custom_base_url.f4m b/test/testdata/f4m/custom_base_url.f4m new file mode 100644 index 0000000..74e1539 --- /dev/null +++ b/test/testdata/f4m/custom_base_url.f4m @@ -0,0 +1,10 @@ + + + recorded + http://vod.livestream.com/events/0000000000673980/ + 269.293 + AAAAm2Fic3QAAAAAAAAAAQAAAAPoAAAAAAAEG+0AAAAAAAAAAAAAAAAAAQAAABlhc3J0AAAAAAAAAAABAAAAAQAAAC4BAAAAVmFmcnQAAAAAAAAD6AAAAAAEAAAAAQAAAAAAAAAAAAAXcAAAAC0AAAAAAAQHQAAAE5UAAAAuAAAAAAAEGtUAAAEYAAAAAAAAAAAAAAAAAAAAAAA= + + AgAKb25NZXRhRGF0YQgAAAAIAAhkdXJhdGlvbgBAcNSwIMSbpgAFd2lkdGgAQJQAAAAAAAAABmhlaWdodABAhoAAAAAAAAAJZnJhbWVyYXRlAEA4/7DoLwW3AA12aWRlb2RhdGFyYXRlAECe1DLgjcobAAx2aWRlb2NvZGVjaWQAQBwAAAAAAAAADWF1ZGlvZGF0YXJhdGUAQGSimlvaPKQADGF1ZGlvY29kZWNpZABAJAAAAAAAAAAACQ== + + diff --git a/test/testdata/m3u8/pluzz_francetv_11507.m3u8 b/test/testdata/m3u8/pluzz_francetv_11507.m3u8 new file mode 100644 index 0000000..0809f5a --- /dev/null +++ b/test/testdata/m3u8/pluzz_francetv_11507.m3u8 @@ -0,0 +1,14 @@ +#EXTM3U + #EXT-X-VERSION:5 + #EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Francais",DEFAULT=NO,FORCED=NO,URI="http://replayftv-pmd.francetv.fr/subtitles/2017/16/156589847-1492488987.m3u8",LANGUAGE="fra" + #EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="aac",LANGUAGE="fra",NAME="Francais",DEFAULT=YES, AUTOSELECT=YES +#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=180000,RESOLUTION=256x144,CODECS="avc1.66.30, mp4a.40.2" +http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_0_av.m3u8?null=0 +#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=303000,RESOLUTION=320x180,CODECS="avc1.66.30, mp4a.40.2" +http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_1_av.m3u8?null=0 +#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=575000,RESOLUTION=512x288,CODECS="avc1.66.30, mp4a.40.2" +http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_2_av.m3u8?null=0 +#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=831000,RESOLUTION=704x396,CODECS="avc1.77.30, mp4a.40.2" +http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_3_av.m3u8?null=0 +#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=1467000,RESOLUTION=1024x576,CODECS="avc1.77.30, mp4a.40.2" +http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_4_av.m3u8?null=0 diff --git a/test/testdata/m3u8/teamcoco_11995.m3u8 b/test/testdata/m3u8/teamcoco_11995.m3u8 new file mode 100644 index 0000000..a6e4216 --- /dev/null +++ b/test/testdata/m3u8/teamcoco_11995.m3u8 @@ -0,0 +1,16 @@ +#EXTM3U +#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio-0",NAME="Default",AUTOSELECT=YES,DEFAULT=YES,URI="hls/CONAN_020217_Highlight_show-audio-160k_v4.m3u8" +#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio-1",NAME="Default",AUTOSELECT=YES,DEFAULT=YES,URI="hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8" +#EXT-X-I-FRAME-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=37862000,CODECS="avc1.4d001f",URI="hls/CONAN_020217_Highlight_show-2m_iframe.m3u8" +#EXT-X-I-FRAME-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=18750000,CODECS="avc1.4d001e",URI="hls/CONAN_020217_Highlight_show-1m_iframe.m3u8" +#EXT-X-I-FRAME-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=6535000,CODECS="avc1.42001e",URI="hls/CONAN_020217_Highlight_show-400k_iframe.m3u8" +#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=2374000,RESOLUTION=1024x576,CODECS="avc1.4d001f,mp4a.40.2",AUDIO="audio-0" +hls/CONAN_020217_Highlight_show-2m_v4.m3u8 +#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1205000,RESOLUTION=640x360,CODECS="avc1.4d001e,mp4a.40.2",AUDIO="audio-0" +hls/CONAN_020217_Highlight_show-1m_v4.m3u8 +#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=522000,RESOLUTION=400x224,CODECS="avc1.42001e,mp4a.40.2",AUDIO="audio-0" +hls/CONAN_020217_Highlight_show-400k_v4.m3u8 +#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=413000,RESOLUTION=400x224,CODECS="avc1.42001e,mp4a.40.5",AUDIO="audio-1" +hls/CONAN_020217_Highlight_show-400k_v4.m3u8 +#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=71000,CODECS="mp4a.40.5",AUDIO="audio-1" +hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8 diff --git a/test/testdata/m3u8/toggle_mobile_12211.m3u8 b/test/testdata/m3u8/toggle_mobile_12211.m3u8 new file mode 100644 index 0000000..69604e6 --- /dev/null +++ b/test/testdata/m3u8/toggle_mobile_12211.m3u8 @@ -0,0 +1,13 @@ +#EXTM3U +#EXT-X-VERSION:4 +#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio",LANGUAGE="eng",NAME="English",URI="http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_sa2ntrdg/name/a.mp4/index.m3u8" +#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio",LANGUAGE="und",NAME="Undefined",URI="http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_r7y0nitg/name/a.mp4/index.m3u8" + +#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=155648,RESOLUTION=320x180,AUDIO="audio" +http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_qlk9hlzr/name/a.mp4/index.m3u8 +#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=502784,RESOLUTION=480x270,AUDIO="audio" +http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_oefackmi/name/a.mp4/index.m3u8 +#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=827392,RESOLUTION=640x360,AUDIO="audio" +http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_vyg9pj7k/name/a.mp4/index.m3u8 +#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1396736,RESOLUTION=854x480,AUDIO="audio" +http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_50n4psvx/name/a.mp4/index.m3u8 diff --git a/test/testdata/m3u8/twitch_vod.m3u8 b/test/testdata/m3u8/twitch_vod.m3u8 new file mode 100644 index 0000000..7617277 --- /dev/null +++ b/test/testdata/m3u8/twitch_vod.m3u8 @@ -0,0 +1,20 @@ +#EXTM3U +#EXT-X-TWITCH-INFO:ORIGIN="s3",CLUSTER="edgecast_vod",REGION="EU",MANIFEST-CLUSTER="edgecast_vod",USER-IP="109.171.17.81" +#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="chunked",NAME="Source",AUTOSELECT=YES,DEFAULT=YES +#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=3214134,CODECS="avc1.100.31,mp4a.40.2",RESOLUTION="1280x720",VIDEO="chunked" +https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/chunked/index-muted-HM49I092CC.m3u8 +#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="high",NAME="High",AUTOSELECT=YES,DEFAULT=YES +#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1603789,CODECS="avc1.42C01F,mp4a.40.2",RESOLUTION="1280x720",VIDEO="high" +https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/high/index-muted-HM49I092CC.m3u8 +#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="medium",NAME="Medium",AUTOSELECT=YES,DEFAULT=YES +#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=893387,CODECS="avc1.42C01E,mp4a.40.2",RESOLUTION="852x480",VIDEO="medium" +https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/medium/index-muted-HM49I092CC.m3u8 +#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="low",NAME="Low",AUTOSELECT=YES,DEFAULT=YES +#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=628347,CODECS="avc1.42C01E,mp4a.40.2",RESOLUTION="640x360",VIDEO="low" +https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/low/index-muted-HM49I092CC.m3u8 +#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="mobile",NAME="Mobile",AUTOSELECT=YES,DEFAULT=YES +#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=280474,CODECS="avc1.42C00D,mp4a.40.2",RESOLUTION="400x226",VIDEO="mobile" +https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/mobile/index-muted-HM49I092CC.m3u8 +#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="audio_only",NAME="Audio Only",AUTOSELECT=NO,DEFAULT=NO +#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=182725,CODECS="mp4a.40.2",VIDEO="audio_only" +https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/audio_only/index-muted-HM49I092CC.m3u8 diff --git a/test/testdata/m3u8/vidio.m3u8 b/test/testdata/m3u8/vidio.m3u8 new file mode 100644 index 0000000..89c2444 --- /dev/null +++ b/test/testdata/m3u8/vidio.m3u8 @@ -0,0 +1,10 @@ +#EXTM3U + +#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=300000,RESOLUTION=480x270,NAME="270p 3G" +https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b300.mp4.m3u8 + +#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=600000,RESOLUTION=640x360,NAME="360p SD" +https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b600.mp4.m3u8 + +#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1200000,RESOLUTION=1280x720,NAME="720p HD" +https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b1200.mp4.m3u8 diff --git a/test/testdata/mpd/float_duration.mpd b/test/testdata/mpd/float_duration.mpd new file mode 100644 index 0000000..8dc1d2d --- /dev/null +++ b/test/testdata/mpd/float_duration.mpd @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/test/testdata/mpd/urls_only.mpd b/test/testdata/mpd/urls_only.mpd new file mode 100644 index 0000000..2b9d595 --- /dev/null +++ b/test/testdata/mpd/urls_only.mpd @@ -0,0 +1,218 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/youtube-dl b/youtube-dl index 3b69288..d00c30e 100755 Binary files a/youtube-dl and b/youtube-dl differ diff --git a/youtube-dl.1 b/youtube-dl.1 index 9ab22b0..410fce1 100644 --- a/youtube-dl.1 +++ b/youtube-dl.1 @@ -1066,6 +1066,15 @@ scale used depends on the webpage .IP \[bu] 2 \f[C]age_limit\f[] (numeric): Age restriction for the video (years) .IP \[bu] 2 +\f[C]is_live\f[] (boolean): Whether this video is a live stream or a +fixed\-length video +.IP \[bu] 2 +\f[C]start_time\f[] (numeric): Time in seconds where the reproduction +should start, as specified in the URL +.IP \[bu] 2 +\f[C]end_time\f[] (numeric): Time in seconds where the reproduction +should end, as specified in the URL +.IP \[bu] 2 \f[C]format\f[] (string): A human\-readable description of the format .IP \[bu] 2 \f[C]format_id\f[] (string): Format code specified by @@ -1120,6 +1129,11 @@ padded with leading zeros according to the total length of the playlist \f[C]playlist_id\f[] (string): Playlist identifier .IP \[bu] 2 \f[C]playlist_title\f[] (string): Playlist title +.IP \[bu] 2 +\f[C]playlist_uploader\f[] (string): Full name of the playlist uploader +.IP \[bu] 2 +\f[C]playlist_uploader_id\f[] (string): Nickname or id of the playlist +uploader .PP Available for the video that belongs to some logical chapter or section: .IP \[bu] 2 diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 342d6b4..ace80f1 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -948,7 +948,8 @@ class YoutubeDL(object): report_download(n_entries) else: # iterable if playlistitems: - entries = make_playlistitems_entries(list(ie_entries)) + entries = make_playlistitems_entries(list(itertools.islice( + ie_entries, 0, max(playlistitems)))) else: entries = list(itertools.islice( ie_entries, playliststart, playlistend)) @@ -974,6 +975,8 @@ class YoutubeDL(object): 'playlist': playlist, 'playlist_id': ie_result.get('id'), 'playlist_title': ie_result.get('title'), + 'playlist_uploader': ie_result.get('uploader'), + 'playlist_uploader_id': ie_result.get('uploader_id'), 'playlist_index': i + playliststart, 'extractor': ie_result['extractor'], 'webpage_url': ie_result['webpage_url'], diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index 93002e4..ea5e3a4 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -107,11 +107,12 @@ class FragmentFD(FileDownloader): def _append_fragment(self, ctx, frag_content): try: ctx['dest_stream'].write(frag_content) + ctx['dest_stream'].flush() finally: if self.__do_ytdl_file(ctx): self._write_ytdl_file(ctx) if not self.params.get('keep_fragments', False): - os.remove(ctx['fragment_filename_sanitized']) + os.remove(encodeFilename(ctx['fragment_filename_sanitized'])) del ctx['fragment_filename_sanitized'] def _prepare_frag_download(self, ctx): diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 1a6e226..4dc3ab4 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -163,7 +163,8 @@ class HlsFD(FragmentFD): return False if decrypt_info['METHOD'] == 'AES-128': iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence) - decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(decrypt_info['URI']).read() + decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen( + self._prepare_url(info_dict, decrypt_info['URI'])).read() frag_content = AES.new( decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content) self._append_fragment(ctx, frag_content) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 8a6638c..3ff26ff 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -284,8 +284,7 @@ class HttpFD(FileDownloader): while count <= retries: try: establish_connection() - download() - return True + return download() except RetryDownload as e: count += 1 if count <= retries: diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 60f753b..87017ed 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -1,6 +1,9 @@ from __future__ import unicode_literals +import hashlib +import hmac import re +import time from .common import InfoExtractor from ..compat import compat_str @@ -10,6 +13,7 @@ from ..utils import ( int_or_none, parse_iso8601, try_get, + update_url_query, ) @@ -101,21 +105,24 @@ class ABCIE(InfoExtractor): class ABCIViewIE(InfoExtractor): IE_NAME = 'abc.net.au:iview' _VALID_URL = r'https?://iview\.abc\.net\.au/programs/[^/]+/(?P[^/?#]+)' + _GEO_COUNTRIES = ['AU'] # ABC iview programs are normally available for 14 days only. _TESTS = [{ - 'url': 'http://iview.abc.net.au/programs/diaries-of-a-broken-mind/ZX9735A001S00', + 'url': 'http://iview.abc.net.au/programs/call-the-midwife/ZW0898A003S00', 'md5': 'cde42d728b3b7c2b32b1b94b4a548afc', 'info_dict': { - 'id': 'ZX9735A001S00', + 'id': 'ZW0898A003S00', 'ext': 'mp4', - 'title': 'Diaries Of A Broken Mind', - 'description': 'md5:7de3903874b7a1be279fe6b68718fc9e', - 'upload_date': '20161010', - 'uploader_id': 'abc2', - 'timestamp': 1476064920, + 'title': 'Series 5 Ep 3', + 'description': 'md5:e0ef7d4f92055b86c4f33611f180ed79', + 'upload_date': '20171228', + 'uploader_id': 'abc1', + 'timestamp': 1514499187, + }, + 'params': { + 'skip_download': True, }, - 'skip': 'Video gone', }] def _real_extract(self, url): @@ -126,20 +133,30 @@ class ABCIViewIE(InfoExtractor): title = video_params.get('title') or video_params['seriesTitle'] stream = next(s for s in video_params['playlist'] if s.get('type') == 'program') - format_urls = [ - try_get(stream, lambda x: x['hds-unmetered'], compat_str)] - - # May have higher quality video - sd_url = try_get( - stream, lambda x: x['streams']['hds']['sd'], compat_str) - if sd_url: - format_urls.append(sd_url.replace('metered', 'um')) - - formats = [] - for format_url in format_urls: - if format_url: - formats.extend( - self._extract_akamai_formats(format_url, video_id)) + house_number = video_params.get('episodeHouseNumber') + path = '/auth/hls/sign?ts={0}&hn={1}&d=android-mobile'.format( + int(time.time()), house_number) + sig = hmac.new( + 'android.content.res.Resources'.encode('utf-8'), + path.encode('utf-8'), hashlib.sha256).hexdigest() + token = self._download_webpage( + 'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id) + + def tokenize_url(url, token): + return update_url_query(url, { + 'hdnea': token, + }) + + for sd in ('sd', 'sd-low'): + sd_url = try_get( + stream, lambda x: x['streams']['hls'][sd], compat_str) + if not sd_url: + continue + formats = self._extract_m3u8_formats( + tokenize_url(sd_url, token), video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) + if formats: + break self._sort_formats(formats) subtitles = {} diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index e6513c7..513dd81 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -228,10 +228,19 @@ class AfreecaTVIE(InfoExtractor): r'^(\d{8})_', key, 'upload date', default=None) file_duration = int_or_none(file_element.get('duration')) format_id = key if key else '%s_%s' % (video_id, file_num) - formats = self._extract_m3u8_formats( - file_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', - note='Downloading part %d m3u8 information' % file_num) + if determine_ext(file_url) == 'm3u8': + formats = self._extract_m3u8_formats( + file_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', + note='Downloading part %d m3u8 information' % file_num) + else: + formats = [{ + 'url': file_url, + 'format_id': 'http', + }] + if not formats: + continue + self._sort_formats(formats) file_info = common_entry.copy() file_info.update({ 'id': format_id, diff --git a/youtube_dl/extractor/animeondemand.py b/youtube_dl/extractor/animeondemand.py index 69d3633..e4fa72f 100644 --- a/youtube_dl/extractor/animeondemand.py +++ b/youtube_dl/extractor/animeondemand.py @@ -78,15 +78,15 @@ class AnimeOnDemandIE(InfoExtractor): post_url = urljoin(self._LOGIN_URL, post_url) response = self._download_webpage( - post_url, None, 'Logging in as %s' % username, + post_url, None, 'Logging in', data=urlencode_postdata(login_form), headers={ 'Referer': self._LOGIN_URL, }) if all(p not in response for p in ('>Logout<', 'href="/users/sign_out"')): error = self._search_regex( - r'

(.+?)

', - response, 'error', default=None) + r']+\bclass=(["\'])(?:(?!\1).)*\balert\b(?:(?!\1).)*\1[^>]*>(?P.+?)

', + response, 'error', default=None, group='error') if error: raise ExtractorError('Unable to login: %s' % error, expected=True) raise ExtractorError('Unable to log in') diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 915f886..ef73d5a 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from .generic import GenericIE +from ..compat import compat_str from ..utils import ( determine_ext, ExtractorError, @@ -126,6 +127,8 @@ class ARDMediathekIE(InfoExtractor): quality = stream.get('_quality') server = stream.get('_server') for stream_url in stream_urls: + if not isinstance(stream_url, compat_str) or '//' not in stream_url: + continue ext = determine_ext(stream_url) if quality != 'auto' and ext in ('f4m', 'm3u8'): continue @@ -146,13 +149,11 @@ class ARDMediathekIE(InfoExtractor): 'play_path': stream_url, 'format_id': 'a%s-rtmp-%s' % (num, quality), } - elif stream_url.startswith('http'): + else: f = { 'url': stream_url, 'format_id': 'a%s-%s-%s' % (num, ext, quality) } - else: - continue m = re.search(r'_(?P\d+)x(?P\d+)\.mp4$', stream_url) if m: f.update({ diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index 01fa308..1a31ebe 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -87,7 +87,7 @@ class AtresPlayerIE(InfoExtractor): self._LOGIN_URL, urlencode_postdata(login_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') response = self._download_webpage( - request, None, 'Logging in as %s' % username) + request, None, 'Logging in') error = self._html_search_regex( r'(?s)]+class="[^"]*\blist_error\b[^"]*">(.+?)', diff --git a/youtube_dl/extractor/aws.py b/youtube_dl/extractor/aws.py new file mode 100644 index 0000000..dccfeaf --- /dev/null +++ b/youtube_dl/extractor/aws.py @@ -0,0 +1,78 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import datetime +import hashlib +import hmac + +from .common import InfoExtractor +from ..compat import compat_urllib_parse_urlencode + + +class AWSIE(InfoExtractor): + _AWS_ALGORITHM = 'AWS4-HMAC-SHA256' + _AWS_REGION = 'us-east-1' + + def _aws_execute_api(self, aws_dict, video_id, query=None): + query = query or {} + amz_date = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ') + date = amz_date[:8] + headers = { + 'Accept': 'application/json', + 'Host': self._AWS_PROXY_HOST, + 'X-Amz-Date': amz_date, + 'X-Api-Key': self._AWS_API_KEY + } + session_token = aws_dict.get('session_token') + if session_token: + headers['X-Amz-Security-Token'] = session_token + + def aws_hash(s): + return hashlib.sha256(s.encode('utf-8')).hexdigest() + + # Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html + canonical_querystring = compat_urllib_parse_urlencode(query) + canonical_headers = '' + for header_name, header_value in sorted(headers.items()): + canonical_headers += '%s:%s\n' % (header_name.lower(), header_value) + signed_headers = ';'.join([header.lower() for header in sorted(headers.keys())]) + canonical_request = '\n'.join([ + 'GET', + aws_dict['uri'], + canonical_querystring, + canonical_headers, + signed_headers, + aws_hash('') + ]) + + # Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html + credential_scope_list = [date, self._AWS_REGION, 'execute-api', 'aws4_request'] + credential_scope = '/'.join(credential_scope_list) + string_to_sign = '\n'.join([self._AWS_ALGORITHM, amz_date, credential_scope, aws_hash(canonical_request)]) + + # Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html + def aws_hmac(key, msg): + return hmac.new(key, msg.encode('utf-8'), hashlib.sha256) + + def aws_hmac_digest(key, msg): + return aws_hmac(key, msg).digest() + + def aws_hmac_hexdigest(key, msg): + return aws_hmac(key, msg).hexdigest() + + k_signing = ('AWS4' + aws_dict['secret_key']).encode('utf-8') + for value in credential_scope_list: + k_signing = aws_hmac_digest(k_signing, value) + + signature = aws_hmac_hexdigest(k_signing, string_to_sign) + + # Task 4: http://docs.aws.amazon.com/general/latest/gr/sigv4-add-signature-to-request.html + headers['Authorization'] = ', '.join([ + '%s Credential=%s/%s' % (self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope), + 'SignedHeaders=%s' % signed_headers, + 'Signature=%s' % signature, + ]) + + return self._download_json( + 'https://%s%s%s' % (self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''), + video_id, headers=headers) diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py index 0eb1930..633c575 100644 --- a/youtube_dl/extractor/bambuser.py +++ b/youtube_dl/extractor/bambuser.py @@ -59,7 +59,7 @@ class BambuserIE(InfoExtractor): self._LOGIN_URL, urlencode_postdata(login_form)) request.add_header('Referer', self._LOGIN_URL) response = self._download_webpage( - request, None, 'Logging in as %s' % username) + request, None, 'Logging in') login_error = self._html_search_regex( r'(?s)
(.+?)
', diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 5525f7c..8b20c03 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -386,7 +386,7 @@ class BBCCoUkIE(InfoExtractor): m3u8_id=format_id, fatal=False)) if re.search(self._USP_RE, href): usp_formats = self._extract_m3u8_formats( - re.sub(self._USP_RE, r'/\1\.ism/\1\.m3u8', href), + re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href), programme_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id=format_id, fatal=False) for f in usp_formats: diff --git a/youtube_dl/extractor/br.py b/youtube_dl/extractor/br.py index 2c32b6a..9bde7f2 100644 --- a/youtube_dl/extractor/br.py +++ b/youtube_dl/extractor/br.py @@ -1,20 +1,23 @@ # coding: utf-8 from __future__ import unicode_literals +import json import re from .common import InfoExtractor from ..utils import ( + determine_ext, ExtractorError, int_or_none, parse_duration, + parse_iso8601, xpath_element, xpath_text, ) class BRIE(InfoExtractor): - IE_DESC = 'Bayerischer Rundfunk Mediathek' + IE_DESC = 'Bayerischer Rundfunk' _VALID_URL = r'(?Phttps?://(?:www\.)?br(?:-klassik)?\.de)/(?:[a-z0-9\-_]+/)+(?P[a-z0-9\-_]+)\.html' _TESTS = [ @@ -123,10 +126,10 @@ class BRIE(InfoExtractor): for asset in assets.findall('asset'): format_url = xpath_text(asset, ['downloadUrl', 'url']) asset_type = asset.get('type') - if asset_type == 'HDS': + if asset_type.startswith('HDS'): formats.extend(self._extract_f4m_formats( format_url + '?hdcore=3.2.0', media_id, f4m_id='hds', fatal=False)) - elif asset_type == 'HLS': + elif asset_type.startswith('HLS'): formats.extend(self._extract_m3u8_formats( format_url, media_id, 'mp4', 'm3u8_native', m3u8_id='hds', fatal=False)) else: @@ -169,3 +172,140 @@ class BRIE(InfoExtractor): } for variant in variants.findall('variant') if xpath_text(variant, 'url')] thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True) return thumbnails + + +class BRMediathekIE(InfoExtractor): + IE_DESC = 'Bayerischer Rundfunk Mediathek' + _VALID_URL = r'https?://(?:www\.)?br\.de/mediathek/video/[^/?&#]*?-(?Pav:[0-9a-f]{24})' + + _TESTS = [{ + 'url': 'https://www.br.de/mediathek/video/gesundheit-die-sendung-vom-28112017-av:5a1e6a6e8fce6d001871cc8e', + 'md5': 'fdc3d485835966d1622587d08ba632ec', + 'info_dict': { + 'id': 'av:5a1e6a6e8fce6d001871cc8e', + 'ext': 'mp4', + 'title': 'Die Sendung vom 28.11.2017', + 'description': 'md5:6000cdca5912ab2277e5b7339f201ccc', + 'timestamp': 1511942766, + 'upload_date': '20171129', + } + }] + + def _real_extract(self, url): + clip_id = self._match_id(url) + + clip = self._download_json( + 'https://proxy-base.master.mango.express/graphql', + clip_id, data=json.dumps({ + "query": """{ + viewer { + clip(id: "%s") { + title + description + duration + createdAt + ageRestriction + videoFiles { + edges { + node { + publicLocation + fileSize + videoProfile { + width + height + bitrate + encoding + } + } + } + } + captionFiles { + edges { + node { + publicLocation + } + } + } + teaserImages { + edges { + node { + imageFiles { + edges { + node { + publicLocation + width + height + } + } + } + } + } + } + } + } +}""" % clip_id}).encode(), headers={ + 'Content-Type': 'application/json', + })['data']['viewer']['clip'] + title = clip['title'] + + formats = [] + for edge in clip.get('videoFiles', {}).get('edges', []): + node = edge.get('node', {}) + n_url = node.get('publicLocation') + if not n_url: + continue + ext = determine_ext(n_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + n_url, clip_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + else: + video_profile = node.get('videoProfile', {}) + tbr = int_or_none(video_profile.get('bitrate')) + format_id = 'http' + if tbr: + format_id += '-%d' % tbr + formats.append({ + 'format_id': format_id, + 'url': n_url, + 'width': int_or_none(video_profile.get('width')), + 'height': int_or_none(video_profile.get('height')), + 'tbr': tbr, + 'filesize': int_or_none(node.get('fileSize')), + }) + self._sort_formats(formats) + + subtitles = {} + for edge in clip.get('captionFiles', {}).get('edges', []): + node = edge.get('node', {}) + n_url = node.get('publicLocation') + if not n_url: + continue + subtitles.setdefault('de', []).append({ + 'url': n_url, + }) + + thumbnails = [] + for edge in clip.get('teaserImages', {}).get('edges', []): + for image_edge in edge.get('node', {}).get('imageFiles', {}).get('edges', []): + node = image_edge.get('node', {}) + n_url = node.get('publicLocation') + if not n_url: + continue + thumbnails.append({ + 'url': n_url, + 'width': int_or_none(node.get('width')), + 'height': int_or_none(node.get('height')), + }) + + return { + 'id': clip_id, + 'title': title, + 'description': clip.get('description'), + 'duration': int_or_none(clip.get('duration')), + 'timestamp': parse_iso8601(clip.get('createdAt')), + 'age_limit': int_or_none(clip.get('ageRestriction')), + 'formats': formats, + 'subtitles': subtitles, + 'thumbnails': thumbnails, + } diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 0ed59bc..f045050 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -464,7 +464,7 @@ class BrightcoveNewIE(AdobePassIE): 'timestamp': 1441391203, 'upload_date': '20150904', 'uploader_id': '929656772001', - 'formats': 'mincount:22', + 'formats': 'mincount:20', }, }, { # with rtmp streams @@ -478,7 +478,7 @@ class BrightcoveNewIE(AdobePassIE): 'timestamp': 1433556729, 'upload_date': '20150606', 'uploader_id': '4036320279001', - 'formats': 'mincount:41', + 'formats': 'mincount:39', }, 'params': { # m3u8 download @@ -564,59 +564,7 @@ class BrightcoveNewIE(AdobePassIE): return entries - def _real_extract(self, url): - url, smuggled_data = unsmuggle_url(url, {}) - self._initialize_geo_bypass(smuggled_data.get('geo_countries')) - - account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups() - - webpage = self._download_webpage( - 'http://players.brightcove.net/%s/%s_%s/index.min.js' - % (account_id, player_id, embed), video_id) - - policy_key = None - - catalog = self._search_regex( - r'catalog\(({.+?})\);', webpage, 'catalog', default=None) - if catalog: - catalog = self._parse_json( - js_to_json(catalog), video_id, fatal=False) - if catalog: - policy_key = catalog.get('policyKey') - - if not policy_key: - policy_key = self._search_regex( - r'policyKey\s*:\s*(["\'])(?P.+?)\1', - webpage, 'policy key', group='pk') - - api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id) - try: - json_data = self._download_json(api_url, video_id, headers={ - 'Accept': 'application/json;pk=%s' % policy_key - }) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: - json_data = self._parse_json(e.cause.read().decode(), video_id)[0] - message = json_data.get('message') or json_data['error_code'] - if json_data.get('error_subcode') == 'CLIENT_GEO': - self.raise_geo_restricted(msg=message) - raise ExtractorError(message, expected=True) - raise - - errors = json_data.get('errors') - if errors and errors[0].get('error_subcode') == 'TVE_AUTH': - custom_fields = json_data['custom_fields'] - tve_token = self._extract_mvpd_auth( - smuggled_data['source_url'], video_id, - custom_fields['bcadobepassrequestorid'], - custom_fields['bcadobepassresourceid']) - json_data = self._download_json( - api_url, video_id, headers={ - 'Accept': 'application/json;pk=%s' % policy_key - }, query={ - 'tveToken': tve_token, - }) - + def _parse_brightcove_metadata(self, json_data, video_id): title = json_data['name'].strip() formats = [] @@ -682,6 +630,7 @@ class BrightcoveNewIE(AdobePassIE): }) formats.append(f) + errors = json_data.get('errors') if not formats and errors: error = errors[0] raise ExtractorError( @@ -708,9 +657,64 @@ class BrightcoveNewIE(AdobePassIE): 'thumbnail': json_data.get('thumbnail') or json_data.get('poster'), 'duration': duration, 'timestamp': parse_iso8601(json_data.get('published_at')), - 'uploader_id': account_id, + 'uploader_id': json_data.get('account_id'), 'formats': formats, 'subtitles': subtitles, 'tags': json_data.get('tags', []), 'is_live': is_live, } + + def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + self._initialize_geo_bypass(smuggled_data.get('geo_countries')) + + account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups() + + webpage = self._download_webpage( + 'http://players.brightcove.net/%s/%s_%s/index.min.js' + % (account_id, player_id, embed), video_id) + + policy_key = None + + catalog = self._search_regex( + r'catalog\(({.+?})\);', webpage, 'catalog', default=None) + if catalog: + catalog = self._parse_json( + js_to_json(catalog), video_id, fatal=False) + if catalog: + policy_key = catalog.get('policyKey') + + if not policy_key: + policy_key = self._search_regex( + r'policyKey\s*:\s*(["\'])(?P.+?)\1', + webpage, 'policy key', group='pk') + + api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id) + try: + json_data = self._download_json(api_url, video_id, headers={ + 'Accept': 'application/json;pk=%s' % policy_key + }) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + json_data = self._parse_json(e.cause.read().decode(), video_id)[0] + message = json_data.get('message') or json_data['error_code'] + if json_data.get('error_subcode') == 'CLIENT_GEO': + self.raise_geo_restricted(msg=message) + raise ExtractorError(message, expected=True) + raise + + errors = json_data.get('errors') + if errors and errors[0].get('error_subcode') == 'TVE_AUTH': + custom_fields = json_data['custom_fields'] + tve_token = self._extract_mvpd_auth( + smuggled_data['source_url'], video_id, + custom_fields['bcadobepassrequestorid'], + custom_fields['bcadobepassresourceid']) + json_data = self._download_json( + api_url, video_id, headers={ + 'Accept': 'application/json;pk=%s' % policy_key + }, query={ + 'tveToken': tve_token, + }) + + return self._parse_brightcove_metadata(json_data, video_id) diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py index 8ef0896..4bf4efe 100644 --- a/youtube_dl/extractor/byutv.py +++ b/youtube_dl/extractor/byutv.py @@ -3,20 +3,19 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ExtractorError class BYUtvIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/(?!event/)(?P[0-9a-f-]+)(?:/(?P[^/?#&]+))?' + _VALID_URL = r'https?://(?:www\.)?byutv\.org/(?:watch|player)/(?!event/)(?P[0-9a-f-]+)(?:/(?P[^/?#&]+))?' _TESTS = [{ 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5', 'info_dict': { - 'id': '6587b9a3-89d2-42a6-a7f7-fd2f81840a7d', + 'id': 'ZvanRocTpW-G5_yZFeltTAMv6jxOU9KH', 'display_id': 'studio-c-season-5-episode-5', 'ext': 'mp4', 'title': 'Season 5 Episode 5', - 'description': 'md5:e07269172baff037f8e8bf9956bc9747', - 'thumbnail': r're:^https?://.*\.jpg$', + 'description': 'md5:1d31dc18ef4f075b28f6a65937d22c65', + 'thumbnail': r're:^https?://.*', 'duration': 1486.486, }, 'params': { @@ -26,6 +25,9 @@ class BYUtvIE(InfoExtractor): }, { 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d', 'only_matching': True, + }, { + 'url': 'https://www.byutv.org/player/27741493-dc83-40b0-8420-e7ae38a2ae98/byu-football-toledo-vs-byu-93016?listid=4fe0fee5-0d3c-4a29-b725-e4948627f472&listindex=0&q=toledo', + 'only_matching': True, }] def _real_extract(self, url): @@ -33,16 +35,16 @@ class BYUtvIE(InfoExtractor): video_id = mobj.group('id') display_id = mobj.group('display_id') or video_id - webpage = self._download_webpage(url, display_id) - episode_code = self._search_regex( - r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information') - - ep = self._parse_json( - episode_code, display_id, transform_source=lambda s: - re.sub(r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', s)) - - if ep['providerType'] != 'Ooyala': - raise ExtractorError('Unsupported provider %s' % ep['provider']) + ep = self._download_json( + 'https://api.byutv.org/api3/catalog/getvideosforcontent', video_id, + query={ + 'contentid': video_id, + 'channel': 'byutv', + 'x-byutv-context': 'web$US', + }, headers={ + 'x-byutv-context': 'web$US', + 'x-byutv-platformkey': 'xsaaw9c7y5', + })['ooyalaVOD'] return { '_type': 'url_transparent', @@ -50,44 +52,7 @@ class BYUtvIE(InfoExtractor): 'url': 'ooyala:%s' % ep['providerId'], 'id': video_id, 'display_id': display_id, - 'title': ep['title'], + 'title': ep.get('title'), 'description': ep.get('description'), 'thumbnail': ep.get('imageThumbnail'), } - - -class BYUtvEventIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/event/(?P[0-9a-f-]+)' - _TEST = { - 'url': 'http://www.byutv.org/watch/event/29941b9b-8bf6-48d2-aebf-7a87add9e34b', - 'info_dict': { - 'id': '29941b9b-8bf6-48d2-aebf-7a87add9e34b', - 'ext': 'mp4', - 'title': 'Toledo vs. BYU (9/30/16)', - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': ['Ooyala'], - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - ooyala_id = self._search_regex( - r'providerId\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', - webpage, 'ooyala id', group='id') - - title = self._search_regex( - r'class=["\']description["\'][^>]*>\s*

([^<]+)

', webpage, - 'title').strip() - - return { - '_type': 'url_transparent', - 'ie_key': 'Ooyala', - 'url': 'ooyala:%s' % ooyala_id, - 'id': video_id, - 'title': title, - } diff --git a/youtube_dl/extractor/cartoonnetwork.py b/youtube_dl/extractor/cartoonnetwork.py index 086ec90..6aeebd7 100644 --- a/youtube_dl/extractor/cartoonnetwork.py +++ b/youtube_dl/extractor/cartoonnetwork.py @@ -31,7 +31,7 @@ class CartoonNetworkIE(TurnerBaseIE): 'http://www.cartoonnetwork.com/video-seo-svc/episodeservices/getCvpPlaylist?networkName=CN2&' + query, video_id, { 'secure': { 'media_src': 'http://androidhls-secure.cdn.turner.com/toon/big', - 'tokenizer_src': 'http://www.cartoonnetwork.com/cntv/mvpd/processors/services/token_ipadAdobe.do', + 'tokenizer_src': 'https://token.vgtf.net/token/token_mobile', }, }, { 'url': url, diff --git a/youtube_dl/extractor/cbslocal.py b/youtube_dl/extractor/cbslocal.py index 7d78e3a..90852a9 100644 --- a/youtube_dl/extractor/cbslocal.py +++ b/youtube_dl/extractor/cbslocal.py @@ -91,12 +91,10 @@ class CBSLocalIE(AnvatoIE): info_dict = self._extract_anvato_videos(webpage, display_id) - time_str = self._html_search_regex( - r'class="entry-date">([^<]+)<', webpage, 'released date', default=None) - if time_str: - timestamp = unified_timestamp(time_str) - else: - timestamp = parse_iso8601(self._html_search_meta('uploadDate', webpage)) + timestamp = unified_timestamp(self._html_search_regex( + r'class="(?:entry|post)-date"[^>]*>([^<]+)', webpage, + 'released date', default=None)) or parse_iso8601( + self._html_search_meta('uploadDate', webpage)) info_dict.update({ 'display_id': display_id, diff --git a/youtube_dl/extractor/ccma.py b/youtube_dl/extractor/ccma.py index 39938c9..bec0a82 100644 --- a/youtube_dl/extractor/ccma.py +++ b/youtube_dl/extractor/ccma.py @@ -93,7 +93,7 @@ class CCMAIE(InfoExtractor): 'description': clean_html(informacio.get('descripcio')), 'duration': duration, 'timestamp': timestamp, - 'thumnails': thumbnails, + 'thumbnails': thumbnails, 'subtitles': subtitles, 'formats': formats, } diff --git a/youtube_dl/extractor/collegerama.py b/youtube_dl/extractor/collegerama.py deleted file mode 100644 index 6a41db8..0000000 --- a/youtube_dl/extractor/collegerama.py +++ /dev/null @@ -1,93 +0,0 @@ -from __future__ import unicode_literals - -import json - -from .common import InfoExtractor -from ..utils import ( - float_or_none, - int_or_none, - sanitized_Request, -) - - -class CollegeRamaIE(InfoExtractor): - _VALID_URL = r'https?://collegerama\.tudelft\.nl/Mediasite/Play/(?P[\da-f]+)' - _TESTS = [ - { - 'url': 'https://collegerama.tudelft.nl/Mediasite/Play/585a43626e544bdd97aeb71a0ec907a01d', - 'md5': '481fda1c11f67588c0d9d8fbdced4e39', - 'info_dict': { - 'id': '585a43626e544bdd97aeb71a0ec907a01d', - 'ext': 'mp4', - 'title': 'Een nieuwe wereld: waarden, bewustzijn en techniek van de mensheid 2.0.', - 'description': '', - 'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$', - 'duration': 7713.088, - 'timestamp': 1413309600, - 'upload_date': '20141014', - }, - }, - { - 'url': 'https://collegerama.tudelft.nl/Mediasite/Play/86a9ea9f53e149079fbdb4202b521ed21d?catalog=fd32fd35-6c99-466c-89d4-cd3c431bc8a4', - 'md5': 'ef1fdded95bdf19b12c5999949419c92', - 'info_dict': { - 'id': '86a9ea9f53e149079fbdb4202b521ed21d', - 'ext': 'wmv', - 'title': '64ste Vakantiecursus: Afvalwater', - 'description': 'md5:7fd774865cc69d972f542b157c328305', - 'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$', - 'duration': 10853, - 'timestamp': 1326446400, - 'upload_date': '20120113', - }, - }, - ] - - def _real_extract(self, url): - video_id = self._match_id(url) - - player_options_request = { - 'getPlayerOptionsRequest': { - 'ResourceId': video_id, - 'QueryString': '', - } - } - - request = sanitized_Request( - 'http://collegerama.tudelft.nl/Mediasite/PlayerService/PlayerService.svc/json/GetPlayerOptions', - json.dumps(player_options_request)) - request.add_header('Content-Type', 'application/json') - - player_options = self._download_json(request, video_id) - - presentation = player_options['d']['Presentation'] - title = presentation['Title'] - description = presentation.get('Description') - thumbnail = None - duration = float_or_none(presentation.get('Duration'), 1000) - timestamp = int_or_none(presentation.get('UnixTime'), 1000) - - formats = [] - for stream in presentation['Streams']: - for video in stream['VideoUrls']: - thumbnail_url = stream.get('ThumbnailUrl') - if thumbnail_url: - thumbnail = 'http://collegerama.tudelft.nl' + thumbnail_url - format_id = video['MediaType'] - if format_id == 'SS': - continue - formats.append({ - 'url': video['Location'], - 'format_id': format_id, - }) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'timestamp': timestamp, - 'formats': formats, - } diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e2d9f52..5b6a09c 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -301,8 +301,9 @@ class InfoExtractor(object): There must be a key "entries", which is a list, an iterable, or a PagedList object, each element of which is a valid dictionary by this specification. - Additionally, playlists can have "title", "description" and "id" attributes - with the same semantics as videos (see above). + Additionally, playlists can have "id", "title", "description", "uploader", + "uploader_id", "uploader_url" attributes with the same semantics as videos + (see above). _type "multi_video" indicates that there are multiple videos that @@ -494,6 +495,16 @@ class InfoExtractor(object): self.to_screen('%s' % (note,)) else: self.to_screen('%s: %s' % (video_id, note)) + + # Some sites check X-Forwarded-For HTTP header in order to figure out + # the origin of the client behind proxy. This allows bypassing geo + # restriction by faking this header's value to IP that belongs to some + # geo unrestricted country. We will do so once we encounter any + # geo restriction error. + if self._x_forwarded_for_ip: + if 'X-Forwarded-For' not in headers: + headers['X-Forwarded-For'] = self._x_forwarded_for_ip + if isinstance(url_or_request, compat_urllib_request.Request): url_or_request = update_Request( url_or_request, data=data, headers=headers, query=query) @@ -523,15 +534,6 @@ class InfoExtractor(object): if isinstance(url_or_request, (compat_str, str)): url_or_request = url_or_request.partition('#')[0] - # Some sites check X-Forwarded-For HTTP header in order to figure out - # the origin of the client behind proxy. This allows bypassing geo - # restriction by faking this header's value to IP that belongs to some - # geo unrestricted country. We will do so once we encounter any - # geo restriction error. - if self._x_forwarded_for_ip: - if 'X-Forwarded-For' not in headers: - headers['X-Forwarded-For'] = self._x_forwarded_for_ip - urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query) if urlh is False: assert not fatal @@ -592,19 +594,11 @@ class InfoExtractor(object): if not encoding: encoding = self._guess_encoding_from_content(content_type, webpage_bytes) if self._downloader.params.get('dump_intermediate_pages', False): - try: - url = url_or_request.get_full_url() - except AttributeError: - url = url_or_request - self.to_screen('Dumping request to ' + url) + self.to_screen('Dumping request to ' + urlh.geturl()) dump = base64.b64encode(webpage_bytes).decode('ascii') self._downloader.to_screen(dump) if self._downloader.params.get('write_pages', False): - try: - url = url_or_request.get_full_url() - except AttributeError: - url = url_or_request - basen = '%s_%s' % (video_id, url) + basen = '%s_%s' % (video_id, urlh.geturl()) if len(basen) > 240: h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest() basen = basen[:240 - len(h)] + h @@ -1356,6 +1350,9 @@ class InfoExtractor(object): if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access return [] + if re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc): # Apple FairPlay + return [] + formats = [] format_url = lambda u: ( @@ -1883,6 +1880,7 @@ class InfoExtractor(object): 'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None, 'format_note': 'DASH %s' % content_type, 'filesize': filesize, + 'container': mimetype2ext(mime_type) + '_dash', } f.update(parse_codecs(representation_attrib.get('codecs'))) representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info) @@ -1980,6 +1978,22 @@ class InfoExtractor(object): }) segment_index += 1 representation_ms_info['fragments'] = fragments + elif 'segment_urls' in representation_ms_info: + # Segment URLs with no SegmentTimeline + # Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091 + # https://github.com/rg3/youtube-dl/pull/14844 + fragments = [] + segment_duration = float_or_none( + representation_ms_info['segment_duration'], + representation_ms_info['timescale']) if 'segment_duration' in representation_ms_info else None + for segment_url in representation_ms_info['segment_urls']: + fragment = { + location_key(segment_url): segment_url, + } + if segment_duration: + fragment['duration'] = segment_duration + fragments.append(fragment) + representation_ms_info['fragments'] = fragments # NB: MPD manifest may contain direct URLs to unfragmented media. # No fragments key is present in this case. if 'fragments' in representation_ms_info: @@ -1994,16 +2008,14 @@ class InfoExtractor(object): f['url'] = initialization_url f['fragments'].append({location_key(initialization_url): initialization_url}) f['fragments'].extend(representation_ms_info['fragments']) - try: - existing_format = next( - fo for fo in formats - if fo['format_id'] == representation_id) - except StopIteration: - full_info = formats_dict.get(representation_id, {}).copy() - full_info.update(f) - formats.append(full_info) - else: - existing_format.update(f) + # According to [1, 5.3.5.2, Table 7, page 35] @id of Representation + # is not necessarily unique within a Period thus formats with + # the same `format_id` are quite possible. There are numerous examples + # of such manifests (see https://github.com/rg3/youtube-dl/issues/15111, + # https://github.com/rg3/youtube-dl/issues/13919) + full_info = formats_dict.get(representation_id, {}).copy() + full_info.update(f) + formats.append(full_info) else: self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) return formats @@ -2043,7 +2055,7 @@ class InfoExtractor(object): stream_timescale = int_or_none(stream.get('TimeScale')) or timescale stream_name = stream.get('Name') for track in stream.findall('QualityLevel'): - fourcc = track.get('FourCC') + fourcc = track.get('FourCC', 'AACL' if track.get('AudioTag') == '255' else None) # TODO: add support for WVC1 and WMAP if fourcc not in ('H264', 'AVC1', 'AACL'): self.report_warning('%s is not a supported codec' % fourcc) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 8bdaf0c..b92f254 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -38,11 +38,32 @@ class CrunchyrollBaseIE(InfoExtractor): _LOGIN_FORM = 'login_form' _NETRC_MACHINE = 'crunchyroll' + def _call_rpc_api(self, method, video_id, note=None, data=None): + data = data or {} + data['req'] = 'RpcApi' + method + data = compat_urllib_parse_urlencode(data).encode('utf-8') + return self._download_xml( + 'http://www.crunchyroll.com/xml/', + video_id, note, fatal=False, data=data, headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + }) + def _login(self): (username, password) = self._get_login_info() if username is None: return + self._download_webpage( + 'https://www.crunchyroll.com/?a=formhandler', + None, 'Logging in', 'Wrong login info', + data=urlencode_postdata({ + 'formname': 'RpcApiUser_Login', + 'next_url': 'https://www.crunchyroll.com/acct/membership', + 'name': username, + 'password': password, + })) + + ''' login_page = self._download_webpage( self._LOGIN_URL, None, 'Downloading login page') @@ -86,6 +107,7 @@ class CrunchyrollBaseIE(InfoExtractor): raise ExtractorError('Unable to login: %s' % error, expected=True) raise ExtractorError('Unable to log in') + ''' def _real_initialize(self): self._login() @@ -365,15 +387,19 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text def _get_subtitles(self, video_id, webpage): subtitles = {} for sub_id, sub_name in re.findall(r'\bssid=([0-9]+)"[^>]+?\btitle="([^"]+)', webpage): - sub_page = self._download_webpage( - 'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id, - video_id, note='Downloading subtitles for ' + sub_name) - id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False) - iv = self._search_regex(r'([^<]+)', sub_page, 'subtitle_iv', fatal=False) - data = self._search_regex(r'([^<]+)', sub_page, 'subtitle_data', fatal=False) - if not id or not iv or not data: + sub_doc = self._call_rpc_api( + 'Subtitle_GetXml', video_id, + 'Downloading subtitles for ' + sub_name, data={ + 'subtitle_script_id': sub_id, + }) + if sub_doc is None: continue - subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8') + sid = sub_doc.get('id') + iv = xpath_text(sub_doc, 'iv', 'subtitle iv') + data = xpath_text(sub_doc, 'data', 'subtitle data') + if not sid or not iv or not data: + continue + subtitle = self._decrypt_subtitles(data, iv, sid).decode('utf-8') lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False) if not lang_code: continue @@ -444,65 +470,79 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text for fmt in available_fmts: stream_quality, stream_format = self._FORMAT_IDS[fmt] video_format = fmt + 'p' - streamdata_req = sanitized_Request( - 'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s' - % (video_id, stream_format, stream_quality), - compat_urllib_parse_urlencode({'current_page': url}).encode('utf-8')) - streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') - streamdata = self._download_xml( - streamdata_req, video_id, - note='Downloading media info for %s' % video_format) - stream_info = streamdata.find('./{default}preload/stream_info') - video_encode_id = xpath_text(stream_info, './video_encode_id') - if video_encode_id in video_encode_ids: - continue - video_encode_ids.append(video_encode_id) - - video_file = xpath_text(stream_info, './file') - if not video_file: - continue - if video_file.startswith('http'): - formats.extend(self._extract_m3u8_formats( - video_file, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - continue - - video_url = xpath_text(stream_info, './host') - if not video_url: - continue - metadata = stream_info.find('./metadata') - format_info = { - 'format': video_format, - 'format_id': video_format, - 'height': int_or_none(xpath_text(metadata, './height')), - 'width': int_or_none(xpath_text(metadata, './width')), - } - - if '.fplive.net/' in video_url: - video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip()) - parsed_video_url = compat_urlparse.urlparse(video_url) - direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace( - netloc='v.lvlt.crcdn.net', - path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1]))) - if self._is_valid_url(direct_video_url, video_id, video_format): - format_info.update({ - 'url': direct_video_url, - }) - formats.append(format_info) + stream_infos = [] + streamdata = self._call_rpc_api( + 'VideoPlayer_GetStandardConfig', video_id, + 'Downloading media info for %s' % video_format, data={ + 'media_id': video_id, + 'video_format': stream_format, + 'video_quality': stream_quality, + 'current_page': url, + }) + if streamdata is not None: + stream_info = streamdata.find('./{default}preload/stream_info') + if stream_info is not None: + stream_infos.append(stream_info) + stream_info = self._call_rpc_api( + 'VideoEncode_GetStreamInfo', video_id, + 'Downloading stream info for %s' % video_format, data={ + 'media_id': video_id, + 'video_format': stream_format, + 'video_encode_quality': stream_quality, + }) + if stream_info is not None: + stream_infos.append(stream_info) + for stream_info in stream_infos: + video_encode_id = xpath_text(stream_info, './video_encode_id') + if video_encode_id in video_encode_ids: continue + video_encode_ids.append(video_encode_id) - format_info.update({ - 'url': video_url, - 'play_path': video_file, - 'ext': 'flv', - }) - formats.append(format_info) - self._sort_formats(formats) + video_file = xpath_text(stream_info, './file') + if not video_file: + continue + if video_file.startswith('http'): + formats.extend(self._extract_m3u8_formats( + video_file, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + continue - metadata = self._download_xml( - 'http://www.crunchyroll.com/xml', video_id, - note='Downloading media info', query={ - 'req': 'RpcApiVideoPlayer_GetMediaMetadata', + video_url = xpath_text(stream_info, './host') + if not video_url: + continue + metadata = stream_info.find('./metadata') + format_info = { + 'format': video_format, + 'height': int_or_none(xpath_text(metadata, './height')), + 'width': int_or_none(xpath_text(metadata, './width')), + } + + if '.fplive.net/' in video_url: + video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip()) + parsed_video_url = compat_urlparse.urlparse(video_url) + direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace( + netloc='v.lvlt.crcdn.net', + path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1]))) + if self._is_valid_url(direct_video_url, video_id, video_format): + format_info.update({ + 'format_id': 'http-' + video_format, + 'url': direct_video_url, + }) + formats.append(format_info) + continue + + format_info.update({ + 'format_id': 'rtmp-' + video_format, + 'url': video_url, + 'play_path': video_file, + 'ext': 'flv', + }) + formats.append(format_info) + self._sort_formats(formats, ('height', 'width', 'tbr', 'fps')) + + metadata = self._call_rpc_api( + 'VideoPlayer_GetMediaMetadata', video_id, + note='Downloading media info', data={ 'media_id': video_id, }) diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index 171820e..67d6df4 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -4,13 +4,14 @@ import re from .common import InfoExtractor from ..utils import ( - int_or_none, - unescapeHTML, - find_xpath_attr, - smuggle_url, determine_ext, ExtractorError, extract_attributes, + find_xpath_attr, + get_element_by_class, + int_or_none, + smuggle_url, + unescapeHTML, ) from .senateisvp import SenateISVPIE from .ustream import UstreamIE @@ -68,6 +69,10 @@ class CSpanIE(InfoExtractor): 'uploader': 'HouseCommittee', 'uploader_id': '12987475', }, + }, { + # Audio Only + 'url': 'https://www.c-span.org/video/?437336-1/judiciary-antitrust-competition-policy-consumer-rights', + 'only_matching': True, }] BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' @@ -111,7 +116,15 @@ class CSpanIE(InfoExtractor): title = self._og_search_title(webpage) surl = smuggle_url(senate_isvp_url, {'force_title': title}) return self.url_result(surl, 'SenateISVP', video_id, title) + video_id = self._search_regex( + r'jwsetup\.clipprog\s*=\s*(\d+);', + webpage, 'jwsetup program id', default=None) + if video_id: + video_type = 'program' if video_type is None or video_id is None: + error_message = get_element_by_class('VLplayer-error-message', webpage) + if error_message: + raise ExtractorError(error_message) raise ExtractorError('unable to find video id and type') def get_text_attr(d, attr): @@ -138,7 +151,7 @@ class CSpanIE(InfoExtractor): entries = [] for partnum, f in enumerate(files): formats = [] - for quality in f['qualities']: + for quality in f.get('qualities', []): formats.append({ 'format_id': '%s-%sp' % (get_text_attr(quality, 'bitrate'), get_text_attr(quality, 'height')), 'url': unescapeHTML(get_text_attr(quality, 'file')), diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 21a2d02..0e7d587 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -413,52 +413,3 @@ class DailymotionUserIE(DailymotionPlaylistIE): 'title': full_user, 'entries': self._extract_entries(user), } - - -class DailymotionCloudIE(DailymotionBaseInfoExtractor): - _VALID_URL_PREFIX = r'https?://api\.dmcloud\.net/(?:player/)?embed/' - _VALID_URL = r'%s[^/]+/(?P[^/?]+)' % _VALID_URL_PREFIX - _VALID_EMBED_URL = r'%s[^/]+/[^\'"]+' % _VALID_URL_PREFIX - - _TESTS = [{ - # From http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html - # Tested at FranceTvInfo_2 - 'url': 'http://api.dmcloud.net/embed/4e7343f894a6f677b10006b4/556e03339473995ee145930c?auth=1464865870-0-jyhsm84b-ead4c701fb750cf9367bf4447167a3db&autoplay=1', - 'only_matching': True, - }, { - # http://www.francetvinfo.fr/societe/larguez-les-amarres-le-cobaturage-se-developpe_980101.html - 'url': 'http://api.dmcloud.net/player/embed/4e7343f894a6f677b10006b4/559545469473996d31429f06?auth=1467430263-0-90tglw2l-a3a4b64ed41efe48d7fccad85b8b8fda&autoplay=1', - 'only_matching': True, - }] - - @classmethod - def _extract_dmcloud_url(cls, webpage): - mobj = re.search(r']+src=[\'"](%s)[\'"]' % cls._VALID_EMBED_URL, webpage) - if mobj: - return mobj.group(1) - - mobj = re.search( - r']+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=[\'"](%s)[\'"]' % cls._VALID_EMBED_URL, - webpage) - if mobj: - return mobj.group(1) - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage_no_ff(url, video_id) - - title = self._html_search_regex(r'([^>]+)', webpage, 'title') - - video_info = self._parse_json(self._search_regex( - r'var\s+info\s*=\s*([^;]+);', webpage, 'video info'), video_id) - - # TODO: parse ios_url, which is in fact a manifest - video_url = video_info['mp4_url'] - - return { - 'id': video_id, - 'url': video_url, - 'title': title, - 'thumbnail': video_info.get('thumbnail_url'), - } diff --git a/youtube_dl/extractor/daisuki.py b/youtube_dl/extractor/daisuki.py index 58cc986..5c9ac68 100644 --- a/youtube_dl/extractor/daisuki.py +++ b/youtube_dl/extractor/daisuki.py @@ -13,33 +13,30 @@ from ..aes import ( from ..utils import ( bytes_to_intlist, bytes_to_long, - clean_html, + extract_attributes, ExtractorError, intlist_to_bytes, - get_element_by_id, js_to_json, int_or_none, long_to_bytes, pkcs1pad, - remove_end, ) -class DaisukiIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?daisuki\.net/[^/]+/[^/]+/[^/]+/watch\.[^.]+\.(?P\d+)\.html' +class DaisukiMottoIE(InfoExtractor): + _VALID_URL = r'https?://motto\.daisuki\.net/framewatch/embed/[^/]+/(?P[0-9a-zA-Z]{3})' _TEST = { - 'url': 'http://www.daisuki.net/tw/en/anime/watch.TheIdolMasterCG.11213.html', + 'url': 'http://motto.daisuki.net/framewatch/embed/embedDRAGONBALLSUPERUniverseSurvivalsaga/V2e/760/428', 'info_dict': { - 'id': '11213', + 'id': 'V2e', 'ext': 'mp4', - 'title': '#01 Who is in the pumpkin carriage? - THE IDOLM@STER CINDERELLA GIRLS', + 'title': '#117 SHOWDOWN OF LOVE! ANDROIDS VS UNIVERSE 2!!', 'subtitles': { 'mul': [{ 'ext': 'ttml', }], }, - 'creator': 'BANDAI NAMCO Entertainment', }, 'params': { 'skip_download': True, # AES-encrypted HLS stream @@ -73,15 +70,17 @@ class DaisukiIE(InfoExtractor): n, e = self._RSA_KEY encrypted_aeskey = long_to_bytes(pow(bytes_to_long(padded_aeskey), e, n)) - init_data = self._download_json('http://www.daisuki.net/bin/bgn/init', video_id, query={ - 's': flashvars.get('s', ''), - 'c': flashvars.get('ss3_prm', ''), - 'e': url, - 'd': base64.b64encode(intlist_to_bytes(aes_cbc_encrypt( - bytes_to_intlist(json.dumps(data)), - aes_key, iv))).decode('ascii'), - 'a': base64.b64encode(encrypted_aeskey).decode('ascii'), - }, note='Downloading JSON metadata' + (' (try #%d)' % (idx + 1) if idx > 0 else '')) + init_data = self._download_json( + 'http://motto.daisuki.net/fastAPI/bgn/init/', + video_id, query={ + 's': flashvars.get('s', ''), + 'c': flashvars.get('ss3_prm', ''), + 'e': url, + 'd': base64.b64encode(intlist_to_bytes(aes_cbc_encrypt( + bytes_to_intlist(json.dumps(data)), + aes_key, iv))).decode('ascii'), + 'a': base64.b64encode(encrypted_aeskey).decode('ascii'), + }, note='Downloading JSON metadata' + (' (try #%d)' % (idx + 1) if idx > 0 else '')) if 'rtn' in init_data: encrypted_rtn = init_data['rtn'] @@ -98,14 +97,11 @@ class DaisukiIE(InfoExtractor): aes_key, iv)).decode('utf-8').rstrip('\0'), video_id) + title = rtn['title_str'] + formats = self._extract_m3u8_formats( rtn['play_url'], video_id, ext='mp4', entry_protocol='m3u8_native') - title = remove_end(self._og_search_title(webpage), ' - DAISUKI') - - creator = self._html_search_regex( - r'Creator\s*:\s*([^<]+)', webpage, 'creator', fatal=False) - subtitles = {} caption_url = rtn.get('caption_url') if caption_url: @@ -120,21 +116,18 @@ class DaisukiIE(InfoExtractor): 'title': title, 'formats': formats, 'subtitles': subtitles, - 'creator': creator, } -class DaisukiPlaylistIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)daisuki\.net/[^/]+/[^/]+/[^/]+/detail\.(?P[a-zA-Z0-9]+)\.html' +class DaisukiMottoPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://motto\.daisuki\.net/(?Pinformation)/' _TEST = { - 'url': 'http://www.daisuki.net/tw/en/anime/detail.TheIdolMasterCG.html', + 'url': 'http://motto.daisuki.net/information/', 'info_dict': { - 'id': 'TheIdolMasterCG', - 'title': 'THE IDOLM@STER CINDERELLA GIRLS', - 'description': 'md5:0f2c028a9339f7a2c7fbf839edc5c5d8', + 'title': 'DRAGON BALL SUPER', }, - 'playlist_count': 26, + 'playlist_mincount': 117, } def _real_extract(self, url): @@ -142,18 +135,19 @@ class DaisukiPlaylistIE(InfoExtractor): webpage = self._download_webpage(url, playlist_id) - episode_pattern = r'''(?sx) - ]+delay="[^"]+/(\d+)/movie\.jpg".+? - ]+class=".*?\bepisodeNumber\b.*?">(?:]+>)?([^<]+)''' - entries = [{ - '_type': 'url_transparent', - 'url': url.replace('detail', 'watch').replace('.html', '.' + movie_id + '.html'), - 'episode_id': episode_id, - 'episode_number': int_or_none(episode_id), - } for movie_id, episode_id in re.findall(episode_pattern, webpage)] - - playlist_title = remove_end( - self._og_search_title(webpage, fatal=False), ' - Anime - DAISUKI') - playlist_description = clean_html(get_element_by_id('synopsisTxt', webpage)) - - return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) + entries = [] + for li in re.findall(r'(]+?data-product_id="[a-zA-Z0-9]{3}"[^>]+>)', webpage): + attr = extract_attributes(li) + ad_id = attr.get('data-ad_id') + product_id = attr.get('data-product_id') + if ad_id and product_id: + episode_id = attr.get('data-chapter') + entries.append({ + '_type': 'url_transparent', + 'url': 'http://motto.daisuki.net/framewatch/embed/%s/%s/760/428' % (ad_id, product_id), + 'episode_id': episode_id, + 'episode_number': int_or_none(episode_id), + 'ie_key': 'DaisukiMotto', + }) + + return self.playlist_result(entries, playlist_title='DRAGON BALL SUPER') diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py index 55853f7..f9cec1d 100644 --- a/youtube_dl/extractor/discovery.py +++ b/youtube_dl/extractor/discovery.py @@ -1,14 +1,18 @@ from __future__ import unicode_literals -from .common import InfoExtractor +import random +import re +import string + +from .discoverygo import DiscoveryGoBaseIE from ..utils import ( - parse_duration, - parse_iso8601, + ExtractorError, + update_url_query, ) -from ..compat import compat_str +from ..compat import compat_HTTPError -class DiscoveryIE(InfoExtractor): +class DiscoveryIE(DiscoveryGoBaseIE): _VALID_URL = r'''(?x)https?://(?:www\.)?(?: discovery| investigationdiscovery| @@ -19,79 +23,65 @@ class DiscoveryIE(InfoExtractor): sciencechannel| tlc| velocity - )\.com/(?:[^/]+/)*(?P[^./?#]+)''' + )\.com(?P/tv-shows/[^/]+/(?:video|full-episode)s/(?P[^./?#]+))''' _TESTS = [{ - 'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm', + 'url': 'https://www.discovery.com/tv-shows/cash-cab/videos/dave-foley', 'info_dict': { - 'id': '20769', + 'id': '5a2d9b4d6b66d17a5026e1fd', 'ext': 'mp4', - 'title': 'Mission Impossible Outtakes', - 'description': ('Watch Jamie Hyneman and Adam Savage practice being' - ' each other -- to the point of confusing Jamie\'s dog -- and ' - 'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s' - ' back.'), - 'duration': 156, - 'timestamp': 1302032462, - 'upload_date': '20110405', - 'uploader_id': '103207', + 'title': 'Dave Foley', + 'description': 'md5:4b39bcafccf9167ca42810eb5f28b01f', + 'duration': 608, }, 'params': { 'skip_download': True, # requires ffmpeg } }, { - 'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mythbusters-the-simpsons', - 'info_dict': { - 'id': 'mythbusters-the-simpsons', - 'title': 'MythBusters: The Simpsons', - }, - 'playlist_mincount': 10, - }, { - 'url': 'http://www.animalplanet.com/longfin-eels-maneaters/', - 'info_dict': { - 'id': '78326', - 'ext': 'mp4', - 'title': 'Longfin Eels: Maneaters?', - 'description': 'Jeremy Wade tests whether or not New Zealand\'s longfin eels are man-eaters by covering himself in fish guts and getting in the water with them.', - 'upload_date': '20140725', - 'timestamp': 1406246400, - 'duration': 116, - 'uploader_id': '103207', - }, - 'params': { - 'skip_download': True, # requires ffmpeg - } + 'url': 'https://www.investigationdiscovery.com/tv-shows/final-vision/full-episodes/final-vision', + 'only_matching': True, }] + _GEO_COUNTRIES = ['US'] + _GEO_BYPASS = False def _real_extract(self, url): - display_id = self._match_id(url) - info = self._download_json(url + '?flat=1', display_id) - - video_title = info.get('playlist_title') or info.get('video_title') + path, display_id = re.match(self._VALID_URL, url).groups() + webpage = self._download_webpage(url, display_id) - entries = [] + react_data = self._parse_json(self._search_regex( + r'window\.__reactTransmitPacket\s*=\s*({.+?});', + webpage, 'react data'), display_id) + content_blocks = react_data['layout'][path]['contentBlocks'] + video = next(cb for cb in content_blocks if cb.get('type') == 'video')['content']['items'][0] + video_id = video['id'] - for idx, video_info in enumerate(info['playlist']): - subtitles = {} - caption_url = video_info.get('captionsUrl') - if caption_url: - subtitles = { - 'en': [{ - 'url': caption_url, - }] - } + access_token = self._download_json( + 'https://www.discovery.com/anonymous', display_id, query={ + 'authLink': update_url_query( + 'https://login.discovery.com/v1/oauth2/authorize', { + 'client_id': react_data['application']['apiClientId'], + 'redirect_uri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html', + 'response_type': 'anonymous', + 'state': 'nonce,' + ''.join([random.choice(string.ascii_letters) for _ in range(32)]), + }) + })['access_token'] - entries.append({ - '_type': 'url_transparent', - 'url': 'http://players.brightcove.net/103207/default_default/index.html?videoId=ref:%s' % video_info['referenceId'], - 'id': compat_str(video_info['id']), - 'title': video_info['title'], - 'description': video_info.get('description'), - 'duration': parse_duration(video_info.get('video_length')), - 'webpage_url': video_info.get('href') or video_info.get('url'), - 'thumbnail': video_info.get('thumbnailURL'), - 'alt_title': video_info.get('secondary_title'), - 'timestamp': parse_iso8601(video_info.get('publishedDate')), - 'subtitles': subtitles, - }) + try: + stream = self._download_json( + 'https://api.discovery.com/v1/streaming/video/' + video_id, + display_id, headers={ + 'Authorization': 'Bearer ' + access_token, + }) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + e_description = self._parse_json( + e.cause.read().decode(), display_id)['description'] + if 'resource not available for country' in e_description: + self.raise_geo_restricted(countries=self._GEO_COUNTRIES) + if 'Authorized Networks' in e_description: + raise ExtractorError( + 'This video is only available via cable service provider subscription that' + ' is not currently supported. You may want to use --cookies.', expected=True) + raise ExtractorError(e_description) + raise - return self.playlist_result(entries, display_id, video_title) + return self._extract_video_info(video, stream, display_id) diff --git a/youtube_dl/extractor/discoverygo.py b/youtube_dl/extractor/discoverygo.py index 7cd5d42..3368c4c 100644 --- a/youtube_dl/extractor/discoverygo.py +++ b/youtube_dl/extractor/discoverygo.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + determine_ext, extract_attributes, ExtractorError, int_or_none, @@ -27,42 +28,9 @@ class DiscoveryGoBaseIE(InfoExtractor): velocitychannel )go\.com/%s(?P[^/?#&]+)''' - -class DiscoveryGoIE(DiscoveryGoBaseIE): - _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % r'(?:[^/]+/)+' - _GEO_COUNTRIES = ['US'] - _TEST = { - 'url': 'https://www.discoverygo.com/bering-sea-gold/reaper-madness/', - 'info_dict': { - 'id': '58c167d86b66d12f2addeb01', - 'ext': 'mp4', - 'title': 'Reaper Madness', - 'description': 'md5:09f2c625c99afb8946ed4fb7865f6e78', - 'duration': 2519, - 'series': 'Bering Sea Gold', - 'season_number': 8, - 'episode_number': 6, - 'age_limit': 14, - }, - } - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - container = extract_attributes( - self._search_regex( - r'(]+class=["\']video-player-container[^>]+>)', - webpage, 'video container')) - - video = self._parse_json( - container.get('data-video') or container.get('data-json'), - display_id) - + def _extract_video_info(self, video, stream, display_id): title = video['name'] - stream = video.get('stream') if not stream: if video.get('authenticated') is True: raise ExtractorError( @@ -106,7 +74,11 @@ class DiscoveryGoIE(DiscoveryGoBaseIE): not subtitle_url.startswith('http')): continue lang = caption.get('fileLang', 'en') - subtitles.setdefault(lang, []).append({'url': subtitle_url}) + ext = determine_ext(subtitle_url) + subtitles.setdefault(lang, []).append({ + 'url': subtitle_url, + 'ext': 'ttml' if ext == 'xml' else ext, + }) return { 'id': video_id, @@ -124,6 +96,43 @@ class DiscoveryGoIE(DiscoveryGoBaseIE): } +class DiscoveryGoIE(DiscoveryGoBaseIE): + _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % r'(?:[^/]+/)+' + _GEO_COUNTRIES = ['US'] + _TEST = { + 'url': 'https://www.discoverygo.com/bering-sea-gold/reaper-madness/', + 'info_dict': { + 'id': '58c167d86b66d12f2addeb01', + 'ext': 'mp4', + 'title': 'Reaper Madness', + 'description': 'md5:09f2c625c99afb8946ed4fb7865f6e78', + 'duration': 2519, + 'series': 'Bering Sea Gold', + 'season_number': 8, + 'episode_number': 6, + 'age_limit': 14, + }, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + container = extract_attributes( + self._search_regex( + r'(]+class=["\']video-player-container[^>]+>)', + webpage, 'video container')) + + video = self._parse_json( + container.get('data-video') or container.get('data-json'), + display_id) + + stream = video.get('stream') + + return self._extract_video_info(video, stream, display_id) + + class DiscoveryGoPlaylistIE(DiscoveryGoBaseIE): _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % '' _TEST = { diff --git a/youtube_dl/extractor/disney.py b/youtube_dl/extractor/disney.py index 968c4c7..0eee82f 100644 --- a/youtube_dl/extractor/disney.py +++ b/youtube_dl/extractor/disney.py @@ -10,6 +10,7 @@ from ..utils import ( compat_str, determine_ext, ExtractorError, + update_url_query, ) @@ -108,9 +109,16 @@ class DisneyIE(InfoExtractor): continue tbr = int_or_none(flavor.get('bitrate')) if tbr == 99999: - formats.extend(self._extract_m3u8_formats( + # wrong ks(Kaltura Signature) causes 404 Error + flavor_url = update_url_query(flavor_url, {'ks': ''}) + m3u8_formats = self._extract_m3u8_formats( flavor_url, video_id, 'mp4', - m3u8_id=flavor_format, fatal=False)) + m3u8_id=flavor_format, fatal=False) + for f in m3u8_formats: + # Apple FairPlay + if '/fpshls/' in f['url']: + continue + formats.append(f) continue format_id = [] if flavor_format: diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index 95883a0..6b60e54 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -54,7 +54,7 @@ class DramaFeverBaseIE(AMPIE): request = sanitized_Request( self._LOGIN_URL, urlencode_postdata(login_form)) response = self._download_webpage( - request, None, 'Logging in as %s' % username) + request, None, 'Logging in') if all(logout_pattern not in response for logout_pattern in ['href="/accounts/logout/"', '>Log out<']): diff --git a/youtube_dl/extractor/drtuber.py b/youtube_dl/extractor/drtuber.py index c5d56a9..c88b312 100644 --- a/youtube_dl/extractor/drtuber.py +++ b/youtube_dl/extractor/drtuber.py @@ -10,7 +10,7 @@ from ..utils import ( class DrTuberIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?drtuber\.com/(?:video|embed)/(?P\d+)(?:/(?P[\w-]+))?' + _VALID_URL = r'https?://(?:(?:www|m)\.)?drtuber\.com/(?:video|embed)/(?P\d+)(?:/(?P[\w-]+))?' _TESTS = [{ 'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf', 'md5': '93e680cf2536ad0dfb7e74d94a89facd', @@ -28,6 +28,9 @@ class DrTuberIE(InfoExtractor): }, { 'url': 'http://www.drtuber.com/embed/489939', 'only_matching': True, + }, { + 'url': 'http://m.drtuber.com/video/3893529/lingerie-blowjob-from-beautiful-teen', + 'only_matching': True, }] @staticmethod diff --git a/youtube_dl/extractor/ellentube.py b/youtube_dl/extractor/ellentube.py new file mode 100644 index 0000000..5444732 --- /dev/null +++ b/youtube_dl/extractor/ellentube.py @@ -0,0 +1,133 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + clean_html, + extract_attributes, + float_or_none, + int_or_none, + try_get, +) + + +class EllenTubeBaseIE(InfoExtractor): + def _extract_data_config(self, webpage, video_id): + details = self._search_regex( + r'(<[^>]+\bdata-component=(["\'])[Dd]etails.+?>)', webpage, + 'details') + return self._parse_json( + extract_attributes(details)['data-config'], video_id) + + def _extract_video(self, data, video_id): + title = data['title'] + + formats = [] + duration = None + for entry in data.get('media'): + if entry.get('id') == 'm3u8': + formats = self._extract_m3u8_formats( + entry['url'], video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls') + duration = int_or_none(entry.get('duration')) + break + self._sort_formats(formats) + + def get_insight(kind): + return int_or_none(try_get( + data, lambda x: x['insight']['%ss' % kind])) + + return { + 'extractor_key': EllenTubeIE.ie_key(), + 'id': video_id, + 'title': title, + 'description': data.get('description'), + 'duration': duration, + 'thumbnail': data.get('thumbnail'), + 'timestamp': float_or_none(data.get('publishTime'), scale=1000), + 'view_count': get_insight('view'), + 'like_count': get_insight('like'), + 'formats': formats, + } + + +class EllenTubeIE(EllenTubeBaseIE): + _VALID_URL = r'''(?x) + (?: + ellentube:| + https://api-prod\.ellentube\.com/ellenapi/api/item/ + ) + (?P[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}) + ''' + _TESTS = [{ + 'url': 'https://api-prod.ellentube.com/ellenapi/api/item/0822171c-3829-43bf-b99f-d77358ae75e3', + 'md5': '2fabc277131bddafdd120e0fc0f974c9', + 'info_dict': { + 'id': '0822171c-3829-43bf-b99f-d77358ae75e3', + 'ext': 'mp4', + 'title': 'Ellen Meets Las Vegas Survivors Jesus Campos and Stephen Schuck', + 'description': 'md5:76e3355e2242a78ad9e3858e5616923f', + 'thumbnail': r're:^https?://.+?', + 'duration': 514, + 'timestamp': 1508505120, + 'upload_date': '20171020', + 'view_count': int, + 'like_count': int, + } + }, { + 'url': 'ellentube:734a3353-f697-4e79-9ca9-bfc3002dc1e0', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + data = self._download_json( + 'https://api-prod.ellentube.com/ellenapi/api/item/%s' % video_id, + video_id) + return self._extract_video(data, video_id) + + +class EllenTubeVideoIE(EllenTubeBaseIE): + _VALID_URL = r'https?://(?:www\.)?ellentube\.com/video/(?P.+?)\.html' + _TEST = { + 'url': 'https://www.ellentube.com/video/ellen-meets-las-vegas-survivors-jesus-campos-and-stephen-schuck.html', + 'only_matching': True, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + video_id = self._extract_data_config(webpage, display_id)['id'] + return self.url_result( + 'ellentube:%s' % video_id, ie=EllenTubeIE.ie_key(), + video_id=video_id) + + +class EllenTubePlaylistIE(EllenTubeBaseIE): + _VALID_URL = r'https?://(?:www\.)?ellentube\.com/(?:episode|studios)/(?P.+?)\.html' + _TESTS = [{ + 'url': 'https://www.ellentube.com/episode/dax-shepard-jordan-fisher-haim.html', + 'info_dict': { + 'id': 'dax-shepard-jordan-fisher-haim', + 'title': "Dax Shepard, 'DWTS' Team Jordan Fisher & Lindsay Arnold, HAIM", + 'description': 'md5:bfc982194dabb3f4e325e43aa6b2e21c', + }, + 'playlist_count': 6, + }, { + 'url': 'https://www.ellentube.com/studios/macey-goes-rving0.html', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + data = self._extract_data_config(webpage, display_id)['data'] + feed = self._download_json( + 'https://api-prod.ellentube.com/ellenapi/api/feed/?%s' + % data['filter'], display_id) + entries = [ + self._extract_video(elem, elem['id']) + for elem in feed if elem.get('type') == 'VIDEO' and elem.get('id')] + return self.playlist_result( + entries, display_id, data.get('title'), + clean_html(data.get('description'))) diff --git a/youtube_dl/extractor/ellentv.py b/youtube_dl/extractor/ellentv.py deleted file mode 100644 index e0a13dd..0000000 --- a/youtube_dl/extractor/ellentv.py +++ /dev/null @@ -1,101 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from .kaltura import KalturaIE -from ..utils import NO_DEFAULT - - -class EllenTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P[a-z0-9_-]+)' - _TESTS = [{ - 'url': 'http://www.ellentv.com/videos/0-ipq1gsai/', - 'md5': '4294cf98bc165f218aaa0b89e0fd8042', - 'info_dict': { - 'id': '0_ipq1gsai', - 'ext': 'mov', - 'title': 'Fast Fingers of Fate', - 'description': 'md5:3539013ddcbfa64b2a6d1b38d910868a', - 'timestamp': 1428035648, - 'upload_date': '20150403', - 'uploader_id': 'batchUser', - }, - }, { - # not available via http://widgets.ellentube.com/ - 'url': 'http://www.ellentv.com/videos/1-szkgu2m2/', - 'info_dict': { - 'id': '1_szkgu2m2', - 'ext': 'flv', - 'title': "Ellen's Amazingly Talented Audience", - 'description': 'md5:86ff1e376ff0d717d7171590e273f0a5', - 'timestamp': 1255140900, - 'upload_date': '20091010', - 'uploader_id': 'ellenkaltura@gmail.com', - }, - 'params': { - 'skip_download': True, - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - URLS = ('http://widgets.ellentube.com/videos/%s' % video_id, url) - - for num, url_ in enumerate(URLS, 1): - webpage = self._download_webpage( - url_, video_id, fatal=num == len(URLS)) - - default = NO_DEFAULT if num == len(URLS) else None - - partner_id = self._search_regex( - r"var\s+partnerId\s*=\s*'([^']+)", webpage, 'partner id', - default=default) - - kaltura_id = self._search_regex( - [r'id="kaltura_player_([^"]+)"', - r"_wb_entry_id\s*:\s*'([^']+)", - r'data-kaltura-entry-id="([^"]+)'], - webpage, 'kaltura id', default=default) - - if partner_id and kaltura_id: - break - - return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), KalturaIE.ie_key()) - - -class EllenTVClipsIE(InfoExtractor): - IE_NAME = 'EllenTV:clips' - _VALID_URL = r'https?://(?:www\.)?ellentv\.com/episodes/(?P[a-z0-9_-]+)' - _TEST = { - 'url': 'http://www.ellentv.com/episodes/meryl-streep-vanessa-hudgens/', - 'info_dict': { - 'id': 'meryl-streep-vanessa-hudgens', - 'title': 'Meryl Streep, Vanessa Hudgens', - }, - 'playlist_mincount': 5, - } - - def _real_extract(self, url): - playlist_id = self._match_id(url) - - webpage = self._download_webpage(url, playlist_id) - playlist = self._extract_playlist(webpage, playlist_id) - - return { - '_type': 'playlist', - 'id': playlist_id, - 'title': self._og_search_title(webpage), - 'entries': self._extract_entries(playlist) - } - - def _extract_playlist(self, webpage, playlist_id): - json_string = self._search_regex(r'playerView.addClips\(\[\{(.*?)\}\]\);', webpage, 'json') - return self._parse_json('[{' + json_string + '}]', playlist_id) - - def _extract_entries(self, playlist): - return [ - self.url_result( - 'kaltura:%s:%s' % (item['kaltura_partner_id'], item['kaltura_entry_id']), - KalturaIE.ie_key(), video_id=item['kaltura_entry_id']) - for item in playlist] diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py index 7a74360..127c69b 100644 --- a/youtube_dl/extractor/espn.py +++ b/youtube_dl/extractor/espn.py @@ -1,6 +1,9 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor +from .once import OnceIE from ..compat import compat_str from ..utils import ( determine_ext, @@ -9,22 +12,27 @@ from ..utils import ( ) -class ESPNIE(InfoExtractor): +class ESPNIE(OnceIE): _VALID_URL = r'''(?x) https?:// (?: - (?:(?:\w+\.)+)?espn\.go| - (?:www\.)?espn - )\.com/ - (?: - (?: - video/clip| - watch/player - ) (?: - \?.*?\bid=| - /_/id/ - ) + (?: + (?:(?:\w+\.)+)?espn\.go| + (?:www\.)?espn + )\.com/ + (?: + (?: + video/(?:clip|iframe/twitter)| + watch/player + ) + (?: + .*?\?.*?\bid=| + /_/id/ + ) + ) + )| + (?:www\.)espnfc\.(?:com|us)/(?:video/)?[^/]+/\d+/video/ ) (?P\d+) ''' @@ -77,6 +85,15 @@ class ESPNIE(InfoExtractor): }, { 'url': 'http://www.espn.com/video/clip/_/id/17989860', 'only_matching': True, + }, { + 'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079', + 'only_matching': True, + }, { + 'url': 'http://www.espnfc.us/video/espn-fc-tv/86/video/3319154/nashville-unveiled-as-the-newest-club-in-mls', + 'only_matching': True, + }, { + 'url': 'http://www.espnfc.com/english-premier-league/23/video/3324163/premier-league-in-90-seconds-golden-tweets', + 'only_matching': True, }] def _real_extract(self, url): @@ -93,7 +110,9 @@ class ESPNIE(InfoExtractor): def traverse_source(source, base_source_id=None): for source_id, source in source.items(): - if isinstance(source, compat_str): + if source_id == 'alert': + continue + elif isinstance(source, compat_str): extract_source(source, base_source_id) elif isinstance(source, dict): traverse_source( @@ -106,7 +125,9 @@ class ESPNIE(InfoExtractor): return format_urls.add(source_url) ext = determine_ext(source_url) - if ext == 'smil': + if OnceIE.suitable(source_url): + formats.extend(self._extract_once_formats(source_url)) + elif ext == 'smil': formats.extend(self._extract_smil_formats( source_url, video_id, fatal=False)) elif ext == 'f4m': @@ -117,12 +138,24 @@ class ESPNIE(InfoExtractor): source_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id=source_id, fatal=False)) else: - formats.append({ + f = { 'url': source_url, 'format_id': source_id, - }) - - traverse_source(clip['links']['source']) + } + mobj = re.search(r'(\d+)p(\d+)_(\d+)k\.', source_url) + if mobj: + f.update({ + 'height': int(mobj.group(1)), + 'fps': int(mobj.group(2)), + 'tbr': int(mobj.group(3)), + }) + if source_id == 'mezzanine': + f['preference'] = 1 + formats.append(f) + + links = clip.get('links', {}) + traverse_source(links.get('source', {})) + traverse_source(links.get('mobile', {})) self._sort_formats(formats) description = clip.get('caption') or clip.get('description') @@ -144,9 +177,6 @@ class ESPNIE(InfoExtractor): class ESPNArticleIE(InfoExtractor): _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P[^/]+)' _TESTS = [{ - 'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079', - 'only_matching': True, - }, { 'url': 'http://espn.go.com/nba/recap?gameId=400793786', 'only_matching': True, }, { @@ -175,3 +205,34 @@ class ESPNArticleIE(InfoExtractor): return self.url_result( 'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key()) + + +class FiveThirtyEightIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?fivethirtyeight\.com/features/(?P[^/?#]+)' + _TEST = { + 'url': 'http://fivethirtyeight.com/features/how-the-6-8-raiders-can-still-make-the-playoffs/', + 'info_dict': { + 'id': '21846851', + 'ext': 'mp4', + 'title': 'FiveThirtyEight: The Raiders can still make the playoffs', + 'description': 'Neil Paine breaks down the simplest scenario that will put the Raiders into the playoffs at 8-8.', + 'timestamp': 1513960621, + 'upload_date': '20171222', + }, + 'params': { + 'skip_download': True, + }, + 'expected_warnings': ['Unable to download f4m manifest'], + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + video_id = self._search_regex( + r'data-video-id=["\'](?P\d+)', + webpage, 'video id', group='id') + + return self.url_result( + 'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key()) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d084707..e64defe 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -127,7 +127,10 @@ from .bloomberg import BloombergIE from .bokecc import BokeCCIE from .bostonglobe import BostonGlobeIE from .bpb import BpbIE -from .br import BRIE +from .br import ( + BRIE, + BRMediathekIE, +) from .bravotv import BravoTVIE from .breakcom import BreakIE from .brightcove import ( @@ -135,10 +138,7 @@ from .brightcove import ( BrightcoveNewIE, ) from .buzzfeed import BuzzFeedIE -from .byutv import ( - BYUtvIE, - BYUtvEventIE, -) +from .byutv import BYUtvIE from .c56 import C56IE from .camdemy import ( CamdemyIE, @@ -205,7 +205,6 @@ from .cnn import ( CNNArticleIE, ) from .coub import CoubIE -from .collegerama import CollegeRamaIE from .comedycentral import ( ComedyCentralFullEpisodesIE, ComedyCentralIE, @@ -243,11 +242,10 @@ from .dailymotion import ( DailymotionIE, DailymotionPlaylistIE, DailymotionUserIE, - DailymotionCloudIE, ) from .daisuki import ( - DaisukiIE, - DaisukiPlaylistIE, + DaisukiMottoIE, + DaisukiMottoPlaylistIE, ) from .daum import ( DaumIE, @@ -309,9 +307,10 @@ from .ehow import EHowIE from .eighttracks import EightTracksIE from .einthusan import EinthusanIE from .eitb import EitbIE -from .ellentv import ( - EllenTVIE, - EllenTVClipsIE, +from .ellentube import ( + EllenTubeIE, + EllenTubeVideoIE, + EllenTubePlaylistIE, ) from .elpais import ElPaisIE from .embedly import EmbedlyIE @@ -322,6 +321,7 @@ from .escapist import EscapistIE from .espn import ( ESPNIE, ESPNArticleIE, + FiveThirtyEightIE, ) from .esri import EsriVideoIE from .etonline import ETOnlineIE @@ -344,11 +344,10 @@ from .filmon import ( FilmOnIE, FilmOnChannelIE, ) -from .firstpost import FirstpostIE +from .filmweb import FilmwebIE from .firsttv import FirstTVIE from .fivemin import FiveMinIE from .fivetv import FiveTVIE -from .fktv import FKTVIE from .flickr import FlickrIE from .flipagram import FlipagramIE from .folketinget import FolketingetIE @@ -375,7 +374,7 @@ from .francetv import ( FranceTVIE, FranceTVEmbedIE, FranceTVInfoIE, - GenerationQuoiIE, + GenerationWhatIE, CultureboxIE, ) from .freesound import FreesoundIE @@ -391,7 +390,6 @@ from .gameone import ( GameOneIE, GameOnePlaylistIE, ) -from .gamersyde import GamersydeIE from .gamespot import GameSpotIE from .gamestar import GameStarIE from .gaskrank import GaskrankIE @@ -467,6 +465,7 @@ from .indavideo import ( ) from .infoq import InfoQIE from .instagram import InstagramIE, InstagramUserIE +from .internazionale import InternazionaleIE from .internetvideoarchive import InternetVideoArchiveIE from .iprima import IPrimaIE from .iqiyi import IqiyiIE @@ -572,9 +571,11 @@ from .mangomolo import ( MangomoloLiveIE, ) from .manyvids import ManyVidsIE +from .massengeschmacktv import MassengeschmackTVIE from .matchtv import MatchTVIE from .mdr import MDRIE from .mediaset import MediasetIE +from .mediasite import MediasiteIE from .medici import MediciIE from .megaphone import MegaphoneIE from .meipai import MeipaiIE @@ -688,6 +689,7 @@ from .nhl import ( ) from .nick import ( NickIE, + NickBrIE, NickDeIE, NickNightIE, NickRuIE, @@ -720,10 +722,6 @@ from .nowness import ( NownessPlaylistIE, NownessSeriesIE, ) -from .nowtv import ( - NowTVIE, - NowTVListIE, -) from .noz import NozIE from .npo import ( AndereTijdenIE, @@ -789,6 +787,7 @@ from .patreon import PatreonIE from .pbs import PBSIE from .pearvideo import PearVideoIE from .people import PeopleIE +from .performgroup import PerformGroupIE from .periscope import ( PeriscopeIE, PeriscopeUserIE, @@ -855,6 +854,7 @@ from .radiofrance import RadioFranceIE from .rai import ( RaiPlayIE, RaiPlayLiveIE, + RaiPlayPlaylistIE, RaiIE, ) from .rbmaradio import RBMARadioIE @@ -912,7 +912,6 @@ from .rutube import ( from .rutv import RUTVIE from .ruutu import RuutuIE from .ruv import RuvIE -from .sandia import SandiaIE from .safari import ( SafariIE, SafariApiIE, @@ -929,8 +928,12 @@ from .senateisvp import SenateISVPIE from .sendtonews import SendtoNewsIE from .servingsys import ServingSysIE from .servus import ServusIE +from .sevenplus import SevenPlusIE from .sexu import SexuIE -from .shahid import ShahidIE +from .shahid import ( + ShahidIE, + ShahidShowIE, +) from .shared import ( SharedIE, VivoIE, @@ -998,6 +1001,7 @@ from .streamango import StreamangoIE from .streamcloud import StreamcloudIE from .streamcz import StreamCZIE from .streetvoice import StreetVoiceIE +from .stretchinternet import StretchInternetIE from .sunporno import SunPornoIE from .svt import ( SVTIE, @@ -1100,6 +1104,10 @@ from .tvigle import TvigleIE from .tvland import TVLandIE from .tvn24 import TVN24IE from .tvnoe import TVNoeIE +from .tvnow import ( + TVNowIE, + TVNowListIE, +) from .tvp import ( TVPEmbedIE, TVPIE, @@ -1113,6 +1121,7 @@ from .tvplayer import TVPlayerIE from .tweakers import TweakersIE from .twentyfourvideo import TwentyFourVideoIE from .twentymin import TwentyMinutenIE +from .twentythreevideo import TwentyThreeVideoIE from .twitch import ( TwitchVideoIE, TwitchChapterIE, @@ -1135,8 +1144,10 @@ from .udemy import ( UdemyCourseIE ) from .udn import UDNEmbedIE +from .ufctv import UFCTVIE from .uktvplay import UKTVPlayIE from .digiteka import DigitekaIE +from .umg import UMGDeIE from .unistra import UnistraIE from .unity import UnityIE from .uol import UOLIE diff --git a/youtube_dl/extractor/faz.py b/youtube_dl/extractor/faz.py index 4bc8fc5..312ee2a 100644 --- a/youtube_dl/extractor/faz.py +++ b/youtube_dl/extractor/faz.py @@ -1,7 +1,10 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor +from ..compat import compat_etree_fromstring from ..utils import ( xpath_element, xpath_text, @@ -43,10 +46,15 @@ class FazIE(InfoExtractor): webpage = self._download_webpage(url, video_id) description = self._og_search_description(webpage) - config_xml_url = self._search_regex( - r'videoXMLURL\s*=\s*"([^"]+)', webpage, 'config xml url') - config = self._download_xml( - config_xml_url, video_id, 'Downloading config xml') + media = self._html_search_regex( + r"data-videojs-media='([^']+)", + webpage, 'media') + if media == 'extern': + perform_url = self._search_regex( + r"]+?src='((?:http:)?//player\.performgroup\.com/eplayer/eplayer\.html#/?[0-9a-f]{26}\.[0-9a-z]{26})", + webpage, 'perform url') + return self.url_result(perform_url) + config = compat_etree_fromstring(media) encodings = xpath_element(config, 'ENCODINGS', 'encodings', True) formats = [] @@ -55,12 +63,24 @@ class FazIE(InfoExtractor): if encoding is not None: encoding_url = xpath_text(encoding, 'FILENAME') if encoding_url: - formats.append({ + tbr = xpath_text(encoding, 'AVERAGEBITRATE', 1000) + if tbr: + tbr = int_or_none(tbr.replace(',', '.')) + f = { 'url': encoding_url, 'format_id': code.lower(), 'quality': pref, - 'tbr': int_or_none(xpath_text(encoding, 'AVERAGEBITRATE')), - }) + 'tbr': tbr, + 'vcodec': xpath_text(encoding, 'CODEC'), + } + mobj = re.search(r'(\d+)x(\d+)_(\d+)\.mp4', encoding_url) + if mobj: + f.update({ + 'width': int(mobj.group(1)), + 'height': int(mobj.group(2)), + 'tbr': tbr or int(mobj.group(3)), + }) + formats.append(f) self._sort_formats(formats) return { diff --git a/youtube_dl/extractor/fczenit.py b/youtube_dl/extractor/fczenit.py index 8d1010b..8db7c59 100644 --- a/youtube_dl/extractor/fczenit.py +++ b/youtube_dl/extractor/fczenit.py @@ -2,7 +2,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urlparse +from ..utils import ( + int_or_none, + float_or_none, +) class FczenitIE(InfoExtractor): @@ -14,6 +17,8 @@ class FczenitIE(InfoExtractor): 'id': '41044', 'ext': 'mp4', 'title': 'Так пишется история: казанский разгром ЦСКА на «Зенит-ТВ»', + 'timestamp': 1462283735, + 'upload_date': '20160503', }, } @@ -21,28 +26,31 @@ class FczenitIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - video_title = self._html_search_regex( - r'<[^>]+class=\"photoalbum__title\">([^<]+)', webpage, 'title') + msi_id = self._search_regex( + r"(?s)config\s*=\s*{.+?video_id\s*:\s*'([^']+)'", webpage, 'msi id') - video_items = self._parse_json(self._search_regex( - r'arrPath\s*=\s*JSON\.parse\(\'(.+)\'\)', webpage, 'video items'), - video_id) - - def merge_dicts(*dicts): - ret = {} - for a_dict in dicts: - ret.update(a_dict) - return ret + msi_data = self._download_json( + 'http://player.fc-zenit.ru/msi/video', msi_id, query={ + 'video': msi_id, + })['data'] + title = msi_data['name'] formats = [{ - 'url': compat_urlparse.urljoin(url, video_url), - 'tbr': int(tbr), - } for tbr, video_url in merge_dicts(*video_items).items()] + 'format_id': q.get('label'), + 'url': q['url'], + 'height': int_or_none(q.get('label')), + } for q in msi_data['qualities'] if q.get('url')] self._sort_formats(formats) + tags = [tag['label'] for tag in msi_data.get('tags', []) if tag.get('label')] + return { 'id': video_id, - 'title': video_title, + 'title': title, + 'thumbnail': msi_data.get('preview'), 'formats': formats, + 'duration': float_or_none(msi_data.get('duration')), + 'timestamp': int_or_none(msi_data.get('date')), + 'tags': tags, } diff --git a/youtube_dl/extractor/filmweb.py b/youtube_dl/extractor/filmweb.py new file mode 100644 index 0000000..56000bc --- /dev/null +++ b/youtube_dl/extractor/filmweb.py @@ -0,0 +1,42 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class FilmwebIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?filmweb\.no/(?Ptrailere|filmnytt)/article(?P\d+)\.ece' + _TEST = { + 'url': 'http://www.filmweb.no/trailere/article1264921.ece', + 'md5': 'e353f47df98e557d67edaceda9dece89', + 'info_dict': { + 'id': '13033574', + 'ext': 'mp4', + 'title': 'Det som en gang var', + 'upload_date': '20160316', + 'timestamp': 1458140101, + 'uploader_id': '12639966', + 'uploader': 'Live Roaldset', + } + } + + def _real_extract(self, url): + article_type, article_id = re.match(self._VALID_URL, url).groups() + if article_type == 'filmnytt': + webpage = self._download_webpage(url, article_id) + article_id = self._search_regex(r'data-videoid="(\d+)"', webpage, 'article id') + embed_code = self._download_json( + 'https://www.filmweb.no/template_v2/ajax/json_trailerEmbed.jsp', + article_id, query={ + 'articleId': article_id, + })['embedCode'] + iframe_url = self._proto_relative_url(self._search_regex( + r']+src="([^"]+)', embed_code, 'iframe url')) + + return { + '_type': 'url_transparent', + 'id': article_id, + 'url': iframe_url, + 'ie_key': 'TwentyThreeVideo', + } diff --git a/youtube_dl/extractor/firstpost.py b/youtube_dl/extractor/firstpost.py deleted file mode 100644 index e8936cb..0000000 --- a/youtube_dl/extractor/firstpost.py +++ /dev/null @@ -1,50 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class FirstpostIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?firstpost\.com/[^/]+/.*-(?P[0-9]+)\.html' - - _TEST = { - 'url': 'http://www.firstpost.com/india/india-to-launch-indigenous-aircraft-carrier-monday-1025403.html', - 'md5': 'ee9114957692f01fb1263ed87039112a', - 'info_dict': { - 'id': '1025403', - 'ext': 'mp4', - 'title': 'India to launch indigenous aircraft carrier INS Vikrant today', - 'description': 'md5:feef3041cb09724e0bdc02843348f5f4', - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - page = self._download_webpage(url, video_id) - - title = self._html_search_meta('twitter:title', page, 'title', fatal=True) - description = self._html_search_meta('twitter:description', page, 'title') - - data = self._download_xml( - 'http://www.firstpost.com/getvideoxml-%s.xml' % video_id, video_id, - 'Downloading video XML') - - item = data.find('./playlist/item') - thumbnail = item.find('./image').text - - formats = [ - { - 'url': details.find('./file').text, - 'format_id': details.find('./label').text.strip(), - 'width': int(details.find('./width').text.strip()), - 'height': int(details.find('./height').text.strip()), - } for details in item.findall('./source/file_details') if details.find('./file').text - ] - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'formats': formats, - } diff --git a/youtube_dl/extractor/fktv.py b/youtube_dl/extractor/fktv.py deleted file mode 100644 index 2958452..0000000 --- a/youtube_dl/extractor/fktv.py +++ /dev/null @@ -1,51 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - clean_html, - determine_ext, - js_to_json, -) - - -class FKTVIE(InfoExtractor): - IE_NAME = 'fernsehkritik.tv' - _VALID_URL = r'https?://(?:www\.)?fernsehkritik\.tv/folge-(?P[0-9]+)(?:/.*)?' - - _TEST = { - 'url': 'http://fernsehkritik.tv/folge-1', - 'md5': '21f0b0c99bce7d5b524eb1b17b1c6d79', - 'info_dict': { - 'id': '1', - 'ext': 'mp4', - 'title': 'Folge 1 vom 10. April 2007', - 'thumbnail': r're:^https?://.*\.jpg$', - }, - } - - def _real_extract(self, url): - episode = self._match_id(url) - - webpage = self._download_webpage( - 'http://fernsehkritik.tv/folge-%s/play' % episode, episode) - title = clean_html(self._html_search_regex( - '

([^<]+)

', webpage, 'title')) - thumbnail = self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False) - sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json) - - formats = [] - for source in sources: - furl = source.get('src') - if furl: - formats.append({ - 'url': furl, - 'format_id': determine_ext(furl), - }) - self._sort_formats(formats) - - return { - 'id': episode, - 'title': title, - 'formats': formats, - 'thumbnail': thumbnail, - } diff --git a/youtube_dl/extractor/fox.py b/youtube_dl/extractor/fox.py index 5f98d01..11d6c9c 100644 --- a/youtube_dl/extractor/fox.py +++ b/youtube_dl/extractor/fox.py @@ -11,6 +11,7 @@ from ..utils import ( parse_duration, try_get, unified_timestamp, + update_url_query, ) @@ -62,7 +63,8 @@ class FOXIE(AdobePassIE): duration = int_or_none(video.get('durationInSeconds')) or int_or_none( video.get('duration')) or parse_duration(video.get('duration')) timestamp = unified_timestamp(video.get('datePublished')) - age_limit = parse_age_limit(video.get('contentRating')) + rating = video.get('contentRating') + age_limit = parse_age_limit(rating) data = try_get( video, lambda x: x['trackingData']['properties'], dict) or {} @@ -77,8 +79,24 @@ class FOXIE(AdobePassIE): release_year = int_or_none(video.get('releaseYear')) if data.get('authRequired'): - # TODO: AP - pass + resource = self._get_mvpd_resource( + 'fbc-fox', title, video.get('guid'), rating) + release_url = update_url_query( + release_url, { + 'auth': self._extract_mvpd_auth( + url, video_id, 'fbc-fox', resource) + }) + + subtitles = {} + for doc_rel in video.get('documentReleases', []): + rel_url = doc_rel.get('url') + if not url or doc_rel.get('format') != 'SCC': + continue + subtitles['en'] = [{ + 'url': rel_url, + 'ext': 'scc', + }] + break info = { 'id': video_id, @@ -93,6 +111,7 @@ class FOXIE(AdobePassIE): 'episode': episode, 'episode_number': episode_number, 'release_year': release_year, + 'subtitles': subtitles, } urlh = self._request_webpage(HEADRequest(release_url), video_id) diff --git a/youtube_dl/extractor/fox9.py b/youtube_dl/extractor/fox9.py index 56d9975..17dfffa 100644 --- a/youtube_dl/extractor/fox9.py +++ b/youtube_dl/extractor/fox9.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals from .anvato import AnvatoIE -from ..utils import js_to_json class FOX9IE(AnvatoIE): @@ -34,9 +33,9 @@ class FOX9IE(AnvatoIE): video_id = self._parse_json( self._search_regex( - r'AnvatoPlaylist\s*\(\s*(\[.+?\])\s*\)\s*;', + r"this\.videosJson\s*=\s*'(\[.+?\])';", webpage, 'anvato playlist'), - video_id, transform_source=js_to_json)[0]['video'] + video_id)[0]['video'] return self._get_anvato_videos( 'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b', diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 2bcbb3e..095bb39 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals import re -import json from .common import InfoExtractor from ..compat import compat_urlparse @@ -14,10 +13,7 @@ from ..utils import ( parse_duration, determine_ext, ) -from .dailymotion import ( - DailymotionIE, - DailymotionCloudIE, -) +from .dailymotion import DailymotionIE class FranceTVBaseInfoExtractor(InfoExtractor): @@ -291,10 +287,6 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor): page_title = mobj.group('title') webpage = self._download_webpage(url, page_title) - dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage) - if dmcloud_url: - return self.url_result(dmcloud_url, DailymotionCloudIE.ie_key()) - dailymotion_urls = DailymotionIE._extract_urls(webpage) if dailymotion_urls: return self.playlist_result([ @@ -308,31 +300,32 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor): return self._extract_video(video_id, catalogue) -class GenerationQuoiIE(InfoExtractor): - IE_NAME = 'france2.fr:generation-quoi' - _VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P[^/?#]+)' +class GenerationWhatIE(InfoExtractor): + IE_NAME = 'france2.fr:generation-what' + _VALID_URL = r'https?://generation-what\.francetv\.fr/[^/]+/video/(?P[^/?#]+)' - _TEST = { - 'url': 'http://generation-quoi.france2.fr/portrait/garde-a-vous', + _TESTS = [{ + 'url': 'http://generation-what.francetv.fr/portrait/video/present-arms', 'info_dict': { - 'id': 'k7FJX8VBcvvLmX4wA5Q', + 'id': 'wtvKYUG45iw', 'ext': 'mp4', - 'title': 'Génération Quoi - Garde à Vous', - 'uploader': 'Génération Quoi', - }, - 'params': { - # It uses Dailymotion - 'skip_download': True, + 'title': 'Generation What - Garde à vous - FRA', + 'uploader': 'Generation What', + 'uploader_id': 'UCHH9p1eetWCgt4kXBYCb3_w', + 'upload_date': '20160411', }, - } + }, { + 'url': 'http://generation-what.francetv.fr/europe/video/present-arms', + 'only_matching': True, + }] def _real_extract(self, url): display_id = self._match_id(url) - info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % display_id) - info_json = self._download_webpage(info_url, display_id) - info = json.loads(info_json) - return self.url_result('http://www.dailymotion.com/video/%s' % info['id'], - ie='Dailymotion') + webpage = self._download_webpage(url, display_id) + youtube_id = self._search_regex( + r"window\.videoURL\s*=\s*'([0-9A-Za-z_-]{11})';", + webpage, 'youtube id') + return self.url_result(youtube_id, 'Youtube', youtube_id) class CultureboxIE(FranceTVBaseInfoExtractor): @@ -363,6 +356,7 @@ class CultureboxIE(FranceTVBaseInfoExtractor): raise ExtractorError('Video %s is not available' % name, expected=True) video_id, catalogue = self._search_regex( - r'"http://videos\.francetv\.fr/video/([^@]+@[^"]+)"', webpage, 'video id').split('@') + r'["\'>]https?://videos\.francetv\.fr/video/([^@]+@.+?)["\'<]', + webpage, 'video id').split('@') return self._extract_video(video_id, catalogue) diff --git a/youtube_dl/extractor/freespeech.py b/youtube_dl/extractor/freespeech.py index 7fa271b..486a49c 100644 --- a/youtube_dl/extractor/freespeech.py +++ b/youtube_dl/extractor/freespeech.py @@ -1,37 +1,34 @@ from __future__ import unicode_literals -import re -import json - from .common import InfoExtractor class FreespeechIE(InfoExtractor): IE_NAME = 'freespeech.org' - _VALID_URL = r'https?://(?:www\.)?freespeech\.org/video/(?P.+)' + _VALID_URL = r'https?://(?:www\.)?freespeech\.org/stories/(?P<id>.+)' _TEST = { 'add_ie': ['Youtube'], - 'url': 'https://www.freespeech.org/video/obama-romney-campaign-colorado-ahead-debate-0', + 'url': 'http://www.freespeech.org/stories/fcc-announces-net-neutrality-rollback-whats-stake/', 'info_dict': { - 'id': 'poKsVCZ64uU', - 'ext': 'webm', - 'title': 'Obama, Romney Campaign in Colorado Ahead of Debate', - 'description': 'Obama, Romney Campaign in Colorado Ahead of Debate', - 'uploader': 'freespeechtv', + 'id': 'waRk6IPqyWM', + 'ext': 'mp4', + 'title': 'What\'s At Stake - Net Neutrality Special', + 'description': 'Presented by MNN and FSTV', + 'upload_date': '20170728', 'uploader_id': 'freespeechtv', - 'upload_date': '20121002', + 'uploader': 'freespeechtv', }, } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - title = mobj.group('title') - webpage = self._download_webpage(url, title) - info_json = self._search_regex(r'jQuery\.extend\(Drupal\.settings, ({.*?})\);', webpage, 'info') - info = json.loads(info_json) + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + youtube_url = self._search_regex( + r'data-video-url="([^"]+)"', + webpage, 'youtube url') return { '_type': 'url', - 'url': info['jw_player']['basic_video_node_player']['file'], + 'url': youtube_url, 'ie_key': 'Youtube', } diff --git a/youtube_dl/extractor/funimation.py b/youtube_dl/extractor/funimation.py index 8c37509..107f658 100644 --- a/youtube_dl/extractor/funimation.py +++ b/youtube_dl/extractor/funimation.py @@ -57,7 +57,7 @@ class FunimationIE(InfoExtractor): try: data = self._download_json( 'https://prod-api-funimationnow.dadcdigital.com/api/auth/login/', - None, 'Logging in as %s' % username, data=urlencode_postdata({ + None, 'Logging in', data=urlencode_postdata({ 'username': username, 'password': password, })) diff --git a/youtube_dl/extractor/gamersyde.py b/youtube_dl/extractor/gamersyde.py deleted file mode 100644 index a218a69..0000000 --- a/youtube_dl/extractor/gamersyde.py +++ /dev/null @@ -1,70 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - js_to_json, - parse_duration, - remove_start, -) - - -class GamersydeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?gamersyde\.com/hqstream_(?P<display_id>[\da-z_]+)-(?P<id>\d+)_[a-z]{2}\.html' - _TEST = { - 'url': 'http://www.gamersyde.com/hqstream_bloodborne_birth_of_a_hero-34371_en.html', - 'md5': 'f38d400d32f19724570040d5ce3a505f', - 'info_dict': { - 'id': '34371', - 'ext': 'mp4', - 'duration': 372, - 'title': 'Bloodborne - Birth of a hero', - 'thumbnail': r're:^https?://.*\.jpg$', - } - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') - - webpage = self._download_webpage(url, display_id) - - playlist = self._parse_json( - self._search_regex( - r'(?s)playlist: \[({.+?})\]\s*}\);', webpage, 'files'), - display_id, transform_source=js_to_json) - - formats = [] - for source in playlist['sources']: - video_url = source.get('file') - if not video_url: - continue - format_id = source.get('label') - f = { - 'url': video_url, - 'format_id': format_id, - } - m = re.search(r'^(?P<height>\d+)[pP](?P<fps>\d+)fps', format_id) - if m: - f.update({ - 'height': int(m.group('height')), - 'fps': int(m.group('fps')), - }) - formats.append(f) - self._sort_formats(formats) - - title = remove_start(playlist['title'], '%s - ' % video_id) - thumbnail = playlist.get('image') - duration = parse_duration(self._search_regex( - r'Length:</label>([^<]+)<', webpage, 'duration', fatal=False)) - - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'thumbnail': thumbnail, - 'duration': duration, - 'formats': formats, - } diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index 6d177cb..ab647dd 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -14,7 +14,7 @@ from ..utils import ( class GameSpotIE(OnceIE): - _VALID_URL = r'https?://(?:www\.)?gamespot\.com/videos/(?:[^/]+/\d+-|embed/)(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?gamespot\.com/(?:video|article)s/(?:[^/]+/\d+-|embed/)(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/', 'md5': 'b2a30deaa8654fcccd43713a6b6a4825', @@ -38,6 +38,9 @@ class GameSpotIE(OnceIE): }, { 'url': 'https://www.gamespot.com/videos/embed/6439218/', 'only_matching': True, + }, { + 'url': 'https://www.gamespot.com/articles/the-last-of-us-2-receives-new-ps4-trailer/1100-6454469/', + 'only_matching': True, }] def _real_extract(self, url): @@ -108,7 +111,8 @@ class GameSpotIE(OnceIE): onceux_url = self._parse_json(unescapeHTML(onceux_json), page_id).get('metadataUri') if onceux_url: formats.extend(self._extract_once_formats(re.sub( - r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url))) + r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url), + http_formats_preference=-1)) if not formats: for quality in ['sd', 'hd']: diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 2a9c3e2..cc4c90b 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -59,10 +59,7 @@ from .tnaflix import TNAFlixNetworkEmbedIE from .drtuber import DrTuberIE from .redtube import RedTubeIE from .vimeo import VimeoIE -from .dailymotion import ( - DailymotionIE, - DailymotionCloudIE, -) +from .dailymotion import DailymotionIE from .dailymail import DailyMailIE from .onionstudios import OnionStudiosIE from .viewlift import ViewLiftEmbedIE @@ -102,6 +99,8 @@ from .joj import JojIE from .megaphone import MegaphoneIE from .vzaar import VzaarIE from .channel9 import Channel9IE +from .vshare import VShareIE +from .mediasite import MediasiteIE class GenericIE(InfoExtractor): @@ -1098,9 +1097,9 @@ class GenericIE(InfoExtractor): }, # jwplayer rtmp { - 'url': 'http://www.suffolk.edu/sjc/', + 'url': 'http://www.suffolk.edu/sjc/live.php', 'info_dict': { - 'id': 'sjclive', + 'id': 'live', 'ext': 'flv', 'title': 'Massachusetts Supreme Judicial Court Oral Arguments', 'uploader': 'www.suffolk.edu', @@ -1108,7 +1107,7 @@ class GenericIE(InfoExtractor): 'params': { 'skip_download': True, }, - 'skip': 'does not contain a video anymore', + 'skip': 'Only has video a few mornings per month, see http://www.suffolk.edu/sjc/', }, # Complex jwplayer { @@ -1135,6 +1134,19 @@ class GenericIE(InfoExtractor): 'skip_download': True, } }, + { + # JWPlatform iframe + 'url': 'https://www.mediaite.com/tv/dem-senator-claims-gary-cohn-faked-a-bad-connection-during-trump-call-to-get-him-off-the-phone/', + 'md5': 'ca00a040364b5b439230e7ebfd02c4e9', + 'info_dict': { + 'id': 'O0c5JcKT', + 'ext': 'mp4', + 'upload_date': '20171122', + 'timestamp': 1511366290, + 'title': 'Dem Senator Claims Gary Cohn Faked a Bad Connection During Trump Call to Get Him Off the Phone', + }, + 'add_ie': [JWPlatformIE.ie_key()], + }, { # Video.js embed, multiple formats 'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html', @@ -1458,23 +1470,6 @@ class GenericIE(InfoExtractor): 'timestamp': 1432570283, }, }, - # Dailymotion Cloud video - { - 'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910', - 'md5': 'dcaf23ad0c67a256f4278bce6e0bae38', - 'info_dict': { - 'id': 'x2uy8t3', - 'ext': 'mp4', - 'title': 'Sauvons les abeilles ! - Le débat', - 'description': 'md5:d9082128b1c5277987825d684939ca26', - 'thumbnail': r're:^https?://.*\.jpe?g$', - 'timestamp': 1434970506, - 'upload_date': '20150622', - 'uploader': 'Public Sénat', - 'uploader_id': 'xa9gza', - }, - 'skip': 'File not found.', - }, # OnionStudios embed { 'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537', @@ -1921,6 +1916,28 @@ class GenericIE(InfoExtractor): 'title': 'Rescue Kit 14 Free Edition - Getting started', }, 'playlist_count': 4, + }, + { + # vshare embed + 'url': 'https://youtube-dl-demo.neocities.org/vshare.html', + 'md5': '17b39f55b5497ae8b59f5fbce8e35886', + 'info_dict': { + 'id': '0f64ce6', + 'title': 'vl14062007715967', + 'ext': 'mp4', + } + }, + { + 'url': 'http://www.heidelberg-laureate-forum.org/blog/video/lecture-friday-september-23-2016-sir-c-antony-r-hoare/', + 'md5': 'aecd089f55b1cb5a59032cb049d3a356', + 'info_dict': { + 'id': '90227f51a80c4d8f86c345a7fa62bd9a1d', + 'ext': 'mp4', + 'title': 'Lecture: Friday, September 23, 2016 - Sir Tony Hoare', + 'description': 'md5:5a51db84a62def7b7054df2ade403c6c', + 'timestamp': 1474354800, + 'upload_date': '20160920', + } } # { # # TODO: find another test @@ -2171,7 +2188,7 @@ class GenericIE(InfoExtractor): return self.playlist_result(self._parse_xspf(doc, video_id), video_id) elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag): info_dict['formats'] = self._parse_mpd_formats( - doc, video_id, + doc, mpd_base_url=compat_str(full_response.geturl()).rpartition('/')[0], mpd_url=url) self._sort_formats(info_dict['formats']) @@ -2680,11 +2697,6 @@ class GenericIE(InfoExtractor): if senate_isvp_url: return self.url_result(senate_isvp_url, 'SenateISVP') - # Look for Dailymotion Cloud videos - dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage) - if dmcloud_url: - return self.url_result(dmcloud_url, 'DailymotionCloud') - # Look for OnionStudios embeds onionstudios_url = OnionStudiosIE._extract_url(webpage) if onionstudios_url: @@ -2879,6 +2891,21 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( channel9_urls, video_id, video_title, ie=Channel9IE.ie_key()) + vshare_urls = VShareIE._extract_urls(webpage) + if vshare_urls: + return self.playlist_from_matches( + vshare_urls, video_id, video_title, ie=VShareIE.ie_key()) + + # Look for Mediasite embeds + mediasite_urls = MediasiteIE._extract_urls(webpage) + if mediasite_urls: + entries = [ + self.url_result(smuggle_url( + compat_urlparse.urljoin(url, mediasite_url), + {'UrlReferrer': url}), ie=MediasiteIE.ie_key()) + for mediasite_url in mediasite_urls] + return self.playlist_result(entries, video_id, video_title) + def merge_dicts(dict1, dict2): merged = {} for k, v in dict1.items(): diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 4667335..a77f619 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -1,5 +1,6 @@ from __future__ import unicode_literals +import itertools import re from .common import InfoExtractor @@ -7,7 +8,6 @@ from ..compat import compat_str from ..utils import ( get_element_by_attribute, int_or_none, - limit_length, lowercase_escape, try_get, ) @@ -130,13 +130,21 @@ class InstagramIE(InfoExtractor): video_url = media.get('video_url') height = int_or_none(media.get('dimensions', {}).get('height')) width = int_or_none(media.get('dimensions', {}).get('width')) - description = media.get('caption') + description = try_get( + media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'], + compat_str) or media.get('caption') thumbnail = media.get('display_src') - timestamp = int_or_none(media.get('date')) + timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date')) uploader = media.get('owner', {}).get('full_name') uploader_id = media.get('owner', {}).get('username') - like_count = int_or_none(media.get('likes', {}).get('count')) - comment_count = int_or_none(media.get('comments', {}).get('count')) + + def get_count(key, kind): + return int_or_none(try_get( + media, (lambda x: x['edge_media_%s' % key]['count'], + lambda x: x['%ss' % kind]['count']))) + like_count = get_count('preview_like', 'like') + comment_count = get_count('to_comment', 'comment') + comments = [{ 'author': comment.get('user', {}).get('username'), 'author_id': comment.get('user', {}).get('id'), @@ -212,7 +220,7 @@ class InstagramIE(InfoExtractor): class InstagramUserIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])' + _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<id>[^/]{2,})/?(?:$|[?#])' IE_DESC = 'Instagram user profile' IE_NAME = 'instagram:user' _TEST = { @@ -221,82 +229,79 @@ class InstagramUserIE(InfoExtractor): 'id': 'porsche', 'title': 'porsche', }, - 'playlist_mincount': 2, - 'playlist': [{ - 'info_dict': { - 'id': '614605558512799803_462752227', - 'ext': 'mp4', - 'title': '#Porsche Intelligent Performance.', - 'thumbnail': r're:^https?://.*\.jpg', - 'uploader': 'Porsche', - 'uploader_id': 'porsche', - 'timestamp': 1387486713, - 'upload_date': '20131219', - }, - }], + 'playlist_count': 5, 'params': { 'extract_flat': True, 'skip_download': True, + 'playlistend': 5, } } - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - uploader_id = mobj.group('username') + def _entries(self, uploader_id): + query = { + '__a': 1, + } - entries = [] - page_count = 0 - media_url = 'http://instagram.com/%s/media' % uploader_id - while True: + def get_count(kind): + return int_or_none(try_get( + node, lambda x: x['%ss' % kind]['count'])) + + for page_num in itertools.count(1): page = self._download_json( - media_url, uploader_id, - note='Downloading page %d ' % (page_count + 1), - ) - page_count += 1 + 'https://instagram.com/%s/' % uploader_id, uploader_id, + note='Downloading page %d' % page_num, + fatal=False, query=query) + if not page: + break + + nodes = try_get(page, lambda x: x['user']['media']['nodes'], list) + if not nodes: + break + + max_id = None - for it in page['items']: - if it.get('type') != 'video': + for node in nodes: + node_id = node.get('id') + if node_id: + max_id = node_id + + if node.get('__typename') != 'GraphVideo' and node.get('is_video') is not True: + continue + video_id = node.get('code') + if not video_id: continue - like_count = int_or_none(it.get('likes', {}).get('count')) - user = it.get('user', {}) - - formats = [{ - 'format_id': k, - 'height': v.get('height'), - 'width': v.get('width'), - 'url': v['url'], - } for k, v in it['videos'].items()] - self._sort_formats(formats) - - thumbnails_el = it.get('images', {}) - thumbnail = thumbnails_el.get('thumbnail', {}).get('url') - - # In some cases caption is null, which corresponds to None - # in python. As a result, it.get('caption', {}) gives None - title = (it.get('caption') or {}).get('text', it['id']) - - entries.append({ - 'id': it['id'], - 'title': limit_length(title, 80), - 'formats': formats, + + info = self.url_result( + 'https://instagram.com/p/%s/' % video_id, + ie=InstagramIE.ie_key(), video_id=video_id) + + description = try_get( + node, [lambda x: x['caption'], lambda x: x['text']['id']], + compat_str) + thumbnail = node.get('thumbnail_src') or node.get('display_src') + timestamp = int_or_none(node.get('date')) + + comment_count = get_count('comment') + like_count = get_count('like') + view_count = int_or_none(node.get('video_views')) + + info.update({ + 'description': description, 'thumbnail': thumbnail, - 'webpage_url': it.get('link'), - 'uploader': user.get('full_name'), - 'uploader_id': user.get('username'), + 'timestamp': timestamp, + 'comment_count': comment_count, 'like_count': like_count, - 'timestamp': int_or_none(it.get('created_time')), + 'view_count': view_count, }) - if not page['items']: + yield info + + if not max_id: break - max_id = page['items'][-1]['id'].split('_')[0] - media_url = ( - 'http://instagram.com/%s/media?max_id=%s' % ( - uploader_id, max_id)) - return { - '_type': 'playlist', - 'entries': entries, - 'id': uploader_id, - 'title': uploader_id, - } + query['max_id'] = max_id + + def _real_extract(self, url): + uploader_id = self._match_id(url) + return self.playlist_result( + self._entries(uploader_id), uploader_id, uploader_id) diff --git a/youtube_dl/extractor/internazionale.py b/youtube_dl/extractor/internazionale.py new file mode 100644 index 0000000..10ba1f6 --- /dev/null +++ b/youtube_dl/extractor/internazionale.py @@ -0,0 +1,64 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import unified_timestamp + + +class InternazionaleIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?internazionale\.it/video/(?:[^/]+/)*(?P<id>[^/?#&]+)' + _TEST = { + 'url': 'https://www.internazionale.it/video/2015/02/19/richard-linklater-racconta-una-scena-di-boyhood', + 'md5': '3e39d32b66882c1218e305acbf8348ca', + 'info_dict': { + 'id': '265968', + 'display_id': 'richard-linklater-racconta-una-scena-di-boyhood', + 'ext': 'mp4', + 'title': 'Richard Linklater racconta una scena di Boyhood', + 'description': 'md5:efb7e5bbfb1a54ae2ed5a4a015f0e665', + 'timestamp': 1424354635, + 'upload_date': '20150219', + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'params': { + 'format': 'bestvideo', + }, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + DATA_RE = r'data-%s=(["\'])(?P<value>(?:(?!\1).)+)\1' + + title = self._search_regex( + DATA_RE % 'video-title', webpage, 'title', default=None, + group='value') or self._og_search_title(webpage) + + video_id = self._search_regex( + DATA_RE % 'job-id', webpage, 'video id', group='value') + video_path = self._search_regex( + DATA_RE % 'video-path', webpage, 'video path', group='value') + + video_base = 'https://video.internazionale.it/%s/%s.' % (video_path, video_id) + + formats = self._extract_m3u8_formats( + video_base + 'm3u8', display_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) + formats.extend(self._extract_mpd_formats( + video_base + 'mpd', display_id, mpd_id='dash', fatal=False)) + self._sort_formats(formats) + + timestamp = unified_timestamp(self._html_search_meta( + 'article:published_time', webpage, 'timestamp')) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'thumbnail': self._og_search_thumbnail(webpage), + 'description': self._og_search_description(webpage), + 'timestamp': timestamp, + 'formats': formats, + } diff --git a/youtube_dl/extractor/itv.py b/youtube_dl/extractor/itv.py index 26c48e4..18a7d7f 100644 --- a/youtube_dl/extractor/itv.py +++ b/youtube_dl/extractor/itv.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import uuid import xml.etree.ElementTree as etree import json +import re from .common import InfoExtractor from ..compat import ( @@ -25,7 +26,7 @@ from ..utils import ( class ITVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)' _GEO_COUNTRIES = ['GB'] - _TEST = { + _TESTS = [{ 'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053', 'info_dict': { 'id': '2a2936a0053', @@ -36,7 +37,11 @@ class ITVIE(InfoExtractor): # rtmp download 'skip_download': True, }, - } + }, { + # unavailable via data-playlist-url + 'url': 'https://www.itv.com/hub/through-the-keyhole/2a2271a0033', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -100,6 +105,18 @@ class ITVIE(InfoExtractor): 'Content-Type': 'text/xml; charset=utf-8', 'SOAPAction': 'http://tempuri.org/PlaylistService/GetPlaylist', }) + + info = self._search_json_ld(webpage, video_id, default={}) + formats = [] + subtitles = {} + + def extract_subtitle(sub_url): + ext = determine_ext(sub_url, 'ttml') + subtitles.setdefault('en', []).append({ + 'url': sub_url, + 'ext': 'ttml' if ext == 'xml' else ext, + }) + resp_env = self._download_xml( params['data-playlist-url'], video_id, headers=headers, data=etree.tostring(req_env)) @@ -110,41 +127,59 @@ class ITVIE(InfoExtractor): if fault_code == 'InvalidGeoRegion': self.raise_geo_restricted( msg=fault_string, countries=self._GEO_COUNTRIES) - raise ExtractorError('%s said: %s' % (self.IE_NAME, fault_string)) - title = xpath_text(playlist, 'EpisodeTitle', fatal=True) - video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True) - media_files = xpath_element(video_element, 'MediaFiles', fatal=True) - rtmp_url = media_files.attrib['base'] + elif fault_code != 'InvalidEntity': + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, fault_string), expected=True) + info.update({ + 'title': self._og_search_title(webpage), + 'episode_title': params.get('data-video-episode'), + 'series': params.get('data-video-title'), + }) + else: + title = xpath_text(playlist, 'EpisodeTitle', default=None) + info.update({ + 'title': title, + 'episode_title': title, + 'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')), + 'series': xpath_text(playlist, 'ProgrammeTitle'), + 'duration': parse_duration(xpath_text(playlist, 'Duration')), + }) + video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True) + media_files = xpath_element(video_element, 'MediaFiles', fatal=True) + rtmp_url = media_files.attrib['base'] - formats = [] - for media_file in media_files.findall('MediaFile'): - play_path = xpath_text(media_file, 'URL') - if not play_path: - continue - tbr = int_or_none(media_file.get('bitrate'), 1000) - f = { - 'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''), - 'play_path': play_path, - # Providing this swfVfy allows to avoid truncated downloads - 'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf', - 'page_url': url, - 'tbr': tbr, - 'ext': 'flv', - } - app = self._search_regex( - 'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None) - if app: - f.update({ - 'url': rtmp_url.split('?', 1)[0], - 'app': app, - }) - else: - f['url'] = rtmp_url - formats.append(f) - - ios_playlist_url = params.get('data-video-playlist') + for media_file in media_files.findall('MediaFile'): + play_path = xpath_text(media_file, 'URL') + if not play_path: + continue + tbr = int_or_none(media_file.get('bitrate'), 1000) + f = { + 'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''), + 'play_path': play_path, + # Providing this swfVfy allows to avoid truncated downloads + 'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf', + 'page_url': url, + 'tbr': tbr, + 'ext': 'flv', + } + app = self._search_regex( + 'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None) + if app: + f.update({ + 'url': rtmp_url.split('?', 1)[0], + 'app': app, + }) + else: + f['url'] = rtmp_url + formats.append(f) + + for caption_url in video_element.findall('ClosedCaptioningURIs/URL'): + if caption_url.text: + extract_subtitle(caption_url.text) + + ios_playlist_url = params.get('data-video-playlist') or params.get('data-video-id') hmac = params.get('data-video-hmac') - if ios_playlist_url and hmac: + if ios_playlist_url and hmac and re.match(r'https?://', ios_playlist_url): headers = self.geo_verification_headers() headers.update({ 'Accept': 'application/vnd.itv.vod.playlist.v2+json', @@ -159,12 +194,12 @@ class ITVIE(InfoExtractor): 'token': '' }, 'device': { - 'manufacturer': 'Apple', - 'model': 'iPad', + 'manufacturer': 'Safari', + 'model': '5', 'os': { - 'name': 'iPhone OS', - 'version': '9.3', - 'type': 'ios' + 'name': 'Windows NT', + 'version': '6.1', + 'type': 'desktop' } }, 'client': { @@ -173,10 +208,10 @@ class ITVIE(InfoExtractor): }, 'variantAvailability': { 'featureset': { - 'min': ['hls', 'aes'], - 'max': ['hls', 'aes'] + 'min': ['hls', 'aes', 'outband-webvtt'], + 'max': ['hls', 'aes', 'outband-webvtt'] }, - 'platformTag': 'mobile' + 'platformTag': 'dotcom' } }).encode(), headers=headers, fatal=False) if ios_playlist: @@ -197,27 +232,22 @@ class ITVIE(InfoExtractor): formats.append({ 'url': href, }) - self._sort_formats(formats) + subs = video_data.get('Subtitles') + if isinstance(subs, list): + for sub in subs: + if not isinstance(sub, dict): + continue + href = sub.get('Href') + if isinstance(href, compat_str): + extract_subtitle(href) + if not info.get('duration'): + info['duration'] = parse_duration(video_data.get('Duration')) - subtitles = {} - for caption_url in video_element.findall('ClosedCaptioningURIs/URL'): - if not caption_url.text: - continue - ext = determine_ext(caption_url.text, 'ttml') - subtitles.setdefault('en', []).append({ - 'url': caption_url.text, - 'ext': 'ttml' if ext == 'xml' else ext, - }) + self._sort_formats(formats) - info = self._search_json_ld(webpage, video_id, default={}) info.update({ 'id': video_id, - 'title': title, 'formats': formats, 'subtitles': subtitles, - 'episode_title': title, - 'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')), - 'series': xpath_text(playlist, 'ProgrammeTitle'), - 'duartion': parse_duration(xpath_text(playlist, 'Duration')), }) return info diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index 33d55f7..c9bcbb0 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -24,7 +24,7 @@ class JWPlatformIE(InfoExtractor): @staticmethod def _extract_url(webpage): mobj = re.search( - r'<script[^>]+?src=["\'](?P<url>(?:https?:)?//content.jwplatform.com/players/[a-zA-Z0-9]{8})', + r'<(?:script|iframe)[^>]+?src=["\'](?P<url>(?:https?:)?//content.jwplatform.com/players/[a-zA-Z0-9]{8})', webpage) if mobj: return mobj.group('url') diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index bdac2df..562e25f 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -125,9 +125,12 @@ class KalturaIE(InfoExtractor): (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)* (?P=q1).*? (?: - entry_?[Ii]d| - (?P<q2>["'])entry_?[Ii]d(?P=q2) - )\s*:\s* + (?: + entry_?[Ii]d| + (?P<q2>["'])entry_?[Ii]d(?P=q2) + )\s*:\s*| + \[\s*(?P<q2_1>["'])entry_?[Ii]d(?P=q2_1)\s*\]\s*=\s* + ) (?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3) ''', webpage) or re.search( diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index 317ebbc..c4776bb 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -114,7 +114,7 @@ class LivestreamIE(InfoExtractor): smil_url = video_data.get('smil_url') if smil_url: - formats.extend(self._extract_smil_formats(smil_url, video_id)) + formats.extend(self._extract_smil_formats(smil_url, video_id, fatal=False)) m3u8_url = video_data.get('m3u8_url') if m3u8_url: diff --git a/youtube_dl/extractor/mailru.py b/youtube_dl/extractor/mailru.py index f7cc3c8..6b7c5e3 100644 --- a/youtube_dl/extractor/mailru.py +++ b/youtube_dl/extractor/mailru.py @@ -13,8 +13,15 @@ from ..utils import ( class MailRuIE(InfoExtractor): IE_NAME = 'mailru' IE_DESC = 'Видео@Mail.Ru' - _VALID_URL = r'https?://(?:(?:www|m)\.)?my\.mail\.ru/(?:video/.*#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|(?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html)' - + _VALID_URL = r'''(?x) + https?:// + (?:(?:www|m)\.)?my\.mail\.ru/ + (?: + video/.*\#video=/?(?P<idv1>(?:[^/]+/){3}\d+)| + (?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html| + (?:video/embed|\+/video/meta)/(?P<metaid>\d+) + ) + ''' _TESTS = [ { 'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76', @@ -23,7 +30,7 @@ class MailRuIE(InfoExtractor): 'id': '46301138_76', 'ext': 'mp4', 'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро', - 'timestamp': 1393232740, + 'timestamp': 1393235077, 'upload_date': '20140224', 'uploader': 'sonypicturesrus', 'uploader_id': 'sonypicturesrus@mail.ru', @@ -40,7 +47,7 @@ class MailRuIE(InfoExtractor): 'title': 'Samsung Galaxy S5 Hammer Smash Fail Battery Explosion', 'timestamp': 1397039888, 'upload_date': '20140409', - 'uploader': 'hitech@corp.mail.ru', + 'uploader': 'hitech', 'uploader_id': 'hitech@corp.mail.ru', 'duration': 245, }, @@ -65,28 +72,42 @@ class MailRuIE(InfoExtractor): { 'url': 'http://m.my.mail.ru/mail/3sktvtr/video/_myvideo/138.html', 'only_matching': True, + }, + { + 'url': 'https://my.mail.ru/video/embed/7949340477499637815', + 'only_matching': True, + }, + { + 'url': 'http://my.mail.ru/+/video/meta/7949340477499637815', + 'only_matching': True, } ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('idv1') - - if not video_id: - video_id = mobj.group('idv2prefix') + mobj.group('idv2suffix') - - webpage = self._download_webpage(url, video_id) + meta_id = mobj.group('metaid') + + video_id = None + if meta_id: + meta_url = 'https://my.mail.ru/+/video/meta/%s' % meta_id + else: + video_id = mobj.group('idv1') + if not video_id: + video_id = mobj.group('idv2prefix') + mobj.group('idv2suffix') + webpage = self._download_webpage(url, video_id) + page_config = self._parse_json(self._search_regex( + r'(?s)<script[^>]+class="sp-video__page-config"[^>]*>(.+?)</script>', + webpage, 'page config', default='{}'), video_id, fatal=False) + if page_config: + meta_url = page_config.get('metaUrl') or page_config.get('video', {}).get('metaUrl') + else: + meta_url = None video_data = None - - page_config = self._parse_json(self._search_regex( - r'(?s)<script[^>]+class="sp-video__page-config"[^>]*>(.+?)</script>', - webpage, 'page config', default='{}'), video_id, fatal=False) - if page_config: - meta_url = page_config.get('metaUrl') or page_config.get('video', {}).get('metaUrl') - if meta_url: - video_data = self._download_json( - meta_url, video_id, 'Downloading video meta JSON', fatal=False) + if meta_url: + video_data = self._download_json( + meta_url, video_id or meta_id, 'Downloading video meta JSON', + fatal=not video_id) # Fallback old approach if not video_data: diff --git a/youtube_dl/extractor/massengeschmacktv.py b/youtube_dl/extractor/massengeschmacktv.py new file mode 100644 index 0000000..cfcc6b2 --- /dev/null +++ b/youtube_dl/extractor/massengeschmacktv.py @@ -0,0 +1,77 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + clean_html, + determine_ext, + int_or_none, + js_to_json, + mimetype2ext, + parse_filesize, +) + + +class MassengeschmackTVIE(InfoExtractor): + IE_NAME = 'massengeschmack.tv' + _VALID_URL = r'https?://(?:www\.)?massengeschmack\.tv/play/(?P<id>[^?&#]+)' + + _TEST = { + 'url': 'https://massengeschmack.tv/play/fktv202', + 'md5': 'a9e054db9c2b5a08f0a0527cc201e8d3', + 'info_dict': { + 'id': 'fktv202', + 'ext': 'mp4', + 'title': 'Fernsehkritik-TV - Folge 202', + }, + } + + def _real_extract(self, url): + episode = self._match_id(url) + + webpage = self._download_webpage(url, episode) + title = clean_html(self._html_search_regex( + '<h3>([^<]+)</h3>', webpage, 'title')) + thumbnail = self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False) + sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json) + + formats = [] + for source in sources: + furl = source.get('src') + if not furl: + continue + furl = self._proto_relative_url(furl) + ext = determine_ext(furl) or mimetype2ext(source.get('type')) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + furl, episode, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'url': furl, + 'format_id': determine_ext(furl), + }) + + for (durl, format_id, width, height, filesize) in re.findall(r'''(?x) + <a[^>]+?href="(?P<url>(?:https:)?//[^"]+)".*? + <strong>(?P<format_id>.+?)</strong>.*? + <small>(?:(?P<width>\d+)x(?P<height>\d+))?\s+?\((?P<filesize>[\d,]+\s*[GM]iB)\)</small> + ''', webpage): + formats.append({ + 'url': durl, + 'format_id': format_id, + 'width': int_or_none(width), + 'height': int_or_none(height), + 'filesize': parse_filesize(filesize), + 'vcodec': 'none' if format_id.startswith('Audio') else None, + }) + + self._sort_formats(formats, ('width', 'height', 'filesize', 'tbr')) + + return { + 'id': episode, + 'title': title, + 'formats': formats, + 'thumbnail': thumbnail, + } diff --git a/youtube_dl/extractor/mediasite.py b/youtube_dl/extractor/mediasite.py new file mode 100644 index 0000000..0e2645c --- /dev/null +++ b/youtube_dl/extractor/mediasite.py @@ -0,0 +1,214 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +import json + +from .common import InfoExtractor +from ..compat import ( + compat_str, + compat_urlparse, +) +from ..utils import ( + ExtractorError, + float_or_none, + mimetype2ext, + unescapeHTML, + unsmuggle_url, + urljoin, +) + + +class MediasiteIE(InfoExtractor): + _VALID_URL = r'(?xi)https?://[^/]+/Mediasite/Play/(?P<id>[0-9a-f]{32,34})(?P<query>\?[^#]+|)' + _TESTS = [ + { + 'url': 'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271681e4f199af3c60d1f82869b1d', + 'info_dict': { + 'id': '2db6c271681e4f199af3c60d1f82869b1d', + 'ext': 'mp4', + 'title': 'Lecture: Tuesday, September 20, 2016 - Sir Andrew Wiles', + 'description': 'Sir Andrew Wiles: “Equations in arithmetic”\\n\\nI will describe some of the interactions between modern number theory and the problem of solving equations in rational numbers or integers\\u0027.', + 'timestamp': 1474268400.0, + 'upload_date': '20160919', + }, + }, + { + 'url': 'http://mediasite.uib.no/Mediasite/Play/90bb363295d945d6b548c867d01181361d?catalog=a452b7df-9ae1-46b7-a3ba-aceeb285f3eb', + 'info_dict': { + 'id': '90bb363295d945d6b548c867d01181361d', + 'ext': 'mp4', + 'upload_date': '20150429', + 'title': '5) IT-forum 2015-Dag 1 - Dungbeetle - How and why Rain created a tiny bug tracker for Unity', + 'timestamp': 1430311380.0, + }, + }, + { + 'url': 'https://collegerama.tudelft.nl/Mediasite/Play/585a43626e544bdd97aeb71a0ec907a01d', + 'md5': '481fda1c11f67588c0d9d8fbdced4e39', + 'info_dict': { + 'id': '585a43626e544bdd97aeb71a0ec907a01d', + 'ext': 'mp4', + 'title': 'Een nieuwe wereld: waarden, bewustzijn en techniek van de mensheid 2.0.', + 'description': '', + 'thumbnail': r're:^https?://.*\.jpg(?:\?.*)?$', + 'duration': 7713.088, + 'timestamp': 1413309600, + 'upload_date': '20141014', + }, + }, + { + 'url': 'https://collegerama.tudelft.nl/Mediasite/Play/86a9ea9f53e149079fbdb4202b521ed21d?catalog=fd32fd35-6c99-466c-89d4-cd3c431bc8a4', + 'md5': 'ef1fdded95bdf19b12c5999949419c92', + 'info_dict': { + 'id': '86a9ea9f53e149079fbdb4202b521ed21d', + 'ext': 'wmv', + 'title': '64ste Vakantiecursus: Afvalwater', + 'description': 'md5:7fd774865cc69d972f542b157c328305', + 'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$', + 'duration': 10853, + 'timestamp': 1326446400, + 'upload_date': '20120113', + }, + }, + { + 'url': 'http://digitalops.sandia.gov/Mediasite/Play/24aace4429fc450fb5b38cdbf424a66e1d', + 'md5': '9422edc9b9a60151727e4b6d8bef393d', + 'info_dict': { + 'id': '24aace4429fc450fb5b38cdbf424a66e1d', + 'ext': 'mp4', + 'title': 'Xyce Software Training - Section 1', + 'description': r're:(?s)SAND Number: SAND 2013-7800.{200,}', + 'upload_date': '20120409', + 'timestamp': 1333983600, + 'duration': 7794, + } + } + ] + + # look in Mediasite.Core.js (Mediasite.ContentStreamType[*]) + _STREAM_TYPES = { + 0: 'video1', # the main video + 2: 'slide', + 3: 'presentation', + 4: 'video2', # screencast? + 5: 'video3', + } + + @staticmethod + def _extract_urls(webpage): + return [ + unescapeHTML(mobj.group('url')) + for mobj in re.finditer( + r'(?xi)<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:(?:https?:)?//[^/]+)?/Mediasite/Play/[0-9a-f]{32,34}(?:\?.*?)?)\1', + webpage)] + + def _real_extract(self, url): + url, data = unsmuggle_url(url, {}) + mobj = re.match(self._VALID_URL, url) + resource_id = mobj.group('id') + query = mobj.group('query') + + webpage, urlh = self._download_webpage_handle(url, resource_id) # XXX: add UrlReferrer? + redirect_url = compat_str(urlh.geturl()) + + # XXX: might have also extracted UrlReferrer and QueryString from the html + service_path = compat_urlparse.urljoin(redirect_url, self._html_search_regex( + r'<div[^>]+\bid=["\']ServicePath[^>]+>(.+?)</div>', webpage, resource_id, + default='/Mediasite/PlayerService/PlayerService.svc/json')) + + player_options = self._download_json( + '%s/GetPlayerOptions' % service_path, resource_id, + headers={ + 'Content-type': 'application/json; charset=utf-8', + 'X-Requested-With': 'XMLHttpRequest', + }, + data=json.dumps({ + 'getPlayerOptionsRequest': { + 'ResourceId': resource_id, + 'QueryString': query, + 'UrlReferrer': data.get('UrlReferrer', ''), + 'UseScreenReader': False, + } + }).encode('utf-8'))['d'] + + presentation = player_options['Presentation'] + title = presentation['Title'] + + if presentation is None: + raise ExtractorError( + 'Mediasite says: %s' % player_options['PlayerPresentationStatusMessage'], + expected=True) + + thumbnails = [] + formats = [] + for snum, Stream in enumerate(presentation['Streams']): + stream_type = Stream.get('StreamType') + if stream_type is None: + continue + + video_urls = Stream.get('VideoUrls') + if not isinstance(video_urls, list): + video_urls = [] + + stream_id = self._STREAM_TYPES.get( + stream_type, 'type%u' % stream_type) + + stream_formats = [] + for unum, VideoUrl in enumerate(video_urls): + video_url = VideoUrl.get('Location') + if not video_url or not isinstance(video_url, compat_str): + continue + # XXX: if Stream.get('CanChangeScheme', False), switch scheme to HTTP/HTTPS + + media_type = VideoUrl.get('MediaType') + if media_type == 'SS': + stream_formats.extend(self._extract_ism_formats( + video_url, resource_id, + ism_id='%s-%u.%u' % (stream_id, snum, unum), + fatal=False)) + elif media_type == 'Dash': + stream_formats.extend(self._extract_mpd_formats( + video_url, resource_id, + mpd_id='%s-%u.%u' % (stream_id, snum, unum), + fatal=False)) + else: + stream_formats.append({ + 'format_id': '%s-%u.%u' % (stream_id, snum, unum), + 'url': video_url, + 'ext': mimetype2ext(VideoUrl.get('MimeType')), + }) + + # TODO: if Stream['HasSlideContent']: + # synthesise an MJPEG video stream '%s-%u.slides' % (stream_type, snum) + # from Stream['Slides'] + # this will require writing a custom downloader... + + # disprefer 'secondary' streams + if stream_type != 0: + for fmt in stream_formats: + fmt['preference'] = -1 + + thumbnail_url = Stream.get('ThumbnailUrl') + if thumbnail_url: + thumbnails.append({ + 'id': '%s-%u' % (stream_id, snum), + 'url': urljoin(redirect_url, thumbnail_url), + 'preference': -1 if stream_type != 0 else 0, + }) + formats.extend(stream_formats) + + self._sort_formats(formats) + + # XXX: Presentation['Presenters'] + # XXX: Presentation['Transcript'] + + return { + 'id': resource_id, + 'title': title, + 'description': presentation.get('Description'), + 'duration': float_or_none(presentation.get('Duration'), 1000), + 'timestamp': float_or_none(presentation.get('UnixTime'), 1000), + 'formats': formats, + 'thumbnails': thumbnails, + } diff --git a/youtube_dl/extractor/mnet.py b/youtube_dl/extractor/mnet.py index 6a85dcb..0e26ca1 100644 --- a/youtube_dl/extractor/mnet.py +++ b/youtube_dl/extractor/mnet.py @@ -40,21 +40,29 @@ class MnetIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + # TODO: extract rtmp formats + # no stype -> rtmp url + # stype=H -> m3u8 url + # stype=M -> mpd url info = self._download_json( - 'http://content.api.mnet.com/player/vodConfig?id=%s&ctype=CLIP' % video_id, - video_id, 'Downloading vod config JSON')['data']['info'] + 'http://content.api.mnet.com/player/vodConfig', + video_id, 'Downloading vod config JSON', query={ + 'id': video_id, + 'ctype': 'CLIP', + 'stype': 'H', + })['data']['info'] title = info['title'] - rtmp_info = self._download_json( - info['cdn'], video_id, 'Downloading vod cdn JSON') - - formats = [{ - 'url': rtmp_info['serverurl'] + rtmp_info['fileurl'], - 'ext': 'flv', - 'page_url': url, - 'player_url': 'http://flvfile.mnet.com/service/player/201602/cjem_player_tv.swf?v=201602191318', - }] + cdn_data = self._download_json( + info['cdn'], video_id, 'Downloading vod cdn JSON')['data'][0] + m3u8_url = cdn_data['url'] + token = cdn_data.get('token') + if token and token != '-': + m3u8_url += '?' + token + formats = self._extract_wowza_formats( + m3u8_url, video_id, skip_protocols=['rtmp', 'rtsp', 'f4m']) + self._sort_formats(formats) description = info.get('ment') duration = parse_duration(info.get('time')) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 1154a35..7a3b57a 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -115,10 +115,17 @@ class MTVServicesInfoExtractor(InfoExtractor): if transcript.get('kind') != 'captions': continue lang = transcript.get('srclang') - subtitles[lang] = [{ - 'url': compat_str(typographic.get('src')), - 'ext': typographic.get('format') - } for typographic in transcript.findall('./typographic')] + for typographic in transcript.findall('./typographic'): + sub_src = typographic.get('src') + if not sub_src: + continue + ext = typographic.get('format') + if ext == 'cea-608': + ext = 'scc' + subtitles.setdefault(lang, []).append({ + 'url': compat_str(sub_src), + 'ext': ext + }) return subtitles def _get_video_info(self, itemdoc, use_hls=True): diff --git a/youtube_dl/extractor/nexx.py b/youtube_dl/extractor/nexx.py index 071879b..9203c04 100644 --- a/youtube_dl/extractor/nexx.py +++ b/youtube_dl/extractor/nexx.py @@ -28,7 +28,7 @@ class NexxIE(InfoExtractor): _TESTS = [{ # movie 'url': 'https://api.nexx.cloud/v3/748/videos/byid/128907', - 'md5': '16746bfc28c42049492385c989b26c4a', + 'md5': '828cea195be04e66057b846288295ba1', 'info_dict': { 'id': '128907', 'ext': 'mp4', @@ -42,9 +42,6 @@ class NexxIE(InfoExtractor): 'timestamp': 1384264416, 'upload_date': '20131112', }, - 'params': { - 'format': 'bestvideo', - }, }, { # episode 'url': 'https://api.nexx.cloud/v3/741/videos/byid/247858', @@ -62,7 +59,6 @@ class NexxIE(InfoExtractor): 'season_number': 2, }, 'params': { - 'format': 'bestvideo', 'skip_download': True, }, }, { @@ -193,35 +189,67 @@ class NexxIE(InfoExtractor): stream_data = video['streamdata'] language = general.get('language_raw') or '' - # TODO: reverse more cdns and formats + # TODO: reverse more cdns cdn = stream_data['cdnType'] assert cdn == 'azure' azure_locator = stream_data['azureLocator'] - AZURE_URL = 'http://nx-p%02d.akamaized.net/' + AZURE_URL = 'http://nx%s%02d.akamaized.net/' - for secure in ('s', ''): - cdn_shield = stream_data.get('cdnShieldHTTP%s' % secure.upper()) - if cdn_shield: - azure_base = 'http%s://%s' % (secure, cdn_shield) - break - else: - azure_base = AZURE_URL % int(stream_data['azureAccount'].replace('nexxplayplus', '')) + def get_cdn_shield_base(shield_type='', prefix='-p'): + for secure in ('', 's'): + cdn_shield = stream_data.get('cdnShield%sHTTP%s' % (shield_type, secure.upper())) + if cdn_shield: + return 'http%s://%s' % (secure, cdn_shield) + else: + return AZURE_URL % (prefix, int(stream_data['azureAccount'].replace('nexxplayplus', ''))) + azure_stream_base = get_cdn_shield_base() is_ml = ',' in language - azure_m3u8_url = '%s%s/%s_src%s.ism/Manifest(format=m3u8-aapl)' % ( - azure_base, azure_locator, video_id, ('_manifest' if is_ml else '')) + azure_manifest_url = '%s%s/%s_src%s.ism/Manifest' % ( + azure_stream_base, azure_locator, video_id, ('_manifest' if is_ml else '')) + '%s' protection_token = try_get( video, lambda x: x['protectiondata']['token'], compat_str) if protection_token: - azure_m3u8_url += '?hdnts=%s' % protection_token + azure_manifest_url += '?hdnts=%s' % protection_token formats = self._extract_m3u8_formats( - azure_m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='%s-hls' % cdn) + azure_manifest_url % '(format=m3u8-aapl)', + video_id, 'mp4', 'm3u8_native', + m3u8_id='%s-hls' % cdn, fatal=False) + formats.extend(self._extract_mpd_formats( + azure_manifest_url % '(format=mpd-time-csf)', + video_id, mpd_id='%s-dash' % cdn, fatal=False)) + formats.extend(self._extract_ism_formats( + azure_manifest_url % '', video_id, ism_id='%s-mss' % cdn, fatal=False)) + + azure_progressive_base = get_cdn_shield_base('Prog', '-d') + azure_file_distribution = stream_data.get('azureFileDistribution') + if azure_file_distribution: + fds = azure_file_distribution.split(',') + if fds: + for fd in fds: + ss = fd.split(':') + if len(ss) == 2: + tbr = int_or_none(ss[0]) + if tbr: + f = { + 'url': '%s%s/%s_src_%s_%d.mp4' % ( + azure_progressive_base, azure_locator, video_id, ss[1], tbr), + 'format_id': '%s-http-%d' % (cdn, tbr), + 'tbr': tbr, + } + width_height = ss[1].split('x') + if len(width_height) == 2: + f.update({ + 'width': int_or_none(width_height[0]), + 'height': int_or_none(width_height[1]), + }) + formats.append(f) + self._sort_formats(formats) return { diff --git a/youtube_dl/extractor/nick.py b/youtube_dl/extractor/nick.py index 310eea2..7edd684 100644 --- a/youtube_dl/extractor/nick.py +++ b/youtube_dl/extractor/nick.py @@ -10,7 +10,7 @@ from ..utils import update_url_query class NickIE(MTVServicesInfoExtractor): # None of videos on the website are still alive? IE_NAME = 'nick.com' - _VALID_URL = r'https?://(?:(?:www|beta)\.)?nick(?:jr)?\.com/(?:[^/]+/)?(?:videos/clip|[^/]+/videos)/(?P<id>[^/?#.]+)' + _VALID_URL = r'https?://(?P<domain>(?:(?:www|beta)\.)?nick(?:jr)?\.com)/(?:[^/]+/)?(?:videos/clip|[^/]+/videos)/(?P<id>[^/?#.]+)' _FEED_URL = 'http://udat.mtvnservices.com/service1/dispatch.htm' _GEO_COUNTRIES = ['US'] _TESTS = [{ @@ -69,8 +69,59 @@ class NickIE(MTVServicesInfoExtractor): 'mgid': uri, } - def _extract_mgid(self, webpage): - return self._search_regex(r'data-contenturi="([^"]+)', webpage, 'mgid') + def _real_extract(self, url): + domain, display_id = re.match(self._VALID_URL, url).groups() + video_data = self._download_json( + 'http://%s/data/video.endLevel.json' % domain, + display_id, query={ + 'urlKey': display_id, + }) + return self._get_videos_info(video_data['player'] + video_data['id']) + + +class NickBrIE(MTVServicesInfoExtractor): + IE_NAME = 'nickelodeon:br' + _VALID_URL = r'https?://(?P<domain>(?:www\.)?nickjr|mundonick\.uol)\.com\.br/(?:programas/)?[^/]+/videos/(?:episodios/)?(?P<id>[^/?#.]+)' + _TESTS = [{ + 'url': 'http://www.nickjr.com.br/patrulha-canina/videos/210-labirinto-de-pipoca/', + 'only_matching': True, + }, { + 'url': 'http://mundonick.uol.com.br/programas/the-loud-house/videos/muitas-irmas/7ljo9j', + 'only_matching': True, + }] + + def _real_extract(self, url): + domain, display_id = re.match(self._VALID_URL, url).groups() + webpage = self._download_webpage(url, display_id) + uri = self._search_regex( + r'data-(?:contenturi|mgid)="([^"]+)', webpage, 'mgid') + video_id = self._id_from_uri(uri) + config = self._download_json( + 'http://media.mtvnservices.com/pmt/e1/access/index.html', + video_id, query={ + 'uri': uri, + 'configtype': 'edge', + }, headers={ + 'Referer': url, + }) + info_url = self._remove_template_parameter(config['feedWithQueryParams']) + if info_url == 'None': + if domain.startswith('www.'): + domain = domain[4:] + content_domain = { + 'mundonick.uol': 'mundonick.com.br', + 'nickjr': 'br.nickelodeonjunior.tv', + }[domain] + query = { + 'mgid': uri, + 'imageEp': content_domain, + 'arcEp': content_domain, + } + if domain == 'nickjr.com.br': + query['ep'] = 'c4b16088' + info_url = update_url_query( + 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed', query) + return self._get_videos_info_from_url(info_url, video_id) class NickDeIE(MTVServicesInfoExtractor): diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py index 8b83e1f..a9f9b10 100644 --- a/youtube_dl/extractor/noco.py +++ b/youtube_dl/extractor/noco.py @@ -70,7 +70,7 @@ class NocoIE(InfoExtractor): return login = self._download_json( - self._LOGIN_URL, None, 'Logging in as %s' % username, + self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata({ 'a': 'login', 'cookie': '1', diff --git a/youtube_dl/extractor/nowtv.py b/youtube_dl/extractor/nowtv.py deleted file mode 100644 index e43b371..0000000 --- a/youtube_dl/extractor/nowtv.py +++ /dev/null @@ -1,261 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - ExtractorError, - determine_ext, - int_or_none, - parse_iso8601, - parse_duration, - remove_start, -) - - -class NowTVBaseIE(InfoExtractor): - _VIDEO_FIELDS = ( - 'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort', - 'broadcastStartDate', 'seoUrl', 'duration', 'files', - 'format.defaultImage169Format', 'format.defaultImage169Logo') - - def _extract_video(self, info, display_id=None): - video_id = compat_str(info['id']) - - files = info['files'] - if not files: - if info.get('geoblocked', False): - raise ExtractorError( - 'Video %s is not available from your location due to geo restriction' % video_id, - expected=True) - if not info.get('free', True): - raise ExtractorError( - 'Video %s is not available for free' % video_id, expected=True) - - formats = [] - for item in files['items']: - if determine_ext(item['path']) != 'f4v': - continue - app, play_path = remove_start(item['path'], '/').split('/', 1) - formats.append({ - 'url': 'rtmpe://fms.rtl.de', - 'app': app, - 'play_path': 'mp4:%s' % play_path, - 'ext': 'flv', - 'page_url': 'http://rtlnow.rtl.de', - 'player_url': 'http://cdn.static-fra.de/now/vodplayer.swf', - 'tbr': int_or_none(item.get('bitrate')), - }) - self._sort_formats(formats) - - title = info['title'] - description = info.get('articleLong') or info.get('articleShort') - timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ') - duration = parse_duration(info.get('duration')) - - f = info.get('format', {}) - thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo') - - return { - 'id': video_id, - 'display_id': display_id or info.get('seoUrl'), - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'timestamp': timestamp, - 'duration': duration, - 'formats': formats, - } - - -class NowTVIE(NowTVBaseIE): - _WORKING = False - _VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/(?:(?:list/[^/]+|jahr/\d{4}/\d{1,2})/)?(?P<id>[^/]+)/(?:player|preview)' - - _TESTS = [{ - # rtl - 'url': 'http://www.nowtv.de/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit/player', - 'info_dict': { - 'id': '203519', - 'display_id': 'bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit', - 'ext': 'flv', - 'title': 'Inka Bause stellt die neuen Bauern vor', - 'description': 'md5:e234e1ed6d63cf06be5c070442612e7e', - 'thumbnail': r're:^https?://.*\.jpg$', - 'timestamp': 1432580700, - 'upload_date': '20150525', - 'duration': 2786, - }, - 'params': { - # rtmp download - 'skip_download': True, - }, - }, { - # rtl2 - 'url': 'http://www.nowtv.de/rtl2/berlin-tag-nacht/berlin-tag-nacht-folge-934/player', - 'info_dict': { - 'id': '203481', - 'display_id': 'berlin-tag-nacht/berlin-tag-nacht-folge-934', - 'ext': 'flv', - 'title': 'Berlin - Tag & Nacht (Folge 934)', - 'description': 'md5:c85e88c2e36c552dfe63433bc9506dd0', - 'thumbnail': r're:^https?://.*\.jpg$', - 'timestamp': 1432666800, - 'upload_date': '20150526', - 'duration': 2641, - }, - 'params': { - # rtmp download - 'skip_download': True, - }, - }, { - # rtlnitro - 'url': 'http://www.nowtv.de/rtlnitro/alarm-fuer-cobra-11-die-autobahnpolizei/hals-und-beinbruch-2014-08-23-21-10-00/player', - 'info_dict': { - 'id': '165780', - 'display_id': 'alarm-fuer-cobra-11-die-autobahnpolizei/hals-und-beinbruch-2014-08-23-21-10-00', - 'ext': 'flv', - 'title': 'Hals- und Beinbruch', - 'description': 'md5:b50d248efffe244e6f56737f0911ca57', - 'thumbnail': r're:^https?://.*\.jpg$', - 'timestamp': 1432415400, - 'upload_date': '20150523', - 'duration': 2742, - }, - 'params': { - # rtmp download - 'skip_download': True, - }, - }, { - # superrtl - 'url': 'http://www.nowtv.de/superrtl/medicopter-117/angst/player', - 'info_dict': { - 'id': '99205', - 'display_id': 'medicopter-117/angst', - 'ext': 'flv', - 'title': 'Angst!', - 'description': 'md5:30cbc4c0b73ec98bcd73c9f2a8c17c4e', - 'thumbnail': r're:^https?://.*\.jpg$', - 'timestamp': 1222632900, - 'upload_date': '20080928', - 'duration': 3025, - }, - 'params': { - # rtmp download - 'skip_download': True, - }, - }, { - # ntv - 'url': 'http://www.nowtv.de/ntv/ratgeber-geld/thema-ua-der-erste-blick-die-apple-watch/player', - 'info_dict': { - 'id': '203521', - 'display_id': 'ratgeber-geld/thema-ua-der-erste-blick-die-apple-watch', - 'ext': 'flv', - 'title': 'Thema u.a.: Der erste Blick: Die Apple Watch', - 'description': 'md5:4312b6c9d839ffe7d8caf03865a531af', - 'thumbnail': r're:^https?://.*\.jpg$', - 'timestamp': 1432751700, - 'upload_date': '20150527', - 'duration': 1083, - }, - 'params': { - # rtmp download - 'skip_download': True, - }, - }, { - # vox - 'url': 'http://www.nowtv.de/vox/der-hundeprofi/buero-fall-chihuahua-joel/player', - 'info_dict': { - 'id': '128953', - 'display_id': 'der-hundeprofi/buero-fall-chihuahua-joel', - 'ext': 'flv', - 'title': "Büro-Fall / Chihuahua 'Joel'", - 'description': 'md5:e62cb6bf7c3cc669179d4f1eb279ad8d', - 'thumbnail': r're:^https?://.*\.jpg$', - 'timestamp': 1432408200, - 'upload_date': '20150523', - 'duration': 3092, - }, - 'params': { - # rtmp download - 'skip_download': True, - }, - }, { - 'url': 'http://www.nowtv.de/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit/preview', - 'only_matching': True, - }, { - 'url': 'http://www.nowtv.at/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit/preview?return=/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit', - 'only_matching': True, - }, { - 'url': 'http://www.nowtv.de/rtl2/echtzeit/list/aktuell/schnelles-geld-am-ende-der-welt/player', - 'only_matching': True, - }, { - 'url': 'http://www.nowtv.de/rtl2/zuhause-im-glueck/jahr/2015/11/eine-erschuetternde-diagnose/player', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - display_id = '%s/%s' % (mobj.group('show_id'), mobj.group('id')) - - info = self._download_json( - 'https://api.nowtv.de/v3/movies/%s?fields=%s' - % (display_id, ','.join(self._VIDEO_FIELDS)), display_id) - - return self._extract_video(info, display_id) - - -class NowTVListIE(NowTVBaseIE): - _VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/list/(?P<id>[^?/#&]+)$' - - _SHOW_FIELDS = ('title', ) - _SEASON_FIELDS = ('id', 'headline', 'seoheadline', ) - - _TESTS = [{ - 'url': 'http://www.nowtv.at/rtl/stern-tv/list/aktuell', - 'info_dict': { - 'id': '17006', - 'title': 'stern TV - Aktuell', - }, - 'playlist_count': 1, - }, { - 'url': 'http://www.nowtv.at/rtl/das-supertalent/list/free-staffel-8', - 'info_dict': { - 'id': '20716', - 'title': 'Das Supertalent - FREE Staffel 8', - }, - 'playlist_count': 14, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - show_id = mobj.group('show_id') - season_id = mobj.group('id') - - fields = [] - fields.extend(self._SHOW_FIELDS) - fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS) - fields.extend( - 'formatTabs.formatTabPages.container.movies.%s' % field - for field in self._VIDEO_FIELDS) - - list_info = self._download_json( - 'https://api.nowtv.de/v3/formats/seo?fields=%s&name=%s.php' - % (','.join(fields), show_id), - season_id) - - season = next( - season for season in list_info['formatTabs']['items'] - if season.get('seoheadline') == season_id) - - title = '%s - %s' % (list_info['title'], season['headline']) - - entries = [] - for container in season['formatTabPages']['items']: - for info in ((container.get('container') or {}).get('movies') or {}).get('items') or []: - entries.append(self._extract_video(info)) - - return self.playlist_result( - entries, compat_str(season.get('id') or season_id), title) diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index 854b680..8e13bcf 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -14,6 +14,7 @@ from ..utils import ( int_or_none, qualities, unescapeHTML, + urlencode_postdata, ) @@ -56,7 +57,7 @@ class OdnoklassnikiIE(InfoExtractor): 'url': 'http://ok.ru/video/64211978996595-1', 'md5': '2f206894ffb5dbfcce2c5a14b909eea5', 'info_dict': { - 'id': '64211978996595-1', + 'id': 'V_VztHT5BzY', 'ext': 'mp4', 'title': 'Космическая среда от 26 августа 2015', 'description': 'md5:848eb8b85e5e3471a3a803dae1343ed0', @@ -127,9 +128,14 @@ class OdnoklassnikiIE(InfoExtractor): if metadata: metadata = self._parse_json(metadata, video_id) else: + data = {} + st_location = flashvars.get('location') + if st_location: + data['st.location'] = st_location metadata = self._download_json( compat_urllib_parse_unquote(flashvars['metadataUrl']), - video_id, 'Downloading metadata JSON') + video_id, 'Downloading metadata JSON', + data=urlencode_postdata(data)) movie = metadata['movie'] diff --git a/youtube_dl/extractor/once.py b/youtube_dl/extractor/once.py index a637c8e..8ae5fad 100644 --- a/youtube_dl/extractor/once.py +++ b/youtube_dl/extractor/once.py @@ -11,7 +11,7 @@ class OnceIE(InfoExtractor): ADAPTIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/master/playlist/%s/%s/%s/content.m3u8' PROGRESSIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/media/progressive/%s/%s/%s/%s/content.mp4' - def _extract_once_formats(self, url): + def _extract_once_formats(self, url, http_formats_preference=None): domain_id, application_id, media_item_id = re.match( OnceIE._VALID_URL, url).groups() formats = self._extract_m3u8_formats( @@ -35,6 +35,7 @@ class OnceIE(InfoExtractor): 'format_id': adaptive_format['format_id'].replace( 'hls', 'http'), 'protocol': 'http', + 'preference': http_formats_preference, }) progressive_formats.append(progressive_format) self._check_formats(progressive_formats, media_item_id) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index b50d6c7..b282bcf 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -112,6 +112,8 @@ class PhantomJSwrapper(object): return get_exe_version('phantomjs', version_re=r'([0-9.]+)') def __init__(self, extractor, required_version=None, timeout=10000): + self._TMP_FILES = {} + self.exe = check_executable('phantomjs', ['-v']) if not self.exe: raise ExtractorError('PhantomJS executable not found in PATH, ' @@ -130,7 +132,6 @@ class PhantomJSwrapper(object): self.options = { 'timeout': timeout, } - self._TMP_FILES = {} for name in self._TMP_FILE_NAMES: tmp = tempfile.NamedTemporaryFile(delete=False) tmp.close() @@ -140,7 +141,7 @@ class PhantomJSwrapper(object): for name in self._TMP_FILE_NAMES: try: os.remove(self._TMP_FILES[name].name) - except: + except (IOError, OSError, KeyError): pass def _save_cookies(self, url): @@ -242,7 +243,7 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): - _VALID_URL = r'https?://(?:openload\.(?:co|io)|oload\.tv)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' + _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' _TESTS = [{ 'url': 'https://openload.co/f/kUEfGclsU9o', @@ -283,9 +284,20 @@ class OpenloadIE(InfoExtractor): # for title and ext 'url': 'https://openload.co/embed/Sxz5sADo82g/', 'only_matching': True, + }, { + # unavailable via https://openload.co/embed/e-Ixz9ZR5L0/ but available + # via https://openload.co/f/e-Ixz9ZR5L0/ + 'url': 'https://openload.co/f/e-Ixz9ZR5L0/', + 'only_matching': True, }, { 'url': 'https://oload.tv/embed/KnG-kKZdcfY/', 'only_matching': True, + }, { + 'url': 'http://www.openload.link/f/KnG-kKZdcfY', + 'only_matching': True, + }, { + 'url': 'https://oload.stream/f/KnG-kKZdcfY', + 'only_matching': True, }] _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' @@ -298,20 +310,30 @@ class OpenloadIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - url = 'https://openload.co/embed/%s/' % video_id + url_pattern = 'https://openload.co/%%s/%s/' % video_id headers = { 'User-Agent': self._USER_AGENT, } - webpage = self._download_webpage(url, video_id, headers=headers) - - if 'File not found' in webpage or 'deleted by the owner' in webpage: - raise ExtractorError('File not found', expected=True, video_id=video_id) + for path in ('embed', 'f'): + page_url = url_pattern % path + last = path == 'f' + webpage = self._download_webpage( + page_url, video_id, 'Downloading %s webpage' % path, + headers=headers, fatal=last) + if not webpage: + continue + if 'File not found' in webpage or 'deleted by the owner' in webpage: + if not last: + continue + raise ExtractorError('File not found', expected=True, video_id=video_id) + break phantom = PhantomJSwrapper(self, required_version='2.0') - webpage, _ = phantom.get(url, html=webpage, video_id=video_id, headers=headers) + webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id, headers=headers) - decoded_id = get_element_by_id('streamurl', webpage) + decoded_id = (get_element_by_id('streamurl', webpage) or + get_element_by_id('streamuri', webpage)) video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id @@ -320,7 +342,7 @@ class OpenloadIE(InfoExtractor): 'title', default=None) or self._html_search_meta( 'description', webpage, 'title', fatal=True) - entries = self._parse_html5_media_entries(url, webpage, video_id) + entries = self._parse_html5_media_entries(page_url, webpage, video_id) entry = entries[0] if entries else {} subtitles = entry.get('subtitles') diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index 74fe801..c1fb580 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -49,13 +49,13 @@ class ORFTVthekIE(InfoExtractor): 'params': { 'skip_download': True, # rtsp downloads }, - '_skip': 'Blocked outside of Austria / Germany', + 'skip': 'Blocked outside of Austria / Germany', }, { 'url': 'http://tvthek.orf.at/topic/Fluechtlingskrise/10463081/Heimat-Fremde-Heimat/13879132/Senioren-betreuen-Migrantenkinder/13879141', - 'skip_download': True, + 'only_matching': True, }, { 'url': 'http://tvthek.orf.at/profile/Universum/35429', - 'skip_download': True, + 'only_matching': True, }] def _real_extract(self, url): diff --git a/youtube_dl/extractor/pandatv.py b/youtube_dl/extractor/pandatv.py index c86d707..13a2e7e 100644 --- a/youtube_dl/extractor/pandatv.py +++ b/youtube_dl/extractor/pandatv.py @@ -33,7 +33,7 @@ class PandaTVIE(InfoExtractor): video_id = self._match_id(url) config = self._download_json( - 'https://www.panda.tv/api_room?roomid=%s' % video_id, video_id) + 'https://www.panda.tv/api_room_v2?roomid=%s' % video_id, video_id) error_code = config.get('errno', 0) if error_code is not 0: @@ -66,6 +66,11 @@ class PandaTVIE(InfoExtractor): plflag1 = '4' live_panda = 'live_panda' if plflag0 < 1 else '' + plflag_auth = self._parse_json(video_info['plflag_list'], video_id) + sign = plflag_auth['auth']['sign'] + ts = plflag_auth['auth']['time'] + rid = plflag_auth['auth']['rid'] + quality_key = qualities(['OD', 'HD', 'SD']) suffix = ['_small', '_mid', ''] formats = [] @@ -77,8 +82,8 @@ class PandaTVIE(InfoExtractor): continue for pref, (ext, pl) in enumerate((('m3u8', '-hls'), ('flv', ''))): formats.append({ - 'url': 'https://pl%s%s.live.panda.tv/live_panda/%s%s%s.%s' - % (pl, plflag1, room_key, live_panda, suffix[quality], ext), + 'url': 'https://pl%s%s.live.panda.tv/live_panda/%s%s%s.%s?sign=%s&ts=%s&rid=%s' + % (pl, plflag1, room_key, live_panda, suffix[quality], ext, sign, ts, rid), 'format_id': '%s-%s' % (k, ext), 'quality': quality, 'source_preference': pref, diff --git a/youtube_dl/extractor/patreon.py b/youtube_dl/extractor/patreon.py index a6a2c27..d4b1d34 100644 --- a/youtube_dl/extractor/patreon.py +++ b/youtube_dl/extractor/patreon.py @@ -67,7 +67,7 @@ class PatreonIE(InfoExtractor): 'https://www.patreon.com/processLogin', compat_urllib_parse_urlencode(login_form).encode('utf-8') ) - login_page = self._download_webpage(request, None, note='Logging in as %s' % username) + login_page = self._download_webpage(request, None, note='Logging in') if re.search(r'onLoginFailed', login_page): raise ExtractorError('Unable to login, incorrect username and/or password', expected=True) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index b51dcbe..f11d5da 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -421,6 +421,7 @@ class PBSIE(InfoExtractor): r'class="coveplayerid">([^<]+)<', # coveplayer r'<section[^>]+data-coveid="(\d+)"', # coveplayer from http://www.pbs.org/wgbh/frontline/film/real-csi/ r'<input type="hidden" id="pbs_video_id_[0-9]+" value="([0-9]+)"/>', # jwplayer + r"(?s)window\.PBS\.playerConfig\s*=\s*{.*?id\s*:\s*'([0-9]+)',", ] media_id = self._search_regex( diff --git a/youtube_dl/extractor/performgroup.py b/youtube_dl/extractor/performgroup.py new file mode 100644 index 0000000..26942bf --- /dev/null +++ b/youtube_dl/extractor/performgroup.py @@ -0,0 +1,83 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import int_or_none + + +class PerformGroupIE(InfoExtractor): + _VALID_URL = r'https?://player\.performgroup\.com/eplayer(?:/eplayer\.html|\.js)#/?(?P<id>[0-9a-f]{26})\.(?P<auth_token>[0-9a-z]{26})' + _TESTS = [{ + # http://www.faz.net/aktuell/sport/fussball/wm-2018-playoffs-schweiz-besiegt-nordirland-1-0-15286104.html + 'url': 'http://player.performgroup.com/eplayer/eplayer.html#d478c41c5d192f56b9aa859de8.1w4crrej5w14e1ed4s1ce4ykab', + 'md5': '259cb03d142e2e52471e8837ecacb29f', + 'info_dict': { + 'id': 'xgrwobuzumes1lwjxtcdpwgxd', + 'ext': 'mp4', + 'title': 'Liga MX: Keine Einsicht nach Horrorfoul', + 'description': 'md5:7cd3b459c82725b021e046ab10bf1c5b', + 'timestamp': 1511533477, + 'upload_date': '20171124', + } + }] + + def _call_api(self, service, auth_token, content_id, referer_url): + return self._download_json( + 'http://ep3.performfeeds.com/ep%s/%s/%s/' % (service, auth_token, content_id), + content_id, headers={ + 'Referer': referer_url, + 'Origin': 'http://player.performgroup.com', + }, query={ + '_fmt': 'json', + }) + + def _real_extract(self, url): + player_id, auth_token = re.search(self._VALID_URL, url).groups() + bootstrap = self._call_api('bootstrap', auth_token, player_id, url) + video = bootstrap['config']['dataSource']['sourceItems'][0]['videos'][0] + video_id = video['uuid'] + vod = self._call_api('vod', auth_token, video_id, url) + media = vod['videos']['video'][0]['media'] + + formats = [] + hls_url = media.get('hls', {}).get('url') + if hls_url: + formats.extend(self._extract_m3u8_formats(hls_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + + hds_url = media.get('hds', {}).get('url') + if hds_url: + formats.extend(self._extract_f4m_formats(hds_url + '?hdcore', video_id, f4m_id='hds', fatal=False)) + + for c in media.get('content', []): + c_url = c.get('url') + if not c_url: + continue + tbr = int_or_none(c.get('bitrate'), 1000) + format_id = 'http' + if tbr: + format_id += '-%d' % tbr + formats.append({ + 'format_id': format_id, + 'url': c_url, + 'tbr': tbr, + 'width': int_or_none(c.get('width')), + 'height': int_or_none(c.get('height')), + 'filesize': int_or_none(c.get('fileSize')), + 'vcodec': c.get('type'), + 'fps': int_or_none(c.get('videoFrameRate')), + 'vbr': int_or_none(c.get('videoRate'), 1000), + 'abr': int_or_none(c.get('audioRate'), 1000), + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video['title'], + 'description': video.get('description'), + 'thumbnail': video.get('poster'), + 'duration': int_or_none(video.get('duration')), + 'timestamp': int_or_none(video.get('publishedTime'), 1000), + 'formats': formats, + } diff --git a/youtube_dl/extractor/playtvak.py b/youtube_dl/extractor/playtvak.py index 391e1bd..4c5f579 100644 --- a/youtube_dl/extractor/playtvak.py +++ b/youtube_dl/extractor/playtvak.py @@ -24,7 +24,7 @@ class PlaytvakIE(InfoExtractor): 'id': 'A150730_150323_hodinovy-manzel_kuko', 'ext': 'mp4', 'title': 'Vyžeňte vosy a sršně ze zahrady', - 'description': 'md5:f93d398691044d303bc4a3de62f3e976', + 'description': 'md5:4436e61b7df227a093778efb7e373571', 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$', 'duration': 279, 'timestamp': 1438732860, @@ -36,9 +36,19 @@ class PlaytvakIE(InfoExtractor): 'info_dict': { 'id': 'A150624_164934_planespotting_cat', 'ext': 'flv', - 'title': 're:^Přímý přenos iDNES.cz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'title': 're:^Planespotting [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'description': 'Sledujte provoz na ranveji Letiště Václava Havla v Praze', - 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$', + 'is_live': True, + }, + 'params': { + 'skip_download': True, # requires rtmpdump + }, + }, { # another live stream, this one without Misc.videoFLV + 'url': 'https://slowtv.playtvak.cz/zive-sledujte-vlaky-v-primem-prenosu-dwi-/hlavni-nadrazi.aspx?c=A151218_145728_hlavni-nadrazi_plap', + 'info_dict': { + 'id': 'A151218_145728_hlavni-nadrazi_plap', + 'ext': 'flv', + 'title': 're:^Hlavní nádraží [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'is_live': True, }, 'params': { @@ -95,7 +105,7 @@ class PlaytvakIE(InfoExtractor): webpage = self._download_webpage(url, video_id) info_url = self._html_search_regex( - r'Misc\.videoFLV\(\s*{\s*data\s*:\s*"([^"]+)"', webpage, 'info url') + r'Misc\.video(?:FLV)?\(\s*{\s*data\s*:\s*"([^"]+)"', webpage, 'info url') parsed_url = compat_urlparse.urlparse(info_url) @@ -160,7 +170,7 @@ class PlaytvakIE(InfoExtractor): if is_live: title = self._live_title(title) description = self._og_search_description(webpage, default=None) or self._html_search_meta( - 'description', webpage, 'description') + 'description', webpage, 'description', default=None) timestamp = None duration = None if not is_live: diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index f6a9131..aacc5d4 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -116,7 +116,7 @@ class PluralsightIE(PluralsightBaseIE): post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) response = self._download_webpage( - post_url, None, 'Logging in as %s' % username, + post_url, None, 'Logging in', data=urlencode_postdata(login_form), headers={'Content-Type': 'application/x-www-form-urlencoded'}) @@ -131,6 +131,13 @@ class PluralsightIE(PluralsightBaseIE): if BLOCKED in response: raise ExtractorError( 'Unable to login: %s' % BLOCKED, expected=True) + MUST_AGREE = 'To continue using Pluralsight, you must agree to' + if any(p in response for p in (MUST_AGREE, '>Disagree<', '>Agree<')): + raise ExtractorError( + 'Unable to login: %s some documents. Go to pluralsight.com, ' + 'log in and agree with what Pluralsight requires.' + % MUST_AGREE, expected=True) + raise ExtractorError('Unable to log in') def _get_subtitles(self, author, clip_id, lang, name, duration, video_id): @@ -164,12 +171,12 @@ class PluralsightIE(PluralsightBaseIE): for num, current in enumerate(subs): current = subs[num] start, text = ( - float_or_none(dict_get(current, TIME_OFFSET_KEYS)), + float_or_none(dict_get(current, TIME_OFFSET_KEYS, skip_false_values=False)), dict_get(current, TEXT_KEYS)) if start is None or text is None: continue end = duration if num == len(subs) - 1 else float_or_none( - dict_get(subs[num + 1], TIME_OFFSET_KEYS)) + dict_get(subs[num + 1], TIME_OFFSET_KEYS, skip_false_values=False)) if end is None: continue srt += os.linesep.join( diff --git a/youtube_dl/extractor/porncom.py b/youtube_dl/extractor/porncom.py index 8218c7d..60ade06 100644 --- a/youtube_dl/extractor/porncom.py +++ b/youtube_dl/extractor/porncom.py @@ -77,12 +77,14 @@ class PornComIE(InfoExtractor): self._sort_formats(formats) view_count = str_to_int(self._search_regex( - r'class=["\']views["\'][^>]*><p>([\d,.]+)', webpage, + (r'Views:\s*</span>\s*<span>\s*([\d,.]+)', + r'class=["\']views["\'][^>]*><p>([\d,.]+)'), webpage, 'view count', fatal=False)) def extract_list(kind): s = self._search_regex( - r'(?s)<p[^>]*>%s:(.+?)</p>' % kind.capitalize(), + (r'(?s)%s:\s*</span>\s*<span>(.+?)</span>' % kind.capitalize(), + r'(?s)<p[^>]*>%s:(.+?)</p>' % kind.capitalize()), webpage, kind, fatal=False) return re.findall(r'<a[^>]+>([^<]+)</a>', s or '') diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index 5bf64a5..d223110 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -17,6 +17,7 @@ from ..utils import ( parse_duration, strip_or_none, try_get, + unescapeHTML, unified_strdate, unified_timestamp, update_url_query, @@ -249,6 +250,41 @@ class RaiPlayLiveIE(RaiBaseIE): } +class RaiPlayPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/', + 'info_dict': { + 'id': 'nondirloalmiocapo', + 'title': 'Non dirlo al mio capo', + 'description': 'md5:9f3d603b2947c1c7abb098f3b14fac86', + }, + 'playlist_mincount': 12, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + title = self._html_search_meta( + ('programma', 'nomeProgramma'), webpage, 'title') + description = unescapeHTML(self._html_search_meta( + ('description', 'og:description'), webpage, 'description')) + print(description) + + entries = [] + for mobj in re.finditer( + r'<a\b[^>]+\bhref=(["\'])(?P<path>/raiplay/video/.+?)\1', + webpage): + video_url = urljoin(url, mobj.group('path')) + entries.append(self.url_result( + video_url, ie=RaiPlayIE.ie_key(), + video_id=RaiPlayIE._match_id(video_url))) + + return self.playlist_result(entries, playlist_id, title, description) + + class RaiIE(RaiBaseIE): _VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/dl/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE _TESTS = [{ diff --git a/youtube_dl/extractor/roosterteeth.py b/youtube_dl/extractor/roosterteeth.py index 46dfc78..8b70380 100644 --- a/youtube_dl/extractor/roosterteeth.py +++ b/youtube_dl/extractor/roosterteeth.py @@ -68,7 +68,7 @@ class RoosterTeethIE(InfoExtractor): login_request = self._download_webpage( self._LOGIN_URL, None, - note='Logging in as %s' % username, + note='Logging in', data=urlencode_postdata(login_form), headers={ 'Referer': self._LOGIN_URL, diff --git a/youtube_dl/extractor/rozhlas.py b/youtube_dl/extractor/rozhlas.py index f8eda8d..fccf694 100644 --- a/youtube_dl/extractor/rozhlas.py +++ b/youtube_dl/extractor/rozhlas.py @@ -21,7 +21,7 @@ class RozhlasIE(InfoExtractor): } }, { 'url': 'http://prehravac.rozhlas.cz/audio/3421320/embed', - 'skip_download': True, + 'only_matching': True, }] def _real_extract(self, url): diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py index 909a6ba..cc6698f 100644 --- a/youtube_dl/extractor/safari.py +++ b/youtube_dl/extractor/safari.py @@ -61,7 +61,7 @@ class SafariBaseIE(InfoExtractor): request = sanitized_Request( self._LOGIN_URL, urlencode_postdata(login_form), headers=headers) login_page = self._download_webpage( - request, None, 'Logging in as %s' % username) + request, None, 'Logging in') if not is_logged(login_page): raise ExtractorError( diff --git a/youtube_dl/extractor/sandia.py b/youtube_dl/extractor/sandia.py deleted file mode 100644 index 96e43af..0000000 --- a/youtube_dl/extractor/sandia.py +++ /dev/null @@ -1,65 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import json - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - mimetype2ext, -) - - -class SandiaIE(InfoExtractor): - IE_DESC = 'Sandia National Laboratories' - _VALID_URL = r'https?://digitalops\.sandia\.gov/Mediasite/Play/(?P<id>[0-9a-f]+)' - _TEST = { - 'url': 'http://digitalops.sandia.gov/Mediasite/Play/24aace4429fc450fb5b38cdbf424a66e1d', - 'md5': '9422edc9b9a60151727e4b6d8bef393d', - 'info_dict': { - 'id': '24aace4429fc450fb5b38cdbf424a66e1d', - 'ext': 'mp4', - 'title': 'Xyce Software Training - Section 1', - 'description': 're:(?s)SAND Number: SAND 2013-7800.{200,}', - 'upload_date': '20120409', - 'timestamp': 1333983600, - 'duration': 7794, - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - presentation_data = self._download_json( - 'http://digitalops.sandia.gov/Mediasite/PlayerService/PlayerService.svc/json/GetPlayerOptions', - video_id, data=json.dumps({ - 'getPlayerOptionsRequest': { - 'ResourceId': video_id, - 'QueryString': '', - } - }), headers={ - 'Content-Type': 'application/json; charset=utf-8', - })['d']['Presentation'] - - title = presentation_data['Title'] - - formats = [] - for stream in presentation_data.get('Streams', []): - for fd in stream.get('VideoUrls', []): - formats.append({ - 'format_id': fd['MediaType'], - 'format_note': fd['MimeType'].partition('/')[2], - 'ext': mimetype2ext(fd['MimeType']), - 'url': fd['Location'], - 'protocol': 'f4m' if fd['MimeType'] == 'video/x-mp4-fragmented' else None, - }) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'description': presentation_data.get('Description'), - 'formats': formats, - 'timestamp': int_or_none(presentation_data.get('UnixTime'), 1000), - 'duration': int_or_none(presentation_data.get('Duration'), 1000), - } diff --git a/youtube_dl/extractor/scrippsnetworks.py b/youtube_dl/extractor/scrippsnetworks.py index b446a02..4023aee 100644 --- a/youtube_dl/extractor/scrippsnetworks.py +++ b/youtube_dl/extractor/scrippsnetworks.py @@ -1,13 +1,11 @@ # coding: utf-8 from __future__ import unicode_literals -import datetime import json import hashlib -import hmac import re -from .common import InfoExtractor +from .aws import AWSIE from .anvato import AnvatoIE from ..utils import ( smuggle_url, @@ -16,7 +14,7 @@ from ..utils import ( ) -class ScrippsNetworksWatchIE(InfoExtractor): +class ScrippsNetworksWatchIE(AWSIE): IE_NAME = 'scrippsnetworks:watch' _VALID_URL = r'''(?x) https?:// @@ -64,44 +62,27 @@ class ScrippsNetworksWatchIE(InfoExtractor): 'travelchannel': 'trav', 'geniuskitchen': 'genius', } - _SNI_HOST = 'web.api.video.snidigital.com' - _AWS_REGION = 'us-east-1' - _AWS_IDENTITY_ID_JSON = json.dumps({ - 'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % _AWS_REGION - }) - _AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback' _AWS_API_KEY = 'E7wSQmq0qK6xPrF13WmzKiHo4BQ7tip4pQcSXVl1' - _AWS_SERVICE = 'execute-api' - _AWS_REQUEST = 'aws4_request' - _AWS_SIGNED_HEADERS = ';'.join([ - 'host', 'x-amz-date', 'x-amz-security-token', 'x-api-key']) - _AWS_CANONICAL_REQUEST_TEMPLATE = '''GET -%(uri)s - -host:%(host)s -x-amz-date:%(date)s -x-amz-security-token:%(token)s -x-api-key:%(key)s + _AWS_PROXY_HOST = 'web.api.video.snidigital.com' -%(signed_headers)s -%(payload_hash)s''' + _AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback' def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) site_id, video_id = mobj.group('site', 'id') - def aws_hash(s): - return hashlib.sha256(s.encode('utf-8')).hexdigest() - + aws_identity_id_json = json.dumps({ + 'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % self._AWS_REGION + }).encode('utf-8') token = self._download_json( - 'https://cognito-identity.us-east-1.amazonaws.com/', video_id, - data=self._AWS_IDENTITY_ID_JSON.encode('utf-8'), + 'https://cognito-identity.%s.amazonaws.com/' % self._AWS_REGION, video_id, + data=aws_identity_id_json, headers={ 'Accept': '*/*', 'Content-Type': 'application/x-amz-json-1.1', 'Referer': url, - 'X-Amz-Content-Sha256': aws_hash(self._AWS_IDENTITY_ID_JSON), + 'X-Amz-Content-Sha256': hashlib.sha256(aws_identity_id_json).hexdigest(), 'X-Amz-Target': 'AWSCognitoIdentityService.GetOpenIdToken', 'X-Amz-User-Agent': self._AWS_USER_AGENT, })['Token'] @@ -124,64 +105,12 @@ x-api-key:%(key)s sts, './/{https://sts.amazonaws.com/doc/2011-06-15/}%s' % key, fatal=True) - access_key_id = get('AccessKeyId') - secret_access_key = get('SecretAccessKey') - session_token = get('SessionToken') - - # Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html - uri = '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id) - datetime_now = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ') - date = datetime_now[:8] - canonical_string = self._AWS_CANONICAL_REQUEST_TEMPLATE % { - 'uri': uri, - 'host': self._SNI_HOST, - 'date': datetime_now, - 'token': session_token, - 'key': self._AWS_API_KEY, - 'signed_headers': self._AWS_SIGNED_HEADERS, - 'payload_hash': aws_hash(''), - } - - # Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html - credential_string = '/'.join([date, self._AWS_REGION, self._AWS_SERVICE, self._AWS_REQUEST]) - string_to_sign = '\n'.join([ - 'AWS4-HMAC-SHA256', datetime_now, credential_string, - aws_hash(canonical_string)]) - - # Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html - def aws_hmac(key, msg): - return hmac.new(key, msg.encode('utf-8'), hashlib.sha256) - - def aws_hmac_digest(key, msg): - return aws_hmac(key, msg).digest() - - def aws_hmac_hexdigest(key, msg): - return aws_hmac(key, msg).hexdigest() - - k_secret = 'AWS4' + secret_access_key - k_date = aws_hmac_digest(k_secret.encode('utf-8'), date) - k_region = aws_hmac_digest(k_date, self._AWS_REGION) - k_service = aws_hmac_digest(k_region, self._AWS_SERVICE) - k_signing = aws_hmac_digest(k_service, self._AWS_REQUEST) - - signature = aws_hmac_hexdigest(k_signing, string_to_sign) - - auth_header = ', '.join([ - 'AWS4-HMAC-SHA256 Credential=%s' % '/'.join( - [access_key_id, date, self._AWS_REGION, self._AWS_SERVICE, self._AWS_REQUEST]), - 'SignedHeaders=%s' % self._AWS_SIGNED_HEADERS, - 'Signature=%s' % signature, - ]) - - mcp_id = self._download_json( - 'https://%s%s' % (self._SNI_HOST, uri), video_id, headers={ - 'Accept': '*/*', - 'Referer': url, - 'Authorization': auth_header, - 'X-Amz-Date': datetime_now, - 'X-Amz-Security-Token': session_token, - 'X-Api-Key': self._AWS_API_KEY, - })['results'][0]['mcpId'] + mcp_id = self._aws_execute_api({ + 'uri': '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id), + 'access_key': get('AccessKeyId'), + 'secret_key': get('SecretAccessKey'), + 'session_token': get('SessionToken'), + }, video_id)['results'][0]['mcpId'] return self.url_result( smuggle_url( diff --git a/youtube_dl/extractor/sevenplus.py b/youtube_dl/extractor/sevenplus.py new file mode 100644 index 0000000..9792f82 --- /dev/null +++ b/youtube_dl/extractor/sevenplus.py @@ -0,0 +1,67 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .brightcove import BrightcoveNewIE +from ..utils import update_url_query + + +class SevenPlusIE(BrightcoveNewIE): + IE_NAME = '7plus' + _VALID_URL = r'https?://(?:www\.)?7plus\.com\.au/(?P<path>[^?]+\?.*?\bepisode-id=(?P<id>[^&#]+))' + _TESTS = [{ + 'url': 'https://7plus.com.au/BEAT?episode-id=BEAT-001', + 'info_dict': { + 'id': 'BEAT-001', + 'ext': 'mp4', + 'title': 'S1 E1 - Help / Lucy In The Sky With Diamonds', + 'description': 'md5:37718bea20a8eedaca7f7361af566131', + 'uploader_id': '5303576322001', + 'upload_date': '20171031', + 'timestamp': 1509440068, + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + } + }, { + 'url': 'https://7plus.com.au/UUUU?episode-id=AUMS43-001', + 'only_matching': True, + }] + + def _real_extract(self, url): + path, episode_id = re.match(self._VALID_URL, url).groups() + + media = self._download_json( + 'https://videoservice.swm.digital/playback', episode_id, query={ + 'appId': '7plus', + 'deviceType': 'web', + 'platformType': 'web', + 'accountId': 5303576322001, + 'referenceId': 'ref:' + episode_id, + 'deliveryId': 'csai', + 'videoType': 'vod', + })['media'] + + for source in media.get('sources', {}): + src = source.get('src') + if not src: + continue + source['src'] = update_url_query(src, {'rule': ''}) + + info = self._parse_brightcove_metadata(media, episode_id) + + content = self._download_json( + 'https://component-cdn.swm.digital/content/' + path, + episode_id, headers={ + 'market-id': 4, + }, fatal=False) or {} + for item in content.get('items', {}): + if item.get('componentData', {}).get('componentType') == 'infoPanel': + for src_key, dst_key in [('title', 'title'), ('shortSynopsis', 'description')]: + value = item.get(src_key) + if value: + info[dst_key] = value + + return info diff --git a/youtube_dl/extractor/shahid.py b/youtube_dl/extractor/shahid.py index 374f7fa..5c2a620 100644 --- a/youtube_dl/extractor/shahid.py +++ b/youtube_dl/extractor/shahid.py @@ -1,22 +1,53 @@ # coding: utf-8 from __future__ import unicode_literals -import re import json +import math +import re -from .common import InfoExtractor +from .aws import AWSIE from ..compat import compat_HTTPError from ..utils import ( + clean_html, ExtractorError, + InAdvancePagedList, int_or_none, parse_iso8601, str_or_none, urlencode_postdata, - clean_html, ) -class ShahidIE(InfoExtractor): +class ShahidBaseIE(AWSIE): + _AWS_PROXY_HOST = 'api2.shahid.net' + _AWS_API_KEY = '2RRtuMHx95aNI1Kvtn2rChEuwsCogUd4samGPjLh' + + def _handle_error(self, e): + fail_data = self._parse_json( + e.cause.read().decode('utf-8'), None, fatal=False) + if fail_data: + faults = fail_data.get('faults', []) + faults_message = ', '.join([clean_html(fault['userMessage']) for fault in faults if fault.get('userMessage')]) + if faults_message: + raise ExtractorError(faults_message, expected=True) + + def _call_api(self, path, video_id, request=None): + query = {} + if request: + query['request'] = json.dumps(request) + try: + return self._aws_execute_api({ + 'uri': '/proxy/v2/' + path, + 'access_key': 'AKIAI6X4TYCIXM2B7MUQ', + 'secret_key': '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', + }, video_id, query) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError): + self._handle_error(e) + raise + + +class ShahidIE(ShahidBaseIE): _NETRC_MACHINE = 'shahid' _VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:serie|show|movie)s/[^/]+/(?P<type>episode|clip|movie)-(?P<id>\d+)' _TESTS = [{ @@ -41,34 +72,25 @@ class ShahidIE(InfoExtractor): 'only_matching': True }] - def _api2_request(self, *args, **kwargs): - try: - return self._download_json(*args, **kwargs) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError): - fail_data = self._parse_json( - e.cause.read().decode('utf-8'), None, fatal=False) - if fail_data: - faults = fail_data.get('faults', []) - faults_message = ', '.join([clean_html(fault['userMessage']) for fault in faults if fault.get('userMessage')]) - if faults_message: - raise ExtractorError(faults_message, expected=True) - raise - def _real_initialize(self): email, password = self._get_login_info() if email is None: return - user_data = self._api2_request( - 'https://shahid.mbc.net/wd/service/users/login', - None, 'Logging in', data=json.dumps({ - 'email': email, - 'password': password, - 'basic': 'false', - }).encode('utf-8'), headers={ - 'Content-Type': 'application/json; charset=UTF-8', - })['user'] + try: + user_data = self._download_json( + 'https://shahid.mbc.net/wd/service/users/login', + None, 'Logging in', data=json.dumps({ + 'email': email, + 'password': password, + 'basic': 'false', + }).encode('utf-8'), headers={ + 'Content-Type': 'application/json; charset=UTF-8', + })['user'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError): + self._handle_error(e) + raise self._download_webpage( 'https://shahid.mbc.net/populateContext', @@ -81,25 +103,13 @@ class ShahidIE(InfoExtractor): 'sessionId': user_data['sessionId'], })) - def _get_api_data(self, response): - data = response.get('data', {}) - - error = data.get('error') - if error: - raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())), - expected=True) - - return data - def _real_extract(self, url): page_type, video_id = re.match(self._VALID_URL, url).groups() if page_type == 'clip': page_type = 'episode' - playout = self._api2_request( - 'https://api2.shahid.net/proxy/v2/playout/url/' + video_id, - video_id, 'Downloading player JSON')['playout'] + playout = self._call_api( + 'playout/url/' + video_id, video_id)['playout'] if playout.get('drm'): raise ExtractorError('This video is DRM protected.', expected=True) @@ -107,13 +117,27 @@ class ShahidIE(InfoExtractor): formats = self._extract_m3u8_formats(playout['url'], video_id, 'mp4') self._sort_formats(formats) - video = self._get_api_data(self._download_json( + # video = self._call_api( + # 'product/id', video_id, { + # 'id': video_id, + # 'productType': 'ASSET', + # 'productSubType': page_type.upper() + # })['productModel'] + + response = self._download_json( 'http://api.shahid.net/api/v1_1/%s/%s' % (page_type, video_id), video_id, 'Downloading video JSON', query={ 'apiKey': 'sh@hid0nlin3', 'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=', - }))[page_type] + }) + data = response.get('data', {}) + error = data.get('error') + if error: + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())), + expected=True) + video = data[page_type] title = video['title'] categories = [ category['name'] @@ -135,3 +159,57 @@ class ShahidIE(InfoExtractor): 'episode_id': video_id, 'formats': formats, } + + +class ShahidShowIE(ShahidBaseIE): + _VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:show|serie)s/[^/]+/(?:show|series)-(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://shahid.mbc.net/ar/shows/%D8%B1%D8%A7%D9%85%D8%B2-%D9%82%D8%B1%D8%B4-%D8%A7%D9%84%D8%A8%D8%AD%D8%B1/show-79187', + 'info_dict': { + 'id': '79187', + 'title': 'رامز قرش البحر', + 'description': 'md5:c88fa7e0f02b0abd39d417aee0d046ff', + }, + 'playlist_mincount': 32, + }, { + 'url': 'https://shahid.mbc.net/ar/series/How-to-live-Longer-(The-Big-Think)/series-291861', + 'only_matching': True + }] + _PAGE_SIZE = 30 + + def _real_extract(self, url): + show_id = self._match_id(url) + + product = self._call_api( + 'playableAsset', show_id, {'showId': show_id})['productModel'] + playlist = product['playlist'] + playlist_id = playlist['id'] + show = product.get('show', {}) + + def page_func(page_num): + playlist = self._call_api( + 'product/playlist', show_id, { + 'playListId': playlist_id, + 'pageNumber': page_num, + 'pageSize': 30, + 'sorts': [{ + 'order': 'DESC', + 'type': 'SORTDATE' + }], + }) + for product in playlist.get('productList', {}).get('products', []): + product_url = product.get('productUrl', []).get('url') + if not product_url: + continue + yield self.url_result( + product_url, 'Shahid', + str_or_none(product.get('id')), + product.get('title')) + + entries = InAdvancePagedList( + page_func, + math.ceil(playlist['count'] / self._PAGE_SIZE), + self._PAGE_SIZE) + + return self.playlist_result( + entries, show_id, show.get('title'), show.get('description')) diff --git a/youtube_dl/extractor/slutload.py b/youtube_dl/extractor/slutload.py index 7145d28..6fc2ff6 100644 --- a/youtube_dl/extractor/slutload.py +++ b/youtube_dl/extractor/slutload.py @@ -1,11 +1,13 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor class SlutloadIE(InfoExtractor): _VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P<id>[^/]+)/?$' - _TEST = { + _TESTS = [{ 'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/', 'md5': '868309628ba00fd488cf516a113fd717', 'info_dict': { @@ -15,11 +17,17 @@ class SlutloadIE(InfoExtractor): 'age_limit': 18, 'thumbnail': r're:https?://.*?\.jpg' } - } + }, { + # mobile site + 'url': 'http://mobile.slutload.com/video/masturbation-solo/fviFLmc6kzJ/', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + + desktop_url = re.sub(r'^(https?://)mobile\.', r'\1', url) + webpage = self._download_webpage(desktop_url, video_id) video_title = self._html_search_regex(r'<h1><strong>([^<]+)</strong>', webpage, 'title').strip() diff --git a/youtube_dl/extractor/sonyliv.py b/youtube_dl/extractor/sonyliv.py index accd112..c3078e2 100644 --- a/youtube_dl/extractor/sonyliv.py +++ b/youtube_dl/extractor/sonyliv.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import smuggle_url class SonyLIVIE(InfoExtractor): @@ -10,12 +11,12 @@ class SonyLIVIE(InfoExtractor): 'url': "http://www.sonyliv.com/details/episodes/5024612095001/Ep.-1---Achaari-Cheese-Toast---Bachelor's-Delight", 'info_dict': { 'title': "Ep. 1 - Achaari Cheese Toast - Bachelor's Delight", - 'id': '5024612095001', + 'id': 'ref:5024612095001', 'ext': 'mp4', - 'upload_date': '20160707', + 'upload_date': '20170923', 'description': 'md5:7f28509a148d5be9d0782b4d5106410d', - 'uploader_id': '4338955589001', - 'timestamp': 1467870968, + 'uploader_id': '5182475815001', + 'timestamp': 1506200547, }, 'params': { 'skip_download': True, @@ -26,9 +27,11 @@ class SonyLIVIE(InfoExtractor): 'only_matching': True, }] - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4338955589001/default_default/index.html?videoId=%s' + # BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4338955589001/default_default/index.html?videoId=%s' + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5182475815001/default_default/index.html?videoId=ref:%s' def _real_extract(self, url): brightcove_id = self._match_id(url) return self.url_result( - self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id) + smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, {'geo_countries': ['IN']}), + 'BrightcoveNew', brightcove_id) diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index 2863e53..e6c2dcf 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -7,7 +7,7 @@ from ..utils import ExtractorError class SpankBangIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video' + _VALID_URL = r'https?://(?:(?:www|m|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video' _TESTS = [{ 'url': 'http://spankbang.com/3vvn/video/fantasy+solo', 'md5': '1cc433e1d6aa14bc376535b8679302f7', @@ -15,7 +15,7 @@ class SpankBangIE(InfoExtractor): 'id': '3vvn', 'ext': 'mp4', 'title': 'fantasy solo', - 'description': 'Watch fantasy solo free HD porn video - 05 minutes - dillion harper masturbates on a bed free adult movies.', + 'description': 'Watch fantasy solo free HD porn video - 05 minutes - Babe,Masturbation,Solo,Toy - dillion harper masturbates on a bed free adult movies sexy clips.', 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'silly2587', 'age_limit': 18, @@ -28,6 +28,10 @@ class SpankBangIE(InfoExtractor): # no uploader 'url': 'http://spankbang.com/lklg/video/sex+with+anyone+wedding+edition+2', 'only_matching': True, + }, { + # mobile page + 'url': 'http://m.spankbang.com/1o2de/video/can+t+remember+her+name', + 'only_matching': True, }] def _real_extract(self, url): diff --git a/youtube_dl/extractor/stretchinternet.py b/youtube_dl/extractor/stretchinternet.py new file mode 100644 index 0000000..ae2ac1b --- /dev/null +++ b/youtube_dl/extractor/stretchinternet.py @@ -0,0 +1,48 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import int_or_none + + +class StretchInternetIE(InfoExtractor): + _VALID_URL = r'https?://portal\.stretchinternet\.com/[^/]+/portal\.htm\?.*?\beventId=(?P<id>\d+)' + _TEST = { + 'url': 'https://portal.stretchinternet.com/umary/portal.htm?eventId=313900&streamType=video', + 'info_dict': { + 'id': '313900', + 'ext': 'mp4', + 'title': 'Augustana (S.D.) Baseball vs University of Mary', + 'description': 'md5:7578478614aae3bdd4a90f578f787438', + 'timestamp': 1490468400, + 'upload_date': '20170325', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + stream = self._download_json( + 'https://neo-client.stretchinternet.com/streamservice/v1/media/stream/v%s' + % video_id, video_id) + + video_url = 'https://%s' % stream['source'] + + event = self._download_json( + 'https://neo-client.stretchinternet.com/portal-ws/getEvent.json', + video_id, query={ + 'clientID': 99997, + 'eventID': video_id, + 'token': 'asdf', + })['event'] + + title = event.get('title') or event['mobileTitle'] + description = event.get('customText') + timestamp = int_or_none(event.get('longtime')) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'timestamp': timestamp, + 'url': video_url, + } diff --git a/youtube_dl/extractor/tbs.py b/youtube_dl/extractor/tbs.py index e947453..eab22c3 100644 --- a/youtube_dl/extractor/tbs.py +++ b/youtube_dl/extractor/tbs.py @@ -4,58 +4,109 @@ from __future__ import unicode_literals import re from .turner import TurnerBaseIE -from ..utils import extract_attributes +from ..utils import ( + float_or_none, + int_or_none, + strip_or_none, +) class TBSIE(TurnerBaseIE): - # https://github.com/rg3/youtube-dl/issues/13658 - _WORKING = False - - _VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com/videos/(?:[^/]+/)+(?P<id>[^/?#]+)\.html' + _VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com/(?:movies|shows/[^/]+/(?:clips|season-\d+/episode-\d+))/(?P<id>[^/?#]+)' _TESTS = [{ - 'url': 'http://www.tbs.com/videos/people-of-earth/season-1/extras/2007318/theatrical-trailer.html', - 'md5': '9e61d680e2285066ade7199e6408b2ee', + 'url': 'http://www.tntdrama.com/shows/the-alienist/clips/monster', 'info_dict': { - 'id': '2007318', + 'id': '8d384cde33b89f3a43ce5329de42903ed5099887', 'ext': 'mp4', - 'title': 'Theatrical Trailer', - 'description': 'Catch the latest comedy from TBS, People of Earth, premiering Halloween night--Monday, October 31, at 9/8c.', + 'title': 'Monster', + 'description': 'Get a first look at the theatrical trailer for TNT’s highly anticipated new psychological thriller The Alienist, which premieres January 22 on TNT.', + 'timestamp': 1508175329, + 'upload_date': '20171016', }, - 'skip': 'TBS videos are deleted after a while', + 'params': { + # m3u8 download + 'skip_download': True, + } }, { - 'url': 'http://www.tntdrama.com/videos/good-behavior/season-1/extras/1538823/you-better-run.html', - 'md5': 'ce53c6ead5e9f3280b4ad2031a6fab56', - 'info_dict': { - 'id': '1538823', - 'ext': 'mp4', - 'title': 'You Better Run', - 'description': 'Letty Raines must figure out what she\'s running toward while running away from her past. Good Behavior premieres November 15 at 9/8c.', - }, - 'skip': 'TBS videos are deleted after a while', + 'url': 'http://www.tbs.com/shows/search-party/season-1/episode-1/explicit-the-mysterious-disappearance-of-the-girl-no-one-knew', + 'only_matching': True, + }, { + 'url': 'http://www.tntdrama.com/movies/star-wars-a-new-hope', + 'only_matching': True, }] def _real_extract(self, url): - domain, display_id = re.match(self._VALID_URL, url).groups() - site = domain[:3] + site, display_id = re.match(self._VALID_URL, url).groups() webpage = self._download_webpage(url, display_id) - video_params = extract_attributes(self._search_regex(r'(<[^>]+id="page-video"[^>]*>)', webpage, 'video params')) - query = None - clip_id = video_params.get('clipid') - if clip_id: - query = 'id=' + clip_id - else: - query = 'titleId=' + video_params['titleid'] - return self._extract_cvp_info( - 'http://www.%s.com/service/cvpXml?%s' % (domain, query), display_id, { - 'default': { - 'media_src': 'http://ht.cdn.turner.com/%s/big' % site, - }, - 'secure': { - 'media_src': 'http://androidhls-secure.cdn.turner.com/%s/big' % site, - 'tokenizer_src': 'http://www.%s.com/video/processors/services/token_ipadAdobe.do' % domain, - }, - }, { - 'url': url, - 'site_name': site.upper(), - 'auth_required': video_params.get('isAuthRequired') != 'false', - }) + video_data = self._parse_json(self._search_regex( + r'<script[^>]+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})</script>', + webpage, 'drupal setting'), display_id)['turner_playlist'][0] + + media_id = video_data['mediaID'] + title = video_data['title'] + + streams_data = self._download_json( + 'http://medium.ngtv.io/media/%s/tv' % media_id, + media_id)['media']['tv'] + duration = None + chapters = [] + formats = [] + for supported_type in ('unprotected', 'bulkaes'): + stream_data = streams_data.get(supported_type, {}) + m3u8_url = stream_data.get('secureUrl') or stream_data.get('url') + if not m3u8_url: + continue + if stream_data.get('playlistProtection') == 'spe': + m3u8_url = self._add_akamai_spe_token( + 'http://www.%s.com/service/token_spe' % site, + m3u8_url, media_id, { + 'url': url, + 'site_name': site[:3].upper(), + 'auth_required': video_data.get('authRequired') == '1', + }) + formats.extend(self._extract_m3u8_formats( + m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False)) + + duration = float_or_none(stream_data.get('totalRuntime') or video_data.get('duration')) + + if not chapters: + for chapter in stream_data.get('contentSegments', []): + start_time = float_or_none(chapter.get('start')) + duration = float_or_none(chapter.get('duration')) + if start_time is None or duration is None: + continue + chapters.append({ + 'start_time': start_time, + 'end_time': start_time + duration, + }) + self._sort_formats(formats) + + thumbnails = [] + for image_id, image in video_data.get('images', {}).items(): + image_url = image.get('url') + if not image_url or image.get('type') != 'video': + continue + i = { + 'id': image_id, + 'url': image_url, + } + mobj = re.search(r'(\d+)x(\d+)', image_url) + if mobj: + i.update({ + 'width': int(mobj.group(1)), + 'height': int(mobj.group(2)), + }) + thumbnails.append(i) + + return { + 'id': media_id, + 'title': title, + 'description': strip_or_none(video_data.get('descriptionNoTags') or video_data.get('shortDescriptionNoTags')), + 'duration': duration, + 'timestamp': int_or_none(video_data.get('created')), + 'season_number': int_or_none(video_data.get('season')), + 'episode_number': int_or_none(video_data.get('episode')), + 'cahpters': chapters, + 'thumbnails': thumbnails, + 'formats': formats, + } diff --git a/youtube_dl/extractor/tnaflix.py b/youtube_dl/extractor/tnaflix.py index 7e6ec34..0c2f8f1 100644 --- a/youtube_dl/extractor/tnaflix.py +++ b/youtube_dl/extractor/tnaflix.py @@ -21,6 +21,8 @@ class TNAFlixNetworkBaseIE(InfoExtractor): r'flashvars\.config\s*=\s*escape\("([^"]+)"', r'<input[^>]+name="config\d?" value="([^"]+)"', ] + _HOST = 'tna' + _VKEY_SUFFIX = '' _TITLE_REGEX = r'<input[^>]+name="title" value="([^"]+)"' _DESCRIPTION_REGEX = r'<input[^>]+name="description" value="([^"]+)"' _UPLOADER_REGEX = r'<input[^>]+name="username" value="([^"]+)"' @@ -72,7 +74,13 @@ class TNAFlixNetworkBaseIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - display_id = mobj.group('display_id') if 'display_id' in mobj.groupdict() else video_id + for display_id_key in ('display_id', 'display_id_2'): + if display_id_key in mobj.groupdict(): + display_id = mobj.group(display_id_key) + if display_id: + break + else: + display_id = video_id webpage = self._download_webpage(url, display_id) @@ -81,8 +89,8 @@ class TNAFlixNetworkBaseIE(InfoExtractor): if not cfg_url: inputs = self._hidden_inputs(webpage) - cfg_url = ('https://cdn-fck.tnaflix.com/tnaflix/%s.fid?key=%s&VID=%s&premium=1&vip=1&alpha' - % (inputs['vkey'], inputs['nkey'], video_id)) + cfg_url = ('https://cdn-fck.%sflix.com/%sflix/%s%s.fid?key=%s&VID=%s&premium=1&vip=1&alpha' + % (self._HOST, self._HOST, inputs['vkey'], self._VKEY_SUFFIX, inputs['nkey'], video_id)) cfg_xml = self._download_xml( cfg_url, display_id, 'Downloading metadata', @@ -91,7 +99,8 @@ class TNAFlixNetworkBaseIE(InfoExtractor): formats = [] def extract_video_url(vl): - return re.sub(r'speed=\d+', 'speed=', unescapeHTML(vl.text)) + # Any URL modification now results in HTTP Error 403: Forbidden + return unescapeHTML(vl.text) video_link = cfg_xml.find('./videoLink') if video_link is not None: @@ -192,18 +201,21 @@ class TNAFlixNetworkEmbedIE(TNAFlixNetworkBaseIE): webpage)] -class TNAFlixIE(TNAFlixNetworkBaseIE): +class TNAEMPFlixBaseIE(TNAFlixNetworkBaseIE): + _DESCRIPTION_REGEX = r'(?s)>Description:</[^>]+>(.+?)<' + _UPLOADER_REGEX = r'<span>by\s*<a[^>]+\bhref=["\']/profile/[^>]+>([^<]+)<' + _CATEGORIES_REGEX = r'(?s)<span[^>]*>Categories:</span>(.+?)</div>' + + +class TNAFlixIE(TNAEMPFlixBaseIE): _VALID_URL = r'https?://(?:www\.)?tnaflix\.com/[^/]+/(?P<display_id>[^/]+)/video(?P<id>\d+)' _TITLE_REGEX = r'<title>(.+?) - (?:TNAFlix Porn Videos|TNAFlix\.com)' - _DESCRIPTION_REGEX = r'(?s)>Description:]+>(.+?)<' - _UPLOADER_REGEX = r'\s*Verified Member\s*\s*(.+?)<' - _CATEGORIES_REGEX = r'(?s)]*>Categories:(.+?)' _TESTS = [{ # anonymous uploader, no categories 'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878', - 'md5': 'ecf3498417d09216374fc5907f9c6ec0', + 'md5': '7e569419fe6d69543d01e6be22f5f7c4', 'info_dict': { 'id': '553878', 'display_id': 'Carmella-Decesare-striptease', @@ -228,7 +240,7 @@ class TNAFlixIE(TNAFlixNetworkBaseIE): 'duration': 164, 'age_limit': 18, 'uploader': 'bobwhite39', - 'categories': ['Amateur Porn', 'Squirting Videos', 'Teen Girls 18+'], + 'categories': list, } }, { 'url': 'https://www.tnaflix.com/amateur-porn/bunzHD-Ms.Donk/video358632', @@ -236,14 +248,15 @@ class TNAFlixIE(TNAFlixNetworkBaseIE): }] -class EMPFlixIE(TNAFlixNetworkBaseIE): - _VALID_URL = r'https?://(?:www\.)?empflix\.com/videos/(?P.+?)-(?P[0-9]+)\.html' +class EMPFlixIE(TNAEMPFlixBaseIE): + _VALID_URL = r'https?://(?:www\.)?empflix\.com/(?:videos/(?P.+?)-|[^/]+/(?P[^/]+)/video)(?P[0-9]+)' - _UPLOADER_REGEX = r']+class="infoTitle"[^>]*>Uploaded By:(.+?)' + _HOST = 'emp' + _VKEY_SUFFIX = '-1' _TESTS = [{ 'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html', - 'md5': 'b1bc15b6412d33902d6e5952035fcabc', + 'md5': 'bc30d48b91a7179448a0bda465114676', 'info_dict': { 'id': '33051', 'display_id': 'Amateur-Finger-Fuck', @@ -259,6 +272,9 @@ class EMPFlixIE(TNAFlixNetworkBaseIE): }, { 'url': 'http://www.empflix.com/videos/[AROMA][ARMD-718]-Aoi-Yoshino-Sawa-25826.html', 'only_matching': True, + }, { + 'url': 'https://www.empflix.com/amateur-porn/Amateur-Finger-Fuck/video33051', + 'only_matching': True, }] diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py index e59ed26..2e7876c 100644 --- a/youtube_dl/extractor/toutv.py +++ b/youtube_dl/extractor/toutv.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( int_or_none, @@ -14,7 +16,7 @@ from ..utils import ( class TouTvIE(InfoExtractor): _NETRC_MACHINE = 'toutv' IE_NAME = 'tou.tv' - _VALID_URL = r'https?://ici\.tou\.tv/(?P[a-zA-Z0-9_-]+(?:/S[0-9]+E[0-9]+)?)' + _VALID_URL = r'https?://ici\.tou\.tv/(?P[a-zA-Z0-9_-]+(?:/S[0-9]+[EC][0-9]+)?)' _access_token = None _claims = None @@ -35,13 +37,16 @@ class TouTvIE(InfoExtractor): }, { 'url': 'http://ici.tou.tv/hackers', 'only_matching': True, + }, { + 'url': 'https://ici.tou.tv/l-age-adulte/S01C501', + 'only_matching': True, }] def _real_initialize(self): email, password = self._get_login_info() if email is None: return - state = 'http://ici.tou.tv//' + state = 'http://ici.tou.tv/' webpage = self._download_webpage(state, None, 'Downloading homepage') toutvlogin = self._parse_json(self._search_regex( r'(?s)toutvlogin\s*=\s*({.+?});', webpage, 'toutvlogin'), None, js_to_json) @@ -54,16 +59,30 @@ class TouTvIE(InfoExtractor): 'scope': 'media-drmt openid profile email id.write media-validation.read.privileged', 'state': state, }) - login_form = self._search_regex( - r'(?s)(]+(?:id|name)="Form-login".+?)', login_webpage, 'login form') - form_data = self._hidden_inputs(login_form) + + def extract_form_url_and_data(wp, default_form_url, form_spec_re=''): + form, form_elem = re.search( + r'(?s)((]+?%s[^>]*?>).+?)' % form_spec_re, wp).groups() + form_data = self._hidden_inputs(form) + form_url = extract_attributes(form_elem).get('action') or default_form_url + return form_url, form_data + + post_url, form_data = extract_form_url_and_data( + login_webpage, + 'https://services.radio-canada.ca/auth/oauth/v2/authorize/login', + r'(?:id|name)="Form-login"') form_data.update({ 'login-email': email, 'login-password': password, }) - post_url = extract_attributes(login_form).get('action') or authorize_url - _, urlh = self._download_webpage_handle( + consent_webpage = self._download_webpage( post_url, None, 'Logging in', data=urlencode_postdata(form_data)) + post_url, form_data = extract_form_url_and_data( + consent_webpage, + 'https://services.radio-canada.ca/auth/oauth/v2/authorize/consent') + _, urlh = self._download_webpage_handle( + post_url, None, 'Following Redirection', + data=urlencode_postdata(form_data)) self._access_token = self._search_regex( r'access_token=([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})', urlh.geturl(), 'access token') diff --git a/youtube_dl/extractor/turner.py b/youtube_dl/extractor/turner.py index efeb677..e73b64a 100644 --- a/youtube_dl/extractor/turner.py +++ b/youtube_dl/extractor/turner.py @@ -18,9 +18,32 @@ from ..utils import ( class TurnerBaseIE(AdobePassIE): + _AKAMAI_SPE_TOKEN_CACHE = {} + def _extract_timestamp(self, video_data): return int_or_none(xpath_attr(video_data, 'dateCreated', 'uts')) + def _add_akamai_spe_token(self, tokenizer_src, video_url, content_id, ap_data): + secure_path = self._search_regex(r'https?://[^/]+(.+/)', video_url, 'secure path') + '*' + token = self._AKAMAI_SPE_TOKEN_CACHE.get(secure_path) + if not token: + query = { + 'path': secure_path, + 'videoId': content_id, + } + if ap_data.get('auth_required'): + query['accessToken'] = self._extract_mvpd_auth(ap_data['url'], content_id, ap_data['site_name'], ap_data['site_name']) + auth = self._download_xml( + tokenizer_src, content_id, query=query) + error_msg = xpath_text(auth, 'error/msg') + if error_msg: + raise ExtractorError(error_msg, expected=True) + token = xpath_text(auth, 'token') + if not token: + return video_url + self._AKAMAI_SPE_TOKEN_CACHE[secure_path] = token + return video_url + '?hdnea=' + token + def _extract_cvp_info(self, data_src, video_id, path_data={}, ap_data={}): video_data = self._download_xml(data_src, video_id) video_id = video_data.attrib['id'] @@ -33,7 +56,6 @@ class TurnerBaseIE(AdobePassIE): # rtmp_src = splited_rtmp_src[1] # aifp = xpath_text(video_data, 'akamai/aifp', default='') - tokens = {} urls = [] formats = [] rex = re.compile( @@ -67,26 +89,10 @@ class TurnerBaseIE(AdobePassIE): secure_path_data = path_data.get('secure') if not secure_path_data: continue - video_url = secure_path_data['media_src'] + video_url - secure_path = self._search_regex(r'https?://[^/]+(.+/)', video_url, 'secure path') + '*' - token = tokens.get(secure_path) - if not token: - query = { - 'path': secure_path, - 'videoId': content_id, - } - if ap_data.get('auth_required'): - query['accessToken'] = self._extract_mvpd_auth(ap_data['url'], video_id, ap_data['site_name'], ap_data['site_name']) - auth = self._download_xml( - secure_path_data['tokenizer_src'], video_id, query=query) - error_msg = xpath_text(auth, 'error/msg') - if error_msg: - raise ExtractorError(error_msg, expected=True) - token = xpath_text(auth, 'token') - if not token: - continue - tokens[secure_path] = token - video_url = video_url + '?hdnea=' + token + video_url = self._add_akamai_spe_token( + secure_path_data['tokenizer_src'], + secure_path_data['media_src'] + video_url, + content_id, ap_data) elif not re.match('https?://', video_url): base_path_data = path_data.get(ext, path_data.get('default', {})) media_src = base_path_data.get('media_src') diff --git a/youtube_dl/extractor/tva.py b/youtube_dl/extractor/tva.py index b57abea..0b863df 100644 --- a/youtube_dl/extractor/tva.py +++ b/youtube_dl/extractor/tva.py @@ -32,6 +32,8 @@ class TVAIE(InfoExtractor): video_data = self._download_json( 'https://videos.tva.ca/proxy/item/_' + video_id, video_id, headers={ 'Accept': 'application/json', + }, query={ + 'appId': '5955fc5f23eec60006c951f1', }) def get_attribute(key): diff --git a/youtube_dl/extractor/tvnow.py b/youtube_dl/extractor/tvnow.py new file mode 100644 index 0000000..e2169f2 --- /dev/null +++ b/youtube_dl/extractor/tvnow.py @@ -0,0 +1,175 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + ExtractorError, + parse_iso8601, + parse_duration, + update_url_query, +) + + +class TVNowBaseIE(InfoExtractor): + _VIDEO_FIELDS = ( + 'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort', + 'broadcastStartDate', 'isDrm', 'duration', 'manifest.dashclear', + 'format.defaultImage169Format', 'format.defaultImage169Logo') + + def _call_api(self, path, video_id, query): + return self._download_json( + 'https://api.tvnow.de/v3/' + path, + video_id, query=query) + + def _extract_video(self, info, display_id): + video_id = compat_str(info['id']) + title = info['title'] + + mpd_url = info['manifest']['dashclear'] + if not mpd_url: + if info.get('isDrm'): + raise ExtractorError( + 'Video %s is DRM protected' % video_id, expected=True) + if info.get('geoblocked'): + raise ExtractorError( + 'Video %s is not available from your location due to geo restriction' % video_id, + expected=True) + if not info.get('free', True): + raise ExtractorError( + 'Video %s is not available for free' % video_id, expected=True) + + mpd_url = update_url_query(mpd_url, {'filter': ''}) + formats = self._extract_mpd_formats(mpd_url, video_id, mpd_id='dash', fatal=False) + formats.extend(self._extract_ism_formats( + mpd_url.replace('dash.', 'hss.').replace('/.mpd', '/Manifest'), + video_id, ism_id='mss', fatal=False)) + formats.extend(self._extract_m3u8_formats( + mpd_url.replace('dash.', 'hls.').replace('/.mpd', '/.m3u8'), + video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + self._sort_formats(formats) + + description = info.get('articleLong') or info.get('articleShort') + timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ') + duration = parse_duration(info.get('duration')) + + f = info.get('format', {}) + thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo') + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'duration': duration, + 'formats': formats, + } + + +class TVNowIE(TVNowBaseIE): + _VALID_URL = r'https?://(?:www\.)?tvnow\.(?:de|at|ch)/(?:rtl(?:2|plus)?|nitro|superrtl|ntv|vox)/(?P[^/]+)/(?:(?:list/[^/]+|jahr/\d{4}/\d{1,2})/)?(?P[^/]+)/(?:player|preview)' + + _TESTS = [{ + # rtl + 'url': 'https://www.tvnow.de/rtl/alarm-fuer-cobra-11/freier-fall/player?return=/rtl', + 'info_dict': { + 'id': '385314', + 'display_id': 'alarm-fuer-cobra-11/freier-fall', + 'ext': 'mp4', + 'title': 'Freier Fall', + 'description': 'md5:8c2d8f727261adf7e0dc18366124ca02', + 'thumbnail': r're:^https?://.*\.jpg$', + 'timestamp': 1512677700, + 'upload_date': '20171207', + 'duration': 2862.0, + }, + }, { + # rtl2 + 'url': 'https://www.tvnow.de/rtl2/armes-deutschland/episode-0008/player', + 'only_matching': 'True', + }, { + # rtlnitro + 'url': 'https://www.tvnow.de/nitro/alarm-fuer-cobra-11-die-autobahnpolizei/auf-eigene-faust-pilot/player', + 'only_matching': 'True', + }, { + # superrtl + 'url': 'https://www.tvnow.de/superrtl/die-lustigsten-schlamassel-der-welt/u-a-ketchup-effekt/player', + 'only_matching': 'True', + }, { + # ntv + 'url': 'https://www.tvnow.de/ntv/startup-news/goetter-in-weiss/player', + 'only_matching': 'True', + }, { + # vox + 'url': 'https://www.tvnow.de/vox/auto-mobil/neues-vom-automobilmarkt-2017-11-19-17-00-00/player', + 'only_matching': 'True', + }, { + # rtlplus + 'url': 'https://www.tvnow.de/rtlplus/op-ruft-dr-bruckner/die-vernaehte-frau/player', + 'only_matching': 'True', + }] + + def _real_extract(self, url): + display_id = '%s/%s' % re.match(self._VALID_URL, url).groups() + + info = self._call_api( + 'movies/' + display_id, display_id, query={ + 'fields': ','.join(self._VIDEO_FIELDS), + }) + + return self._extract_video(info, display_id) + + +class TVNowListIE(TVNowBaseIE): + _VALID_URL = r'(?Phttps?://(?:www\.)?tvnow\.(?:de|at|ch)/(?:rtl(?:2|plus)?|nitro|superrtl|ntv|vox)/(?P[^/]+)/)list/(?P[^?/#&]+)$' + + _SHOW_FIELDS = ('title', ) + _SEASON_FIELDS = ('id', 'headline', 'seoheadline', ) + _VIDEO_FIELDS = ('id', 'headline', 'seoUrl', ) + + _TESTS = [{ + 'url': 'https://www.tvnow.de/rtl/30-minuten-deutschland/list/aktuell', + 'info_dict': { + 'id': '28296', + 'title': '30 Minuten Deutschland - Aktuell', + }, + 'playlist_mincount': 1, + }] + + def _real_extract(self, url): + base_url, show_id, season_id = re.match(self._VALID_URL, url).groups() + + fields = [] + fields.extend(self._SHOW_FIELDS) + fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS) + fields.extend( + 'formatTabs.formatTabPages.container.movies.%s' % field + for field in self._VIDEO_FIELDS) + + list_info = self._call_api( + 'formats/seo', season_id, query={ + 'fields': ','.join(fields), + 'name': show_id + '.php' + }) + + season = next( + season for season in list_info['formatTabs']['items'] + if season.get('seoheadline') == season_id) + + title = '%s - %s' % (list_info['title'], season['headline']) + + entries = [] + for container in season['formatTabPages']['items']: + for info in ((container.get('container') or {}).get('movies') or {}).get('items') or []: + seo_url = info.get('seoUrl') + if not seo_url: + continue + entries.append(self.url_result( + base_url + seo_url + '/player', 'TVNow', info.get('id'))) + + return self.playlist_result( + entries, compat_str(season.get('id') or season_id), title) diff --git a/youtube_dl/extractor/twentythreevideo.py b/youtube_dl/extractor/twentythreevideo.py new file mode 100644 index 0000000..aa0c6e9 --- /dev/null +++ b/youtube_dl/extractor/twentythreevideo.py @@ -0,0 +1,77 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import int_or_none + + +class TwentyThreeVideoIE(InfoExtractor): + IE_NAME = '23video' + _VALID_URL = r'https?://video\.(?Ptwentythree\.net|23video\.com|filmweb\.no)/v\.ihtml/player\.html\?(?P.*?\bphoto(?:_|%5f)id=(?P\d+).*)' + _TEST = { + 'url': 'https://video.twentythree.net/v.ihtml/player.html?showDescriptions=0&source=site&photo%5fid=20448876&autoPlay=1', + 'md5': '75fcf216303eb1dae9920d651f85ced4', + 'info_dict': { + 'id': '20448876', + 'ext': 'mp4', + 'title': 'Video Marketing Minute: Personalized Video', + 'timestamp': 1513855354, + 'upload_date': '20171221', + 'uploader_id': '12258964', + 'uploader': 'Rasmus Bysted', + } + } + + def _real_extract(self, url): + domain, query, photo_id = re.match(self._VALID_URL, url).groups() + base_url = 'https://video.%s' % domain + photo_data = self._download_json( + base_url + '/api/photo/list?' + query, photo_id, query={ + 'format': 'json', + }, transform_source=lambda s: self._search_regex(r'(?s)({.+})', s, 'photo data'))['photo'] + title = photo_data['title'] + + formats = [] + + audio_path = photo_data.get('audio_download') + if audio_path: + formats.append({ + 'format_id': 'audio', + 'url': base_url + audio_path, + 'filesize': int_or_none(photo_data.get('audio_size')), + 'vcodec': 'none', + }) + + def add_common_info_to_list(l, template, id_field, id_value): + f_base = template % id_value + f_path = photo_data.get(f_base + 'download') + if not f_path: + return + l.append({ + id_field: id_value, + 'url': base_url + f_path, + 'width': int_or_none(photo_data.get(f_base + 'width')), + 'height': int_or_none(photo_data.get(f_base + 'height')), + 'filesize': int_or_none(photo_data.get(f_base + 'size')), + }) + + for f in ('mobile_high', 'medium', 'hd', '1080p', '4k'): + add_common_info_to_list(formats, 'video_%s_', 'format_id', f) + + thumbnails = [] + for t in ('quad16', 'quad50', 'quad75', 'quad100', 'small', 'portrait', 'standard', 'medium', 'large', 'original'): + add_common_info_to_list(thumbnails, '%s_', 'id', t) + + return { + 'id': photo_id, + 'title': title, + 'timestamp': int_or_none(photo_data.get('creation_date_epoch')), + 'duration': int_or_none(photo_data.get('video_length')), + 'view_count': int_or_none(photo_data.get('view_count')), + 'comment_count': int_or_none(photo_data.get('number_of_comments')), + 'uploader_id': photo_data.get('user_id'), + 'uploader': photo_data.get('display_name'), + 'thumbnails': thumbnails, + 'formats': formats, + } diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index fefcd28..bf57eac 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -101,7 +101,7 @@ class TwitchBaseIE(InfoExtractor): fail(clean_html(login_page)) redirect_page, handle = login_step( - login_page, handle, 'Logging in as %s' % username, { + login_page, handle, 'Logging in', { 'username': username, 'password': password, }) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 1b0b963..d7e4250 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -43,7 +43,7 @@ class TwitterBaseIE(InfoExtractor): class TwitterCardIE(TwitterBaseIE): IE_NAME = 'twitter:card' - _VALID_URL = r'https?://(?:www\.)?twitter\.com/i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?twitter\.com/i/(?Pcards/tfw/v1|videos(?:/tweet)?)/(?P\d+)' _TESTS = [ { 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889', @@ -51,11 +51,10 @@ class TwitterCardIE(TwitterBaseIE): 'info_dict': { 'id': '560070183650213889', 'ext': 'mp4', - 'title': 'Twitter Card', + 'title': 'Twitter web player', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 30.033, }, - 'skip': 'Video gone', }, { 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768', @@ -63,11 +62,9 @@ class TwitterCardIE(TwitterBaseIE): 'info_dict': { 'id': '623160978427936768', 'ext': 'mp4', - 'title': 'Twitter Card', - 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 80.155, + 'title': 'Twitter web player', + 'thumbnail': r're:^https?://.*(?:\bformat=|\.)jpg', }, - 'skip': 'Video gone', }, { 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977', @@ -120,15 +117,15 @@ class TwitterCardIE(TwitterBaseIE): elif media_url.endswith('.mpd'): formats.extend(self._extract_mpd_formats(media_url, video_id, mpd_id='dash')) else: - vbr = int_or_none(dict_get(media_variant, ('bitRate', 'bitrate')), scale=1000) + tbr = int_or_none(dict_get(media_variant, ('bitRate', 'bitrate')), scale=1000) a_format = { 'url': media_url, - 'format_id': 'http-%d' % vbr if vbr else 'http', - 'vbr': vbr, + 'format_id': 'http-%d' % tbr if tbr else 'http', + 'tbr': tbr, } # Reported bitRate may be zero - if not a_format['vbr']: - del a_format['vbr'] + if not a_format['tbr']: + del a_format['tbr'] self._search_dimensions_in_video_url(a_format, media_url) @@ -150,79 +147,83 @@ class TwitterCardIE(TwitterBaseIE): bearer_token = self._search_regex( r'BEARER_TOKEN\s*:\s*"([^"]+)"', main_script, 'bearer token') - guest_token = self._search_regex( - r'document\.cookie\s*=\s*decodeURIComponent\("gt=(\d+)', - webpage, 'guest token') + # https://developer.twitter.com/en/docs/tweets/post-and-engage/api-reference/get-statuses-show-id api_data = self._download_json( - 'https://api.twitter.com/2/timeline/conversation/%s.json' % video_id, - video_id, 'Downloading mobile API data', + 'https://api.twitter.com/1.1/statuses/show/%s.json' % video_id, + video_id, 'Downloading API data', headers={ 'Authorization': 'Bearer ' + bearer_token, - 'x-guest-token': guest_token, }) - media_info = try_get(api_data, lambda o: o['globalObjects']['tweets'][video_id] - ['extended_entities']['media'][0]['video_info']) or {} + media_info = try_get(api_data, lambda o: o['extended_entities']['media'][0]['video_info']) or {} return self._parse_media_info(media_info, video_id) def _real_extract(self, url): - video_id = self._match_id(url) + path, video_id = re.search(self._VALID_URL, url).groups() config = None formats = [] duration = None - webpage = self._download_webpage(url, video_id) + urls = [url] + if path.startswith('cards/'): + urls.append('https://twitter.com/i/videos/' + video_id) - iframe_url = self._html_search_regex( - r']+src="((?:https?:)?//(?:www\.youtube\.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"', - webpage, 'video iframe', default=None) - if iframe_url: - return self.url_result(iframe_url) + for u in urls: + webpage = self._download_webpage(u, video_id) - config = self._parse_json(self._html_search_regex( - r'data-(?:player-)?config="([^"]+)"', webpage, - 'data player config', default='{}'), - video_id) + iframe_url = self._html_search_regex( + r']+src="((?:https?:)?//(?:www\.youtube\.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"', + webpage, 'video iframe', default=None) + if iframe_url: + return self.url_result(iframe_url) - if config.get('source_type') == 'vine': - return self.url_result(config['player_url'], 'Vine') + config = self._parse_json(self._html_search_regex( + r'data-(?:player-)?config="([^"]+)"', webpage, + 'data player config', default='{}'), + video_id) - periscope_url = PeriscopeIE._extract_url(webpage) - if periscope_url: - return self.url_result(periscope_url, PeriscopeIE.ie_key()) + if config.get('source_type') == 'vine': + return self.url_result(config['player_url'], 'Vine') - video_url = config.get('video_url') or config.get('playlist', [{}])[0].get('source') + periscope_url = PeriscopeIE._extract_url(webpage) + if periscope_url: + return self.url_result(periscope_url, PeriscopeIE.ie_key()) - if video_url: - if determine_ext(video_url) == 'm3u8': - formats.extend(self._extract_m3u8_formats(video_url, video_id, ext='mp4', m3u8_id='hls')) - else: - f = { - 'url': video_url, - } + video_url = config.get('video_url') or config.get('playlist', [{}])[0].get('source') + + if video_url: + if determine_ext(video_url) == 'm3u8': + formats.extend(self._extract_m3u8_formats(video_url, video_id, ext='mp4', m3u8_id='hls')) + else: + f = { + 'url': video_url, + } + + self._search_dimensions_in_video_url(f, video_url) - self._search_dimensions_in_video_url(f, video_url) + formats.append(f) - formats.append(f) + vmap_url = config.get('vmapUrl') or config.get('vmap_url') + if vmap_url: + formats.extend( + self._extract_formats_from_vmap_url(vmap_url, video_id)) - vmap_url = config.get('vmapUrl') or config.get('vmap_url') - if vmap_url: - formats.extend( - self._extract_formats_from_vmap_url(vmap_url, video_id)) + media_info = None - media_info = None + for entity in config.get('status', {}).get('entities', []): + if 'mediaInfo' in entity: + media_info = entity['mediaInfo'] - for entity in config.get('status', {}).get('entities', []): - if 'mediaInfo' in entity: - media_info = entity['mediaInfo'] + if media_info: + formats.extend(self._parse_media_info(media_info, video_id)) + duration = float_or_none(media_info.get('duration', {}).get('nanos'), scale=1e9) - if media_info: - formats.extend(self._parse_media_info(media_info, video_id)) - duration = float_or_none(media_info.get('duration', {}).get('nanos'), scale=1e9) + username = config.get('user', {}).get('screen_name') + if username: + formats.extend(self._extract_mobile_formats(username, video_id)) - username = config.get('user', {}).get('screen_name') - if username: - formats.extend(self._extract_mobile_formats(username, video_id)) + if formats: + break self._remove_duplicate_formats(formats) self._sort_formats(formats) @@ -258,9 +259,6 @@ class TwitterIE(InfoExtractor): 'uploader_id': 'freethenipple', 'duration': 12.922, }, - 'params': { - 'skip_download': True, # requires ffmpeg - }, }, { 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1', 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42', @@ -277,7 +275,6 @@ class TwitterIE(InfoExtractor): 'skip': 'Account suspended', }, { 'url': 'https://twitter.com/starwars/status/665052190608723968', - 'md5': '39b7199856dee6cd4432e72c74bc69d4', 'info_dict': { 'id': '665052190608723968', 'ext': 'mp4', @@ -303,20 +300,16 @@ class TwitterIE(InfoExtractor): }, }, { 'url': 'https://twitter.com/jaydingeer/status/700207533655363584', - 'md5': '', 'info_dict': { 'id': '700207533655363584', 'ext': 'mp4', - 'title': 'あかさ - BEAT PROD: @suhmeduh #Damndaniel', - 'description': 'あかさ on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"', + 'title': 'JG - BEAT PROD: @suhmeduh #Damndaniel', + 'description': 'JG on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"', 'thumbnail': r're:^https?://.*\.jpg', - 'uploader': 'あかさ', + 'uploader': 'JG', 'uploader_id': 'jaydingeer', 'duration': 30.0, }, - 'params': { - 'skip_download': True, # requires ffmpeg - }, }, { 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609', 'md5': '89a15ed345d13b86e9a5a5e051fa308a', @@ -342,9 +335,6 @@ class TwitterIE(InfoExtractor): 'uploader': 'Captain America', 'duration': 3.17, }, - 'params': { - 'skip_download': True, # requires ffmpeg - }, }, { 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384', 'info_dict': { @@ -370,9 +360,6 @@ class TwitterIE(InfoExtractor): 'uploader_id': 'news_al3alm', 'duration': 277.4, }, - 'params': { - 'format': 'best[format_id^=http-]', - }, }, { 'url': 'https://twitter.com/i/web/status/910031516746514432', 'info_dict': { diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 207c4a6..195f5ce 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -62,11 +62,11 @@ class UdemyIE(InfoExtractor): def _extract_course_info(self, webpage, video_id): course = self._parse_json( unescapeHTML(self._search_regex( - r'ng-init=["\'].*\bcourse=({.+?});', webpage, 'course', default='{}')), + r'ng-init=["\'].*\bcourse=({.+?})[;"\']', + webpage, 'course', default='{}')), video_id, fatal=False) or {} course_id = course.get('id') or self._search_regex( - (r'"id"\s*:\s*(\d+)', r'data-course-id=["\'](\d+)'), - webpage, 'course id') + r'data-course-id=["\'](\d+)', webpage, 'course id') return course_id, course.get('title') def _enroll_course(self, base_url, webpage, course_id): @@ -164,7 +164,7 @@ class UdemyIE(InfoExtractor): }) response = self._download_webpage( - self._LOGIN_URL, None, 'Logging in as %s' % username, + self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(login_form), headers={ 'Referer': self._ORIGIN_URL, @@ -257,6 +257,11 @@ class UdemyIE(InfoExtractor): video_url = source.get('file') or source.get('src') if not video_url or not isinstance(video_url, compat_str): continue + if source.get('type') == 'application/x-mpegURL' or determine_ext(video_url) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + continue format_id = source.get('label') f = { 'url': video_url, diff --git a/youtube_dl/extractor/ufctv.py b/youtube_dl/extractor/ufctv.py new file mode 100644 index 0000000..ab82381 --- /dev/null +++ b/youtube_dl/extractor/ufctv.py @@ -0,0 +1,55 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + parse_duration, + parse_iso8601, +) + + +class UFCTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ufc\.tv/video/(?P[^/]+)' + _TEST = { + 'url': 'https://www.ufc.tv/video/ufc-219-countdown-full-episode', + 'info_dict': { + 'id': '34167', + 'ext': 'mp4', + 'title': 'UFC 219 Countdown: Full Episode', + 'description': 'md5:26d4e8bf4665ae5878842d7050c3c646', + 'timestamp': 1513962360, + 'upload_date': '20171222', + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + } + + def _real_extract(self, url): + display_id = self._match_id(url) + video_data = self._download_json(url, display_id, query={ + 'format': 'json', + }) + video_id = str(video_data['id']) + title = video_data['name'] + m3u8_url = self._download_json( + 'https://www.ufc.tv/service/publishpoint', video_id, query={ + 'type': 'video', + 'format': 'json', + 'id': video_id, + }, headers={ + 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_1 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A402 Safari/604.1', + })['path'] + m3u8_url = m3u8_url.replace('_iphone.', '.') + formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': video_data.get('description'), + 'duration': parse_duration(video_data.get('runtime')), + 'timestamp': parse_iso8601(video_data.get('releaseDate')), + 'formats': formats, + } diff --git a/youtube_dl/extractor/umg.py b/youtube_dl/extractor/umg.py new file mode 100644 index 0000000..d815cd9 --- /dev/null +++ b/youtube_dl/extractor/umg.py @@ -0,0 +1,103 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_filesize, + parse_iso8601, +) + + +class UMGDeIE(InfoExtractor): + IE_NAME = 'umg:de' + IE_DESC = 'Universal Music Deutschland' + _VALID_URL = r'https?://(?:www\.)?universal-music\.de/[^/]+/videos/[^/?#]+-(?P\d+)' + _TEST = { + 'url': 'https://www.universal-music.de/sido/videos/jedes-wort-ist-gold-wert-457803', + 'md5': 'ebd90f48c80dcc82f77251eb1902634f', + 'info_dict': { + 'id': '457803', + 'ext': 'mp4', + 'title': 'Jedes Wort ist Gold wert', + 'timestamp': 1513591800, + 'upload_date': '20171218', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + video_data = self._download_json( + 'https://api.universal-music.de/graphql', + video_id, query={ + 'query': '''{ + universalMusic(channel:16) { + video(id:%s) { + headline + formats { + formatId + url + type + width + height + mimeType + fileSize + } + duration + createdDate + } + } +}''' % video_id})['data']['universalMusic']['video'] + + title = video_data['headline'] + hls_url_template = 'http://mediadelivery.universal-music-services.de/vod/mp4:autofill/storage/' + '/'.join(list(video_id)) + '/content/%s/file/playlist.m3u8' + + thumbnails = [] + formats = [] + + def add_m3u8_format(format_id): + m3u8_formats = self._extract_m3u8_formats( + hls_url_template % format_id, video_id, 'mp4', + 'm3u8_native', m3u8_id='hls', fatal='False') + if m3u8_formats and m3u8_formats[0].get('height'): + formats.extend(m3u8_formats) + + for f in video_data.get('formats', []): + f_url = f.get('url') + mime_type = f.get('mimeType') + if not f_url or mime_type == 'application/mxf': + continue + fmt = { + 'url': f_url, + 'width': int_or_none(f.get('width')), + 'height': int_or_none(f.get('height')), + 'filesize': parse_filesize(f.get('fileSize')), + } + f_type = f.get('type') + if f_type == 'Image': + thumbnails.append(fmt) + elif f_type == 'Video': + format_id = f.get('formatId') + if format_id: + fmt['format_id'] = format_id + if mime_type == 'video/mp4': + add_m3u8_format(format_id) + urlh = self._request_webpage(f_url, video_id, fatal=False) + if urlh: + first_byte = urlh.read(1) + if first_byte not in (b'F', b'\x00'): + continue + formats.append(fmt) + if not formats: + for format_id in (867, 836, 940): + add_m3u8_format(format_id) + self._sort_formats(formats, ('width', 'height', 'filesize', 'tbr')) + + return { + 'id': video_id, + 'title': title, + 'duration': int_or_none(video_data.get('duration')), + 'timestamp': parse_iso8601(video_data.get('createdDate'), ' '), + 'thumbnails': thumbnails, + 'formats': formats, + } diff --git a/youtube_dl/extractor/vidzi.py b/youtube_dl/extractor/vidzi.py index e64873b..ac35d55 100644 --- a/youtube_dl/extractor/vidzi.py +++ b/youtube_dl/extractor/vidzi.py @@ -28,10 +28,10 @@ class VidziIE(InfoExtractor): }, }, { 'url': 'http://vidzi.tv/embed-4z2yb0rzphe9-600x338.html', - 'skip_download': True, + 'only_matching': True, }, { 'url': 'http://vidzi.cc/cghql9yq6emu.html', - 'skip_download': True, + 'only_matching': True, }] def _real_extract(self, url): diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 853e5c7..ad2a2a4 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -99,7 +99,7 @@ class VikiBaseIE(InfoExtractor): login = self._call_api( 'sessions.json', None, - 'Logging in as %s' % username, post_data=login_form) + 'Logging in', post_data=login_form) self._token = login.get('token') if not self._token: diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index cedb548..6af7056 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -468,11 +468,12 @@ class VimeoIE(VimeoBaseInfoExtractor): request = sanitized_Request(url, headers=headers) try: webpage, urlh = self._download_webpage_handle(request, video_id) + redirect_url = compat_str(urlh.geturl()) # Some URLs redirect to ondemand can't be extracted with # this extractor right away thus should be passed through # ondemand extractor (e.g. https://vimeo.com/73445910) - if VimeoOndemandIE.suitable(urlh.geturl()): - return self.url_result(urlh.geturl(), VimeoOndemandIE.ie_key()) + if VimeoOndemandIE.suitable(redirect_url): + return self.url_result(redirect_url, VimeoOndemandIE.ie_key()) except ExtractorError as ee: if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403: errmsg = ee.cause.read() @@ -541,15 +542,15 @@ class VimeoIE(VimeoBaseInfoExtractor): if re.search(r']+?id="pw_form"', webpage) is not None: if '_video_password_verified' in data: raise ExtractorError('video password verification failed!') - self._verify_video_password(url, video_id, webpage) + self._verify_video_password(redirect_url, video_id, webpage) return self._real_extract( - smuggle_url(url, {'_video_password_verified': 'verified'})) + smuggle_url(redirect_url, {'_video_password_verified': 'verified'})) else: raise ExtractorError('Unable to extract info section', cause=e) else: if config.get('view') == 4: - config = self._verify_player_video_password(url, video_id) + config = self._verify_player_video_password(redirect_url, video_id) def is_rented(): if '>You rented this title.<' in webpage: diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 105e172..d4838b3 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -67,7 +67,7 @@ class VKBaseIE(InfoExtractor): login_page = self._download_webpage( 'https://login.vk.com/?act=login', None, - note='Logging in as %s' % username, + note='Logging in', data=urlencode_postdata(login_form)) if re.search(r'onLoginFailed', login_page): @@ -414,7 +414,7 @@ class VKIE(VKBaseIE): view_count = str_to_int(self._search_regex( r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)', - info_page, 'view count', fatal=False)) + info_page, 'view count', default=None)) formats = [] for format_id, format_url in data.items(): diff --git a/youtube_dl/extractor/voot.py b/youtube_dl/extractor/voot.py index 5de3deb..751b21e 100644 --- a/youtube_dl/extractor/voot.py +++ b/youtube_dl/extractor/voot.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals from .common import InfoExtractor -from .kaltura import KalturaIE from ..utils import ( ExtractorError, int_or_none, @@ -21,7 +20,6 @@ class VootIE(InfoExtractor): 'ext': 'mp4', 'title': 'Ishq Ka Rang Safed - Season 01 - Episode 340', 'description': 'md5:06291fbbbc4dcbe21235c40c262507c1', - 'uploader_id': 'batchUser', 'timestamp': 1472162937, 'upload_date': '20160825', 'duration': 1146, @@ -63,6 +61,10 @@ class VootIE(InfoExtractor): entry_id = media['EntryId'] title = media['MediaName'] + formats = self._extract_m3u8_formats( + 'https://cdnapisec.kaltura.com/p/1982551/playManifest/pt/https/f/applehttp/t/web/e/' + entry_id, + video_id, 'mp4', m3u8_id='hls') + self._sort_formats(formats) description, series, season_number, episode, episode_number = [None] * 5 @@ -82,9 +84,8 @@ class VootIE(InfoExtractor): episode_number = int_or_none(value) return { - '_type': 'url_transparent', - 'url': 'kaltura:1982551:%s' % entry_id, - 'ie_key': KalturaIE.ie_key(), + 'extractor_key': 'Kaltura', + 'id': entry_id, 'title': title, 'description': description, 'series': series, @@ -95,4 +96,5 @@ class VootIE(InfoExtractor): 'duration': int_or_none(media.get('Duration')), 'view_count': int_or_none(media.get('ViewCounter')), 'like_count': int_or_none(media.get('like_counter')), + 'formats': formats, } diff --git a/youtube_dl/extractor/vshare.py b/youtube_dl/extractor/vshare.py index 5addbc2..e4ec778 100644 --- a/youtube_dl/extractor/vshare.py +++ b/youtube_dl/extractor/vshare.py @@ -1,14 +1,21 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor +from ..compat import compat_chr +from ..utils import ( + decode_packed_codes, + ExtractorError, +) class VShareIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://vshare.io/d/0f64ce6', - 'md5': '16d7b8fef58846db47419199ff1ab3e7', + 'md5': '17b39f55b5497ae8b59f5fbce8e35886', 'info_dict': { 'id': '0f64ce6', 'title': 'vl14062007715967', @@ -19,20 +26,49 @@ class VShareIE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def _extract_urls(webpage): + return re.findall( + r']+?src=["\'](?P(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)', + webpage) + + def _extract_packed(self, webpage): + packed = self._search_regex( + r'(eval\(function.+)', webpage, 'packed code') + unpacked = decode_packed_codes(packed) + digits = self._search_regex(r'\[((?:\d+,?)+)\]', unpacked, 'digits') + digits = [int(digit) for digit in digits.split(',')] + key_digit = self._search_regex( + r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit') + chars = [compat_chr(d - int(key_digit)) for d in digits] + return ''.join(chars) + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage( - 'https://vshare.io/d/%s' % video_id, video_id) + 'https://vshare.io/v/%s/width-650/height-430/1' % video_id, + video_id) title = self._html_search_regex( - r'(?s)
(.+?)
', webpage, 'title') - video_url = self._search_regex( - r']+href=(["\'])(?P(?:https?:)?//.+?)\1[^>]*>[Cc]lick\s+here', - webpage, 'video url', group='url') + r'([^<]+)', webpage, 'title') + title = title.split(' - ')[0] + + error = self._html_search_regex( + r'(?s)]+\bclass=["\']xxx-error[^>]+>(.+?)%s' % self._extract_packed(webpage), + video_id)[0] + + self._sort_formats(info['formats']) + + info.update({ 'id': video_id, 'title': title, - 'url': video_url, - } + }) + + return info diff --git a/youtube_dl/extractor/vvvvid.py b/youtube_dl/extractor/vvvvid.py index 656a4b9..3d0dc40 100644 --- a/youtube_dl/extractor/vvvvid.py +++ b/youtube_dl/extractor/vvvvid.py @@ -22,6 +22,9 @@ class VVVVIDIE(InfoExtractor): 'ext': 'mp4', 'title': 'Ping Pong', }, + 'params': { + 'skip_download': True, + }, }, { # video_type == 'video/rcs' 'url': 'https://www.vvvvid.it/#!show/376/death-note-live-action/377/482493/episodio-01', @@ -31,6 +34,9 @@ class VVVVIDIE(InfoExtractor): 'ext': 'mp4', 'title': 'Episodio 01', }, + 'params': { + 'skip_download': True, + }, }] _conn_id = None @@ -116,8 +122,20 @@ class VVVVIDIE(InfoExtractor): embed_code = ds(embed_code) video_type = video_data.get('video_type') if video_type in ('video/rcs', 'video/kenc'): - formats.extend(self._extract_akamai_formats( - embed_code, video_id)) + embed_code = re.sub(r'https?://([^/]+)/z/', r'https://\1/i/', embed_code).replace('/manifest.f4m', '/master.m3u8') + if video_type == 'video/kenc': + kenc = self._download_json( + 'https://www.vvvvid.it/kenc', video_id, query={ + 'action': 'kt', + 'conn_id': self._conn_id, + 'url': embed_code, + }, fatal=False) or {} + kenc_message = kenc.get('message') + if kenc_message: + embed_code += '?' + ds(kenc_message) + formats.extend(self._extract_m3u8_formats( + embed_code, video_id, 'mp4', + m3u8_id='hls', fatal=False)) else: formats.extend(self._extract_wowza_formats( 'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id)) diff --git a/youtube_dl/extractor/wsj.py b/youtube_dl/extractor/wsj.py index 9b54877..67236f3 100644 --- a/youtube_dl/extractor/wsj.py +++ b/youtube_dl/extractor/wsj.py @@ -13,7 +13,7 @@ class WSJIE(InfoExtractor): _VALID_URL = r'''(?x) (?: https?://video-api\.wsj\.com/api-video/player/iframe\.html\?.*?\bguid=| - https?://(?:www\.)?(?:wsj|barrons)\.com/video/[^/]+/| + https?://(?:www\.)?(?:wsj|barrons)\.com/video/(?:[^/]+/)+| wsj: ) (?P[a-fA-F0-9-]{36}) @@ -38,6 +38,9 @@ class WSJIE(InfoExtractor): }, { 'url': 'http://www.barrons.com/video/capitalism-deserves-more-respect-from-millennials/F301217E-6F46-43AE-B8D2-B7180D642EE9.html', 'only_matching': True, + }, { + 'url': 'https://www.wsj.com/video/series/a-brief-history-of/the-modern-cell-carrier-how-we-got-here/980E2187-401D-48A1-B82B-1486CEE06CB9', + 'only_matching': True, }] def _real_extract(self, url): diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index be3624e..68652a2 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -6,10 +6,12 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( clean_html, + determine_ext, dict_get, ExtractorError, int_or_none, parse_duration, + try_get, unified_strdate, ) @@ -32,6 +34,7 @@ class XHamsterIE(InfoExtractor): 'display_id': 'femaleagent_shy_beauty_takes_the_bait', 'ext': 'mp4', 'title': 'FemaleAgent Shy beauty takes the bait', + 'timestamp': 1350194821, 'upload_date': '20121014', 'uploader': 'Ruseful2011', 'duration': 893, @@ -45,6 +48,7 @@ class XHamsterIE(InfoExtractor): 'display_id': 'britney_spears_sexy_booty', 'ext': 'mp4', 'title': 'Britney Spears Sexy Booty', + 'timestamp': 1379123460, 'upload_date': '20130914', 'uploader': 'jojo747400', 'duration': 200, @@ -61,6 +65,7 @@ class XHamsterIE(InfoExtractor): 'id': '5667973', 'ext': 'mp4', 'title': '....', + 'timestamp': 1454948101, 'upload_date': '20160208', 'uploader': 'parejafree', 'duration': 72, @@ -70,6 +75,10 @@ class XHamsterIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + # mobile site + 'url': 'https://m.xhamster.com/videos/cute-teen-jacqueline-solo-masturbation-8559111', + 'only_matching': True, }, { 'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html', 'only_matching': True, @@ -88,7 +97,8 @@ class XHamsterIE(InfoExtractor): video_id = mobj.group('id') or mobj.group('id_2') display_id = mobj.group('display_id') or mobj.group('display_id_2') - webpage = self._download_webpage(url, video_id) + desktop_url = re.sub(r'^(https?://(?:.+?\.)?)m\.', r'\1', url) + webpage = self._download_webpage(desktop_url, video_id) error = self._html_search_regex( r']+id=["\']videoClosed["\'][^>]*>(.+?)
', @@ -96,6 +106,83 @@ class XHamsterIE(InfoExtractor): if error: raise ExtractorError(error, expected=True) + age_limit = self._rta_search(webpage) + + def get_height(s): + return int_or_none(self._search_regex( + r'^(\d+)[pP]', s, 'height', default=None)) + + initials = self._parse_json( + self._search_regex( + r'window\.initials\s*=\s*({.+?})\s*;\s*\n', webpage, 'initials', + default='{}'), + video_id, fatal=False) + if initials: + video = initials['videoModel'] + title = video['title'] + formats = [] + for format_id, formats_dict in video['sources'].items(): + if not isinstance(formats_dict, dict): + continue + for quality, format_item in formats_dict.items(): + if format_id == 'download': + # Download link takes some time to be generated, + # skipping for now + continue + if not isinstance(format_item, dict): + continue + format_url = format_item.get('link') + filesize = int_or_none( + format_item.get('size'), invscale=1000000) + else: + format_url = format_item + filesize = None + if not isinstance(format_url, compat_str): + continue + formats.append({ + 'format_id': '%s-%s' % (format_id, quality), + 'url': format_url, + 'ext': determine_ext(format_url, 'mp4'), + 'height': get_height(quality), + 'filesize': filesize, + }) + self._sort_formats(formats) + + categories_list = video.get('categories') + if isinstance(categories_list, list): + categories = [] + for c in categories_list: + if not isinstance(c, dict): + continue + c_name = c.get('name') + if isinstance(c_name, compat_str): + categories.append(c_name) + else: + categories = None + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': video.get('description'), + 'timestamp': int_or_none(video.get('created')), + 'uploader': try_get( + video, lambda x: x['author']['name'], compat_str), + 'thumbnail': video.get('thumbURL'), + 'duration': int_or_none(video.get('duration')), + 'view_count': int_or_none(video.get('views')), + 'like_count': int_or_none(try_get( + video, lambda x: x['rating']['likes'], int)), + 'dislike_count': int_or_none(try_get( + video, lambda x: x['rating']['dislikes'], int)), + 'comment_count': int_or_none(video.get('views')), + 'age_limit': age_limit, + 'categories': categories, + 'formats': formats, + } + + # Old layout fallback + title = self._html_search_regex( [r']*>([^<]+)', r']+itemprop=".*?caption.*?"[^>]+content="(.+?)"', @@ -119,8 +206,7 @@ class XHamsterIE(InfoExtractor): formats.append({ 'format_id': format_id, 'url': format_url, - 'height': int_or_none(self._search_regex( - r'^(\d+)[pP]', format_id, 'height', default=None)) + 'height': get_height(format_id), }) video_url = self._search_regex( @@ -148,8 +234,8 @@ class XHamsterIE(InfoExtractor): webpage, 'uploader', default='anonymous') thumbnail = self._search_regex( - [r'''thumb\s*:\s*(?P["'])(?P.+?)(?P=q)''', - r''']+poster=(?P["'])(?P.+?)(?P=q)[^>]*>'''], + [r'''["']thumbUrl["']\s*:\s*(?P["'])(?P.+?)(?P=q)''', + r''']+"poster"=(?P["'])(?P.+?)(?P=q)[^>]*>'''], webpage, 'thumbnail', fatal=False, group='thumbnail') duration = parse_duration(self._search_regex( @@ -167,8 +253,6 @@ class XHamsterIE(InfoExtractor): mobj = re.search(r'Comments \((?P\d+)\)', webpage) comment_count = mobj.group('commentcount') if mobj else 0 - age_limit = self._rta_search(webpage) - categories_html = self._search_regex( r'(?s)Categories:.+?)', webpage, 'categories', default=None) @@ -195,15 +279,16 @@ class XHamsterIE(InfoExtractor): class XHamsterEmbedIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?xhamster\.com/xembed\.php\?video=(?P\d+)' + _VALID_URL = r'https?://(?:.+?\.)?xhamster\.com/xembed\.php\?video=(?P\d+)' _TEST = { 'url': 'http://xhamster.com/xembed.php?video=3328539', 'info_dict': { 'id': '3328539', 'ext': 'mp4', 'title': 'Pen Masturbation', + 'timestamp': 1406581861, 'upload_date': '20140728', - 'uploader_id': 'anonymous', + 'uploader': 'ManyakisArt', 'duration': 5, 'age_limit': 18, } diff --git a/youtube_dl/extractor/xiami.py b/youtube_dl/extractor/xiami.py index d017e03..7f871c8 100644 --- a/youtube_dl/extractor/xiami.py +++ b/youtube_dl/extractor/xiami.py @@ -40,9 +40,12 @@ class XiamiBaseIE(InfoExtractor): 'subtitles': subtitles, } - def _extract_tracks(self, item_id, typ=None): + def _extract_tracks(self, item_id, referer, typ=None): playlist = self._download_json( - '%s/%s%s' % (self._API_BASE_URL, item_id, '/type/%s' % typ if typ else ''), item_id) + '%s/%s%s' % (self._API_BASE_URL, item_id, '/type/%s' % typ if typ else ''), + item_id, headers={ + 'Referer': referer, + }) return [ self._extract_track(track, item_id) for track in playlist['data']['trackList']] @@ -135,13 +138,13 @@ class XiamiSongIE(XiamiBaseIE): }] def _real_extract(self, url): - return self._extract_tracks(self._match_id(url))[0] + return self._extract_tracks(self._match_id(url), url)[0] class XiamiPlaylistBaseIE(XiamiBaseIE): def _real_extract(self, url): item_id = self._match_id(url) - return self.playlist_result(self._extract_tracks(item_id, self._TYPE), item_id) + return self.playlist_result(self._extract_tracks(item_id, url, self._TYPE), item_id) class XiamiAlbumIE(XiamiPlaylistBaseIE): diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index 0c4bc2e..c7947d4 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -154,7 +154,7 @@ class YoukuIE(InfoExtractor): # request basic data basic_data_params = { 'vid': video_id, - 'ccode': '0402' if 'tudou.com' in url else '0401', + 'ccode': '0507', 'client_ip': '192.168.1.1', 'utid': cna, 'client_ts': time.time() / 1000, @@ -240,7 +240,11 @@ class YoukuShowIE(InfoExtractor): }, { # Ongoing playlist. The initial page is the last one 'url': 'http://list.youku.com/show/id_za7c275ecd7b411e1a19e.html', - 'only_matchine': True, + 'only_matching': True, + }, { + # No data-id value. + 'url': 'http://list.youku.com/show/id_zefbfbd61237fefbfbdef.html', + 'only_matching': True, }] def _extract_entries(self, playlist_data_url, show_id, note, query): @@ -276,9 +280,9 @@ class YoukuShowIE(InfoExtractor): r']+id="(reload_\d+)', first_page, 'first page reload id') # The first reload_id has the same items as first_page reload_ids = re.findall(']+data-id="([^"]+)">', first_page) + entries.extend(initial_entries) for idx, reload_id in enumerate(reload_ids): if reload_id == first_page_reload_id: - entries.extend(initial_entries) continue _, new_entries = self._extract_entries( 'http://list.youku.com/show/episode', show_id, diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 9943ddd..0919bef 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2270,6 +2270,19 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): r'(?s)

]*>\s*(.*?)\s*

', page, 'title', default=None) + _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*
  • \s*]+\bhref=' + uploader = self._search_regex( + r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE, + page, 'uploader', default=None) + mobj = re.search( + r'%s(["\'])(?P/(?:user|channel)/(?P.+?))\1' % _UPLOADER_BASE, + page) + if mobj: + uploader_id = mobj.group('uploader_id') + uploader_url = compat_urlparse.urljoin(url, mobj.group('path')) + else: + uploader_id = uploader_url = None + has_videos = True if not playlist_title: @@ -2280,8 +2293,15 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): except StopIteration: has_videos = False - return has_videos, self.playlist_result( + playlist = self.playlist_result( self._entries(page, playlist_id), playlist_id, playlist_title) + playlist.update({ + 'uploader': uploader, + 'uploader_id': uploader_id, + 'uploader_url': uploader_url, + }) + + return has_videos, playlist def _check_download_just_video(self, url, playlist_id): # Check if it's a video-specific URL diff --git a/youtube_dl/postprocessor/xattrpp.py b/youtube_dl/postprocessor/xattrpp.py index fbdfa02..b0aed9c 100644 --- a/youtube_dl/postprocessor/xattrpp.py +++ b/youtube_dl/postprocessor/xattrpp.py @@ -42,6 +42,7 @@ class XAttrMetadataPP(PostProcessor): 'user.dublincore.format': 'format', } + num_written = 0 for xattrname, infoname in xattr_mapping.items(): value = info.get(infoname) @@ -52,6 +53,7 @@ class XAttrMetadataPP(PostProcessor): byte_value = value.encode('utf-8') write_xattr(filename, xattrname, byte_value) + num_written += 1 return [], info @@ -62,8 +64,8 @@ class XAttrMetadataPP(PostProcessor): except XAttrMetadataError as e: if e.reason == 'NO_SPACE': self._downloader.report_warning( - 'There\'s no disk space left or disk quota exceeded. ' + - 'Extended attributes are not written.') + 'There\'s no disk space left, disk quota exceeded or filesystem xattr limit exceeded. ' + + (('Some ' if num_written else '') + 'extended attributes are not written.').capitalize()) elif e.reason == 'VALUE_TOO_LONG': self._downloader.report_warning( 'Unable to write extended attributes due to too long values.') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 34866a5..2843a3d 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -159,6 +159,8 @@ DATE_FORMATS = ( '%Y-%m-%dT%H:%M', '%b %d %Y at %H:%M', '%b %d %Y at %H:%M:%S', + '%B %d %Y at %H:%M', + '%B %d %Y at %H:%M:%S', ) DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS) @@ -2350,6 +2352,7 @@ def mimetype2ext(mt): 'ttml+xml': 'ttml', 'x-flv': 'flv', 'x-mp4-fragmented': 'mp4', + 'x-ms-sami': 'sami', 'x-ms-wmv': 'wmv', 'mpegurl': 'm3u8', 'x-mpegurl': 'm3u8', @@ -2372,7 +2375,7 @@ def parse_codecs(codecs_str): vcodec, acodec = None, None for full_codec in splited_codecs: codec = full_codec.split('.')[0] - if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v'): + if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1'): if not vcodec: vcodec = full_codec elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'): diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 8b67d23..a3f84b9 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.11.06' +__version__ = '2017.12.31'