]> Raphaël G. Git Repositories - youtubedl/commitdiff
New upstream version 2016.12.01
authorRogério Brito <rbrito@ime.usp.br>
Wed, 30 Nov 2016 19:22:52 +0000 (17:22 -0200)
committerRogério Brito <rbrito@ime.usp.br>
Wed, 30 Nov 2016 19:22:52 +0000 (17:22 -0200)
401 files changed:
.github/ISSUE_TEMPLATE.md
.github/ISSUE_TEMPLATE_tmpl.md
.github/PULL_REQUEST_TEMPLATE.md
.gitignore
AUTHORS
CONTRIBUTING.md
ChangeLog
Makefile
README.md
devscripts/bash-completion.py
devscripts/create-github-release.py
devscripts/fish-completion.py
devscripts/generate_aes_testdata.py
devscripts/gh-pages/update-sites.py
devscripts/lazy_load_template.py
devscripts/make_contributing.py
devscripts/make_lazy_extractors.py
devscripts/make_supportedsites.py
devscripts/prepare_manpage.py
devscripts/release.sh
devscripts/zsh-completion.py
docs/conf.py
docs/supportedsites.md
setup.py
test/test_InfoExtractor.py
test/test_YoutubeDL.py
test/test_aes.py
test/test_download.py
test/test_execution.py
test/test_http.py
test/test_iqiyi_sdk_interpreter.py
test/test_jsinterp.py
test/test_utils.py
test/test_verbose_output.py
test/test_write_annotations.py
test/test_youtube_lists.py
test/test_youtube_signature.py
youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/aes.py
youtube_dl/compat.py
youtube_dl/downloader/__init__.py
youtube_dl/downloader/common.py
youtube_dl/downloader/dash.py
youtube_dl/downloader/external.py
youtube_dl/downloader/f4m.py
youtube_dl/downloader/fragment.py
youtube_dl/downloader/hls.py
youtube_dl/downloader/http.py
youtube_dl/downloader/ism.py [new file with mode: 0644]
youtube_dl/extractor/abc.py
youtube_dl/extractor/abcnews.py
youtube_dl/extractor/abcotvs.py [moved from youtube_dl/extractor/abc7news.py with 52% similarity]
youtube_dl/extractor/adobepass.py
youtube_dl/extractor/adultswim.py
youtube_dl/extractor/afreecatv.py
youtube_dl/extractor/aftonbladet.py [deleted file]
youtube_dl/extractor/aljazeera.py
youtube_dl/extractor/allocine.py
youtube_dl/extractor/amcnetworks.py
youtube_dl/extractor/anvato.py
youtube_dl/extractor/ard.py
youtube_dl/extractor/arte.py
youtube_dl/extractor/audioboom.py
youtube_dl/extractor/awaan.py [moved from youtube_dl/extractor/dcn.py with 51% similarity]
youtube_dl/extractor/azubu.py
youtube_dl/extractor/bandcamp.py
youtube_dl/extractor/bbc.py
youtube_dl/extractor/beatport.py [moved from youtube_dl/extractor/beatportpro.py with 89% similarity]
youtube_dl/extractor/beeg.py
youtube_dl/extractor/bellmedia.py [new file with mode: 0644]
youtube_dl/extractor/bet.py
youtube_dl/extractor/bilibili.py
youtube_dl/extractor/bpb.py
youtube_dl/extractor/bravotv.py
youtube_dl/extractor/brightcove.py
youtube_dl/extractor/byutv.py
youtube_dl/extractor/camdemy.py
youtube_dl/extractor/canalplus.py
youtube_dl/extractor/canvas.py
youtube_dl/extractor/carambatv.py
youtube_dl/extractor/cartoonnetwork.py [new file with mode: 0644]
youtube_dl/extractor/cbc.py
youtube_dl/extractor/cbs.py
youtube_dl/extractor/cbsinteractive.py
youtube_dl/extractor/cbslocal.py
youtube_dl/extractor/cbsnews.py
youtube_dl/extractor/cbssports.py
youtube_dl/extractor/cctv.py [new file with mode: 0644]
youtube_dl/extractor/cda.py
youtube_dl/extractor/ceskatelevize.py
youtube_dl/extractor/charlierose.py [new file with mode: 0644]
youtube_dl/extractor/chirbit.py
youtube_dl/extractor/clipfish.py
youtube_dl/extractor/clubic.py
youtube_dl/extractor/cmt.py
youtube_dl/extractor/cnn.py
youtube_dl/extractor/comcarcoff.py
youtube_dl/extractor/comedycentral.py
youtube_dl/extractor/common.py
youtube_dl/extractor/commonprotocols.py
youtube_dl/extractor/crackle.py
youtube_dl/extractor/criterion.py
youtube_dl/extractor/crunchyroll.py
youtube_dl/extractor/ctv.py [deleted file]
youtube_dl/extractor/cultureunplugged.py
youtube_dl/extractor/curiositystream.py [new file with mode: 0644]
youtube_dl/extractor/dailymotion.py
youtube_dl/extractor/daum.py
youtube_dl/extractor/dbtv.py
youtube_dl/extractor/dctp.py
youtube_dl/extractor/democracynow.py
youtube_dl/extractor/discoverygo.py
youtube_dl/extractor/dotsub.py
youtube_dl/extractor/douyutv.py
youtube_dl/extractor/dramafever.py
youtube_dl/extractor/drtuber.py
youtube_dl/extractor/drtv.py
youtube_dl/extractor/eagleplatform.py
youtube_dl/extractor/einthusan.py
youtube_dl/extractor/eitb.py
youtube_dl/extractor/embedly.py
youtube_dl/extractor/engadget.py
youtube_dl/extractor/espn.py
youtube_dl/extractor/exfm.py [deleted file]
youtube_dl/extractor/expotv.py
youtube_dl/extractor/extractors.py
youtube_dl/extractor/extremetube.py
youtube_dl/extractor/facebook.py
youtube_dl/extractor/faz.py
youtube_dl/extractor/fc2.py
youtube_dl/extractor/firsttv.py
youtube_dl/extractor/folketinget.py
youtube_dl/extractor/footyroom.py
youtube_dl/extractor/formula1.py
youtube_dl/extractor/fox.py
youtube_dl/extractor/fox9.py [new file with mode: 0644]
youtube_dl/extractor/foxgay.py
youtube_dl/extractor/foxnews.py
youtube_dl/extractor/franceculture.py
youtube_dl/extractor/franceinter.py
youtube_dl/extractor/francetv.py
youtube_dl/extractor/freespeech.py
youtube_dl/extractor/funnyordie.py
youtube_dl/extractor/gamestar.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/glide.py
youtube_dl/extractor/globo.py
youtube_dl/extractor/go.py [new file with mode: 0644]
youtube_dl/extractor/googleplus.py
youtube_dl/extractor/googlesearch.py
youtube_dl/extractor/goshgay.py
youtube_dl/extractor/hark.py
youtube_dl/extractor/hbo.py
youtube_dl/extractor/hellporno.py
youtube_dl/extractor/helsinki.py
youtube_dl/extractor/hgtv.py
youtube_dl/extractor/hornbunny.py
youtube_dl/extractor/hotnewhiphop.py
youtube_dl/extractor/huajiao.py [new file with mode: 0644]
youtube_dl/extractor/imdb.py
youtube_dl/extractor/imgur.py
youtube_dl/extractor/ina.py
youtube_dl/extractor/instagram.py
youtube_dl/extractor/internetvideoarchive.py
youtube_dl/extractor/iprima.py
youtube_dl/extractor/ivi.py
youtube_dl/extractor/iwara.py [new file with mode: 0644]
youtube_dl/extractor/jamendo.py [new file with mode: 0644]
youtube_dl/extractor/jpopsukitv.py
youtube_dl/extractor/jwplatform.py
youtube_dl/extractor/kaltura.py
youtube_dl/extractor/karaoketv.py
youtube_dl/extractor/keezmovies.py
youtube_dl/extractor/ketnet.py [new file with mode: 0644]
youtube_dl/extractor/kickstarter.py
youtube_dl/extractor/kontrtube.py
youtube_dl/extractor/krasview.py
youtube_dl/extractor/kusi.py
youtube_dl/extractor/kuwo.py
youtube_dl/extractor/lci.py [new file with mode: 0644]
youtube_dl/extractor/leeco.py
youtube_dl/extractor/lego.py [new file with mode: 0644]
youtube_dl/extractor/lifenews.py
youtube_dl/extractor/limelight.py
youtube_dl/extractor/litv.py
youtube_dl/extractor/liveleak.py
youtube_dl/extractor/lrt.py
youtube_dl/extractor/lynda.py
youtube_dl/extractor/m6.py
youtube_dl/extractor/macgamestore.py
youtube_dl/extractor/mailru.py
youtube_dl/extractor/mangomolo.py [new file with mode: 0644]
youtube_dl/extractor/metacritic.py
youtube_dl/extractor/mgtv.py
youtube_dl/extractor/miaopai.py [new file with mode: 0644]
youtube_dl/extractor/microsoftvirtualacademy.py
youtube_dl/extractor/ministrygrid.py
youtube_dl/extractor/miomio.py
youtube_dl/extractor/mitele.py
youtube_dl/extractor/moevideo.py
youtube_dl/extractor/motorsport.py
youtube_dl/extractor/movieclips.py
youtube_dl/extractor/moviezine.py
youtube_dl/extractor/movingimage.py [moved from youtube_dl/extractor/ssa.py with 65% similarity]
youtube_dl/extractor/msn.py
youtube_dl/extractor/mtv.py
youtube_dl/extractor/musicplayon.py
youtube_dl/extractor/mwave.py
youtube_dl/extractor/myspace.py
youtube_dl/extractor/myspass.py
youtube_dl/extractor/myvidster.py
youtube_dl/extractor/nationalgeographic.py
youtube_dl/extractor/naver.py
youtube_dl/extractor/nba.py
youtube_dl/extractor/nbc.py
youtube_dl/extractor/ndr.py
youtube_dl/extractor/newgrounds.py
youtube_dl/extractor/newstube.py
youtube_dl/extractor/nextmedia.py
youtube_dl/extractor/nfl.py
youtube_dl/extractor/nhk.py [new file with mode: 0644]
youtube_dl/extractor/nhl.py
youtube_dl/extractor/nick.py
youtube_dl/extractor/niconico.py
youtube_dl/extractor/ninecninemedia.py
youtube_dl/extractor/ninenow.py
youtube_dl/extractor/nobelprize.py [new file with mode: 0644]
youtube_dl/extractor/noco.py
youtube_dl/extractor/normalboots.py
youtube_dl/extractor/nova.py
youtube_dl/extractor/nowness.py
youtube_dl/extractor/npo.py
youtube_dl/extractor/nrk.py
youtube_dl/extractor/ntvde.py
youtube_dl/extractor/ntvru.py
youtube_dl/extractor/nuevo.py
youtube_dl/extractor/nytimes.py
youtube_dl/extractor/nzz.py [new file with mode: 0644]
youtube_dl/extractor/oktoberfesttv.py
youtube_dl/extractor/onet.py
youtube_dl/extractor/ooyala.py
youtube_dl/extractor/openload.py
youtube_dl/extractor/orf.py
youtube_dl/extractor/pandatv.py [new file with mode: 0644]
youtube_dl/extractor/pandoratv.py
youtube_dl/extractor/parliamentliveuk.py
youtube_dl/extractor/patreon.py
youtube_dl/extractor/periscope.py
youtube_dl/extractor/played.py [deleted file]
youtube_dl/extractor/plays.py
youtube_dl/extractor/playvid.py
youtube_dl/extractor/pluralsight.py
youtube_dl/extractor/polskieradio.py
youtube_dl/extractor/porn91.py
youtube_dl/extractor/porncom.py [new file with mode: 0644]
youtube_dl/extractor/pornhub.py
youtube_dl/extractor/pornovoisines.py
youtube_dl/extractor/pornoxo.py
youtube_dl/extractor/promptfile.py
youtube_dl/extractor/prosiebensat1.py
youtube_dl/extractor/puls4.py
youtube_dl/extractor/pyvideo.py
youtube_dl/extractor/qqmusic.py
youtube_dl/extractor/radiobremen.py
youtube_dl/extractor/radiocanada.py
youtube_dl/extractor/redtube.py
youtube_dl/extractor/rentv.py [new file with mode: 0644]
youtube_dl/extractor/reverbnation.py
youtube_dl/extractor/rmcdecouverte.py [new file with mode: 0644]
youtube_dl/extractor/rottentomatoes.py
youtube_dl/extractor/roxwel.py
youtube_dl/extractor/rtl2.py
youtube_dl/extractor/rtve.py
youtube_dl/extractor/rudo.py
youtube_dl/extractor/ruhd.py
youtube_dl/extractor/rutube.py
youtube_dl/extractor/rutv.py
youtube_dl/extractor/ruutu.py
youtube_dl/extractor/safari.py
youtube_dl/extractor/sapo.py
youtube_dl/extractor/sbs.py
youtube_dl/extractor/screencast.py
youtube_dl/extractor/screenjunkies.py
youtube_dl/extractor/screenwavemedia.py [deleted file]
youtube_dl/extractor/senateisvp.py
youtube_dl/extractor/shahid.py
youtube_dl/extractor/shared.py
youtube_dl/extractor/slideshare.py
youtube_dl/extractor/slutload.py
youtube_dl/extractor/smotri.py
youtube_dl/extractor/snotr.py
youtube_dl/extractor/sohu.py
youtube_dl/extractor/soundcloud.py
youtube_dl/extractor/southpark.py
youtube_dl/extractor/spankbang.py
youtube_dl/extractor/spiegel.py
youtube_dl/extractor/spike.py
youtube_dl/extractor/srmediathek.py
youtube_dl/extractor/streamable.py
youtube_dl/extractor/streamcz.py
youtube_dl/extractor/svt.py
youtube_dl/extractor/swrmediathek.py
youtube_dl/extractor/syfy.py
youtube_dl/extractor/sztvhu.py
youtube_dl/extractor/tagesschau.py
youtube_dl/extractor/tass.py
youtube_dl/extractor/tbs.py [new file with mode: 0644]
youtube_dl/extractor/teachertube.py
youtube_dl/extractor/teachingchannel.py
youtube_dl/extractor/teamcoco.py
youtube_dl/extractor/teamfourstar.py [new file with mode: 0644]
youtube_dl/extractor/techtalks.py
youtube_dl/extractor/telecinco.py
youtube_dl/extractor/telequebec.py [new file with mode: 0644]
youtube_dl/extractor/telewebion.py
youtube_dl/extractor/tfo.py [new file with mode: 0644]
youtube_dl/extractor/theintercept.py
youtube_dl/extractor/theplatform.py
youtube_dl/extractor/thescene.py
youtube_dl/extractor/thestar.py
youtube_dl/extractor/theweatherchannel.py [new file with mode: 0644]
youtube_dl/extractor/thisav.py
youtube_dl/extractor/thisoldhouse.py [new file with mode: 0644]
youtube_dl/extractor/thvideo.py [deleted file]
youtube_dl/extractor/tlc.py
youtube_dl/extractor/tmz.py
youtube_dl/extractor/tnaflix.py
youtube_dl/extractor/tonline.py [new file with mode: 0644]
youtube_dl/extractor/toutv.py
youtube_dl/extractor/toypics.py
youtube_dl/extractor/trollvids.py [deleted file]
youtube_dl/extractor/trutube.py [deleted file]
youtube_dl/extractor/trutv.py [new file with mode: 0644]
youtube_dl/extractor/tube8.py
youtube_dl/extractor/tubitv.py
youtube_dl/extractor/tumblr.py
youtube_dl/extractor/turner.py [new file with mode: 0644]
youtube_dl/extractor/tv2.py
youtube_dl/extractor/tv4.py
youtube_dl/extractor/tvanouvelles.py [new file with mode: 0644]
youtube_dl/extractor/tvigle.py
youtube_dl/extractor/tvland.py
youtube_dl/extractor/tvnoe.py [new file with mode: 0644]
youtube_dl/extractor/tvp.py
youtube_dl/extractor/tvplay.py
youtube_dl/extractor/twitch.py
youtube_dl/extractor/twitter.py
youtube_dl/extractor/udemy.py
youtube_dl/extractor/uplynk.py
youtube_dl/extractor/urplay.py
youtube_dl/extractor/usanetwork.py [new file with mode: 0644]
youtube_dl/extractor/ustream.py
youtube_dl/extractor/vbox7.py
youtube_dl/extractor/vessel.py
youtube_dl/extractor/vesti.py
youtube_dl/extractor/vevo.py
youtube_dl/extractor/vgtv.py
youtube_dl/extractor/vice.py
youtube_dl/extractor/viceland.py
youtube_dl/extractor/videodetective.py
youtube_dl/extractor/videomore.py
youtube_dl/extractor/vidzi.py
youtube_dl/extractor/vier.py
youtube_dl/extractor/viki.py
youtube_dl/extractor/vimeo.py
youtube_dl/extractor/vimple.py
youtube_dl/extractor/vk.py
youtube_dl/extractor/vlive.py
youtube_dl/extractor/vodlocker.py
youtube_dl/extractor/vodplatform.py
youtube_dl/extractor/voxmedia.py
youtube_dl/extractor/vrt.py
youtube_dl/extractor/vuclip.py
youtube_dl/extractor/vyborymos.py [new file with mode: 0644]
youtube_dl/extractor/vzaar.py [new file with mode: 0644]
youtube_dl/extractor/wat.py
youtube_dl/extractor/wdr.py
youtube_dl/extractor/webcaster.py [new file with mode: 0644]
youtube_dl/extractor/weiqitv.py
youtube_dl/extractor/wrzuta.py
youtube_dl/extractor/wsj.py
youtube_dl/extractor/xboxclips.py
youtube_dl/extractor/xfileshare.py
youtube_dl/extractor/xnxx.py
youtube_dl/extractor/xuite.py
youtube_dl/extractor/yahoo.py
youtube_dl/extractor/yam.py
youtube_dl/extractor/youjizz.py
youtube_dl/extractor/youporn.py
youtube_dl/extractor/youtube.py
youtube_dl/extractor/zingmp3.py
youtube_dl/jsinterp.py
youtube_dl/options.py
youtube_dl/postprocessor/embedthumbnail.py
youtube_dl/postprocessor/ffmpeg.py
youtube_dl/postprocessor/xattrpp.py
youtube_dl/socks.py
youtube_dl/swfinterp.py
youtube_dl/utils.py
youtube_dl/version.py

index ae28d83d50d64d73b88993eeaa5bfc3e6c421b16..36559dd7ba92a65299ad8f1924354ba30433b7ab 100644 (file)
@@ -6,8 +6,8 @@
 
 ---
 
-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.17*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
-- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.17**
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.12.01*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.12.01**
 
 ### Before submitting an *issue* make sure you have:
 - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
 [debug] User config: []
 [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2016.08.17
+[debug] youtube-dl version 2016.12.01
 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
 [debug] Proxy map: {}
@@ -55,4 +55,4 @@ $ youtube-dl -v <your command line>
 ### Description of your *issue*, suggested solution and other information
 
 Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible.
-If work on your *issue* required an account credentials please provide them or explain how one can obtain them.
+If work on your *issue* requires account credentials please provide them or explain how one can obtain them.
index a5e6a4233d88a2b5cdae3d668d2874fb39e6adf0..ab9968129f33790aaf6471f0f41f6b21164fe0a7 100644 (file)
@@ -55,4 +55,4 @@ $ youtube-dl -v <your command line>
 ### Description of your *issue*, suggested solution and other information
 
 Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible.
-If work on your *issue* required an account credentials please provide them or explain how one can obtain them.
+If work on your *issue* requires account credentials please provide them or explain how one can obtain them.
index f24bb4b09c184302cbfc34c5c777d58cd3d8cdf0..46fa26f02d97ff8cc93a6fe2bc1a3864a9044280 100644 (file)
 - [ ] At least skimmed through [adding new extractor tutorial](https://github.com/rg3/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/rg3/youtube-dl#youtube-dl-coding-conventions) sections
 - [ ] [Searched](https://github.com/rg3/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests
 
+### In order to be accepted and merged into youtube-dl each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check one of the following options:
+- [ ] I am the original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/)
+- [ ] I am not the original author of this code but it is in public domain or released under [Unlicense](http://unlicense.org/) (provide reliable evidence)
+
 ### What is the purpose of your *pull request*?
 - [ ] Bug fix
+- [ ] Improvement
 - [ ] New extractor
 - [ ] New feature
 
index a802c75a10225f53f8da414fa34fd5422de5bea2..9ce4b5e2d5d78771b718313ff942afe77db92f02 100644 (file)
@@ -29,6 +29,11 @@ updates_key.pem
 *.m4a
 *.m4v
 *.mp3
+*.3gp
+*.wav
+*.ape
+*.mkv
+*.swf
 *.part
 *.swp
 test/testdata
diff --git a/AUTHORS b/AUTHORS
index 1fd4be78522b5d9e66e8531cb3d885a7ae65c73d..4a6f7e13f45fd72ae3da87c475fadf892d2f7a4f 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -26,7 +26,7 @@ Albert Kim
 Pierre Rudloff
 Huarong Huo
 Ismael Mejía
-Steffan 'Ruirize' James
+Steffan Donal
 Andras Elso
 Jelle van der Waa
 Marcin Cieślak
@@ -181,3 +181,12 @@ Nehal Patel
 Rob van Bekkum
 Petr Zvoníček
 Pratyush Singh
+Aleksander Nitecki
+Sebastian Blunt
+Matěj Cepl
+Xie Yanbo
+Philip Xu
+John Hawkinson
+Rich Leeper
+Zhong Jianxin
+Thor77
index 95392030ea2fc7f78cbb23b368ee37a9fd5ee010..495955bb571b80c2679b80705306b0b6abf0fac5 100644 (file)
@@ -12,7 +12,7 @@ $ youtube-dl -v <your command line>
 [debug] Proxy map: {}
 ...
 ```
-**Do not post screenshots of verbose log only plain text is acceptable.**
+**Do not post screenshots of verbose logs; only plain text is acceptable.**
 
 The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
 
@@ -66,7 +66,7 @@ Only post features that you (or an incapacitated friend you can personally talk
 
 ###  Is your question about youtube-dl?
 
-It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different or even the reporter's own application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug.
+It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different, or even the reporter's own, application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug.
 
 # DEVELOPER INSTRUCTIONS
 
@@ -85,14 +85,14 @@ To run the test, simply invoke your favorite test runner, or execute a test file
 If you want to create a build of youtube-dl yourself, you'll need
 
 * python
-* make (both GNU make and BSD make are supported)
+* make (only GNU make is supported)
 * pandoc
 * zip
 * nosetests
 
 ### Adding support for a new site
 
-If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**.
+If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](README.md#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**.
 
 After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`):
 
@@ -167,19 +167,19 @@ In any case, thank you very much for your contributions!
 
 This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code.
 
-Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hoster out of your control and this layout tend to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize code dependency on source's layout changes and even to make the code foresee potential future changes and be ready for that. This is important because it will allow extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with fix incorporated all the previous version become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say some may never receive an update at all that is possible for non rolling release distros.
+Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with a fix incorporated, all the previous versions become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say, some non rolling release distros may never receive an update at all.
 
 ### Mandatory and optional metafields
 
-For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in *info dict* are considered mandatory for successful extraction process by youtube-dl:
+For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
 
  - `id` (media identifier)
  - `title` (media title)
  - `url` (media download URL) or `formats`
 
-In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` to be mandatory. Thus aforementioned metafields are the critical data the extraction does not make any sense without and if any of them fail to be extracted then extractor is considered completely broken.
+In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken.
 
-[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
+[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
 
 #### Example
 
@@ -199,7 +199,7 @@ Assume at this point `meta`'s layout is:
 }
 ```
 
-Assume you want to extract `summary` and put into resulting info dict as `description`. Since `description` is optional metafield you should be ready that this key may be missing from the `meta` dict, so that you should extract it like:
+Assume you want to extract `summary` and put it into the resulting info dict as `description`. Since `description` is an optional metafield you should be ready that this key may be missing from the `meta` dict, so that you should extract it like:
 
 ```python
 description = meta.get('summary')  # correct
@@ -211,7 +211,7 @@ and not like:
 description = meta['summary']  # incorrect
 ```
 
-The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some time later but with former approach extraction will just go ahead with `description` set to `None` that is perfectly fine (remember `None` is equivalent for absence of data). 
+The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some later time but with the former approach extraction will just go ahead with `description` set to `None` which is perfectly fine (remember `None` is equivalent to the absence of data).
 
 Similarly, you should pass `fatal=False` when extracting optional data from a webpage with `_search_regex`, `_html_search_regex` or similar methods, for instance:
 
@@ -231,21 +231,21 @@ description = self._search_regex(
     webpage, 'description', default=None)
 ```
 
-On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that are known to may or may not be present.
+On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that may or may not be present.
  
 ### Provide fallbacks
 
-When extracting metadata try to provide several scenarios for that. For example if `title` is present in several places/sources try extracting from at least some of them. This would make it more future-proof in case some of the sources became unavailable.
+When extracting metadata try to do so from multiple sources. For example if `title` is present in several places, try extracting from at least some of them. This makes it more future-proof in case some of the sources become unavailable.
 
 #### Example
 
-Say `meta` from previous example has a `title` and you are about to extract it. Since `title` is mandatory meta field you should end up with something like:
+Say `meta` from the previous example has a `title` and you are about to extract it. Since `title` is a mandatory meta field you should end up with something like:
 
 ```python
 title = meta['title']
 ```
 
-If `title` disappeares from `meta` in future due to some changes on hoster's side the extraction would fail since `title` is mandatory. That's expected.
+If `title` disappears from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected.
 
 Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario:
 
@@ -282,7 +282,7 @@ title = self._search_regex(
     webpage, 'title', group='title')
 ```
 
-Note how you tolerate potential changes in `style` attribute's value or switch from using double quotes to single for `class` attribute: 
+Note how you tolerate potential changes in the `style` attribute's value or switch from using double quotes to single for `class` attribute: 
 
 The code definitely should not look like:
 
index 354306a976e0c1c39084b5c9bef90ffbc8aa3cd5..a91de7b63d91b99d606be44cb27e4e72f4e251b2 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,599 @@
+version 2016.12.01
+
+Extractors
+* [soundcloud] Update client id (#11327)
+* [ruutu] Detect DRM protected videos
++ [liveleak] Add support for youtube embeds (#10688)
+* [spike] Fix full episodes support (#11312)
+* [comedycentral] Fix full episodes support
+* [normalboots] Rewrite in terms of JWPlatform (#11184)
+* [teamfourstar] Rewrite in terms of JWPlatform (#11184)
+- [screenwavemedia] Remove extractor (#11184)
+
+
+version 2016.11.27
+
+Extractors
++ [webcaster] Add support for webcaster.pro
++ [azubu] Add support for azubu.uol.com.br (#11305)
+* [viki] Prefer hls formats
+* [viki] Fix rtmp formats extraction (#11255)
+* [puls4] Relax URL regular expression (#11267)
+* [vevo] Improve artist extraction (#10911)
+* [mitele] Relax URL regular expression and extract more metadata (#11244)
++ [cbslocal] Recognize New York site (#11285)
++ [youtube:playlist] Pass disable_polymer in URL query (#11193)
+
+
+version 2016.11.22
+
+Extractors
+* [hellporno] Fix video extension extraction (#11247)
++ [hellporno] Add support for hellporno.net (#11247)
++ [amcnetworks] Recognize more BBC America URLs (#11263)
+* [funnyordie] Improve extraction (#11208)
+* [extractor/generic] Improve limelight embeds support
+- [crunchyroll] Remove ScaledBorderAndShadow from ASS subtitles (#8207, #9028)
+* [bandcamp] Fix free downloads extraction and extract all formats (#11067)
+* [twitter:card] Relax URL regular expression (#11225)
++ [tvanouvelles] Add support for tvanouvelles.ca (#10616)
+
+
+version 2016.11.18
+
+Extractors
+* [youtube:live] Relax URL regular expression (#11164)
+* [openload] Fix extraction (#10408, #11122)
+* [vlive] Prefer locale over language for subtitles id (#11203)
+
+
+version 2016.11.14.1
+
+Core
++ [downoader/fragment,f4m,hls] Respect HTTP headers from info dict
+* [extractor/common] Fix media templates with Bandwidth substitution pattern in
+  MPD manifests (#11175)
+* [extractor/common] Improve thumbnail extraction from JSON-LD
+
+Extractors
++ [nrk] Workaround geo restriction
++ [nrk] Improve error detection and messages
++ [afreecatv] Add support for vod.afreecatv.com (#11174)
+* [cda] Fix and improve extraction (#10929, #10936)
+* [plays] Fix extraction (#11165)
+* [eagleplatform] Fix extraction (#11160)
++ [audioboom] Recognize /posts/ URLs (#11149)
+
+
+version 2016.11.08.1
+
+Extractors
+* [espn:article] Fix support for espn.com articles
+* [franceculture] Fix extraction (#11140)
+
+
+version 2016.11.08
+
+Extractors
+* [tmz:article] Fix extraction (#11052)
+* [espn] Fix extraction (#11041)
+* [mitele] Fix extraction after website redesign (#10824)
+- [ard] Remove age restriction check (#11129)
+* [generic] Improve support for pornhub.com embeds (#11100)
++ [generic] Add support for redtube.com embeds (#11099)
++ [generic] Add support for drtuber.com embeds (#11098)
++ [redtube] Add support for embed URLs
++ [drtuber] Add support for embed URLs
++ [yahoo] Improve content id extraction (#11088)
+* [toutv] Relax URL regular expression (#11121)
+
+
+version 2016.11.04
+
+Core
+* [extractor/common] Tolerate malformed RESOLUTION attribute in m3u8
+  manifests (#11113)
+* [downloader/ism] Fix AVC Decoder Configuration Record
+
+Extractors
++ [fox9] Add support for fox9.com (#11110)
++ [anvato] Extract more metadata and improve formats extraction
+* [vodlocker] Improve removed videos detection (#11106)
++ [vzaar] Add support for vzaar.com (#11093)
++ [vice] Add support for uplynk preplay videos (#11101)
+* [tubitv] Fix extraction (#11061)
++ [shahid] Add support for authentication (#11091)
++ [radiocanada] Add subtitles support (#11096)
++ [generic] Add support for ISM manifests
+
+
+version 2016.11.02
+
+Core
++ Add basic support for Smooth Streaming protocol (#8118, #10969)
+* Improve MPD manifest base URL extraction (#10909, #11079)
+* Fix --match-filter for int-like strings (#11082)
+
+Extractors
++ [mva] Add support for ISM formats
++ [msn] Add support for ISM formats
++ [onet] Add support for ISM formats
++ [tvp] Add support for ISM formats
++ [nicknight] Add support for nicknight sites (#10769)
+
+
+version 2016.10.30
+
+Extractors
+* [facebook] Improve 1080P video detection (#11073)
+* [imgur] Recognize /r/ URLs (#11071)
+* [beeg] Fix extraction (#11069)
+* [openload] Fix extraction (#10408)
+* [gvsearch] Modernize and fix search request (#11051)
+* [adultswim] Fix extraction (#10979)
++ [nobelprize] Add support for nobelprize.org (#9999)
+* [hornbunny] Fix extraction (#10981)
+* [tvp] Improve video id extraction (#10585)
+
+
+version 2016.10.26
+
+Extractors
++ [rentv] Add support for ren.tv (#10620)
++ [ard] Detect unavailable videos (#11018)
+* [vk] Fix extraction (#11022)
+
+
+version 2016.10.25
+
+Core
+* Running youtube-dl in the background is fixed (#10996, #10706, #955)
+
+Extractors
++ [jamendo] Add support for jamendo.com (#10132, #10736)
++ [pandatv] Add support for panda.tv (#10736)
++ [dotsub] Support Vimeo embed (#10964)
+* [litv] Fix extraction
++ [vimeo] Delegate ondemand redirects to ondemand extractor (#10994)
+* [vivo] Fix extraction (#11003)
++ [twitch:stream] Add support for rebroadcasts (#10995)
+* [pluralsight] Fix subtitles conversion (#10990)
+
+
+version 2016.10.21.1
+
+Extractors
++ [pluralsight] Process all clip URLs (#10984)
+
+
+version 2016.10.21
+
+Core
+- Disable thumbnails embedding in mkv
++ Add support for Comcast multiple-system operator (#10819)
+
+Extractors
+* [pluralsight] Adapt to new API (#10972)
+* [openload] Fix extraction (#10408, #10971)
++ [natgeo] Extract m3u8 formats (#10959)
+
+
+version 2016.10.19
+
+Core
++ [utils] Expose PACKED_CODES_RE
++ [extractor/common] Extract non smil wowza mpd manifests
++ [extractor/common] Detect f4m audio-only formats
+
+Extractors
+* [vidzi] Fix extraction (#10908, #10952)
+* [urplay] Fix subtitles extraction
++ [urplay] Add support for urskola.se (#10915)
++ [orf] Add subtitles support (#10939)
+* [youtube] Fix --no-playlist behavior for youtu.be/id URLs (#10896)
+* [nrk] Relax URL regular expression (#10928)
++ [nytimes] Add support for podcasts (#10926)
+* [pluralsight] Relax URL regular expression (#10941)
+
+
+version 2016.10.16
+
+Core
+* [postprocessor/ffmpeg] Return correct filepath and ext in updated information
+  in FFmpegExtractAudioPP (#10879)
+
+Extractors
++ [ruutu] Add support for supla.fi (#10849)
++ [theoperaplatform] Add support for theoperaplatform.eu (#10914)
+* [lynda] Fix height for prioritized streams
++ [lynda] Add fallback extraction scenario
+* [lynda] Switch to https (#10916)
++ [huajiao] New extractor (#10917)
+* [cmt] Fix mgid extraction (#10813)
++ [safari:course] Add support for techbus.safaribooksonline.com
+* [orf:tvthek] Fix extraction and modernize (#10898)
+* [chirbit] Fix extraction of user profile pages
+* [carambatv] Fix extraction
+* [canalplus] Fix extraction for some videos
+* [cbsinteractive] Fix extraction for cnet.com
+* [parliamentliveuk] Lower case URLs are now recognized (#10912)
+
+
+version 2016.10.12
+
+Core
++ Support HTML media elements without child nodes
+* [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387)
+
+Extractors
+* [dailymotion] Fix extraction (#10901)
+* [vimeo:review] Fix extraction (#10900)
+* [nhl] Correctly handle invalid formats (#10713)
+* [footyroom] Fix extraction (#10810)
+* [abc.net.au:iview] Fix for standalone (non series) videos (#10895)
++ [hbo] Add support for episode pages (#10892)
+* [allocine] Fix extraction (#10860)
++ [nextmedia] Recognize action news on AppleDaily
+* [lego] Improve info extraction and bypass geo restriction (#10872)
+
+
+version 2016.10.07
+
+Extractors
++ [iprima] Detect geo restriction
+* [facebook] Fix video extraction (#10846)
++ [commonprotocols] Support direct MMS links (#10838)
++ [generic] Add support for multiple vimeo embeds (#10862)
++ [nzz] Add support for nzz.ch (#4407)
++ [npo] Detect geo restriction
++ [npo] Add support for 2doc.nl (#10842)
++ [lego] Add support for lego.com (#10369)
++ [tonline] Add support for t-online.de (#10376)
+* [techtalks] Relax URL regular expression (#10840)
+* [youtube:live] Extend URL regular expression (#10839)
++ [theweatherchannel] Add support for weather.com (#7188)
++ [thisoldhouse] Add support for thisoldhouse.com (#10837)
++ [nhl] Add support for wch2016.com (#10833)
+* [pornoxo] Use JWPlatform to improve metadata extraction
+
+
+version 2016.10.02
+
+Core
+* Fix possibly lost extended attributes during post-processing
++ Support pyxattr as well as python-xattr for --xattrs and
+  --xattr-set-filesize (#9054)
+
+Extractors
++ [jwplatform] Support DASH streams in JWPlayer
++ [jwplatform] Support old-style JWPlayer playlists
++ [byutv:event] Add extractor
+* [periscope:user] Fix extraction (#10820)
+* [dctp] Fix extraction (#10734)
++ [instagram] Extract video dimensions (#10790)
++ [tvland] Extend URL regular expression (#10812)
++ [vgtv] Add support for tv.aftonbladet.se (#10800)
+- [aftonbladet] Remove extractor
+* [vk] Fix timestamp and view count extraction (#10760)
++ [vk] Add support for running and finished live streams (#10799)
++ [leeco] Recognize more Le Sports URLs (#10794)
++ [instagram] Extract comments (#10788)
++ [ketnet] Extract mzsource formats (#10770)
+* [limelight:media] Improve HTTP formats extraction
+
+
+version 2016.09.27
+
+Core
++ Add hdcore query parameter to akamai f4m formats
++ Delegate HLS live streams downloading to ffmpeg
++ Improved support for HTML5 subtitles
+
+Extractors
++ [vk] Add support for dailymotion embeds (#10661)
+* [promptfile] Fix extraction (#10634)
+* [kaltura] Speed up embed regular expressions (#10764)
++ [npo] Add support for anderetijden.nl (#10754)
++ [prosiebensat1] Add support for advopedia sites
+* [mwave] Relax URL regular expression (#10735, #10748)
+* [prosiebensat1] Fix playlist support (#10745)
++ [prosiebensat1] Add support for sat1gold sites (#10745)
++ [cbsnews:livevideo] Fix extraction and extract m3u8 formats
++ [brightcove:new] Add support for live streams
+* [soundcloud] Generalize playlist entries extraction (#10733)
++ [mtv] Add support for new URL schema (#8169, #9808)
+* [einthusan] Fix extraction (#10714)
++ [twitter] Support Periscope embeds (#10737)
++ [openload] Support subtitles (#10625)
+
+
+version 2016.09.24
+
+Core
++ Add support for watchTVeverywhere.com authentication provider based MSOs for
+  Adobe Pass authentication (#10709)
+
+Extractors
++ [soundcloud:playlist] Provide video id for early playlist entries (#10733)
++ [prosiebensat1] Add support for kabeleinsdoku (#10732)
+* [cbs] Extract info from thunder videoPlayerService (#10728)
+* [openload] Fix extraction (#10408)
++ [ustream] Support the new HLS streams (#10698)
++ [ooyala] Extract all HLS formats
++ [cartoonnetwork] Add support for Adobe Pass authentication
++ [soundcloud] Extract license metadata
++ [fox] Add support for Adobe Pass authentication (#8584)
++ [tbs] Add support for Adobe Pass authentication (#10642, #10222)
++ [trutv] Add support for Adobe Pass authentication (#10519)
++ [turner] Add support for Adobe Pass authentication
+
+
+version 2016.09.19
+
+Extractors
++ [crunchyroll] Check if already authenticated (#10700)
+- [twitch:stream] Remove fallback to profile extraction when stream is offline
+* [thisav] Improve title extraction (#10682)
+* [vyborymos] Improve station info extraction
+
+
+version 2016.09.18
+
+Core
++ Introduce manifest_url and fragments fields in formats dictionary for
+  fragmented media
++ Provide manifest_url field for DASH segments, HLS and HDS
++ Provide fragments field for DASH segments
+* Rework DASH segments downloader to use fragments field
++ Add helper method for Wowza Streaming Engine formats extraction
+
+Extractors
++ [vyborymos] Add extractor for vybory.mos.ru (#10692)
++ [xfileshare] Add title regular expression for streamin.to (#10646)
++ [globo:article] Add support for multiple videos (#10653)
++ [thisav] Recognize HTML5 videos (#10447)
+* [jwplatform] Improve JWPlayer detection
++ [mangomolo] Add support for Mangomolo embeds
++ [toutv] Add support for authentication (#10669)
+* [franceinter] Fix upload date extraction
+* [tv4] Fix HLS and HDS formats extraction (#10659)
+
+
+version 2016.09.15
+
+Core
+* Improve _hidden_inputs
++ Introduce improved explicit Adobe Pass support
++ Add --ap-mso to provide multiple-system operator identifier
++ Add --ap-username to provide MSO account username
++ Add --ap-password to provide MSO account password
++ Add --ap-list-mso to list all supported MSOs
++ Add support for Rogers Cable multiple-system operator (#10606)
+
+Extractors
+* [crunchyroll] Fix authentication (#10655)
+* [twitch] Fix API calls (#10654, #10660)
++ [bellmedia] Add support for more Bell Media Television sites
+* [franceinter] Fix extraction (#10538, #2105)
+* [kuwo] Improve error detection (#10650)
++ [go] Add support for free full episodes (#10439)
+* [bilibili] Fix extraction for specific videos (#10647)
+* [nhk] Fix extraction (#10633)
+* [kaltura] Improve audio detection
+* [kaltura] Skip chun format
++ [vimeo:ondemand] Pass Referer along with embed URL (#10624)
++ [nbc] Add support for NBC Olympics (#10361)
+
+
+version 2016.09.11.1
+
+Extractors
++ [tube8] Extract categories and tags (#10579)
++ [pornhub] Extract categories and tags (#10499)
+* [openload] Temporary fix (#10408)
++ [foxnews] Add support Fox News articles (#10598)
+* [viafree] Improve video id extraction (#10615)
+* [iwara] Fix extraction after relaunch (#10462, #3215)
++ [tfo] Add extractor for tfo.org
+* [lrt] Fix audio extraction (#10566)
+* [9now] Fix extraction (#10561)
++ [canalplus] Add support for c8.fr (#10577)
+* [newgrounds] Fix uploader extraction (#10584)
++ [polskieradio:category] Add support for category lists (#10576)
++ [ketnet] Add extractor for ketnet.be (#10343)
++ [canvas] Add support for een.be (#10605)
++ [telequebec] Add extractor for telequebec.tv (#1999)
+* [parliamentliveuk] Fix extraction (#9137)
+
+
+version 2016.09.08
+
+Extractors
++ [jwplatform] Extract height from format label
++ [yahoo] Extract Brightcove Legacy Studio embeds (#9345)
+* [videomore] Fix extraction (#10592)
+* [foxgay] Fix extraction (#10480)
++ [rmcdecouverte] Add extractor for rmcdecouverte.bfmtv.com (#9709)
+* [gamestar] Fix metadata extraction (#10479)
+* [puls4] Fix extraction (#10583)
++ [cctv] Add extractor for CCTV and CNTV (#8153)
++ [lci] Add extractor for lci.fr (#10573)
++ [wat] Extract DASH formats
++ [viafree] Improve video id detection (#10569)
++ [trutv] Add extractor for trutv.com (#10519)
++ [nick] Add support for nickelodeon.nl (#10559)
++ [abcotvs:clips] Add support for clips.abcotvs.com
++ [abcotvs] Add support for ABC Owned Television Stations sites (#9551)
++ [miaopai] Add extractor for miaopai.com (#10556)
+* [gamestar] Fix metadata extraction (#10479)
++ [bilibili] Add support for episodes (#10190)
++ [tvnoe] Add extractor for tvnoe.cz (#10524)
+
+
+version 2016.09.04.1
+
+Core
+* In DASH downloader if the first segment fails, abort the whole download
+  process to prevent throttling (#10497)
++ Add support for --skip-unavailable-fragments and --fragment retries in
+  hlsnative downloader (#10165, #10448).
++ Add support for --skip-unavailable-fragments in DASH downloader
++ Introduce --skip-unavailable-fragments option for fragment based downloaders
+  that allows to skip fragments unavailable due to a HTTP error
+* Fix extraction of video/audio entries with src attribute in
+  _parse_html5_media_entries (#10540)
+
+Extractors
+* [theplatform] Relax URL regular expression (#10546)
+* [youtube:playlist] Extend URL regular expression
+* [rottentomatoes] Delegate extraction to internetvideoarchive extractor
+* [internetvideoarchive] Extract all formats
+* [pornvoisines] Fix extraction (#10469)
+* [rottentomatoes] Fix extraction (#10467)
+* [espn] Extend URL regular expression (#10549)
+* [vimple] Extend URL regular expression (#10547)
+* [youtube:watchlater] Fix extraction (#10544)
+* [youjizz] Fix extraction (#10437)
++ [foxnews] Add support for FoxNews Insider (#10445)
++ [fc2] Recognize Flash player URLs (#10512)
+
+
+version 2016.09.03
+
+Core
+* Restore usage of NAME attribute from EXT-X-MEDIA tag for formats codes in
+  _extract_m3u8_formats (#10522)
+* Handle semicolon in mimetype2ext
+
+Extractors
++ [youtube] Add support for rental videos' previews (#10532)
+* [youtube:playlist] Fallback to video extraction for video/playlist URLs when
+  no playlist is actually served (#10537)
++ [drtv] Add support for dr.dk/nyheder (#10536)
++ [facebook:plugins:video] Add extractor (#10530)
++ [go] Add extractor for *.go.com sites
+* [adobepass] Check for authz_token expiration (#10527)
+* [nytimes] improve extraction
+* [thestar] Fix extraction (#10465)
+* [glide] Fix extraction (#10478)
+- [exfm] Remove extractor (#10482)
+* [youporn] Fix categories and tags extraction (#10521)
++ [curiositystream] Add extractor for app.curiositystream.com
+- [thvideo] Remove extractor (#10464)
+* [movingimage] Fix for the new site name (#10466)
++ [cbs] Add support for once formats (#10515)
+* [limelight] Skip ism snd duplicate manifests
++ [porncom] Extract categories and tags (#10510)
++ [facebook] Extract timestamp (#10508)
++ [yahoo] Extract more formats
+
+
+version 2016.08.31
+
+Extractors
+* [soundcloud] Fix URL regular expression to avoid clashes with sets (#10505)
+* [bandcamp:album] Fix title extraction (#10455)
+* [pyvideo] Fix extraction (#10468)
++ [ctv] Add support for tsn.ca, bnn.ca and thecomedynetwork.ca (#10016)
+* [9c9media] Extract more metadata
+* [9c9media] Fix multiple stacks extraction (#10016)
+* [adultswim] Improve video info extraction (#10492)
+* [vodplatform] Improve embed regular expression
+- [played] Remove extractor (#10470)
++ [tbs] Add extractor for tbs.com and tntdrama.com (#10222)
++ [cartoonnetwork] Add extractor for cartoonnetwork.com (#10110)
+* [adultswim] Rework in terms of turner extractor
+* [cnn] Rework in terms of turner extractor
+* [nba] Rework in terms of turner extractor
++ [turner] Add base extractor for Turner Broadcasting System based sites
+* [bilibili] Fix extraction (#10375)
+* [openload] Fix extraction (#10408)
+
+
+version 2016.08.28
+
+Core
++ Add warning message that ffmpeg doesn't support SOCKS
+* Improve thumbnail sorting
++ Extract formats from #EXT-X-MEDIA tags in _extract_m3u8_formats
+* Fill IV with leading zeros for IVs shorter than 16 octets in hlsnative
++ Add ac-3 to the list of audio codecs in parse_codecs
+
+Extractors
+* [periscope:user] Fix extraction (#10453)
+* [douyutv] Fix extraction (#10153, #10318, #10444)
++ [nhk:vod] Add extractor for www3.nhk.or.jp on demand (#4437, #10424)
+- [trutube] Remove extractor (#10438)
++ [usanetwork] Add extractor for usanetwork.com
+* [crackle] Fix extraction (#10333)
+* [spankbang] Fix description and uploader extraction (#10339)
+* [discoverygo] Detect cable provider restricted videos (#10425)
++ [cbc] Add support for watch.cbc.ca
+* [kickstarter] Silent the warning for og:description (#10415)
+* [mtvservices:embedded] Fix extraction for the new 'edge' player (#10363)
+
+
+version 2016.08.24.1
+
+Extractors
++ [pluralsight] Add support for subtitles (#9681)
+
+
+version 2016.08.24
+
+Extractors
+* [youtube] Fix authentication (#10392)
+* [openload] Fix extraction (#10408)
++ [bravotv] Add support for Adobe Pass (#10407)
+* [bravotv] Fix clip info extraction (#10407)
+* [eagleplatform] Improve embedded videos detection (#10409)
+* [awaan] Fix extraction
+* [mtvservices:embedded] Update config URL
++ [abc:iview] Add extractor (#6148)
+
+
+version 2016.08.22
+
+Core
+* Improve formats and subtitles extension auto calculation
++ Recognize full unit names in parse_filesize
++ Add support for m3u8 manifests in HTML5 multimedia tags
+* Fix octal/hexadecimal number detection in js_to_json
+
+Extractors
++ [ivi] Add support for 720p and 1080p
++ [charlierose] Add new extractor (#10382)
+* [1tv] Fix extraction (#9249)
+* [twitch] Renew authentication
+* [kaltura] Improve subtitles extension calculation
++ [zingmp3] Add support for video clips
+* [zingmp3] Fix extraction (#10041)
+* [kaltura] Improve subtitles extraction (#10279)
+* [cultureunplugged] Fix extraction (#10330)
++ [cnn] Add support for money.cnn.com (#2797)
+* [cbsnews] Fix extraction (#10362)
+* [cbs] Fix extraction (#10393)
++ [litv] Support 'promo' URLs (#10385)
+* [snotr] Fix extraction (#10338)
+* [n-tv.de] Fix extraction (#10331)
+* [globo:article] Relax URL and video id regular expressions (#10379)
+
+
+version 2016.08.19
+
+Core
+- Remove output template description from --help
+* Recognize lowercase units in parse_filesize
+
+Extractors
++ [porncom] Add extractor for porn.com (#2251, #10251)
++ [generic] Add support for DBTV embeds
+* [vk:wallpost] Fix audio extraction for new site layout
+* [vk] Fix authentication
++ [hgtvcom:show] Add extractor for hgtv.com shows (#10365)
++ [discoverygo] Add support for another GO network sites
+
+
 version 2016.08.17
 
 Core
index 354052c50822b716423c7bc4d27606e90ed58796..9d1ddc9d1b106cb16d5a1c879d0b6e7e7150d584 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
 
 clean:
-       rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
+       rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
        find . -name "*.pyc" -delete
        find . -name "*.class" -delete
 
@@ -12,7 +12,7 @@ SHAREDIR ?= $(PREFIX)/share
 PYTHON ?= /usr/bin/env python
 
 # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
-SYSCONFDIR != if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi
+SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi)
 
 install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish
        install -d $(DESTDIR)$(BINDIR)
@@ -90,7 +90,7 @@ fish-completion: youtube-dl.fish
 
 lazy-extractors: youtube_dl/extractor/lazy_extractors.py
 
-_EXTRACTOR_FILES != find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py'
+_EXTRACTOR_FILES = $(shell find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py')
 youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES)
        $(PYTHON) devscripts/make_lazy_extractors.py $@
 
index cabbbef76690a6e2ce6513d8f7a03291507aa343..ea9131c3ab88e944c34b7bf50dc87fad8113bc20 100644 (file)
--- a/README.md
+++ b/README.md
@@ -89,6 +89,8 @@ which means you can modify it, redistribute it or use it however you like.
     --mark-watched                   Mark videos watched (YouTube only)
     --no-mark-watched                Do not mark videos watched (YouTube only)
     --no-color                       Do not emit color codes in output
+    --abort-on-unavailable-fragment  Abort downloading when some fragment is not
+                                     available
 
 ## Network Options:
     --proxy URL                      Use the specified HTTP/HTTPS/SOCKS proxy.
@@ -173,7 +175,10 @@ which means you can modify it, redistribute it or use it however you like.
     -R, --retries RETRIES            Number of retries (default is 10), or
                                      "infinite".
     --fragment-retries RETRIES       Number of retries for a fragment (default
-                                     is 10), or "infinite" (DASH only)
+                                     is 10), or "infinite" (DASH and hlsnative
+                                     only)
+    --skip-unavailable-fragments     Skip unavailable fragments (DASH and
+                                     hlsnative only)
     --buffer-size SIZE               Size of download buffer (e.g. 1024 or 16K)
                                      (default is 1024)
     --no-resize-buffer               Do not automatically adjust the buffer
@@ -201,32 +206,8 @@ which means you can modify it, redistribute it or use it however you like.
     -a, --batch-file FILE            File containing URLs to download ('-' for
                                      stdin)
     --id                             Use only video ID in file name
-    -o, --output TEMPLATE            Output filename template. Use %(title)s to
-                                     get the title, %(uploader)s for the
-                                     uploader name, %(uploader_id)s for the
-                                     uploader nickname if different,
-                                     %(autonumber)s to get an automatically
-                                     incremented number, %(ext)s for the
-                                     filename extension, %(format)s for the
-                                     format description (like "22 - 1280x720" or
-                                     "HD"), %(format_id)s for the unique id of
-                                     the format (like YouTube's itags: "137"),
-                                     %(upload_date)s for the upload date
-                                     (YYYYMMDD), %(extractor)s for the provider
-                                     (youtube, metacafe, etc), %(id)s for the
-                                     video id, %(playlist_title)s,
-                                     %(playlist_id)s, or %(playlist)s (=title if
-                                     present, ID otherwise) for the playlist the
-                                     video is in, %(playlist_index)s for the
-                                     position in the playlist. %(height)s and
-                                     %(width)s for the width and height of the
-                                     video format. %(resolution)s for a textual
-                                     description of the resolution of the video
-                                     format. %% for a literal percent. Use - to
-                                     output to stdout. Can also be used to
-                                     download to a different directory, for
-                                     example with -o '/my/downloads/%(uploader)s
-                                     /%(title)s-%(id)s.%(ext)s' .
+    -o, --output TEMPLATE            Output filename template, see the "OUTPUT
+                                     TEMPLATE" for all the info
     --autonumber-size NUMBER         Specify the number of digits in
                                      %(autonumber)s when it is present in output
                                      filename template or --auto-number option
@@ -377,6 +358,17 @@ which means you can modify it, redistribute it or use it however you like.
     -n, --netrc                      Use .netrc authentication data
     --video-password PASSWORD        Video password (vimeo, smotri, youku)
 
+## Adobe Pass Options:
+    --ap-mso MSO                     Adobe Pass multiple-system operator (TV
+                                     provider) identifier, use --ap-list-mso for
+                                     a list of available MSOs
+    --ap-username USERNAME           Multiple-system operator account login
+    --ap-password PASSWORD           Multiple-system operator account password.
+                                     If this option is left out, youtube-dl will
+                                     ask interactively.
+    --ap-list-mso                    List all supported multiple-system
+                                     operators
+
 ## Post-processing Options:
     -x, --extract-audio              Convert video files to audio-only files
                                      (requires ffmpeg or avconv and ffprobe or
@@ -436,11 +428,19 @@ You can configure youtube-dl by placing any supported command line option to a c
 
 For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory:
 ```
+# Lines starting with # are comments
+
+# Always extract audio
 -x
+
+# Do not copy the mtime
 --no-mtime
+
+# Use this proxy
 --proxy 127.0.0.1:3128
+
+# Save all videos under Movies directory in your home directory
 -o ~/Movies/%(title)s.%(ext)s
-# Lines starting with # are comments
 ```
 
 Note that options in configuration file are just the same options aka switches used in regular command line calls thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`.
@@ -449,12 +449,12 @@ You can use `--ignore-config` if you want to disable the configuration file for
 
 ### Authentication with `.netrc` file
 
-You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by you only:
+You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on a per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by only you:
 ```
 touch $HOME/.netrc
 chmod a-rwx,u+rw $HOME/.netrc
 ```
-After that you can add credentials for extractor in the following format, where *extractor* is the name of extractor in lowercase:
+After that you can add credentials for an extractor in the following format, where *extractor* is the name of the extractor in lowercase:
 ```
 machine <extractor> login <login> password <password>
 ```
@@ -550,13 +550,13 @@ Available for the media that is a track or a part of a music album:
  - `disc_number`: Number of the disc or other physical medium the track belongs to
  - `release_year`: Year (YYYY) when the album was released
 
-Each aforementioned sequence when referenced in output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by particular extractor, such sequences will be replaced with `NA`.
+Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with `NA`.
 
-For example for `-o %(title)s-%(id)s.%(ext)s` and mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj` this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory.
+For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj`, this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory.
 
-Output template can also contain arbitrary hierarchical path, e.g. `-o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s'` that will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you.
+Output templates can also contain arbitrary hierarchical path, e.g. `-o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s'` which will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you.
 
-To specify percent literal in output template use `%%`. To output to stdout use `-o -`.
+To use percent literals in an output template use `%%`. To output to stdout use `-o -`.
 
 The current default template is `%(title)s-%(id)s.%(ext)s`.
 
@@ -564,7 +564,7 @@ In some cases, you don't want special characters such as 中, spaces, or &, such
 
 #### Output template and Windows batch files
 
-If you are using output template inside a Windows batch file then you must escape plain percent characters (`%`) by doubling, so that `-o "%(title)s-%(id)s.%(ext)s"` should become `-o "%%(title)s-%%(id)s.%%(ext)s"`. However you should not touch `%`'s that are not plain characters, e.g. environment variables for expansion should stay intact: `-o "C:\%HOMEPATH%\Desktop\%%(title)s.%%(ext)s"`.
+If you are using an output template inside a Windows batch file then you must escape plain percent characters (`%`) by doubling, so that `-o "%(title)s-%(id)s.%(ext)s"` should become `-o "%%(title)s-%%(id)s.%%(ext)s"`. However you should not touch `%`'s that are not plain characters, e.g. environment variables for expansion should stay intact: `-o "C:\%HOMEPATH%\Desktop\%%(title)s.%%(ext)s"`.
 
 #### Output template examples
 
@@ -597,7 +597,7 @@ $ youtube-dl -o - BaW_jenozKc
 
 By default youtube-dl tries to download the best available quality, i.e. if you want the best quality you **don't need** to pass any special options, youtube-dl will guess it for you by **default**.
 
-But sometimes you may want to download in a different format, for example when you are on a slow or intermittent connection. The key mechanism for achieving this is so called *format selection* based on which you can explicitly specify desired format, select formats based on some criterion or criteria, setup precedence and much more.
+But sometimes you may want to download in a different format, for example when you are on a slow or intermittent connection. The key mechanism for achieving this is so-called *format selection* based on which you can explicitly specify desired format, select formats based on some criterion or criteria, setup precedence and much more.
 
 The general syntax for format selection is `--format FORMAT` or shorter `-f FORMAT` where `FORMAT` is a *selector expression*, i.e. an expression that describes format or formats you would like to download.
 
@@ -605,21 +605,21 @@ The general syntax for format selection is `--format FORMAT` or shorter `-f FORM
 
 The simplest case is requesting a specific format, for example with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific. 
 
-You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download best quality format of particular file extension served as a single file, e.g. `-f webm` will download best quality format with `webm` extension served as a single file.
+You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download the best quality format of a particular file extension served as a single file, e.g. `-f webm` will download the best quality format with the `webm` extension served as a single file.
 
-You can also use special names to select particular edge case format:
- - `best`: Select best quality format represented by single file with video and audio
- - `worst`: Select worst quality format represented by single file with video and audio
- - `bestvideo`: Select best quality video only format (e.g. DASH video), may not be available
- - `worstvideo`: Select worst quality video only format, may not be available
- - `bestaudio`: Select best quality audio only format, may not be available
- - `worstaudio`: Select worst quality audio only format, may not be available
+You can also use special names to select particular edge case formats:
+ - `best`: Select the best quality format represented by a single file with video and audio.
+ - `worst`: Select the worst quality format represented by a single file with video and audio.
+ - `bestvideo`: Select the best quality video-only format (e.g. DASH video). May not be available.
+ - `worstvideo`: Select the worst quality video-only format. May not be available.
+ - `bestaudio`: Select the best quality audio only-format. May not be available.
+ - `worstaudio`: Select the worst quality audio only-format. May not be available.
 
-For example, to download worst quality video only format you can use `-f worstvideo`.
+For example, to download the worst quality video-only format you can use `-f worstvideo`.
 
 If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes. Note that slash is left-associative, i.e. formats on the left hand side are preferred, for example `-f 22/17/18` will download format 22 if it's available, otherwise it will download format 17 if it's available, otherwise it will download format 18 if it's available, otherwise it will complain that no suitable formats are available for download.
 
-If you want to download several formats of the same video use comma as a separator, e.g. `-f 22,17,18` will download all these three formats, of course if they are available. Or more sophisticated example combined with precedence feature `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`.
+If you want to download several formats of the same video use a comma as a separator, e.g. `-f 22,17,18` will download all these three formats, of course if they are available. Or a more sophisticated example combined with the precedence feature: `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`.
 
 You can also filter the video formats by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`).
 
@@ -641,15 +641,15 @@ Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begin
  - `protocol`: The protocol that will be used for the actual download, lower-case. `http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `m3u8`, or `m3u8_native`
  - `format_id`: A short description of the format
 
-Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by video hoster.
+Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster.
 
 Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s.
 
-You can merge the video and audio of two formats into a single file using `-f <video-format>+<audio-format>` (requires ffmpeg or avconv installed), for example `-f bestvideo+bestaudio` will download best video only format, best audio only format and mux them together with ffmpeg/avconv.
+You can merge the video and audio of two formats into a single file using `-f <video-format>+<audio-format>` (requires ffmpeg or avconv installed), for example `-f bestvideo+bestaudio` will download the best video-only format, the best audio-only format and mux them together with ffmpeg/avconv.
 
 Format selectors can also be grouped using parentheses, for example if you want to download the best mp4 and webm formats with a height lower than 480 you can use `-f '(mp4,webm)[height<480]'`.
 
-Since the end of April 2015 and version 2015.04.26 youtube-dl uses `-f bestvideo+bestaudio/best` as default format selection (see [#5447](https://github.com/rg3/youtube-dl/issues/5447), [#5456](https://github.com/rg3/youtube-dl/issues/5456)). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading the best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some DASH formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded and muxed.
+Since the end of April 2015 and version 2015.04.26, youtube-dl uses `-f bestvideo+bestaudio/best` as the default format selection (see [#5447](https://github.com/rg3/youtube-dl/issues/5447), [#5456](https://github.com/rg3/youtube-dl/issues/5456)). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading the best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some DASH formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded and muxed.
 
 If you want to preserve the old format selection behavior (prior to youtube-dl 2015.04.26), i.e. you want to download the best available quality media served as a single file, you should explicitly specify your choice with `-f best`. You may want to add it to the [configuration file](#configuration) in order not to type it every time you run youtube-dl.
 
@@ -669,7 +669,11 @@ $ youtube-dl -f 'best[filesize<50M]'
 
 # Download best format available via direct link over HTTP/HTTPS protocol
 $ youtube-dl -f '(bestvideo+bestaudio/best)[protocol^=http]'
+
+# Download the best video format and the best audio format without merging them
+$ youtube-dl -f 'bestvideo,bestaudio' -o '%(title)s.f%(format_id)s.%(ext)s'
 ```
+Note that in the last example, an output template is recommended as bestvideo and bestaudio may have the same file name.
 
 
 # VIDEO SELECTION
@@ -724,7 +728,7 @@ Add a file exclusion for `youtube-dl.exe` in Windows Defender settings.
 
 YouTube changed their playlist format in March 2014 and later on, so you'll need at least youtube-dl 2014.07.25 to download all YouTube videos.
 
-If you have installed youtube-dl with a package manager, pip, setup.py or a tarball, please use that to update. Note that Ubuntu packages do not seem to get updated anymore. Since we are not affiliated with Ubuntu, there is little we can do. Feel free to [report bugs](https://bugs.launchpad.net/ubuntu/+source/youtube-dl/+filebug) to the [Ubuntu packaging guys](mailto:ubuntu-motu@lists.ubuntu.com?subject=outdated%20version%20of%20youtube-dl) - all they have to do is update the package to a somewhat recent version. See above for a way to update.
+If you have installed youtube-dl with a package manager, pip, setup.py or a tarball, please use that to update. Note that Ubuntu packages do not seem to get updated anymore. Since we are not affiliated with Ubuntu, there is little we can do. Feel free to [report bugs](https://bugs.launchpad.net/ubuntu/+source/youtube-dl/+filebug) to the [Ubuntu packaging people](mailto:ubuntu-motu@lists.ubuntu.com?subject=outdated%20version%20of%20youtube-dl) - all they have to do is update the package to a somewhat recent version. See above for a way to update.
 
 ### I'm getting an error when trying to use output template: `error: using output template conflicts with using title, video ID or auto number`
 
@@ -750,11 +754,11 @@ Videos or video formats streamed via RTMP protocol can only be downloaded when [
 
 ### I have downloaded a video but how can I play it?
 
-Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](http://www.videolan.org) or [mplayer](http://www.mplayerhq.hu/).
+Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](http://www.videolan.org/) or [mplayer](http://www.mplayerhq.hu/).
 
 ### I extracted a video URL with `-g`, but it does not play on another machine / in my webbrowser.
 
-It depends a lot on the service. In many cases, requests for the video (to download/play it) must come from the same IP address and with the same cookies.  Use the `--cookies` option to write the required cookies into a file, and advise your downloader to read cookies from that file. Some sites also require a common user agent to be used, use `--dump-user-agent` to see the one in use by youtube-dl.
+It depends a lot on the service. In many cases, requests for the video (to download/play it) must come from the same IP address and with the same cookies and/or HTTP headers. Use the `--cookies` option to write the required cookies into a file, and advise your downloader to read cookies from that file. Some sites also require a common user agent to be used, use `--dump-user-agent` to see the one in use by youtube-dl. You can also get necessary cookies and HTTP headers from JSON output obtained with `--dump-json`.
 
 It may be beneficial to use IPv6; in some cases, the restrictions are only applied to IPv4. Some services (sometimes only for a subset of videos) do not restrict the video URL by IP address, cookie, or user-agent, but these are the exception rather than the rule.
 
@@ -832,10 +836,42 @@ Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the opt
 
 ### How do I pass cookies to youtube-dl?
 
-Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`. Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows, `LF` (`\n`) for Linux and `CR` (`\r`) for Mac OS. `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
+Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
+
+In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [Export Cookies](https://addons.mozilla.org/en-US/firefox/addon/export-cookies/) (for Firefox).
+
+Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows, `LF` (`\n`) for Linux and `CR` (`\r`) for Mac OS. `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
 
 Passing cookies to youtube-dl is a good way to workaround login when a particular extractor does not implement it explicitly. Another use case is working around [CAPTCHA](https://en.wikipedia.org/wiki/CAPTCHA) some websites require you to solve in particular cases in order to get access (e.g. YouTube, CloudFlare).
 
+### How do I stream directly to media player?
+
+You will first need to tell youtube-dl to stream media to stdout with `-o -`, and also tell your media player to read from stdin (it must be capable of this for streaming) and then pipe former to latter. For example, streaming to [vlc](http://www.videolan.org/) can be achieved with:
+
+    youtube-dl -o - "http://www.youtube.com/watch?v=BaW_jenozKcj" | vlc -
+
+### How do I download only new videos from a playlist?
+
+Use download-archive feature. With this feature you should initially download the complete playlist with `--download-archive /path/to/download/archive/file.txt` that will record identifiers of all the videos in a special file. Each subsequent run with the same `--download-archive` will download only new videos and skip all videos that have been downloaded before. Note that only successful downloads are recorded in the file.
+
+For example, at first,
+
+    youtube-dl --download-archive archive.txt "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re"
+
+will download the complete `PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re` playlist and create a file `archive.txt`. Each subsequent run will only download new videos if any:
+
+    youtube-dl --download-archive archive.txt "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re"
+
+### Should I add `--hls-prefer-native` into my config?
+
+When youtube-dl detects an HLS video, it can download it either with the built-in downloader or ffmpeg. Since many HLS streams are slightly invalid and ffmpeg/youtube-dl each handle some invalid cases better than the other, there is an option to switch the downloader if needed.
+
+When youtube-dl knows that one particular downloader works better for a given website, that downloader will be picked. Otherwise, youtube-dl will pick the best downloader for general compatibility, which at the moment happens to be ffmpeg. This choice may change in future versions of youtube-dl, with improvements of the built-in downloader and/or ffmpeg.
+
+In particular, the generic extractor (used when your website is not in the [list of supported sites by youtube-dl](http://rg3.github.io/youtube-dl/supportedsites.html) cannot mandate one specific downloader.
+
+If you put either `--hls-prefer-native` or `--hls-prefer-ffmpeg` into your configuration, a different subset of videos will fail to download correctly. Instead, it is much better to [file an issue](https://yt-dl.org/bug) or a pull request which details why the native or the ffmpeg HLS downloader is a better choice for your use case.
+
 ### Can you add support for this anime video site, or site which shows current movies for free?
 
 As a matter of policy (as well as legality), youtube-dl does not include support for services that specialize in infringing copyright. As a rule of thumb, if you cannot easily find a video that the service is quite obviously allowed to distribute (i.e. that has been uploaded by the creator, the creator's distributor, or is published under a free license), the service is probably unfit for inclusion to youtube-dl.
@@ -866,7 +902,7 @@ If you want to find out whether a given URL is supported, simply call youtube-dl
 
 # Why do I need to go through that much red tape when filing bugs?
 
-Before we had the issue template, despite our extensive [bug reporting instructions](#bugs), about 80% of the issue reports we got were useless, for instance because people used ancient versions hundreds of releases old, because of simple syntactic errors (not in youtube-dl but in general shell usage), because the problem was alrady reported multiple times before, because people did not actually read an error message, even if it said "please install ffmpeg", because people did not mention the URL they were trying to download and many more simple, easy-to-avoid problems, many of whom were totally unrelated to youtube-dl.
+Before we had the issue template, despite our extensive [bug reporting instructions](#bugs), about 80% of the issue reports we got were useless, for instance because people used ancient versions hundreds of releases old, because of simple syntactic errors (not in youtube-dl but in general shell usage), because the problem was already reported multiple times before, because people did not actually read an error message, even if it said "please install ffmpeg", because people did not mention the URL they were trying to download and many more simple, easy-to-avoid problems, many of whom were totally unrelated to youtube-dl.
 
 youtube-dl is an open-source project manned by too few volunteers, so we'd rather spend time fixing bugs where we are certain none of those simple problems apply, and where we can be reasonably confident to be able to reproduce the issue without asking the reporter repeatedly. As such, the output of `youtube-dl -v YOUR_URL_HERE` is really all that's required to file an issue. The issue template also guides you through some basic steps you can do, such as checking that your version of youtube-dl is current.
 
@@ -887,14 +923,14 @@ To run the test, simply invoke your favorite test runner, or execute a test file
 If you want to create a build of youtube-dl yourself, you'll need
 
 * python
-* make (both GNU make and BSD make are supported)
+* make (only GNU make is supported)
 * pandoc
 * zip
 * nosetests
 
 ### Adding support for a new site
 
-If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**.
+If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](README.md#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**.
 
 After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`):
 
@@ -969,19 +1005,19 @@ In any case, thank you very much for your contributions!
 
 This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code.
 
-Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hoster out of your control and this layout tend to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize code dependency on source's layout changes and even to make the code foresee potential future changes and be ready for that. This is important because it will allow extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with fix incorporated all the previous version become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say some may never receive an update at all that is possible for non rolling release distros.
+Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with a fix incorporated, all the previous versions become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say, some non rolling release distros may never receive an update at all.
 
 ### Mandatory and optional metafields
 
-For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in *info dict* are considered mandatory for successful extraction process by youtube-dl:
+For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
 
  - `id` (media identifier)
  - `title` (media title)
  - `url` (media download URL) or `formats`
 
-In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` to be mandatory. Thus aforementioned metafields are the critical data the extraction does not make any sense without and if any of them fail to be extracted then extractor is considered completely broken.
+In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken.
 
-[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
+[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
 
 #### Example
 
@@ -1001,7 +1037,7 @@ Assume at this point `meta`'s layout is:
 }
 ```
 
-Assume you want to extract `summary` and put into resulting info dict as `description`. Since `description` is optional metafield you should be ready that this key may be missing from the `meta` dict, so that you should extract it like:
+Assume you want to extract `summary` and put it into the resulting info dict as `description`. Since `description` is an optional metafield you should be ready that this key may be missing from the `meta` dict, so that you should extract it like:
 
 ```python
 description = meta.get('summary')  # correct
@@ -1013,7 +1049,7 @@ and not like:
 description = meta['summary']  # incorrect
 ```
 
-The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some time later but with former approach extraction will just go ahead with `description` set to `None` that is perfectly fine (remember `None` is equivalent for absence of data). 
+The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some later time but with the former approach extraction will just go ahead with `description` set to `None` which is perfectly fine (remember `None` is equivalent to the absence of data).
 
 Similarly, you should pass `fatal=False` when extracting optional data from a webpage with `_search_regex`, `_html_search_regex` or similar methods, for instance:
 
@@ -1033,21 +1069,21 @@ description = self._search_regex(
     webpage, 'description', default=None)
 ```
 
-On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that are known to may or may not be present.
+On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that may or may not be present.
  
 ### Provide fallbacks
 
-When extracting metadata try to provide several scenarios for that. For example if `title` is present in several places/sources try extracting from at least some of them. This would make it more future-proof in case some of the sources became unavailable.
+When extracting metadata try to do so from multiple sources. For example if `title` is present in several places, try extracting from at least some of them. This makes it more future-proof in case some of the sources become unavailable.
 
 #### Example
 
-Say `meta` from previous example has a `title` and you are about to extract it. Since `title` is mandatory meta field you should end up with something like:
+Say `meta` from the previous example has a `title` and you are about to extract it. Since `title` is a mandatory meta field you should end up with something like:
 
 ```python
 title = meta['title']
 ```
 
-If `title` disappeares from `meta` in future due to some changes on hoster's side the extraction would fail since `title` is mandatory. That's expected.
+If `title` disappears from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected.
 
 Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario:
 
@@ -1084,7 +1120,7 @@ title = self._search_regex(
     webpage, 'title', group='title')
 ```
 
-Note how you tolerate potential changes in `style` attribute's value or switch from using double quotes to single for `class` attribute: 
+Note how you tolerate potential changes in the `style` attribute's value or switch from using double quotes to single for `class` attribute: 
 
 The code definitely should not look like:
 
@@ -1154,7 +1190,7 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
 
 # BUGS
 
-Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues>. Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](http://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
+Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues>. Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](http://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
 
 **Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
 ```
@@ -1170,7 +1206,7 @@ $ youtube-dl -v <your command line>
 [debug] Proxy map: {}
 ...
 ```
-**Do not post screenshots of verbose log only plain text is acceptable.**
+**Do not post screenshots of verbose logs; only plain text is acceptable.**
 
 The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
 
@@ -1224,7 +1260,7 @@ Only post features that you (or an incapacitated friend you can personally talk
 
 ###  Is your question about youtube-dl?
 
-It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different or even the reporter's own application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug.
+It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different, or even the reporter's own, application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug.
 
 # COPYRIGHT
 
index ce68f26f9ca39bd298f5d4149346af686257e042..3d1391334bd38a23c7024192c6c36522acaa5613 100755 (executable)
@@ -25,5 +25,6 @@ def build_completion(opt_parser):
         filled_template = template.replace("{{flags}}", " ".join(opts_flag))
         f.write(filled_template)
 
+
 parser = youtube_dl.parseOpts()[0]
 build_completion(parser)
index 3b8021e74a8149b33753be5df590d2a9115a8305..30716ad8edc917da616a23753db30367458011d8 100644 (file)
@@ -2,11 +2,13 @@
 from __future__ import unicode_literals
 
 import base64
+import io
 import json
 import mimetypes
 import netrc
 import optparse
 import os
+import re
 import sys
 
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -90,16 +92,23 @@ class GitHubReleaser(object):
 
 
 def main():
-    parser = optparse.OptionParser(usage='%prog VERSION BUILDPATH')
+    parser = optparse.OptionParser(usage='%prog CHANGELOG VERSION BUILDPATH')
     options, args = parser.parse_args()
-    if len(args) != 2:
+    if len(args) != 3:
         parser.error('Expected a version and a build directory')
 
-    version, build_path = args
+    changelog_file, version, build_path = args
+
+    with io.open(changelog_file, encoding='utf-8') as inf:
+        changelog = inf.read()
+
+    mobj = re.search(r'(?s)version %s\n{2}(.+?)\n{3}' % version, changelog)
+    body = mobj.group(1) if mobj else ''
 
     releaser = GitHubReleaser()
 
-    new_release = releaser.create_release(version, name='youtube-dl %s' % version)
+    new_release = releaser.create_release(
+        version, name='youtube-dl %s' % version, body=body)
     release_id = new_release['id']
 
     for asset in os.listdir(build_path):
index 41629d87d006fbaf4ba90cbb87bf60388fb7f7e5..51d19dd33d3bf5c05fc86f3c63e23c00871fda90 100755 (executable)
@@ -44,5 +44,6 @@ def build_completion(opt_parser):
     with open(FISH_COMPLETION_FILE, 'w') as f:
         f.write(filled_template)
 
+
 parser = youtube_dl.parseOpts()[0]
 build_completion(parser)
index 2e389fc8e742e26b0985f3492835ccb6790cef3e..e3df42cc2da6c99d9104c9bd2bac776af5a61c46 100644 (file)
@@ -23,6 +23,7 @@ def openssl_encode(algo, key, iv):
     out, _ = prog.communicate(secret_msg)
     return out
 
+
 iv = key = [0x20, 0x15] + 14 * [0]
 
 r = openssl_encode('aes-128-cbc', key, iv)
index 503c1372fd3589f45a207d043999a5286f6c5e1e..531c93c7089c1847a7e9018fcda5ca177f68547e 100755 (executable)
@@ -32,5 +32,6 @@ def main():
     with open('supportedsites.html', 'w', encoding='utf-8') as sitesf:
         sitesf.write(template)
 
+
 if __name__ == '__main__':
     main()
index 2e6e6641b8e385beba9a9b742125eb1a4d1e0cc3..c4e5fc1f40e72f987ddedd5c91d3367a8692150c 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index 5e454a429e46eeb108612690ae2b523ee98f30d5..226d1a5d6644953982db6346a00a21ec45f9b089 100755 (executable)
@@ -28,5 +28,6 @@ def main():
     with io.open(outfile, 'w', encoding='utf-8') as outf:
         outf.write(out)
 
+
 if __name__ == '__main__':
     main()
index 9a79c2bc5a6d57f6de31be45b29807e36bd8e12f..19114d30d1aa59e394e0c35e7ec9446eb4969c56 100644 (file)
@@ -59,6 +59,7 @@ def build_lazy_ie(ie, name):
         s += make_valid_template.format(valid_url=ie._make_valid_url())
     return s
 
+
 # find the correct sorting and add the required base classes so that sublcasses
 # can be correctly created
 classes = _ALL_CLASSES[:-1]
index 8cb4a46380253643e6df2370058c433094cf159b..764795bc5b1e560b033c2e9a0c395cecb10b1242 100644 (file)
@@ -41,5 +41,6 @@ def main():
     with io.open(outfile, 'w', encoding='utf-8') as outf:
         outf.write(out)
 
+
 if __name__ == '__main__':
     main()
index ce548739f57f5c52e6c73d46f2095e894c8f940d..f9fe63f1ffd5073b312f22e8f08fb7798fa3f7a4 100644 (file)
@@ -74,5 +74,6 @@ def filter_options(readme):
 
     return ret
 
+
 if __name__ == '__main__':
     main()
index ca6ae1b491215b0bcff9a6b5398f5e6de127a0d7..4db5def5d8534ef73664fc90d00433d90d363bbc 100755 (executable)
@@ -60,6 +60,9 @@ if ! type pandoc >/dev/null 2>/dev/null; then echo 'ERROR: pandoc is missing'; e
 if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missing'; exit 1; fi
 if ! python3 -c 'import wheel' 2>/dev/null; then echo 'ERROR: wheel is missing'; exit 1; fi
 
+read -p "Is ChangeLog up to date? (y/n) " -n 1
+if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
+
 /bin/echo -e "\n### First of all, testing..."
 make clean
 if $skip_tests ; then
@@ -107,7 +110,7 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
 for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done
 
 ROOT=$(pwd)
-python devscripts/create-github-release.py $version "$ROOT/build/$version"
+python devscripts/create-github-release.py ChangeLog $version "$ROOT/build/$version"
 
 ssh ytdl@yt-dl.org "sh html/update_latest.sh $version"
 
index 04728e8e2ce763ca886853061875c59e4f645921..60aaf76cc3297adc6e80984890e33e4267b95c2b 100755 (executable)
@@ -44,5 +44,6 @@ def build_completion(opt_parser):
     with open(ZSH_COMPLETION_FILE, "w") as f:
         f.write(template)
 
+
 parser = youtube_dl.parseOpts()[0]
 build_completion(parser)
index 594ca61a6bf984d173620a3e95eaca28b22cda5a..0aaf1b8fcf8220301d63250e83cb1587b618388c 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 #
 # youtube-dl documentation build configuration file, created by
 # sphinx-quickstart on Fri Mar 14 21:05:43 2014.
index 189b9301dfc6eb84610305a458f3e16685e7e83a..edb76d9cc9eef838d1146a487e80fbc35a9f5ae7 100644 (file)
  - **5min**
  - **8tracks**
  - **91porn**
+ - **9c9media**
+ - **9c9media:stack**
  - **9gag**
  - **9now.com.au**
  - **abc.net.au**
- - **Abc7News**
+ - **abc.net.au:iview**
  - **abcnews**
  - **abcnews:video**
+ - **abcotvs**: ABC Owned Television Stations
+ - **abcotvs:clips**
  - **AcademicEarth:Course**
  - **acast**
  - **acast:channel**
  - **AdultSwim**
  - **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network
  - **AfreecaTV**: afreecatv.com
- - **Aftonbladet**
  - **AirMozilla**
  - **AlJazeera**
  - **Allocine**
  - **AlphaPorno**
  - **AMCNetworks**
+ - **anderetijden**: npo.nl and ntr.nl
  - **AnimeOnDemand**
  - **anitube.se**
  - **AnySex**
  - **audiomack**
  - **audiomack:album**
  - **auroravid**: AuroraVid
+ - **AWAAN**
+ - **awaan:live**
+ - **awaan:season**
+ - **awaan:video**
  - **Azubu**
  - **AzubuLive**
  - **BaiduVideo**: 百度视频
  - **bbc.co.uk:article**: BBC articles
  - **bbc.co.uk:iplayer:playlist**
  - **bbc.co.uk:playlist**
- - **BeatportPro**
+ - **Beatport**
  - **Beeg**
  - **BehindKink**
+ - **BellMedia**
  - **Bet**
  - **Bigflix**
  - **Bild**: Bild.de
  - **bt:vestlendingen**: Bergens Tidende - Vestlendingen
  - **BuzzFeed**
  - **BYUtv**
+ - **BYUtvEvent**
  - **Camdemy**
  - **CamdemyFolder**
  - **CamWithHer**
  - **Canvas**
  - **CarambaTV**
  - **CarambaTVPage**
- - **CBC**
- - **CBCPlayer**
+ - **CartoonNetwork**
+ - **cbc.ca**
+ - **cbc.ca:player**
+ - **cbc.ca:watch**
+ - **cbc.ca:watch:video**
  - **CBS**
  - **CBSInteractive**
  - **CBSLocal**
- - **CBSNews**: CBS News
- - **CBSNewsLiveVideo**: CBS News Live Videos
+ - **cbsnews**: CBS News
+ - **cbsnews:livevideo**: CBS News Live Videos
  - **CBSSports**
+ - **CCTV**
  - **CDA**
  - **CeskaTelevize**
  - **channel9**: Channel 9
+ - **CharlieRose**
  - **Chaturbate**
  - **Chilloutzone**
  - **chirbit**
  - **CollegeRama**
  - **ComCarCoff**
  - **ComedyCentral**
+ - **ComedyCentralFullEpisodes**
  - **ComedyCentralShortname**
  - **ComedyCentralTV**
  - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
  - **CSNNE**
  - **CSpan**: C-SPAN
  - **CtsNews**: 華視新聞
- - **CTV**
  - **CTVNews**
  - **culturebox.francetvinfo.fr**
  - **CultureUnplugged**
+ - **curiositystream**
+ - **curiositystream:collection**
  - **CWTV**
  - **DailyMail**
  - **dailymotion**
  - **daum.net:playlist**
  - **daum.net:user**
  - **DBTV**
- - **DCN**
- - **dcn:live**
- - **dcn:season**
- - **dcn:video**
  - **DctpTv**
  - **DeezerPlaylist**
  - **defense.gouv.fr**
  - **EroProfile**
  - **Escapist**
  - **ESPN**
+ - **ESPNArticle**
  - **EsriVideo**
  - **Europa**
  - **EveryonesMixtape**
- - **exfm**: ex.fm
  - **ExpoTV**
  - **ExtremeTube**
  - **EyedoTV**
  - **facebook**
+ - **FacebookPluginsVideo**
  - **faz.net**
  - **fc2**
+ - **fc2:embed**
  - **Fczenit**
  - **features.aol.com**
  - **fernsehkritik.tv**
  - **FootyRoom**
  - **Formula1**
  - **FOX**
+ - **FOX9**
  - **Foxgay**
- - **FoxNews**: Fox News and Fox Business Video
+ - **foxnews**: Fox News and Fox Business Video
+ - **foxnews:article**
+ - **foxnews:insider**
  - **FoxSports**
  - **france2.fr:generation-quoi**
  - **FranceCulture**
  - **Glide**: Glide mobile video messages (glide.me)
  - **Globo**
  - **GloboArticle**
+ - **Go**
  - **GodTube**
  - **GodTV**
  - **Golem**
  - **Groupon**
  - **Hark**
  - **HBO**
+ - **HBOEpisode**
  - **HearThisAt**
  - **Heise**
  - **HellPorno**
  - **Helsinki**: helsinki.fi
  - **HentaiStigma**
  - **HGTV**
+ - **hgtv.com:show**
  - **HistoricFilms**
  - **history:topic**: History.com Topic
  - **hitbox**
  - **HowStuffWorks**
  - **HRTi**
  - **HRTiPlaylist**
+ - **Huajiao**: 花椒直播
  - **HuffPost**: Huffington Post
  - **Hypem**
  - **Iconosquare**
  - **ivi**: ivi.ru
  - **ivi:compilation**: ivi.ru compilations
  - **ivideon**: Ivideon TV
+ - **Iwara**
  - **Izlesene**
+ - **Jamendo**
+ - **JamendoAlbum**
  - **JeuxVideo**
  - **Jove**
  - **jpopsuki.tv**
  - **KarriereVideos**
  - **keek**
  - **KeezMovies**
+ - **Ketnet**
  - **KhanAcademy**
  - **KickStarter**
  - **KonserthusetPlay**
  - **kuwo:song**: 酷我音乐
  - **la7.it**
  - **Laola1Tv**
+ - **LCI**
  - **Lcp**
  - **LcpPlay**
  - **Le**: 乐视网
  - **Learnr**
  - **Lecture2Go**
+ - **LEGO**
  - **Lemonde**
  - **LePlaylist**
  - **LetvCloud**: 乐视云
  - **mailru**: Видео@Mail.Ru
  - **MakersChannel**
  - **MakerTV**
+ - **mangomolo:live**
+ - **mangomolo:video**
  - **MatchTV**
  - **MDR**: MDR.DE and KiKA
  - **media.ccc.de**
  - **Metacritic**
  - **Mgoon**
  - **MGTV**: 芒果TV
+ - **MiaoPai**
  - **Minhateca**
  - **MinistryGrid**
  - **Minoto**
  - **MovieClips**
  - **MovieFap**
  - **Moviezine**
+ - **MovingImage**
  - **MPORA**
  - **MSN**
  - **mtg**: MTG services
- - **MTV**
+ - **mtv**
  - **mtv.de**
+ - **mtv:video**
  - **mtvservices:embedded**
  - **MuenchenTV**: münchen.tv
  - **MusicPlayOn**
  - **NBA**
  - **NBC**
  - **NBCNews**
+ - **NBCOlympics**
  - **NBCSports**
  - **NBCSportsVPlayer**
  - **ndr**: NDR.de - Norddeutscher Rundfunk
  - **NextMediaActionNews**: 蘋果日報 - 動新聞
  - **nfb**: National Film Board of Canada
  - **nfl.com**
+ - **NhkVod**
  - **nhl.com**
  - **nhl.com:news**: NHL news
  - **nhl.com:videocenter**
  - **nhl.com:videocenter:category**: NHL videocenter category
  - **nick.com**
  - **nick.de**
+ - **nicknight**
  - **niconico**: ニコニコ動画
  - **NiconicoPlaylist**
- - **NineCNineMedia**
  - **Nintendo**
  - **njoy**: N-JOY
  - **njoy:embed**
+ - **NobelPrize**
  - **Noco**
  - **Normalboots**
  - **NosVideo**
  - **Nuvid**
  - **NYTimes**
  - **NYTimesArticle**
+ - **NZZ**
  - **ocw.mit.edu**
  - **OdaTV**
  - **Odnoklassniki**
  - **orf:iptv**: iptv.ORF.at
  - **orf:oe1**: Radio Österreich 1
  - **orf:tvthek**: ORF TVthek
+ - **PandaTV**: 熊猫TV
  - **pandora.tv**: 판도라TV
  - **parliamentlive.tv**: UK parliament videos
  - **Patreon**
  - **Pinkbike**
  - **Pladform**
  - **play.fm**
- - **played.to**
  - **PlaysTV**
  - **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
  - **Playvid**
  - **podomatic**
  - **Pokemon**
  - **PolskieRadio**
+ - **PolskieRadioCategory**
+ - **PornCom**
  - **PornHd**
  - **PornHub**: PornHub and Thumbzilla
  - **PornHubPlaylist**
  - **RDS**: RDS.ca
  - **RedTube**
  - **RegioTV**
+ - **RENTV**
+ - **RENTVArticle**
  - **Restudy**
  - **Reuters**
  - **ReverbNation**
  - **revision3:embed**
  - **RICE**
  - **RingTV**
+ - **RMCDecouverte**
  - **RockstarGames**
  - **RoosterTeeth**
  - **RottenTomatoes**
  - **Screencast**
  - **ScreencastOMatic**
  - **ScreenJunkies**
- - **ScreenwaveMedia**
  - **Seeker**
  - **SenateISVP**
  - **SendtoNews**
  - **ServingSys**
  - **Sexu**
  - **Shahid**
- - **Shared**: shared.sx and vivo.sx
+ - **Shared**: shared.sx
  - **ShareSix**
  - **Sina**
  - **SixPlay**
  - **sr:mediathek**: Saarländischer Rundfunk
  - **SRGSSR**
  - **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites
- - **SSA**
  - **stanfordoc**: Stanford Open ClassRoom
  - **Steam**
  - **Stitcher**
  - **SWRMediathek**
  - **Syfy**
  - **SztvHu**
+ - **t-online.de**
  - **Tagesschau**
  - **tagesschau:player**
  - **Tass**
+ - **TBS**
  - **TDSLifeway**
  - **teachertube**: teachertube.com videos
  - **teachertube:user:collection**: teachertube.com user and collection videos
  - **TeachingChannel**
  - **Teamcoco**
- - **TeamFour**
+ - **TeamFourStar**
  - **TechTalks**
  - **techtv.mit.edu**
  - **ted**
  - **Telecinco**: telecinco.es, cuatro.com and mediaset.es
  - **Telegraaf**
  - **TeleMB**
+ - **TeleQuebec**
  - **TeleTask**
  - **Telewebion**
  - **TF1**
+ - **TFO**
  - **TheIntercept**
+ - **theoperaplatform**
  - **ThePlatform**
  - **ThePlatformFeed**
  - **TheScene**
  - **TheSixtyOne**
  - **TheStar**
+ - **TheWeatherChannel**
  - **ThisAmericanLife**
  - **ThisAV**
- - **THVideo**
- - **THVideoPlaylist**
+ - **ThisOldHouse**
  - **tinypic**: tinypic.com videos
  - **tlc.de**
  - **TMZ**
  - **ToypicsUser**: Toypics user profile
  - **TrailerAddict** (Currently broken)
  - **Trilulilu**
- - **trollvids**
- - **TruTube**
+ - **TruTV**
  - **Tube8**
  - **TubiTv**
  - **tudou**
  - **TV2Article**
  - **TV3**
  - **TV4**: tv4.se and tv4play.se
+ - **TVANouvelles**
+ - **TVANouvellesArticle**
  - **TVC**
  - **TVCArticle**
  - **tvigle**: Интернет-телевидение Tvigle.ru
  - **tvland.com**
+ - **TVNoe**
  - **tvp**: Telewizja Polska
  - **tvp:embed**: Telewizja Polska
  - **tvp:series**
  - **uplynk:preplay**
  - **Urort**: NRK P3 Urørt
  - **URPlay**
+ - **USANetwork**
  - **USAToday**
  - **ustream**
  - **ustream:channel**
  - **Vimple**: Vimple - one-click video hosting
  - **Vine**
  - **vine:user**
+ - **Vivo**: vivo.sx
  - **vk**: VK
  - **vk:uservideos**: VK - User's Videos
  - **vk:wallpost**
  - **VRT**
  - **vube**: Vube.com
  - **VuClip**
+ - **VyboryMos**
+ - **Vzaar**
  - **Walla**
  - **washingtonpost**
  - **washingtonpost:article**
  - **WatchIndianPorn**: Watch Indian Porn
  - **WDR**
  - **wdr:mobile**
+ - **Webcaster**
+ - **WebcasterFeed**
  - **WebOfStories**
  - **WebOfStoriesPlaylist**
  - **WeiqiTV**: WQTV
  - **wholecloud**: WholeCloud
  - **Wimp**
  - **Wistia**
- - **WNL**
+ - **wnl**: npo.nl and ntr.nl
  - **WorldStarHipHop**
  - **wrzuta.pl**
  - **wrzuta.pl:playlist**
  - **Zapiks**
  - **ZDF**
  - **ZDFChannel**
- - **zingmp3:album**: mp3.zing.vn albums
- - **zingmp3:song**: mp3.zing.vn songs
+ - **zingmp3**: mp3.zing.vn
index 508b27f3707898d07d303cd1ce44b7e4d54b152f..ce6dd1870bc52951d268f96aa4dc68ea6f92e04d 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-# -*- coding: utf-8 -*-
+# coding: utf-8
 
 from __future__ import print_function
 
index a98305c747635c1b1638f761d7bdf9bead353d19..437c7270ee6aeaa8eba588badfb3bf26d79ea37d 100644 (file)
@@ -84,5 +84,6 @@ class TestInfoExtractor(unittest.TestCase):
         self.assertRaises(ExtractorError, self.ie._download_json, uri, None)
         self.assertEqual(self.ie._download_json(uri, None, fatal=False), None)
 
+
 if __name__ == '__main__':
     unittest.main()
index 0dfe25c00165a015338f2732cdaf1bccf769195a..8bf00bea9818f6b91fa7b91e89fcbbf8a6cc0dd3 100644 (file)
@@ -605,6 +605,7 @@ class TestYoutubeDL(unittest.TestCase):
             'extractor': 'TEST',
             'duration': 30,
             'filesize': 10 * 1024,
+            'playlist_id': '42',
         }
         second = {
             'id': '2',
@@ -614,6 +615,7 @@ class TestYoutubeDL(unittest.TestCase):
             'duration': 10,
             'description': 'foo',
             'filesize': 5 * 1024,
+            'playlist_id': '43',
         }
         videos = [first, second]
 
@@ -650,6 +652,10 @@ class TestYoutubeDL(unittest.TestCase):
         res = get_videos(f)
         self.assertEqual(res, ['1'])
 
+        f = match_filter_func('playlist_id = 42')
+        res = get_videos(f)
+        self.assertEqual(res, ['1'])
+
     def test_playlist_items_selection(self):
         entries = [{
             'id': compat_str(i),
index 315a3f5ae6a597662d05f56e97672b4ff93aff10..54078a66d61ad49a05600e9efca48472194f0fa5 100644 (file)
@@ -51,5 +51,6 @@ class TestAES(unittest.TestCase):
         decrypted = (aes_decrypt_text(encrypted, password, 32))
         self.assertEqual(decrypted, self.secret_msg)
 
+
 if __name__ == '__main__':
     unittest.main()
index a3f1c0644f32b180a2b177e76dbea44854b0983e..4639529897967ebc49883e488f5624a038c70c44 100644 (file)
@@ -60,6 +60,7 @@ def _file_md5(fn):
     with open(fn, 'rb') as f:
         return hashlib.md5(f.read()).hexdigest()
 
+
 defs = gettestcases()
 
 
@@ -217,6 +218,7 @@ def generator(test_case):
 
     return test_template
 
+
 # And add them to TestDownload
 for n, test_case in enumerate(defs):
     test_method = generator(test_case)
index 620db080e9bd836c7239a93e86e0944b95f793e0..11661bb68148f4eb229b50c37f67dc744491c7df 100644 (file)
@@ -39,5 +39,6 @@ class TestExecution(unittest.TestCase):
         _, stderr = p.communicate()
         self.assertFalse(stderr)
 
+
 if __name__ == '__main__':
     unittest.main()
index fdc68ccb42c85410788ecb7bcb1eafd802b3a794..7a7a3510ffb46e2791153dff5e4157bb21433056 100644 (file)
@@ -87,7 +87,7 @@ class TestHTTP(unittest.TestCase):
 
         ydl = YoutubeDL({'logger': FakeLogger()})
         r = ydl.extract_info('http://localhost:%d/302' % self.port)
-        self.assertEqual(r['url'], 'http://localhost:%d/vid.mp4' % self.port)
+        self.assertEqual(r['entries'][0]['url'], 'http://localhost:%d/vid.mp4' % self.port)
 
 
 class TestHTTPS(unittest.TestCase):
@@ -111,7 +111,7 @@ class TestHTTPS(unittest.TestCase):
 
         ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True})
         r = ydl.extract_info('https://localhost:%d/video.html' % self.port)
-        self.assertEqual(r['url'], 'https://localhost:%d/vid.mp4' % self.port)
+        self.assertEqual(r['entries'][0]['url'], 'https://localhost:%d/vid.mp4' % self.port)
 
 
 def _build_proxy_handler(name):
@@ -169,5 +169,6 @@ class TestProxy(unittest.TestCase):
         # b'xn--fiq228c' is '中文'.encode('idna')
         self.assertEqual(response, 'normal: http://xn--fiq228c.tw/')
 
+
 if __name__ == '__main__':
     unittest.main()
index 9d95cb60618ae4ee122b46884a2eae6233dffbec..789059dbea38026362caea2be08f9d36796a7b1d 100644 (file)
@@ -43,5 +43,6 @@ class TestIqiyiSDKInterpreter(unittest.TestCase):
         ie._login()
         self.assertTrue('unable to log in:' in logger.messages[0])
 
+
 if __name__ == '__main__':
     unittest.main()
index 63c350b8fa986fc63d70af43a6a0fdcaf5958eed..c24b8ca742acc308ca9c455378564bbac053765d 100644 (file)
@@ -104,6 +104,14 @@ class TestJSInterpreter(unittest.TestCase):
         }''')
         self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50])
 
+    def test_call(self):
+        jsi = JSInterpreter('''
+        function x() { return 2; }
+        function y(a) { return x() + a; }
+        function z() { return y(3); }
+        ''')
+        self.assertEqual(jsi.call_function('z'), 5)
+
 
 if __name__ == '__main__':
     unittest.main()
index 74fcf91c0cb622662d3a7c2d4985377a0ea6ce7b..2e3cd0179db9dd97792fafa695f7e7a043542a38 100644 (file)
@@ -39,6 +39,8 @@ from youtube_dl.utils import (
     is_html,
     js_to_json,
     limit_length,
+    mimetype2ext,
+    month_by_name,
     ohdave_rsa_encrypt,
     OnDemandPagedList,
     orderedSet,
@@ -67,6 +69,7 @@ from youtube_dl.utils import (
     uppercase_escape,
     lowercase_escape,
     url_basename,
+    base_url,
     urlencode_postdata,
     urshift,
     update_url_query,
@@ -290,6 +293,7 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(unified_strdate('25-09-2014'), '20140925')
         self.assertEqual(unified_strdate('27.02.2016 17:30'), '20160227')
         self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None)
+        self.assertEqual(unified_strdate('Feb 7, 2016 at 6:35 pm'), '20160207')
 
     def test_unified_timestamps(self):
         self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600)
@@ -310,6 +314,7 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(unified_timestamp('27.02.2016 17:30'), 1456594200)
         self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None)
         self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500)
+        self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100)
 
     def test_determine_ext(self):
         self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
@@ -433,6 +438,13 @@ class TestUtil(unittest.TestCase):
             url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'),
             'trailer.mp4')
 
+    def test_base_url(self):
+        self.assertEqual(base_url('http://foo.de/'), 'http://foo.de/')
+        self.assertEqual(base_url('http://foo.de/bar'), 'http://foo.de/')
+        self.assertEqual(base_url('http://foo.de/bar/'), 'http://foo.de/bar/')
+        self.assertEqual(base_url('http://foo.de/bar/baz'), 'http://foo.de/bar/')
+        self.assertEqual(base_url('http://foo.de/bar/baz?x=z/x/c'), 'http://foo.de/bar/')
+
     def test_parse_age_limit(self):
         self.assertEqual(parse_age_limit(None), None)
         self.assertEqual(parse_age_limit(False), None)
@@ -625,6 +637,22 @@ class TestUtil(unittest.TestCase):
             limit_length('foo bar baz asd', 12).startswith('foo bar'))
         self.assertTrue('...' in limit_length('foo bar baz asd', 12))
 
+    def test_mimetype2ext(self):
+        self.assertEqual(mimetype2ext(None), None)
+        self.assertEqual(mimetype2ext('video/x-flv'), 'flv')
+        self.assertEqual(mimetype2ext('application/x-mpegURL'), 'm3u8')
+        self.assertEqual(mimetype2ext('text/vtt'), 'vtt')
+        self.assertEqual(mimetype2ext('text/vtt;charset=utf-8'), 'vtt')
+        self.assertEqual(mimetype2ext('text/html; charset=utf-8'), 'html')
+
+    def test_month_by_name(self):
+        self.assertEqual(month_by_name(None), None)
+        self.assertEqual(month_by_name('December', 'en'), 12)
+        self.assertEqual(month_by_name('décembre', 'fr'), 12)
+        self.assertEqual(month_by_name('December'), 12)
+        self.assertEqual(month_by_name('décembre'), None)
+        self.assertEqual(month_by_name('Unknown', 'unknown'), None)
+
     def test_parse_codecs(self):
         self.assertEqual(parse_codecs(''), {})
         self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), {
@@ -712,6 +740,9 @@ class TestUtil(unittest.TestCase):
         inp = '''{"foo":101}'''
         self.assertEqual(js_to_json(inp), '''{"foo":101}''')
 
+        inp = '''{"duration": "00:01:07"}'''
+        self.assertEqual(js_to_json(inp), '''{"duration": "00:01:07"}''')
+
     def test_js_to_json_edgecases(self):
         on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
         self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
@@ -817,7 +848,10 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(parse_filesize('2 MiB'), 2097152)
         self.assertEqual(parse_filesize('5 GB'), 5000000000)
         self.assertEqual(parse_filesize('1.2Tb'), 1200000000000)
+        self.assertEqual(parse_filesize('1.2tb'), 1200000000000)
         self.assertEqual(parse_filesize('1,24 KB'), 1240)
+        self.assertEqual(parse_filesize('1,24 kb'), 1240)
+        self.assertEqual(parse_filesize('8.5 megabytes'), 8500000)
 
     def test_parse_count(self):
         self.assertEqual(parse_count(None), None)
@@ -1041,5 +1075,6 @@ The first line
         self.assertEqual(get_element_by_class('foo', html), 'nice')
         self.assertEqual(get_element_by_class('no-such-class', html), None)
 
+
 if __name__ == '__main__':
     unittest.main()
index 96a66f7a09b0f2173cf1b01f20c81f42985ef1b6..c1465fe8c51d8bf3789606fbf6c61da0deabfa90 100644 (file)
@@ -66,5 +66,6 @@ class TestVerboseOutput(unittest.TestCase):
         self.assertTrue(b'-p' in serr)
         self.assertTrue(b'secret' not in serr)
 
+
 if __name__ == '__main__':
     unittest.main()
index 8de08f2d6d3974bd2d28265c323e7ff76d1317a3..41abdfe3b99eaabf562ebabc222fc50fead77631 100644 (file)
@@ -24,6 +24,7 @@ class YoutubeDL(youtube_dl.YoutubeDL):
         super(YoutubeDL, self).__init__(*args, **kwargs)
         self.to_stderr = self.to_screen
 
+
 params = get_params({
     'writeannotations': True,
     'skip_download': True,
@@ -74,5 +75,6 @@ class TestAnnotations(unittest.TestCase):
     def tearDown(self):
         try_rm(ANNOTATIONS_FILE)
 
+
 if __name__ == '__main__':
     unittest.main()
index af1c454217d0bec66a27a1bdc89c02195bb6274f..7a33dbf88e90f2d901b144759ffa90552787885c 100644 (file)
@@ -66,5 +66,6 @@ class TestYoutubeLists(unittest.TestCase):
         for entry in result['entries']:
             self.assertTrue(entry.get('title'))
 
+
 if __name__ == '__main__':
     unittest.main()
index 060864434fe2ab81839dcde17475e6e9f61db0f2..f0c370eeedc8942abc0b8cd8c10e57b4361d00c2 100644 (file)
@@ -114,6 +114,7 @@ def make_tfunc(url, stype, sig_input, expected_sig):
     test_func.__name__ = str('test_signature_' + stype + '_' + test_id)
     setattr(TestSignature, test_func.__name__, test_func)
 
+
 for test_spec in _TESTS:
     make_tfunc(*test_spec)
 
index e844dc98a5b3915070ffae079395233de7ed04f7..53f20ac2cb1bd16398e160db329004b49d6bf424 100755 (executable)
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-# -*- coding: utf-8 -*-
+# coding: utf-8
 
 from __future__ import absolute_import, unicode_literals
 
@@ -131,6 +131,9 @@ class YoutubeDL(object):
     username:          Username for authentication purposes.
     password:          Password for authentication purposes.
     videopassword:     Password for accessing a video.
+    ap_mso:            Adobe Pass multiple-system operator identifier.
+    ap_username:       Multiple-system operator account username.
+    ap_password:       Multiple-system operator account password.
     usenetrc:          Use netrc for authentication instead.
     verbose:           Print additional info to stdout.
     quiet:             Do not print messages to stdout.
@@ -1256,8 +1259,10 @@ class YoutubeDL(object):
                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
         if thumbnails:
             thumbnails.sort(key=lambda t: (
-                t.get('preference'), t.get('width'), t.get('height'),
-                t.get('id'), t.get('url')))
+                t.get('preference') if t.get('preference') is not None else -1,
+                t.get('width') if t.get('width') is not None else -1,
+                t.get('height') if t.get('height') is not None else -1,
+                t.get('id') if t.get('id') is not None else '', t.get('url')))
             for i, t in enumerate(thumbnails):
                 t['url'] = sanitize_url(t['url'])
                 if t.get('width') and t.get('height'):
@@ -1299,7 +1304,7 @@ class YoutubeDL(object):
                 for subtitle_format in subtitle:
                     if subtitle_format.get('url'):
                         subtitle_format['url'] = sanitize_url(subtitle_format['url'])
-                    if 'ext' not in subtitle_format:
+                    if subtitle_format.get('ext') is None:
                         subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
 
         if self.params.get('listsubtitles', False):
@@ -1354,7 +1359,7 @@ class YoutubeDL(object):
                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
                 )
             # Automatically determine file extension if missing
-            if 'ext' not in format:
+            if format.get('ext') is None:
                 format['ext'] = determine_ext(format['url']).lower()
             # Automatically determine protocol if missing (useful for format
             # selection purposes)
@@ -1653,7 +1658,7 @@ class YoutubeDL(object):
                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
                         if video_ext and audio_ext:
                             COMPATIBLE_EXTS = (
-                                ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
+                                ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
                                 ('webm')
                             )
                             for exts in COMPATIBLE_EXTS:
index a9730292cd6801c1bbd7f68b586e288bad7fe21d..6850d95e1ff359453571a6ac635d6ffa99ae038f 100644 (file)
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-# -*- coding: utf-8 -*-
+# coding: utf-8
 
 from __future__ import unicode_literals
 
@@ -34,12 +34,14 @@ from .utils import (
     setproctitle,
     std_headers,
     write_string,
+    render_table,
 )
 from .update import update_self
 from .downloader import (
     FileDownloader,
 )
 from .extractor import gen_extractors, list_extractors
+from .extractor.adobepass import MSO_INFO
 from .YoutubeDL import YoutubeDL
 
 
@@ -93,8 +95,7 @@ def _real_main(argv=None):
                 write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
         except IOError:
             sys.exit('ERROR: batch file could not be read')
-    all_urls = batch_urls + args
-    all_urls = [url.strip() for url in all_urls]
+    all_urls = batch_urls + [url.strip() for url in args]  # batch_urls are already striped in read_batch_urls
     _enc = preferredencoding()
     all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
 
@@ -118,18 +119,26 @@ def _real_main(argv=None):
                 desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
             write_string(desc + '\n', out=sys.stdout)
         sys.exit(0)
+    if opts.ap_list_mso:
+        table = [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()]
+        write_string('Supported TV Providers:\n' + render_table(['mso', 'mso name'], table) + '\n', out=sys.stdout)
+        sys.exit(0)
 
     # Conflicting, missing and erroneous options
     if opts.usenetrc and (opts.username is not None or opts.password is not None):
         parser.error('using .netrc conflicts with giving username/password')
     if opts.password is not None and opts.username is None:
         parser.error('account username missing\n')
+    if opts.ap_password is not None and opts.ap_username is None:
+        parser.error('TV Provider account username missing\n')
     if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid):
         parser.error('using output template conflicts with using title, video ID or auto number')
     if opts.usetitle and opts.useid:
         parser.error('using title conflicts with using video ID')
     if opts.username is not None and opts.password is None:
         opts.password = compat_getpass('Type account password and press [Return]: ')
+    if opts.ap_username is not None and opts.ap_password is None:
+        opts.ap_password = compat_getpass('Type TV provider account password and press [Return]: ')
     if opts.ratelimit is not None:
         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
         if numeric_limit is None:
@@ -155,6 +164,8 @@ def _real_main(argv=None):
             parser.error('max sleep interval must be greater than or equal to min sleep interval')
     else:
         opts.max_sleep_interval = opts.sleep_interval
+    if opts.ap_mso and opts.ap_mso not in MSO_INFO:
+        parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers')
 
     def parse_retries(retries):
         if retries in ('inf', 'infinite'):
@@ -254,8 +265,6 @@ def _real_main(argv=None):
         postprocessors.append({
             'key': 'FFmpegEmbedSubtitle',
         })
-    if opts.xattrs:
-        postprocessors.append({'key': 'XAttrMetadata'})
     if opts.embedthumbnail:
         already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails
         postprocessors.append({
@@ -264,6 +273,10 @@ def _real_main(argv=None):
         })
         if not already_have_thumbnail:
             opts.writethumbnail = True
+    # XAttrMetadataPP should be run after post-processors that may change file
+    # contents
+    if opts.xattrs:
+        postprocessors.append({'key': 'XAttrMetadata'})
     # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
     # So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
     if opts.exec_cmd:
@@ -271,12 +284,6 @@ def _real_main(argv=None):
             'key': 'ExecAfterDownload',
             'exec_cmd': opts.exec_cmd,
         })
-    if opts.xattr_set_filesize:
-        try:
-            import xattr
-            xattr  # Confuse flake8
-        except ImportError:
-            parser.error('setting filesize xattr requested but python-xattr is not available')
     external_downloader_args = None
     if opts.external_downloader_args:
         external_downloader_args = compat_shlex_split(opts.external_downloader_args)
@@ -293,6 +300,9 @@ def _real_main(argv=None):
         'password': opts.password,
         'twofactor': opts.twofactor,
         'videopassword': opts.videopassword,
+        'ap_mso': opts.ap_mso,
+        'ap_username': opts.ap_username,
+        'ap_password': opts.ap_password,
         'quiet': (opts.quiet or any_getting or any_printing),
         'no_warnings': opts.no_warnings,
         'forceurl': opts.geturl,
@@ -318,6 +328,7 @@ def _real_main(argv=None):
         'nooverwrites': opts.nooverwrites,
         'retries': opts.retries,
         'fragment_retries': opts.fragment_retries,
+        'skip_unavailable_fragments': opts.skip_unavailable_fragments,
         'buffersize': opts.buffersize,
         'noresizebuffer': opts.noresizebuffer,
         'continuedl': opts.continue_dl,
@@ -438,4 +449,5 @@ def main(argv=None):
     except KeyboardInterrupt:
         sys.exit('\nERROR: Interrupted by user')
 
+
 __all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors']
index a01c367de4f6cf5e6f9ce4d9b86de4991fa859dc..b8ff4548116403dc5166825250fedad65c20f665 100644 (file)
@@ -174,6 +174,7 @@ def aes_decrypt_text(data, password, key_size_bytes):
 
     return plaintext
 
+
 RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36)
 SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
         0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
@@ -328,4 +329,5 @@ def inc(data):
             break
     return data
 
+
 __all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
index b8aaf5a461c9e3ca2884c748ebb3225a2fd9fe29..83ee7e25747532c61f344aaea921021690669f61 100644 (file)
@@ -2491,6 +2491,7 @@ class _TreeBuilder(etree.TreeBuilder):
     def doctype(self, name, pubid, system):
         pass
 
+
 if sys.version_info[0] >= 3:
     def compat_etree_fromstring(text):
         return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
@@ -2787,6 +2788,7 @@ def workaround_optparse_bug9161():
             return real_add_option(self, *bargs, **bkwargs)
         optparse.OptionGroup.add_option = _compat_add_option
 
+
 if hasattr(shutil, 'get_terminal_size'):  # Python >= 3.3
     compat_get_terminal_size = shutil.get_terminal_size
 else:
index 817591d97e88606b966b7055026f691faab840dc..16952e359bc19337dc4f8682061d169ff956e264 100644 (file)
@@ -7,6 +7,7 @@ from .http import HttpFD
 from .rtmp import RtmpFD
 from .dash import DashSegmentsFD
 from .rtsp import RtspFD
+from .ism import IsmFD
 from .external import (
     get_external_downloader,
     FFmpegFD,
@@ -24,6 +25,7 @@ PROTOCOL_MAP = {
     'rtsp': RtspFD,
     'f4m': F4mFD,
     'http_dash_segments': DashSegmentsFD,
+    'ism': IsmFD,
 }
 
 
index 8482cbd8423dae254db7efff873588cd8fb10b8a..3dc144b4e19f208d4075d6423ce3278b3a614330 100644 (file)
@@ -346,7 +346,6 @@ class FileDownloader(object):
         min_sleep_interval = self.params.get('sleep_interval')
         if min_sleep_interval:
             max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
-            print(min_sleep_interval, max_sleep_interval)
             sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
             self.to_screen('[download] Sleeping %s seconds...' % sleep_interval)
             time.sleep(sleep_interval)
index 8bbab9dbc596c659db622fe9910d0ae90018a598..8437dde30ca2afe031afb1ff2882ed12ac4b49b5 100644 (file)
@@ -1,7 +1,6 @@
 from __future__ import unicode_literals
 
 import os
-import re
 
 from .fragment import FragmentFD
 from ..compat import compat_urllib_error
@@ -19,32 +18,32 @@ class DashSegmentsFD(FragmentFD):
     FD_NAME = 'dashsegments'
 
     def real_download(self, filename, info_dict):
-        base_url = info_dict['url']
-        segment_urls = [info_dict['segment_urls'][0]] if self.params.get('test', False) else info_dict['segment_urls']
-        initialization_url = info_dict.get('initialization_url')
+        segments = info_dict['fragments'][:1] if self.params.get(
+            'test', False) else info_dict['fragments']
 
         ctx = {
             'filename': filename,
-            'total_frags': len(segment_urls) + (1 if initialization_url else 0),
+            'total_frags': len(segments),
         }
 
         self._prepare_and_start_frag_download(ctx)
 
-        def combine_url(base_url, target_url):
-            if re.match(r'^https?://', target_url):
-                return target_url
-            return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url)
-
         segments_filenames = []
 
         fragment_retries = self.params.get('fragment_retries', 0)
+        skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
 
-        def append_url_to_file(target_url, tmp_filename, segment_name):
+        def process_segment(segment, tmp_filename, num):
+            segment_url = segment['url']
+            segment_name = 'Frag%d' % num
             target_filename = '%s-%s' % (tmp_filename, segment_name)
+            # In DASH, the first segment contains necessary headers to
+            # generate a valid MP4 file, so always abort for the first segment
+            fatal = num == 0 or not skip_unavailable_fragments
             count = 0
             while count <= fragment_retries:
                 try:
-                    success = ctx['dl'].download(target_filename, {'url': combine_url(base_url, target_url)})
+                    success = ctx['dl'].download(target_filename, {'url': segment_url})
                     if not success:
                         return False
                     down, target_sanitized = sanitize_open(target_filename, 'rb')
@@ -52,26 +51,27 @@ class DashSegmentsFD(FragmentFD):
                     down.close()
                     segments_filenames.append(target_sanitized)
                     break
-                except (compat_urllib_error.HTTPError, ) as err:
+                except compat_urllib_error.HTTPError as err:
                     # YouTube may often return 404 HTTP error for a fragment causing the
                     # whole download to fail. However if the same fragment is immediately
                     # retried with the same request data this usually succeeds (1-2 attemps
                     # is usually enough) thus allowing to download the whole file successfully.
-                    # So, we will retry all fragments that fail with 404 HTTP error for now.
-                    if err.code != 404:
-                        raise
-                    # Retry fragment
+                    # To be future-proof we will retry all fragments that fail with any
+                    # HTTP error.
                     count += 1
                     if count <= fragment_retries:
-                        self.report_retry_fragment(segment_name, count, fragment_retries)
+                        self.report_retry_fragment(err, segment_name, count, fragment_retries)
             if count > fragment_retries:
+                if not fatal:
+                    self.report_skip_fragment(segment_name)
+                    return True
                 self.report_error('giving up after %s fragment retries' % fragment_retries)
                 return False
+            return True
 
-        if initialization_url:
-            append_url_to_file(initialization_url, ctx['tmpfilename'], 'Init')
-        for i, segment_url in enumerate(segment_urls):
-            append_url_to_file(segment_url, ctx['tmpfilename'], 'Seg%d' % i)
+        for i, segment in enumerate(segments):
+            if not process_segment(segment, ctx['tmpfilename'], i):
+                return False
 
         self._finish_frag_download(ctx)
 
index cf45562217ac0c27b77f3a241f3881d770d800ce..5d3e5d8d3d748d98ea187e8eca4444c5504e07fb 100644 (file)
@@ -220,6 +220,12 @@ class FFmpegFD(ExternalFD):
         if proxy:
             if not re.match(r'^[\da-zA-Z]+://', proxy):
                 proxy = 'http://%s' % proxy
+
+            if proxy.startswith('socks'):
+                self.report_warning(
+                    '%s does not support SOCKS proxies. Downloading is likely to fail. '
+                    'Consider adding --hls-prefer-native to your command.' % self.get_basename())
+
             # Since December 2015 ffmpeg supports -http_proxy option (see
             # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd)
             # We could switch to the following code if we are able to detect version properly
@@ -287,6 +293,7 @@ class FFmpegFD(ExternalFD):
 class AVconvFD(FFmpegFD):
     pass
 
+
 _BY_NAME = dict(
     (klass.get_basename(), klass)
     for name, klass in globals().items()
index 80c21d40bc88382a64634b0eeb9daa3eaaccc303..688e086eb0536c55ef184ae68fa09a6ffb41462d 100644 (file)
@@ -314,7 +314,8 @@ class F4mFD(FragmentFD):
         man_url = info_dict['url']
         requested_bitrate = info_dict.get('tbr')
         self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
-        urlh = self.ydl.urlopen(man_url)
+
+        urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
         man_url = urlh.geturl()
         # Some manifests may be malformed, e.g. prosiebensat1 generated manifests
         # (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244
@@ -387,7 +388,10 @@ class F4mFD(FragmentFD):
             url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query))
             frag_filename = '%s-%s' % (ctx['tmpfilename'], name)
             try:
-                success = ctx['dl'].download(frag_filename, {'url': url_parsed.geturl()})
+                success = ctx['dl'].download(frag_filename, {
+                    'url': url_parsed.geturl(),
+                    'http_headers': info_dict.get('http_headers'),
+                })
                 if not success:
                     return False
                 (down, frag_sanitized) = sanitize_open(frag_filename, 'rb')
index ba903ae103a7bc6817378389a34713d9a5550e19..60df627a65dfc589899f009fa5df9ce76a441ae5 100644 (file)
@@ -6,8 +6,10 @@ import time
 from .common import FileDownloader
 from .http import HttpFD
 from ..utils import (
+    error_to_compat_str,
     encodeFilename,
     sanitize_open,
+    sanitized_Request,
 )
 
 
@@ -22,13 +24,23 @@ class FragmentFD(FileDownloader):
 
     Available options:
 
-    fragment_retries:   Number of times to retry a fragment for HTTP error (DASH only)
+    fragment_retries:   Number of times to retry a fragment for HTTP error (DASH
+                        and hlsnative only)
+    skip_unavailable_fragments:
+                        Skip unavailable fragments (DASH and hlsnative only)
     """
 
-    def report_retry_fragment(self, fragment_name, count, retries):
+    def report_retry_fragment(self, err, fragment_name, count, retries):
         self.to_screen(
-            '[download] Got server HTTP error. Retrying fragment %s (attempt %d of %s)...'
-            % (fragment_name, count, self.format_retries(retries)))
+            '[download] Got server HTTP error: %s. Retrying fragment %s (attempt %d of %s)...'
+            % (error_to_compat_str(err), fragment_name, count, self.format_retries(retries)))
+
+    def report_skip_fragment(self, fragment_name):
+        self.to_screen('[download] Skipping fragment %s...' % fragment_name)
+
+    def _prepare_url(self, info_dict, url):
+        headers = info_dict.get('http_headers')
+        return sanitized_Request(url, None, headers) if headers else url
 
     def _prepare_and_start_frag_download(self, ctx):
         self._prepare_frag_download(ctx)
index 8d7971e5d7042e5917deaf589cdebac7d488cd10..7373ec05fd0d4a1d983f48668229b21d98977581 100644 (file)
@@ -13,6 +13,7 @@ from .fragment import FragmentFD
 from .external import FFmpegFD
 
 from ..compat import (
+    compat_urllib_error,
     compat_urlparse,
     compat_struct_pack,
 )
@@ -30,7 +31,7 @@ class HlsFD(FragmentFD):
     FD_NAME = 'hlsnative'
 
     @staticmethod
-    def can_download(manifest):
+    def can_download(manifest, info_dict):
         UNSUPPORTED_FEATURES = (
             r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)',  # encrypted streams [1]
             r'#EXT-X-BYTERANGE',  # playlists composed of byte ranges of media files [2]
@@ -52,16 +53,18 @@ class HlsFD(FragmentFD):
         )
         check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
         check_results.append(can_decrypt_frag or '#EXT-X-KEY:METHOD=AES-128' not in manifest)
+        check_results.append(not info_dict.get('is_live'))
         return all(check_results)
 
     def real_download(self, filename, info_dict):
         man_url = info_dict['url']
         self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
-        manifest = self.ydl.urlopen(man_url).read()
+
+        manifest = self.ydl.urlopen(self._prepare_url(info_dict, man_url)).read()
 
         s = manifest.decode('utf-8', 'ignore')
 
-        if not self.can_download(s):
+        if not self.can_download(s, info_dict):
             self.report_warning(
                 'hlsnative has detected features it does not support, '
                 'extraction will be delegated to ffmpeg')
@@ -83,7 +86,14 @@ class HlsFD(FragmentFD):
 
         self._prepare_and_start_frag_download(ctx)
 
+        fragment_retries = self.params.get('fragment_retries', 0)
+        skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
+        test = self.params.get('test', False)
+
+        extra_query = None
         extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
+        if extra_param_to_segment_url:
+            extra_query = compat_urlparse.parse_qs(extra_param_to_segment_url)
         i = 0
         media_sequence = 0
         decrypt_info = {'METHOD': 'NONE'}
@@ -96,15 +106,40 @@ class HlsFD(FragmentFD):
                         line
                         if re.match(r'^https?://', line)
                         else compat_urlparse.urljoin(man_url, line))
-                    frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i)
-                    if extra_param_to_segment_url:
-                        frag_url = update_url_query(frag_url, extra_param_to_segment_url)
-                    success = ctx['dl'].download(frag_filename, {'url': frag_url})
-                    if not success:
+                    frag_name = 'Frag%d' % i
+                    frag_filename = '%s-%s' % (ctx['tmpfilename'], frag_name)
+                    if extra_query:
+                        frag_url = update_url_query(frag_url, extra_query)
+                    count = 0
+                    while count <= fragment_retries:
+                        try:
+                            success = ctx['dl'].download(frag_filename, {
+                                'url': frag_url,
+                                'http_headers': info_dict.get('http_headers'),
+                            })
+                            if not success:
+                                return False
+                            down, frag_sanitized = sanitize_open(frag_filename, 'rb')
+                            frag_content = down.read()
+                            down.close()
+                            break
+                        except compat_urllib_error.HTTPError as err:
+                            # Unavailable (possibly temporary) fragments may be served.
+                            # First we try to retry then either skip or abort.
+                            # See https://github.com/rg3/youtube-dl/issues/10165,
+                            # https://github.com/rg3/youtube-dl/issues/10448).
+                            count += 1
+                            if count <= fragment_retries:
+                                self.report_retry_fragment(err, frag_name, count, fragment_retries)
+                    if count > fragment_retries:
+                        if skip_unavailable_fragments:
+                            i += 1
+                            media_sequence += 1
+                            self.report_skip_fragment(frag_name)
+                            continue
+                        self.report_error(
+                            'giving up after %s fragment retries' % fragment_retries)
                         return False
-                    down, frag_sanitized = sanitize_open(frag_filename, 'rb')
-                    frag_content = down.read()
-                    down.close()
                     if decrypt_info['METHOD'] == 'AES-128':
                         iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
                         frag_content = AES.new(
@@ -112,7 +147,7 @@ class HlsFD(FragmentFD):
                     ctx['dest_stream'].write(frag_content)
                     frags_filenames.append(frag_sanitized)
                     # We only download the first fragment during the test
-                    if self.params.get('test', False):
+                    if test:
                         break
                     i += 1
                     media_sequence += 1
@@ -120,12 +155,12 @@ class HlsFD(FragmentFD):
                     decrypt_info = parse_m3u8_attributes(line[11:])
                     if decrypt_info['METHOD'] == 'AES-128':
                         if 'IV' in decrypt_info:
-                            decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:])
+                            decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32))
                         if not re.match(r'^https?://', decrypt_info['URI']):
                             decrypt_info['URI'] = compat_urlparse.urljoin(
                                 man_url, decrypt_info['URI'])
-                        if extra_param_to_segment_url:
-                            decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_param_to_segment_url)
+                        if extra_query:
+                            decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
                         decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read()
                 elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
                     media_sequence = int(line[22:])
index f8b69d186ac5ee93c8402f85bc66e7ed59570118..af405b9509572bfd42bb11bd48bec5300d8105b3 100644 (file)
@@ -13,6 +13,9 @@ from ..utils import (
     encodeFilename,
     sanitize_open,
     sanitized_Request,
+    write_xattr,
+    XAttrMetadataError,
+    XAttrUnavailableError,
 )
 
 
@@ -179,9 +182,8 @@ class HttpFD(FileDownloader):
 
                 if self.params.get('xattr_set_filesize', False) and data_len is not None:
                     try:
-                        import xattr
-                        xattr.setxattr(tmpfilename, 'user.ytdl.filesize', str(data_len))
-                    except(OSError, IOError, ImportError) as err:
+                        write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
+                    except (XAttrUnavailableError, XAttrMetadataError) as err:
                         self.report_error('unable to set filesize xattr: %s' % str(err))
 
             try:
diff --git a/youtube_dl/downloader/ism.py b/youtube_dl/downloader/ism.py
new file mode 100644 (file)
index 0000000..93cac5e
--- /dev/null
@@ -0,0 +1,271 @@
+from __future__ import unicode_literals
+
+import os
+import time
+import struct
+import binascii
+import io
+
+from .fragment import FragmentFD
+from ..compat import compat_urllib_error
+from ..utils import (
+    sanitize_open,
+    encodeFilename,
+)
+
+
+u8 = struct.Struct(b'>B')
+u88 = struct.Struct(b'>Bx')
+u16 = struct.Struct(b'>H')
+u1616 = struct.Struct(b'>Hxx')
+u32 = struct.Struct(b'>I')
+u64 = struct.Struct(b'>Q')
+
+s88 = struct.Struct(b'>bx')
+s16 = struct.Struct(b'>h')
+s1616 = struct.Struct(b'>hxx')
+s32 = struct.Struct(b'>i')
+
+unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000)
+
+TRACK_ENABLED = 0x1
+TRACK_IN_MOVIE = 0x2
+TRACK_IN_PREVIEW = 0x4
+
+SELF_CONTAINED = 0x1
+
+
+def box(box_type, payload):
+    return u32.pack(8 + len(payload)) + box_type + payload
+
+
+def full_box(box_type, version, flags, payload):
+    return box(box_type, u8.pack(version) + u32.pack(flags)[1:] + payload)
+
+
+def write_piff_header(stream, params):
+    track_id = params['track_id']
+    fourcc = params['fourcc']
+    duration = params['duration']
+    timescale = params.get('timescale', 10000000)
+    language = params.get('language', 'und')
+    height = params.get('height', 0)
+    width = params.get('width', 0)
+    is_audio = width == 0 and height == 0
+    creation_time = modification_time = int(time.time())
+
+    ftyp_payload = b'isml'  # major brand
+    ftyp_payload += u32.pack(1)  # minor version
+    ftyp_payload += b'piff' + b'iso2'  # compatible brands
+    stream.write(box(b'ftyp', ftyp_payload))  # File Type Box
+
+    mvhd_payload = u64.pack(creation_time)
+    mvhd_payload += u64.pack(modification_time)
+    mvhd_payload += u32.pack(timescale)
+    mvhd_payload += u64.pack(duration)
+    mvhd_payload += s1616.pack(1)  # rate
+    mvhd_payload += s88.pack(1)  # volume
+    mvhd_payload += u16.pack(0)  # reserved
+    mvhd_payload += u32.pack(0) * 2  # reserved
+    mvhd_payload += unity_matrix
+    mvhd_payload += u32.pack(0) * 6  # pre defined
+    mvhd_payload += u32.pack(0xffffffff)  # next track id
+    moov_payload = full_box(b'mvhd', 1, 0, mvhd_payload)  # Movie Header Box
+
+    tkhd_payload = u64.pack(creation_time)
+    tkhd_payload += u64.pack(modification_time)
+    tkhd_payload += u32.pack(track_id)  # track id
+    tkhd_payload += u32.pack(0)  # reserved
+    tkhd_payload += u64.pack(duration)
+    tkhd_payload += u32.pack(0) * 2  # reserved
+    tkhd_payload += s16.pack(0)  # layer
+    tkhd_payload += s16.pack(0)  # alternate group
+    tkhd_payload += s88.pack(1 if is_audio else 0)  # volume
+    tkhd_payload += u16.pack(0)  # reserved
+    tkhd_payload += unity_matrix
+    tkhd_payload += u1616.pack(width)
+    tkhd_payload += u1616.pack(height)
+    trak_payload = full_box(b'tkhd', 1, TRACK_ENABLED | TRACK_IN_MOVIE | TRACK_IN_PREVIEW, tkhd_payload)  # Track Header Box
+
+    mdhd_payload = u64.pack(creation_time)
+    mdhd_payload += u64.pack(modification_time)
+    mdhd_payload += u32.pack(timescale)
+    mdhd_payload += u64.pack(duration)
+    mdhd_payload += u16.pack(((ord(language[0]) - 0x60) << 10) | ((ord(language[1]) - 0x60) << 5) | (ord(language[2]) - 0x60))
+    mdhd_payload += u16.pack(0)  # pre defined
+    mdia_payload = full_box(b'mdhd', 1, 0, mdhd_payload)  # Media Header Box
+
+    hdlr_payload = u32.pack(0)  # pre defined
+    hdlr_payload += b'soun' if is_audio else b'vide'  # handler type
+    hdlr_payload += u32.pack(0) * 3  # reserved
+    hdlr_payload += (b'Sound' if is_audio else b'Video') + b'Handler\0'  # name
+    mdia_payload += full_box(b'hdlr', 0, 0, hdlr_payload)  # Handler Reference Box
+
+    if is_audio:
+        smhd_payload = s88.pack(0)  # balance
+        smhd_payload = u16.pack(0)  # reserved
+        media_header_box = full_box(b'smhd', 0, 0, smhd_payload)  # Sound Media Header
+    else:
+        vmhd_payload = u16.pack(0)  # graphics mode
+        vmhd_payload += u16.pack(0) * 3  # opcolor
+        media_header_box = full_box(b'vmhd', 0, 1, vmhd_payload)  # Video Media Header
+    minf_payload = media_header_box
+
+    dref_payload = u32.pack(1)  # entry count
+    dref_payload += full_box(b'url ', 0, SELF_CONTAINED, b'')  # Data Entry URL Box
+    dinf_payload = full_box(b'dref', 0, 0, dref_payload)  # Data Reference Box
+    minf_payload += box(b'dinf', dinf_payload)  # Data Information Box
+
+    stsd_payload = u32.pack(1)  # entry count
+
+    sample_entry_payload = u8.pack(0) * 6  # reserved
+    sample_entry_payload += u16.pack(1)  # data reference index
+    if is_audio:
+        sample_entry_payload += u32.pack(0) * 2  # reserved
+        sample_entry_payload += u16.pack(params.get('channels', 2))
+        sample_entry_payload += u16.pack(params.get('bits_per_sample', 16))
+        sample_entry_payload += u16.pack(0)  # pre defined
+        sample_entry_payload += u16.pack(0)  # reserved
+        sample_entry_payload += u1616.pack(params['sampling_rate'])
+
+        if fourcc == 'AACL':
+            sample_entry_box = box(b'mp4a', sample_entry_payload)
+    else:
+        sample_entry_payload = sample_entry_payload
+        sample_entry_payload += u16.pack(0)  # pre defined
+        sample_entry_payload += u16.pack(0)  # reserved
+        sample_entry_payload += u32.pack(0) * 3  # pre defined
+        sample_entry_payload += u16.pack(width)
+        sample_entry_payload += u16.pack(height)
+        sample_entry_payload += u1616.pack(0x48)  # horiz resolution 72 dpi
+        sample_entry_payload += u1616.pack(0x48)  # vert resolution 72 dpi
+        sample_entry_payload += u32.pack(0)  # reserved
+        sample_entry_payload += u16.pack(1)  # frame count
+        sample_entry_payload += u8.pack(0) * 32  # compressor name
+        sample_entry_payload += u16.pack(0x18)  # depth
+        sample_entry_payload += s16.pack(-1)  # pre defined
+
+        codec_private_data = binascii.unhexlify(params['codec_private_data'])
+        if fourcc in ('H264', 'AVC1'):
+            sps, pps = codec_private_data.split(u32.pack(1))[1:]
+            avcc_payload = u8.pack(1)  # configuration version
+            avcc_payload += sps[1:4]  # avc profile indication + profile compatibility + avc level indication
+            avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1))  # complete represenation (1) + reserved (11111) + length size minus one
+            avcc_payload += u8.pack(1)  # reserved (0) + number of sps (0000001)
+            avcc_payload += u16.pack(len(sps))
+            avcc_payload += sps
+            avcc_payload += u8.pack(1)  # number of pps
+            avcc_payload += u16.pack(len(pps))
+            avcc_payload += pps
+            sample_entry_payload += box(b'avcC', avcc_payload)  # AVC Decoder Configuration Record
+            sample_entry_box = box(b'avc1', sample_entry_payload)  # AVC Simple Entry
+    stsd_payload += sample_entry_box
+
+    stbl_payload = full_box(b'stsd', 0, 0, stsd_payload)  # Sample Description Box
+
+    stts_payload = u32.pack(0)  # entry count
+    stbl_payload += full_box(b'stts', 0, 0, stts_payload)  # Decoding Time to Sample Box
+
+    stsc_payload = u32.pack(0)  # entry count
+    stbl_payload += full_box(b'stsc', 0, 0, stsc_payload)  # Sample To Chunk Box
+
+    stco_payload = u32.pack(0)  # entry count
+    stbl_payload += full_box(b'stco', 0, 0, stco_payload)  # Chunk Offset Box
+
+    minf_payload += box(b'stbl', stbl_payload)  # Sample Table Box
+
+    mdia_payload += box(b'minf', minf_payload)  # Media Information Box
+
+    trak_payload += box(b'mdia', mdia_payload)  # Media Box
+
+    moov_payload += box(b'trak', trak_payload)  # Track Box
+
+    mehd_payload = u64.pack(duration)
+    mvex_payload = full_box(b'mehd', 1, 0, mehd_payload)  # Movie Extends Header Box
+
+    trex_payload = u32.pack(track_id)  # track id
+    trex_payload += u32.pack(1)  # default sample description index
+    trex_payload += u32.pack(0)  # default sample duration
+    trex_payload += u32.pack(0)  # default sample size
+    trex_payload += u32.pack(0)  # default sample flags
+    mvex_payload += full_box(b'trex', 0, 0, trex_payload)  # Track Extends Box
+
+    moov_payload += box(b'mvex', mvex_payload)  # Movie Extends Box
+    stream.write(box(b'moov', moov_payload))  # Movie Box
+
+
+def extract_box_data(data, box_sequence):
+    data_reader = io.BytesIO(data)
+    while True:
+        box_size = u32.unpack(data_reader.read(4))[0]
+        box_type = data_reader.read(4)
+        if box_type == box_sequence[0]:
+            box_data = data_reader.read(box_size - 8)
+            if len(box_sequence) == 1:
+                return box_data
+            return extract_box_data(box_data, box_sequence[1:])
+        data_reader.seek(box_size - 8, 1)
+
+
+class IsmFD(FragmentFD):
+    """
+    Download segments in a ISM manifest
+    """
+
+    FD_NAME = 'ism'
+
+    def real_download(self, filename, info_dict):
+        segments = info_dict['fragments'][:1] if self.params.get(
+            'test', False) else info_dict['fragments']
+
+        ctx = {
+            'filename': filename,
+            'total_frags': len(segments),
+        }
+
+        self._prepare_and_start_frag_download(ctx)
+
+        segments_filenames = []
+
+        fragment_retries = self.params.get('fragment_retries', 0)
+        skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
+
+        track_written = False
+        for i, segment in enumerate(segments):
+            segment_url = segment['url']
+            segment_name = 'Frag%d' % i
+            target_filename = '%s-%s' % (ctx['tmpfilename'], segment_name)
+            count = 0
+            while count <= fragment_retries:
+                try:
+                    success = ctx['dl'].download(target_filename, {'url': segment_url})
+                    if not success:
+                        return False
+                    down, target_sanitized = sanitize_open(target_filename, 'rb')
+                    down_data = down.read()
+                    if not track_written:
+                        tfhd_data = extract_box_data(down_data, [b'moof', b'traf', b'tfhd'])
+                        info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0]
+                        write_piff_header(ctx['dest_stream'], info_dict['_download_params'])
+                        track_written = True
+                    ctx['dest_stream'].write(down_data)
+                    down.close()
+                    segments_filenames.append(target_sanitized)
+                    break
+                except compat_urllib_error.HTTPError as err:
+                    count += 1
+                    if count <= fragment_retries:
+                        self.report_retry_fragment(err, segment_name, count, fragment_retries)
+            if count > fragment_retries:
+                if skip_unavailable_fragments:
+                    self.report_skip_fragment(segment_name)
+                    continue
+                self.report_error('giving up after %s fragment retries' % fragment_retries)
+                return False
+
+        self._finish_frag_download(ctx)
+
+        for segment_file in segments_filenames:
+            os.remove(encodeFilename(segment_file))
+
+        return True
index b584277be92b5a86fb9e0ac5d95870444d441174..0247cabf9df8a6c61602085dcabe5f139b53420a 100644 (file)
@@ -7,12 +7,13 @@ from ..utils import (
     ExtractorError,
     js_to_json,
     int_or_none,
+    parse_iso8601,
 )
 
 
 class ABCIE(InfoExtractor):
     IE_NAME = 'abc.net.au'
-    _VALID_URL = r'https?://www\.abc\.net\.au/news/(?:[^/]+/){1,2}(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?abc\.net\.au/news/(?:[^/]+/){1,2}(?P<id>\d+)'
 
     _TESTS = [{
         'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334',
@@ -93,3 +94,59 @@ class ABCIE(InfoExtractor):
             'description': self._og_search_description(webpage),
             'thumbnail': self._og_search_thumbnail(webpage),
         }
+
+
+class ABCIViewIE(InfoExtractor):
+    IE_NAME = 'abc.net.au:iview'
+    _VALID_URL = r'https?://iview\.abc\.net\.au/programs/[^/]+/(?P<id>[^/?#]+)'
+
+    # ABC iview programs are normally available for 14 days only.
+    _TESTS = [{
+        'url': 'http://iview.abc.net.au/programs/diaries-of-a-broken-mind/ZX9735A001S00',
+        'md5': 'cde42d728b3b7c2b32b1b94b4a548afc',
+        'info_dict': {
+            'id': 'ZX9735A001S00',
+            'ext': 'mp4',
+            'title': 'Diaries Of A Broken Mind',
+            'description': 'md5:7de3903874b7a1be279fe6b68718fc9e',
+            'upload_date': '20161010',
+            'uploader_id': 'abc2',
+            'timestamp': 1476064920,
+        },
+        'skip': 'Video gone',
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        video_params = self._parse_json(self._search_regex(
+            r'videoParams\s*=\s*({.+?});', webpage, 'video params'), video_id)
+        title = video_params.get('title') or video_params['seriesTitle']
+        stream = next(s for s in video_params['playlist'] if s.get('type') == 'program')
+
+        formats = self._extract_akamai_formats(stream['hds-unmetered'], video_id)
+        self._sort_formats(formats)
+
+        subtitles = {}
+        src_vtt = stream.get('captions', {}).get('src-vtt')
+        if src_vtt:
+            subtitles['en'] = [{
+                'url': src_vtt,
+                'ext': 'vtt',
+            }]
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': self._html_search_meta(['og:description', 'twitter:description'], webpage),
+            'thumbnail': self._html_search_meta(['og:image', 'twitter:image:src'], webpage),
+            'duration': int_or_none(video_params.get('eventDuration')),
+            'timestamp': parse_iso8601(video_params.get('pubDate'), ' '),
+            'series': video_params.get('seriesTitle'),
+            'series_id': video_params.get('seriesHouseNumber') or video_id[:7],
+            'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage, default=None)),
+            'episode': self._html_search_meta('episode_title', webpage, default=None),
+            'uploader_id': video_params.get('channel'),
+            'formats': formats,
+            'subtitles': subtitles,
+        }
index b61a6327c46110c6f32d1806db7e791152ea9635..6ae5d9a96ac6919ab1ea1ae906bf510018d5578b 100644 (file)
@@ -12,7 +12,7 @@ from ..compat import compat_urlparse
 
 class AbcNewsVideoIE(AMPIE):
     IE_NAME = 'abcnews:video'
-    _VALID_URL = 'http://abcnews.go.com/[^/]+/video/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
+    _VALID_URL = r'https?://abcnews\.go\.com/[^/]+/video/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
 
     _TESTS = [{
         'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932',
@@ -49,7 +49,7 @@ class AbcNewsVideoIE(AMPIE):
 
 class AbcNewsIE(InfoExtractor):
     IE_NAME = 'abcnews'
-    _VALID_URL = 'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
+    _VALID_URL = r'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
 
     _TESTS = [{
         'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY',
similarity index 52%
rename from youtube_dl/extractor/abc7news.py
rename to youtube_dl/extractor/abcotvs.py
index c04949c215d03229909d3024dbe32e376d6c35c3..054bb05964910c3d521eb6615661c1843239290f 100644 (file)
@@ -1,13 +1,19 @@
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
 
 from .common import InfoExtractor
-from ..utils import parse_iso8601
+from ..utils import (
+    int_or_none,
+    parse_iso8601,
+)
 
 
-class Abc7NewsIE(InfoExtractor):
-    _VALID_URL = r'https?://abc7news\.com(?:/[^/]+/(?P<display_id>[^/]+))?/(?P<id>\d+)'
+class ABCOTVSIE(InfoExtractor):
+    IE_NAME = 'abcotvs'
+    IE_DESC = 'ABC Owned Television Stations'
+    _VALID_URL = r'https?://(?:abc(?:7(?:news|ny|chicago)?|11|13|30)|6abc)\.com(?:/[^/]+/(?P<display_id>[^/]+))?/(?P<id>\d+)'
     _TESTS = [
         {
             'url': 'http://abc7news.com/entertainment/east-bay-museum-celebrates-vintage-synthesizers/472581/',
@@ -15,7 +21,7 @@ class Abc7NewsIE(InfoExtractor):
                 'id': '472581',
                 'display_id': 'east-bay-museum-celebrates-vintage-synthesizers',
                 'ext': 'mp4',
-                'title': 'East Bay museum celebrates history of synthesized music',
+                'title': 'East Bay museum celebrates vintage synthesizers',
                 'description': 'md5:a4f10fb2f2a02565c1749d4adbab4b10',
                 'thumbnail': 're:^https?://.*\.jpg$',
                 'timestamp': 1421123075,
@@ -41,7 +47,7 @@ class Abc7NewsIE(InfoExtractor):
         webpage = self._download_webpage(url, display_id)
 
         m3u8 = self._html_search_meta(
-            'contentURL', webpage, 'm3u8 url', fatal=True)
+            'contentURL', webpage, 'm3u8 url', fatal=True).split('?')[0]
 
         formats = self._extract_m3u8_formats(m3u8, display_id, 'mp4')
         self._sort_formats(formats)
@@ -66,3 +72,41 @@ class Abc7NewsIE(InfoExtractor):
             'uploader': uploader,
             'formats': formats,
         }
+
+
+class ABCOTVSClipsIE(InfoExtractor):
+    IE_NAME = 'abcotvs:clips'
+    _VALID_URL = r'https?://clips\.abcotvs\.com/(?:[^/]+/)*video/(?P<id>\d+)'
+    _TEST = {
+        'url': 'https://clips.abcotvs.com/kabc/video/214814',
+        'info_dict': {
+            'id': '214814',
+            'ext': 'mp4',
+            'title': 'SpaceX launch pad explosion destroys rocket, satellite',
+            'description': 'md5:9f186e5ad8f490f65409965ee9c7be1b',
+            'upload_date': '20160901',
+            'timestamp': 1472756695,
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        video_data = self._download_json('https://clips.abcotvs.com/vogo/video/getByIds?ids=' + video_id, video_id)['results'][0]
+        title = video_data['title']
+        formats = self._extract_m3u8_formats(
+            video_data['videoURL'].split('?')[0], video_id, 'mp4')
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': video_data.get('description'),
+            'thumbnail': video_data.get('thumbnailURL'),
+            'duration': int_or_none(video_data.get('duration')),
+            'timestamp': int_or_none(video_data.get('pubDate')),
+            'formats': formats,
+        }
index 9e3a3e3620152390bffe996a542697f6f01736af..12eeab271c29f3dae271912969317a67fee826ae 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -6,16 +6,1292 @@ import time
 import xml.etree.ElementTree as etree
 
 from .common import InfoExtractor
+from ..compat import compat_urlparse
 from ..utils import (
     unescapeHTML,
     urlencode_postdata,
     unified_timestamp,
+    ExtractorError,
 )
 
 
+MSO_INFO = {
+    'DTV': {
+        'name': 'DIRECTV',
+        'username_field': 'username',
+        'password_field': 'password',
+    },
+    'Rogers': {
+        'name': 'Rogers',
+        'username_field': 'UserName',
+        'password_field': 'UserPassword',
+    },
+    'Comcast_SSO': {
+        'name': 'Comcast XFINITY',
+        'username_field': 'user',
+        'password_field': 'passwd',
+    },
+    'thr030': {
+        'name': '3 Rivers Communications'
+    },
+    'com140': {
+        'name': 'Access Montana'
+    },
+    'acecommunications': {
+        'name': 'AcenTek'
+    },
+    'acm010': {
+        'name': 'Acme Communications'
+    },
+    'ada020': {
+        'name': 'Adams Cable Service'
+    },
+    'alb020': {
+        'name': 'Albany Mutual Telephone'
+    },
+    'algona': {
+        'name': 'Algona Municipal Utilities'
+    },
+    'allwest': {
+        'name': 'All West Communications'
+    },
+    'all025': {
+        'name': 'Allen\'s Communications'
+    },
+    'spl010': {
+        'name': 'Alliance Communications'
+    },
+    'all070': {
+        'name': 'ALLO Communications'
+    },
+    'alpine': {
+        'name': 'Alpine Communications'
+    },
+    'hun015': {
+        'name': 'American Broadband'
+    },
+    'nwc010': {
+        'name': 'American Broadband Missouri'
+    },
+    'com130-02': {
+        'name': 'American Community Networks'
+    },
+    'com130-01': {
+        'name': 'American Warrior Networks'
+    },
+    'tom020': {
+        'name': 'Amherst Telephone/Tomorrow Valley'
+    },
+    'tvc020': {
+        'name': 'Andycable'
+    },
+    'arkwest': {
+        'name': 'Arkwest Communications'
+    },
+    'art030': {
+        'name': 'Arthur Mutual Telephone Company'
+    },
+    'arvig': {
+        'name': 'Arvig'
+    },
+    'nttcash010': {
+        'name': 'Ashland Home Net'
+    },
+    'astound': {
+        'name': 'Astound (now Wave)'
+    },
+    'dix030': {
+        'name': 'ATC Broadband'
+    },
+    'ara010': {
+        'name': 'ATC Communications'
+    },
+    'she030-02': {
+        'name': 'Ayersville Communications'
+    },
+    'baldwin': {
+        'name': 'Baldwin Lightstream'
+    },
+    'bal040': {
+        'name': 'Ballard TV'
+    },
+    'cit025': {
+        'name': 'Bardstown Cable TV'
+    },
+    'bay030': {
+        'name': 'Bay Country Communications'
+    },
+    'tel095': {
+        'name': 'Beaver Creek Cooperative Telephone'
+    },
+    'bea020': {
+        'name': 'Beaver Valley Cable'
+    },
+    'bee010': {
+        'name': 'Bee Line Cable'
+    },
+    'wir030': {
+        'name': 'Beehive Broadband'
+    },
+    'bra020': {
+        'name': 'BELD'
+    },
+    'bel020': {
+        'name': 'Bellevue Municipal Cable'
+    },
+    'vol040-01': {
+        'name': 'Ben Lomand Connect / BLTV'
+    },
+    'bev010': {
+        'name': 'BEVCOMM'
+    },
+    'big020': {
+        'name': 'Big Sandy Broadband'
+    },
+    'ble020': {
+        'name': 'Bledsoe Telephone Cooperative'
+    },
+    'bvt010': {
+        'name': 'Blue Valley Tele-Communications'
+    },
+    'bra050': {
+        'name': 'Brandenburg Telephone Co.'
+    },
+    'bte010': {
+        'name': 'Bristol Tennessee Essential Services'
+    },
+    'annearundel': {
+        'name': 'Broadstripe'
+    },
+    'btc010': {
+        'name': 'BTC Communications'
+    },
+    'btc040': {
+        'name': 'BTC Vision - Nahunta'
+    },
+    'bul010': {
+        'name': 'Bulloch Telephone Cooperative'
+    },
+    'but010': {
+        'name': 'Butler-Bremer Communications'
+    },
+    'tel160-csp': {
+        'name': 'C Spire SNAP'
+    },
+    'csicable': {
+        'name': 'Cable Services Inc.'
+    },
+    'cableamerica': {
+        'name': 'CableAmerica'
+    },
+    'cab038': {
+        'name': 'CableSouth Media 3'
+    },
+    'weh010-camtel': {
+        'name': 'Cam-Tel Company'
+    },
+    'car030': {
+        'name': 'Cameron Communications'
+    },
+    'canbytel': {
+        'name': 'Canby Telcom'
+    },
+    'crt020': {
+        'name': 'CapRock Tv'
+    },
+    'car050': {
+        'name': 'Carnegie Cable'
+    },
+    'cas': {
+        'name': 'CAS Cable'
+    },
+    'casscomm': {
+        'name': 'CASSCOMM'
+    },
+    'mid180-02': {
+        'name': 'Catalina Broadband Solutions'
+    },
+    'cccomm': {
+        'name': 'CC Communications'
+    },
+    'nttccde010': {
+        'name': 'CDE Lightband'
+    },
+    'cfunet': {
+        'name': 'Cedar Falls Utilities'
+    },
+    'dem010-01': {
+        'name': 'Celect-Bloomer Telephone Area'
+    },
+    'dem010-02': {
+        'name': 'Celect-Bruce Telephone Area'
+    },
+    'dem010-03': {
+        'name': 'Celect-Citizens Connected Area'
+    },
+    'dem010-04': {
+        'name': 'Celect-Elmwood/Spring Valley Area'
+    },
+    'dem010-06': {
+        'name': 'Celect-Mosaic Telecom'
+    },
+    'dem010-05': {
+        'name': 'Celect-West WI Telephone Area'
+    },
+    'net010-02': {
+        'name': 'Cellcom/Nsight Telservices'
+    },
+    'cen100': {
+        'name': 'CentraCom'
+    },
+    'nttccst010': {
+        'name': 'Central Scott / CSTV'
+    },
+    'cha035': {
+        'name': 'Chaparral CableVision'
+    },
+    'cha050': {
+        'name': 'Chariton Valley Communication Corporation, Inc.'
+    },
+    'cha060': {
+        'name': 'Chatmoss Cablevision'
+    },
+    'nttcche010': {
+        'name': 'Cherokee Communications'
+    },
+    'che050': {
+        'name': 'Chesapeake Bay Communications'
+    },
+    'cimtel': {
+        'name': 'Cim-Tel Cable, LLC.'
+    },
+    'cit180': {
+        'name': 'Citizens Cablevision - Floyd, VA'
+    },
+    'cit210': {
+        'name': 'Citizens Cablevision, Inc.'
+    },
+    'cit040': {
+        'name': 'Citizens Fiber'
+    },
+    'cit250': {
+        'name': 'Citizens Mutual'
+    },
+    'war040': {
+        'name': 'Citizens Telephone Corporation'
+    },
+    'wat025': {
+        'name': 'City Of Monroe'
+    },
+    'wadsworth': {
+        'name': 'CityLink'
+    },
+    'nor100': {
+        'name': 'CL Tel'
+    },
+    'cla010': {
+        'name': 'Clarence Telephone and Cedar Communications'
+    },
+    'ser060': {
+        'name': 'Clear Choice Communications'
+    },
+    'tac020': {
+        'name': 'Click! Cable TV'
+    },
+    'war020': {
+        'name': 'CLICK1.NET'
+    },
+    'cml010': {
+        'name': 'CML Telephone Cooperative Association'
+    },
+    'cns': {
+        'name': 'CNS'
+    },
+    'com160': {
+        'name': 'Co-Mo Connect'
+    },
+    'coa020': {
+        'name': 'Coast Communications'
+    },
+    'coa030': {
+        'name': 'Coaxial Cable TV'
+    },
+    'mid055': {
+        'name': 'Cobalt TV (Mid-State Community TV)'
+    },
+    'col070': {
+        'name': 'Columbia Power & Water Systems'
+    },
+    'col080': {
+        'name': 'Columbus Telephone'
+    },
+    'nor105': {
+        'name': 'Communications 1 Cablevision, Inc.'
+    },
+    'com150': {
+        'name': 'Community Cable & Broadband'
+    },
+    'com020': {
+        'name': 'Community Communications Company'
+    },
+    'coy010': {
+        'name': 'commZoom'
+    },
+    'com025': {
+        'name': 'Complete Communication Services'
+    },
+    'cat020': {
+        'name': 'Comporium'
+    },
+    'com071': {
+        'name': 'ComSouth Telesys'
+    },
+    'consolidatedcable': {
+        'name': 'Consolidated'
+    },
+    'conwaycorp': {
+        'name': 'Conway Corporation'
+    },
+    'coo050': {
+        'name': 'Coon Valley Telecommunications Inc'
+    },
+    'coo080': {
+        'name': 'Cooperative Telephone Company'
+    },
+    'cpt010': {
+        'name': 'CP-TEL'
+    },
+    'cra010': {
+        'name': 'Craw-Kan Telephone'
+    },
+    'crestview': {
+        'name': 'Crestview Cable Communications'
+    },
+    'cross': {
+        'name': 'Cross TV'
+    },
+    'cro030': {
+        'name': 'Crosslake Communications'
+    },
+    'ctc040': {
+        'name': 'CTC - Brainerd MN'
+    },
+    'phe030': {
+        'name': 'CTV-Beam - East Alabama'
+    },
+    'cun010': {
+        'name': 'Cunningham Telephone & Cable'
+    },
+    'dpc010': {
+        'name': 'D & P Communications'
+    },
+    'dak030': {
+        'name': 'Dakota Central Telecommunications'
+    },
+    'nttcdel010': {
+        'name': 'Delcambre Telephone LLC'
+    },
+    'tel160-del': {
+        'name': 'Delta Telephone Company'
+    },
+    'sal040': {
+        'name': 'DiamondNet'
+    },
+    'ind060-dc': {
+        'name': 'Direct Communications'
+    },
+    'doy010': {
+        'name': 'Doylestown Cable TV'
+    },
+    'dic010': {
+        'name': 'DRN'
+    },
+    'dtc020': {
+        'name': 'DTC'
+    },
+    'dtc010': {
+        'name': 'DTC Cable (Delhi)'
+    },
+    'dum010': {
+        'name': 'Dumont Telephone Company'
+    },
+    'dun010': {
+        'name': 'Dunkerton Telephone Cooperative'
+    },
+    'cci010': {
+        'name': 'Duo County Telecom'
+    },
+    'eagle': {
+        'name': 'Eagle Communications'
+    },
+    'weh010-east': {
+        'name': 'East Arkansas Cable TV'
+    },
+    'eatel': {
+        'name': 'EATEL Video, LLC'
+    },
+    'ell010': {
+        'name': 'ECTA'
+    },
+    'emerytelcom': {
+        'name': 'Emery Telcom Video LLC'
+    },
+    'nor200': {
+        'name': 'Empire Access'
+    },
+    'endeavor': {
+        'name': 'Endeavor Communications'
+    },
+    'sun045': {
+        'name': 'Enhanced Telecommunications Corporation'
+    },
+    'mid030': {
+        'name': 'enTouch'
+    },
+    'epb020': {
+        'name': 'EPB Smartnet'
+    },
+    'jea010': {
+        'name': 'EPlus Broadband'
+    },
+    'com065': {
+        'name': 'ETC'
+    },
+    'ete010': {
+        'name': 'Etex Communications'
+    },
+    'fbc-tele': {
+        'name': 'F&B Communications'
+    },
+    'fal010': {
+        'name': 'Falcon Broadband'
+    },
+    'fam010': {
+        'name': 'FamilyView CableVision'
+    },
+    'far020': {
+        'name': 'Farmers Mutual Telephone Company'
+    },
+    'fay010': {
+        'name': 'Fayetteville Public Utilities'
+    },
+    'sal060': {
+        'name': 'fibrant'
+    },
+    'fid010': {
+        'name': 'Fidelity Communications'
+    },
+    'for030': {
+        'name': 'FJ Communications'
+    },
+    'fli020': {
+        'name': 'Flint River Communications'
+    },
+    'far030': {
+        'name': 'FMT - Jesup'
+    },
+    'foo010': {
+        'name': 'Foothills Communications'
+    },
+    'for080': {
+        'name': 'Forsyth CableNet'
+    },
+    'fbcomm': {
+        'name': 'Frankfort Plant Board'
+    },
+    'tel160-fra': {
+        'name': 'Franklin Telephone Company'
+    },
+    'nttcftc010': {
+        'name': 'FTC'
+    },
+    'fullchannel': {
+        'name': 'Full Channel, Inc.'
+    },
+    'gar040': {
+        'name': 'Gardonville Cooperative Telephone Association'
+    },
+    'gbt010': {
+        'name': 'GBT Communications, Inc.'
+    },
+    'tec010': {
+        'name': 'Genuine Telecom'
+    },
+    'clr010': {
+        'name': 'Giant Communications'
+    },
+    'gla010': {
+        'name': 'Glasgow EPB'
+    },
+    'gle010': {
+        'name': 'Glenwood Telecommunications'
+    },
+    'gra060': {
+        'name': 'GLW Broadband Inc.'
+    },
+    'goldenwest': {
+        'name': 'Golden West Cablevision'
+    },
+    'vis030': {
+        'name': 'Grantsburg Telcom'
+    },
+    'gpcom': {
+        'name': 'Great Plains Communications'
+    },
+    'gri010': {
+        'name': 'Gridley Cable Inc'
+    },
+    'hbc010': {
+        'name': 'H&B Cable Services'
+    },
+    'hae010': {
+        'name': 'Haefele TV Inc.'
+    },
+    'htc010': {
+        'name': 'Halstad Telephone Company'
+    },
+    'har005': {
+        'name': 'Harlan Municipal Utilities'
+    },
+    'har020': {
+        'name': 'Hart Communications'
+    },
+    'ced010': {
+        'name': 'Hartelco TV'
+    },
+    'hea040': {
+        'name': 'Heart of Iowa Communications Cooperative'
+    },
+    'htc020': {
+        'name': 'Hickory Telephone Company'
+    },
+    'nttchig010': {
+        'name': 'Highland Communication Services'
+    },
+    'hig030': {
+        'name': 'Highland Media'
+    },
+    'spc010': {
+        'name': 'Hilliary Communications'
+    },
+    'hin020': {
+        'name': 'Hinton CATV Co.'
+    },
+    'hometel': {
+        'name': 'HomeTel Entertainment, Inc.'
+    },
+    'hoodcanal': {
+        'name': 'Hood Canal Communications'
+    },
+    'weh010-hope': {
+        'name': 'Hope - Prescott Cable TV'
+    },
+    'horizoncable': {
+        'name': 'Horizon Cable TV, Inc.'
+    },
+    'hor040': {
+        'name': 'Horizon Chillicothe Telephone'
+    },
+    'htc030': {
+        'name': 'HTC Communications Co. - IL'
+    },
+    'htccomm': {
+        'name': 'HTC Communications, Inc. - IA'
+    },
+    'wal005': {
+        'name': 'Huxley Communications'
+    },
+    'imon': {
+        'name': 'ImOn Communications'
+    },
+    'ind040': {
+        'name': 'Independence Telecommunications'
+    },
+    'rrc010': {
+        'name': 'Inland Networks'
+    },
+    'stc020': {
+        'name': 'Innovative Cable TV St Croix'
+    },
+    'car100': {
+        'name': 'Innovative Cable TV St Thomas-St John'
+    },
+    'icc010': {
+        'name': 'Inside Connect Cable'
+    },
+    'int100': {
+        'name': 'Integra Telecom'
+    },
+    'int050': {
+        'name': 'Interstate Telecommunications Coop'
+    },
+    'irv010': {
+        'name': 'Irvine Cable'
+    },
+    'k2c010': {
+        'name': 'K2 Communications'
+    },
+    'kal010': {
+        'name': 'Kalida Telephone Company, Inc.'
+    },
+    'kal030': {
+        'name': 'Kalona Cooperative Telephone Company'
+    },
+    'kmt010': {
+        'name': 'KMTelecom'
+    },
+    'kpu010': {
+        'name': 'KPU Telecommunications'
+    },
+    'kuh010': {
+        'name': 'Kuhn Communications, Inc.'
+    },
+    'lak130': {
+        'name': 'Lakeland Communications'
+    },
+    'lan010': {
+        'name': 'Langco'
+    },
+    'lau020': {
+        'name': 'Laurel Highland Total Communications, Inc.'
+    },
+    'leh010': {
+        'name': 'Lehigh Valley Cooperative Telephone'
+    },
+    'bra010': {
+        'name': 'Limestone Cable/Bracken Cable'
+    },
+    'loc020': {
+        'name': 'LISCO'
+    },
+    'lit020': {
+        'name': 'Litestream'
+    },
+    'tel140': {
+        'name': 'LivCom'
+    },
+    'loc010': {
+        'name': 'LocalTel Communications'
+    },
+    'weh010-longview': {
+        'name': 'Longview - Kilgore Cable TV'
+    },
+    'lon030': {
+        'name': 'Lonsdale Video Ventures, LLC'
+    },
+    'lns010': {
+        'name': 'Lost Nation-Elwood Telephone Co.'
+    },
+    'nttclpc010': {
+        'name': 'LPC Connect'
+    },
+    'lumos': {
+        'name': 'Lumos Networks'
+    },
+    'madison': {
+        'name': 'Madison Communications'
+    },
+    'mad030': {
+        'name': 'Madison County Cable Inc.'
+    },
+    'nttcmah010': {
+        'name': 'Mahaska Communication Group'
+    },
+    'mar010': {
+        'name': 'Marne & Elk Horn Telephone Company'
+    },
+    'mcc040': {
+        'name': 'McClure Telephone Co.'
+    },
+    'mctv': {
+        'name': 'MCTV'
+    },
+    'merrimac': {
+        'name': 'Merrimac Communications Ltd.'
+    },
+    'metronet': {
+        'name': 'Metronet'
+    },
+    'mhtc': {
+        'name': 'MHTC'
+    },
+    'midhudson': {
+        'name': 'Mid-Hudson Cable'
+    },
+    'midrivers': {
+        'name': 'Mid-Rivers Communications'
+    },
+    'mid045': {
+        'name': 'Midstate Communications'
+    },
+    'mil080': {
+        'name': 'Milford Communications'
+    },
+    'min030': {
+        'name': 'MINET'
+    },
+    'nttcmin010': {
+        'name': 'Minford TV'
+    },
+    'san040-02': {
+        'name': 'Mitchell Telecom'
+    },
+    'mlg010': {
+        'name': 'MLGC'
+    },
+    'mon060': {
+        'name': 'Mon-Cre TVE'
+    },
+    'mou110': {
+        'name': 'Mountain Telephone'
+    },
+    'mou050': {
+        'name': 'Mountain Village Cable'
+    },
+    'mtacomm': {
+        'name': 'MTA Communications, LLC'
+    },
+    'mtc010': {
+        'name': 'MTC Cable'
+    },
+    'med040': {
+        'name': 'MTC Technologies'
+    },
+    'man060': {
+        'name': 'MTCC'
+    },
+    'mtc030': {
+        'name': 'MTCO Communications'
+    },
+    'mul050': {
+        'name': 'Mulberry Telecommunications'
+    },
+    'mur010': {
+        'name': 'Murray Electric System'
+    },
+    'musfiber': {
+        'name': 'MUS FiberNET'
+    },
+    'mpw': {
+        'name': 'Muscatine Power & Water'
+    },
+    'nttcsli010': {
+        'name': 'myEVTV.com'
+    },
+    'nor115': {
+        'name': 'NCC'
+    },
+    'nor260': {
+        'name': 'NDTC'
+    },
+    'nctc': {
+        'name': 'Nebraska Central Telecom, Inc.'
+    },
+    'nel020': {
+        'name': 'Nelsonville TV Cable'
+    },
+    'nem010': {
+        'name': 'Nemont'
+    },
+    'new075': {
+        'name': 'New Hope Telephone Cooperative'
+    },
+    'nor240': {
+        'name': 'NICP'
+    },
+    'cic010': {
+        'name': 'NineStar Connect'
+    },
+    'nktelco': {
+        'name': 'NKTelco'
+    },
+    'nortex': {
+        'name': 'Nortex Communications'
+    },
+    'nor140': {
+        'name': 'North Central Telephone Cooperative'
+    },
+    'nor030': {
+        'name': 'Northland Communications'
+    },
+    'nor075': {
+        'name': 'Northwest Communications'
+    },
+    'nor125': {
+        'name': 'Norwood Light Broadband'
+    },
+    'net010': {
+        'name': 'Nsight Telservices'
+    },
+    'dur010': {
+        'name': 'Ntec'
+    },
+    'nts010': {
+        'name': 'NTS Communications'
+    },
+    'new045': {
+        'name': 'NU-Telecom'
+    },
+    'nulink': {
+        'name': 'NuLink'
+    },
+    'jam030': {
+        'name': 'NVC'
+    },
+    'far035': {
+        'name': 'OmniTel Communications'
+    },
+    'onesource': {
+        'name': 'OneSource Communications'
+    },
+    'cit230': {
+        'name': 'Opelika Power Services'
+    },
+    'daltonutilities': {
+        'name': 'OptiLink'
+    },
+    'mid140': {
+        'name': 'OPTURA'
+    },
+    'ote010': {
+        'name': 'OTEC Communication Company'
+    },
+    'cci020': {
+        'name': 'Packerland Broadband'
+    },
+    'pan010': {
+        'name': 'Panora Telco/Guthrie Center Communications'
+    },
+    'otter': {
+        'name': 'Park Region Telephone & Otter Tail Telcom'
+    },
+    'mid050': {
+        'name': 'Partner Communications Cooperative'
+    },
+    'fib010': {
+        'name': 'Pathway'
+    },
+    'paulbunyan': {
+        'name': 'Paul Bunyan Communications'
+    },
+    'pem020': {
+        'name': 'Pembroke Telephone Company'
+    },
+    'mck010': {
+        'name': 'Peoples Rural Telephone Cooperative'
+    },
+    'pul010': {
+        'name': 'PES Energize'
+    },
+    'phi010': {
+        'name': 'Philippi Communications System'
+    },
+    'phonoscope': {
+        'name': 'Phonoscope Cable'
+    },
+    'pin070': {
+        'name': 'Pine Belt Communications, Inc.'
+    },
+    'weh010-pine': {
+        'name': 'Pine Bluff Cable TV'
+    },
+    'pin060': {
+        'name': 'Pineland Telephone Cooperative'
+    },
+    'cam010': {
+        'name': 'Pinpoint Communications'
+    },
+    'pio060': {
+        'name': 'Pioneer Broadband'
+    },
+    'pioncomm': {
+        'name': 'Pioneer Communications'
+    },
+    'pioneer': {
+        'name': 'Pioneer DTV'
+    },
+    'pla020': {
+        'name': 'Plant TiftNet, Inc.'
+    },
+    'par010': {
+        'name': 'PLWC'
+    },
+    'pro035': {
+        'name': 'PMT'
+    },
+    'vik011': {
+        'name': 'Polar Cablevision'
+    },
+    'pottawatomie': {
+        'name': 'Pottawatomie Telephone Co.'
+    },
+    'premiercomm': {
+        'name': 'Premier Communications'
+    },
+    'psc010': {
+        'name': 'PSC'
+    },
+    'pan020': {
+        'name': 'PTCI'
+    },
+    'qco010': {
+        'name': 'QCOL'
+    },
+    'qua010': {
+        'name': 'Quality Cablevision'
+    },
+    'rad010': {
+        'name': 'Radcliffe Telephone Company'
+    },
+    'car040': {
+        'name': 'Rainbow Communications'
+    },
+    'rai030': {
+        'name': 'Rainier Connect'
+    },
+    'ral010': {
+        'name': 'Ralls Technologies'
+    },
+    'rct010': {
+        'name': 'RC Technologies'
+    },
+    'red040': {
+        'name': 'Red River Communications'
+    },
+    'ree010': {
+        'name': 'Reedsburg Utility Commission'
+    },
+    'mol010': {
+        'name': 'Reliance Connects- Oregon'
+    },
+    'res020': {
+        'name': 'Reserve Telecommunications'
+    },
+    'weh010-resort': {
+        'name': 'Resort TV Cable'
+    },
+    'rld010': {
+        'name': 'Richland Grant Telephone Cooperative, Inc.'
+    },
+    'riv030': {
+        'name': 'River Valley Telecommunications Coop'
+    },
+    'rockportcable': {
+        'name': 'Rock Port Cablevision'
+    },
+    'rsf010': {
+        'name': 'RS Fiber'
+    },
+    'rtc': {
+        'name': 'RTC Communication Corp'
+    },
+    'res040': {
+        'name': 'RTC-Reservation Telephone Coop.'
+    },
+    'rte010': {
+        'name': 'RTEC Communications'
+    },
+    'stc010': {
+        'name': 'S&T'
+    },
+    'san020': {
+        'name': 'San Bruno Cable TV'
+    },
+    'san040-01': {
+        'name': 'Santel'
+    },
+    'sav010': {
+        'name': 'SCI Broadband-Savage Communications Inc.'
+    },
+    'sco050': {
+        'name': 'Scottsboro Electric Power Board'
+    },
+    'scr010': {
+        'name': 'Scranton Telephone Company'
+    },
+    'selco': {
+        'name': 'SELCO'
+    },
+    'she010': {
+        'name': 'Shentel'
+    },
+    'she030': {
+        'name': 'Sherwood Mutual Telephone Association, Inc.'
+    },
+    'ind060-ssc': {
+        'name': 'Silver Star Communications'
+    },
+    'sjoberg': {
+        'name': 'Sjoberg\'s Inc.'
+    },
+    'sou025': {
+        'name': 'SKT'
+    },
+    'sky050': {
+        'name': 'SkyBest TV'
+    },
+    'nttcsmi010': {
+        'name': 'Smithville Communications'
+    },
+    'woo010': {
+        'name': 'Solarus'
+    },
+    'sou075': {
+        'name': 'South Central Rural Telephone Cooperative'
+    },
+    'sou065': {
+        'name': 'South Holt Cablevision, Inc.'
+    },
+    'sou035': {
+        'name': 'South Slope Cooperative Communications'
+    },
+    'spa020': {
+        'name': 'Spanish Fork Community Network'
+    },
+    'spe010': {
+        'name': 'Spencer Municipal Utilities'
+    },
+    'spi005': {
+        'name': 'Spillway Communications, Inc.'
+    },
+    'srt010': {
+        'name': 'SRT'
+    },
+    'cccsmc010': {
+        'name': 'St. Maarten Cable TV'
+    },
+    'sta025': {
+        'name': 'Star Communications'
+    },
+    'sco020': {
+        'name': 'STE'
+    },
+    'uin010': {
+        'name': 'STRATA Networks'
+    },
+    'sum010': {
+        'name': 'Sumner Cable TV'
+    },
+    'pie010': {
+        'name': 'Surry TV/PCSI TV'
+    },
+    'swa010': {
+        'name': 'Swayzee Communications'
+    },
+    'sweetwater': {
+        'name': 'Sweetwater Cable Television Co'
+    },
+    'weh010-talequah': {
+        'name': 'Tahlequah Cable TV'
+    },
+    'tct': {
+        'name': 'TCT'
+    },
+    'tel050': {
+        'name': 'Tele-Media Company'
+    },
+    'com050': {
+        'name': 'The Community Agency'
+    },
+    'thr020': {
+        'name': 'Three River'
+    },
+    'cab140': {
+        'name': 'Town & Country Technologies'
+    },
+    'tra010': {
+        'name': 'Trans-Video'
+    },
+    'tre010': {
+        'name': 'Trenton TV Cable Company'
+    },
+    'tcc': {
+        'name': 'Tri County Communications Cooperative'
+    },
+    'tri025': {
+        'name': 'TriCounty Telecom'
+    },
+    'tri110': {
+        'name': 'TrioTel Communications, Inc.'
+    },
+    'tro010': {
+        'name': 'Troy Cablevision, Inc.'
+    },
+    'tsc': {
+        'name': 'TSC'
+    },
+    'cit220': {
+        'name': 'Tullahoma Utilities Board'
+    },
+    'tvc030': {
+        'name': 'TV Cable of Rensselaer'
+    },
+    'tvc015': {
+        'name': 'TVC Cable'
+    },
+    'cab180': {
+        'name': 'TVision'
+    },
+    'twi040': {
+        'name': 'Twin Lakes'
+    },
+    'tvtinc': {
+        'name': 'Twin Valley'
+    },
+    'uis010': {
+        'name': 'Union Telephone Company'
+    },
+    'uni110': {
+        'name': 'United Communications - TN'
+    },
+    'uni120': {
+        'name': 'United Services'
+    },
+    'uss020': {
+        'name': 'US Sonet'
+    },
+    'cab060': {
+        'name': 'USA Communications'
+    },
+    'she005': {
+        'name': 'USA Communications/Shellsburg, IA'
+    },
+    'val040': {
+        'name': 'Valley TeleCom Group'
+    },
+    'val025': {
+        'name': 'Valley Telecommunications'
+    },
+    'val030': {
+        'name': 'Valparaiso Broadband'
+    },
+    'cla050': {
+        'name': 'Vast Broadband'
+    },
+    'sul015': {
+        'name': 'Venture Communications Cooperative, Inc.'
+    },
+    'ver025': {
+        'name': 'Vernon Communications Co-op'
+    },
+    'weh010-vicksburg': {
+        'name': 'Vicksburg Video'
+    },
+    'vis070': {
+        'name': 'Vision Communications'
+    },
+    'volcanotel': {
+        'name': 'Volcano Vision, Inc.'
+    },
+    'vol040-02': {
+        'name': 'VolFirst / BLTV'
+    },
+    'ver070': {
+        'name': 'VTel'
+    },
+    'nttcvtx010': {
+        'name': 'VTX1'
+    },
+    'bci010-02': {
+        'name': 'Vyve Broadband'
+    },
+    'wab020': {
+        'name': 'Wabash Mutual Telephone'
+    },
+    'waitsfield': {
+        'name': 'Waitsfield Cable'
+    },
+    'wal010': {
+        'name': 'Walnut Communications'
+    },
+    'wavebroadband': {
+        'name': 'Wave'
+    },
+    'wav030': {
+        'name': 'Waverly Communications Utility'
+    },
+    'wbi010': {
+        'name': 'WBI'
+    },
+    'web020': {
+        'name': 'Webster-Calhoun Cooperative Telephone Association'
+    },
+    'wes005': {
+        'name': 'West Alabama TV Cable'
+    },
+    'carolinata': {
+        'name': 'West Carolina Communications'
+    },
+    'wct010': {
+        'name': 'West Central Telephone Association'
+    },
+    'wes110': {
+        'name': 'West River Cooperative Telephone Company'
+    },
+    'ani030': {
+        'name': 'WesTel Systems'
+    },
+    'westianet': {
+        'name': 'Western Iowa Networks'
+    },
+    'nttcwhi010': {
+        'name': 'Whidbey Telecom'
+    },
+    'weh010-white': {
+        'name': 'White County Cable TV'
+    },
+    'wes130': {
+        'name': 'Wiatel'
+    },
+    'wik010': {
+        'name': 'Wiktel'
+    },
+    'wil070': {
+        'name': 'Wilkes Communications, Inc./RiverStreet Networks'
+    },
+    'wil015': {
+        'name': 'Wilson Communications'
+    },
+    'win010': {
+        'name': 'Windomnet/SMBS'
+    },
+    'win090': {
+        'name': 'Windstream Cable TV'
+    },
+    'wcta': {
+        'name': 'Winnebago Cooperative Telecom Association'
+    },
+    'wtc010': {
+        'name': 'WTC'
+    },
+    'wil040': {
+        'name': 'WTC Communications, Inc.'
+    },
+    'wya010': {
+        'name': 'Wyandotte Cable'
+    },
+    'hin020-02': {
+        'name': 'X-Stream Services'
+    },
+    'xit010': {
+        'name': 'XIT Communications'
+    },
+    'yel010': {
+        'name': 'Yelcot Communications'
+    },
+    'mid180-01': {
+        'name': 'yondoo'
+    },
+    'cou060': {
+        'name': 'Zito Media'
+    },
+}
+
+
 class AdobePassIE(InfoExtractor):
     _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
     _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
+    _MVPD_CACHE = 'ap-mvpd'
 
     @staticmethod
     def _get_mvpd_resource(provider_id, title, guid, rating):
@@ -37,6 +1313,28 @@ class AdobePassIE(InfoExtractor):
             return self._search_regex(
                 '<%s>(.+?)</%s>' % (tag, tag), xml_str, tag)
 
+        def is_expired(token, date_ele):
+            token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele)))
+            return token_expires and token_expires <= int(time.time())
+
+        def post_form(form_page_res, note, data={}):
+            form_page, urlh = form_page_res
+            post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url')
+            if not re.match(r'https?://', post_url):
+                post_url = compat_urlparse.urljoin(urlh.geturl(), post_url)
+            form_data = self._hidden_inputs(form_page)
+            form_data.update(data)
+            return self._download_webpage_handle(
+                post_url, video_id, note, data=urlencode_postdata(form_data), headers={
+                    'Content-Type': 'application/x-www-form-urlencoded',
+                })
+
+        def raise_mvpd_required():
+            raise ExtractorError(
+                'This video is only available for users of participating TV providers. '
+                'Use --ap-mso to specify Adobe Pass Multiple-system operator Identifier '
+                'and --ap-username and --ap-password or --netrc to provide account credentials.', expected=True)
+
         mvpd_headers = {
             'ap_42': 'anonymous',
             'ap_11': 'Linux i686',
@@ -44,91 +1342,131 @@ class AdobePassIE(InfoExtractor):
             'User-Agent': self._USER_AGENT,
         }
 
-        guid = xml_text(resource, 'guid')
-        requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {}
-        authn_token = requestor_info.get('authn_token')
-        if authn_token:
-            token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(authn_token, 'simpleTokenExpires')))
-            if token_expires and token_expires <= int(time.time()):
+        guid = xml_text(resource, 'guid') if '<' in resource else resource
+        count = 0
+        while count < 2:
+            requestor_info = self._downloader.cache.load(self._MVPD_CACHE, requestor_id) or {}
+            authn_token = requestor_info.get('authn_token')
+            if authn_token and is_expired(authn_token, 'simpleTokenExpires'):
                 authn_token = None
-                requestor_info = {}
-        if not authn_token:
-            # TODO add support for other TV Providers
-            mso_id = 'DTV'
-            username, password = self._get_netrc_login_info(mso_id)
-            if not username or not password:
-                return ''
-
-            def post_form(form_page, note, data={}):
-                post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url')
-                return self._download_webpage(
-                    post_url, video_id, note, data=urlencode_postdata(data or self._hidden_inputs(form_page)), headers={
-                        'Content-Type': 'application/x-www-form-urlencoded',
+            if not authn_token:
+                # TODO add support for other TV Providers
+                mso_id = self._downloader.params.get('ap_mso')
+                if not mso_id:
+                    raise_mvpd_required()
+                username, password = self._get_login_info('ap_username', 'ap_password', mso_id)
+                if not username or not password:
+                    raise_mvpd_required()
+                mso_info = MSO_INFO[mso_id]
+
+                provider_redirect_page_res = self._download_webpage_handle(
+                    self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id,
+                    'Downloading Provider Redirect Page', query={
+                        'noflash': 'true',
+                        'mso_id': mso_id,
+                        'requestor_id': requestor_id,
+                        'no_iframe': 'false',
+                        'domain_name': 'adobe.com',
+                        'redirect_url': url,
                     })
 
-            provider_redirect_page = self._download_webpage(
-                self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id,
-                'Downloading Provider Redirect Page', query={
-                    'noflash': 'true',
-                    'mso_id': mso_id,
-                    'requestor_id': requestor_id,
-                    'no_iframe': 'false',
-                    'domain_name': 'adobe.com',
-                    'redirect_url': url,
-                })
-            provider_login_page = post_form(
-                provider_redirect_page, 'Downloading Provider Login Page')
-            mvpd_confirm_page = post_form(provider_login_page, 'Logging in', {
-                'username': username,
-                'password': password,
+                if mso_id == 'Comcast_SSO':
+                    # Comcast page flow varies by video site and whether you
+                    # are on Comcast's network.
+                    provider_redirect_page, urlh = provider_redirect_page_res
+                    # Check for Comcast auto login
+                    if 'automatically signing you in' in provider_redirect_page:
+                        oauth_redirect_url = self._html_search_regex(
+                            r'window\.location\s*=\s*[\'"]([^\'"]+)',
+                            provider_redirect_page, 'oauth redirect')
+                        # Just need to process the request. No useful data comes back
+                        self._download_webpage(
+                            oauth_redirect_url, video_id, 'Confirming auto login')
+                    else:
+                        if '<form name="signin"' in provider_redirect_page:
+                            # already have the form, just fill it
+                            provider_login_page_res = provider_redirect_page_res
+                        elif 'http-equiv="refresh"' in provider_redirect_page:
+                            # redirects to the login page
+                            oauth_redirect_url = self._html_search_regex(
+                                r'content="0;\s*url=([^\'"]+)',
+                                provider_redirect_page, 'meta refresh redirect')
+                            provider_login_page_res = self._download_webpage_handle(
+                                oauth_redirect_url,
+                                video_id, 'Downloading Provider Login Page')
+                        else:
+                            provider_login_page_res = post_form(
+                                provider_redirect_page_res, 'Downloading Provider Login Page')
+
+                        mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', {
+                            mso_info.get('username_field', 'username'): username,
+                            mso_info.get('password_field', 'password'): password,
+                        })
+                        mvpd_confirm_page, urlh = mvpd_confirm_page_res
+                        if '<button class="submit" value="Resume">Resume</button>' in mvpd_confirm_page:
+                            post_form(mvpd_confirm_page_res, 'Confirming Login')
+
+                else:
+                    # Normal, non-Comcast flow
+                    provider_login_page_res = post_form(
+                        provider_redirect_page_res, 'Downloading Provider Login Page')
+                    mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', {
+                        mso_info.get('username_field', 'username'): username,
+                        mso_info.get('password_field', 'password'): password,
+                    })
+                    if mso_id != 'Rogers':
+                        post_form(mvpd_confirm_page_res, 'Confirming Login')
+
+                session = self._download_webpage(
+                    self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id,
+                    'Retrieving Session', data=urlencode_postdata({
+                        '_method': 'GET',
+                        'requestor_id': requestor_id,
+                    }), headers=mvpd_headers)
+                if '<pendingLogout' in session:
+                    self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
+                    count += 1
+                    continue
+                authn_token = unescapeHTML(xml_text(session, 'authnToken'))
+                requestor_info['authn_token'] = authn_token
+                self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
+
+            authz_token = requestor_info.get(guid)
+            if authz_token and is_expired(authz_token, 'simpleTokenTTL'):
+                authz_token = None
+            if not authz_token:
+                authorize = self._download_webpage(
+                    self._SERVICE_PROVIDER_TEMPLATE % 'authorize', video_id,
+                    'Retrieving Authorization Token', data=urlencode_postdata({
+                        'resource_id': resource,
+                        'requestor_id': requestor_id,
+                        'authentication_token': authn_token,
+                        'mso_id': xml_text(authn_token, 'simpleTokenMsoID'),
+                        'userMeta': '1',
+                    }), headers=mvpd_headers)
+                if '<pendingLogout' in authorize:
+                    self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
+                    count += 1
+                    continue
+                authz_token = unescapeHTML(xml_text(authorize, 'authzToken'))
+                requestor_info[guid] = authz_token
+                self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
+
+            mvpd_headers.update({
+                'ap_19': xml_text(authn_token, 'simpleSamlNameID'),
+                'ap_23': xml_text(authn_token, 'simpleSamlSessionIndex'),
             })
-            post_form(mvpd_confirm_page, 'Confirming Login')
 
-            session = self._download_webpage(
-                self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id,
-                'Retrieving Session', data=urlencode_postdata({
-                    '_method': 'GET',
-                    'requestor_id': requestor_id,
-                }), headers=mvpd_headers)
-            if '<pendingLogout' in session:
-                self._downloader.cache.store('mvpd', requestor_id, {})
-                return self._extract_mvpd_auth(url, video_id, requestor_id, resource)
-            authn_token = unescapeHTML(xml_text(session, 'authnToken'))
-            requestor_info['authn_token'] = authn_token
-            self._downloader.cache.store('mvpd', requestor_id, requestor_info)
-
-        authz_token = requestor_info.get(guid)
-        if not authz_token:
-            authorize = self._download_webpage(
-                self._SERVICE_PROVIDER_TEMPLATE % 'authorize', video_id,
-                'Retrieving Authorization Token', data=urlencode_postdata({
-                    'resource_id': resource,
+            short_authorize = self._download_webpage(
+                self._SERVICE_PROVIDER_TEMPLATE % 'shortAuthorize',
+                video_id, 'Retrieving Media Token', data=urlencode_postdata({
+                    'authz_token': authz_token,
                     'requestor_id': requestor_id,
-                    'authentication_token': authn_token,
-                    'mso_id': xml_text(authn_token, 'simpleTokenMsoID'),
-                    'userMeta': '1',
+                    'session_guid': xml_text(authn_token, 'simpleTokenAuthenticationGuid'),
+                    'hashed_guid': 'false',
                 }), headers=mvpd_headers)
-            if '<pendingLogout' in authorize:
-                self._downloader.cache.store('mvpd', requestor_id, {})
-                return self._extract_mvpd_auth(url, video_id, requestor_id, resource)
-            authz_token = unescapeHTML(xml_text(authorize, 'authzToken'))
-            requestor_info[guid] = authz_token
-            self._downloader.cache.store('mvpd', requestor_id, requestor_info)
-
-        mvpd_headers.update({
-            'ap_19': xml_text(authn_token, 'simpleSamlNameID'),
-            'ap_23': xml_text(authn_token, 'simpleSamlSessionIndex'),
-        })
-
-        short_authorize = self._download_webpage(
-            self._SERVICE_PROVIDER_TEMPLATE % 'shortAuthorize',
-            video_id, 'Retrieving Media Token', data=urlencode_postdata({
-                'authz_token': authz_token,
-                'requestor_id': requestor_id,
-                'session_guid': xml_text(authn_token, 'simpleTokenAuthenticationGuid'),
-                'hashed_guid': 'false',
-            }), headers=mvpd_headers)
-        if '<pendingLogout' in short_authorize:
-            self._downloader.cache.store('mvpd', requestor_id, {})
-            return self._extract_mvpd_auth(url, video_id, requestor_id, resource)
-        return short_authorize
+            if '<pendingLogout' in short_authorize:
+                self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
+                count += 1
+                continue
+            return short_authorize
index 3f7f8c03624a9e25611b239e89cf900b41cbce0e..989505c8232abf53f99d0af594c84e45f8778eb0 100644 (file)
@@ -3,16 +3,14 @@ from __future__ import unicode_literals
 
 import re
 
-from .common import InfoExtractor
+from .turner import TurnerBaseIE
 from ..utils import (
-    determine_ext,
     ExtractorError,
-    float_or_none,
-    xpath_text,
+    int_or_none,
 )
 
 
-class AdultSwimIE(InfoExtractor):
+class AdultSwimIE(TurnerBaseIE):
     _VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<is_playlist>playlists/)?(?P<show_path>[^/]+)/(?P<episode_path>[^/?#]+)/?'
 
     _TESTS = [{
@@ -96,7 +94,29 @@ class AdultSwimIE(InfoExtractor):
         'params': {
             # m3u8 download
             'skip_download': True,
-        }
+        },
+        'expected_warnings': ['Unable to download f4m manifest'],
+    }, {
+        'url': 'http://www.adultswim.com/videos/toonami/friday-october-14th-2016/',
+        'info_dict': {
+            'id': 'eYiLsKVgQ6qTC6agD67Sig',
+            'title': 'Toonami - Friday, October 14th, 2016',
+            'description': 'md5:99892c96ffc85e159a428de85c30acde',
+        },
+        'playlist': [{
+            'md5': '',
+            'info_dict': {
+                'id': 'eYiLsKVgQ6qTC6agD67Sig',
+                'ext': 'mp4',
+                'title': 'Toonami - Friday, October 14th, 2016',
+                'description': 'md5:99892c96ffc85e159a428de85c30acde',
+            },
+        }],
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+        'expected_warnings': ['Unable to download f4m manifest'],
     }]
 
     @staticmethod
@@ -148,7 +168,10 @@ class AdultSwimIE(InfoExtractor):
                 if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path:
                     video_info = bootstrapped_data['slugged_video']
             if not video_info:
-                video_info = bootstrapped_data.get('heroMetadata', {}).get('trailer').get('video')
+                video_info = bootstrapped_data.get(
+                    'heroMetadata', {}).get('trailer', {}).get('video')
+            if not video_info:
+                video_info = bootstrapped_data.get('onlineOriginals', [None])[0]
             if not video_info:
                 raise ExtractorError('Unable to find video info')
 
@@ -161,71 +184,41 @@ class AdultSwimIE(InfoExtractor):
                 segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
             elif video_info.get('videoPlaybackID'):
                 segment_ids = [video_info['videoPlaybackID']]
+            elif video_info.get('id'):
+                segment_ids = [video_info['id']]
             else:
-                raise ExtractorError(
-                    'This video is only available via cable service provider subscription that'
-                    ' is not currently supported. You may want to use --cookies.'
-                    if video_info.get('auth') is True else 'Unable to find stream or clips',
-                    expected=True)
+                if video_info.get('auth') is True:
+                    raise ExtractorError(
+                        'This video is only available via cable service provider subscription that'
+                        ' is not currently supported. You may want to use --cookies.', expected=True)
+                else:
+                    raise ExtractorError('Unable to find stream or clips')
 
         episode_id = video_info['id']
         episode_title = video_info['title']
-        episode_description = video_info['description']
-        episode_duration = video_info.get('duration')
+        episode_description = video_info.get('description')
+        episode_duration = int_or_none(video_info.get('duration'))
+        view_count = int_or_none(video_info.get('views'))
 
         entries = []
         for part_num, segment_id in enumerate(segment_ids):
-            segment_url = 'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=desktop' % segment_id
-
+            segement_info = self._extract_cvp_info(
+                'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=desktop' % segment_id,
+                segment_id, {
+                    'secure': {
+                        'media_src': 'http://androidhls-secure.cdn.turner.com/adultswim/big',
+                        'tokenizer_src': 'http://www.adultswim.com/astv/mvpd/processors/services/token_ipadAdobe.do',
+                    },
+                })
             segment_title = '%s - %s' % (show_title, episode_title)
             if len(segment_ids) > 1:
                 segment_title += ' Part %d' % (part_num + 1)
-
-            idoc = self._download_xml(
-                segment_url, segment_title,
-                'Downloading segment information', 'Unable to download segment information')
-
-            segment_duration = float_or_none(
-                xpath_text(idoc, './/trt', 'segment duration').strip())
-
-            formats = []
-            file_els = idoc.findall('.//files/file') or idoc.findall('./files/file')
-
-            unique_urls = []
-            unique_file_els = []
-            for file_el in file_els:
-                media_url = file_el.text
-                if not media_url or determine_ext(media_url) == 'f4m':
-                    continue
-                if file_el.text not in unique_urls:
-                    unique_urls.append(file_el.text)
-                    unique_file_els.append(file_el)
-
-            for file_el in unique_file_els:
-                bitrate = file_el.attrib.get('bitrate')
-                ftype = file_el.attrib.get('type')
-                media_url = file_el.text
-                if determine_ext(media_url) == 'm3u8':
-                    formats.extend(self._extract_m3u8_formats(
-                        media_url, segment_title, 'mp4', preference=0,
-                        m3u8_id='hls', fatal=False))
-                else:
-                    formats.append({
-                        'format_id': '%s_%s' % (bitrate, ftype),
-                        'url': file_el.text.strip(),
-                        # The bitrate may not be a number (for example: 'iphone')
-                        'tbr': int(bitrate) if bitrate.isdigit() else None,
-                    })
-
-            self._sort_formats(formats)
-
-            entries.append({
+            segement_info.update({
                 'id': segment_id,
                 'title': segment_title,
-                'formats': formats,
-                'duration': segment_duration,
-                'description': episode_description
+                'description': episode_description,
             })
+            entries.append(segement_info)
 
         return {
             '_type': 'playlist',
@@ -234,5 +227,6 @@ class AdultSwimIE(InfoExtractor):
             'entries': entries,
             'title': '%s - %s' % (show_title, episode_title),
             'description': episode_description,
-            'duration': episode_duration
+            'duration': episode_duration,
+            'view_count': view_count,
         }
index 518c61f67eb0befa0ce59fb393d10d8ebd4dcc03..75b36699363609876c755d4c120ec195aa81ec3a 100644 (file)
@@ -11,6 +11,7 @@ from ..compat import (
 from ..utils import (
     ExtractorError,
     int_or_none,
+    update_url_query,
     xpath_element,
     xpath_text,
 )
@@ -18,12 +19,18 @@ from ..utils import (
 
 class AfreecaTVIE(InfoExtractor):
     IE_DESC = 'afreecatv.com'
-    _VALID_URL = r'''(?x)^
-        https?://(?:(live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)?
-        (?:
-            /app/(?:index|read_ucc_bbs)\.cgi|
-            /player/[Pp]layer\.(?:swf|html))
-        \?.*?\bnTitleNo=(?P<id>\d+)'''
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:
+                            (?:(?:live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)?
+                            (?:
+                                /app/(?:index|read_ucc_bbs)\.cgi|
+                                /player/[Pp]layer\.(?:swf|html)
+                            )\?.*?\bnTitleNo=|
+                            vod\.afreecatv\.com/PLAYER/STATION/
+                        )
+                        (?P<id>\d+)
+                    '''
     _TESTS = [{
         'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
         'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
@@ -66,6 +73,9 @@ class AfreecaTVIE(InfoExtractor):
     }, {
         'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
         'only_matching': True,
+    }, {
+        'url': 'http://vod.afreecatv.com/PLAYER/STATION/15055030',
+        'only_matching': True,
     }]
 
     @staticmethod
@@ -83,7 +93,9 @@ class AfreecaTVIE(InfoExtractor):
         info_url = compat_urlparse.urlunparse(parsed_url._replace(
             netloc='afbbs.afreecatv.com:8080',
             path='/api/video/get_video_info.php'))
-        video_xml = self._download_xml(info_url, video_id)
+
+        video_xml = self._download_xml(
+            update_url_query(info_url, {'nTitleNo': video_id}), video_id)
 
         if xpath_element(video_xml, './track/video/file') is None:
             raise ExtractorError('Specified AfreecaTV video does not exist',
diff --git a/youtube_dl/extractor/aftonbladet.py b/youtube_dl/extractor/aftonbladet.py
deleted file mode 100644 (file)
index 5766b4f..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-# encoding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import int_or_none
-
-
-class AftonbladetIE(InfoExtractor):
-    _VALID_URL = r'https?://tv\.aftonbladet\.se/abtv/articles/(?P<id>[0-9]+)'
-    _TEST = {
-        'url': 'http://tv.aftonbladet.se/abtv/articles/36015',
-        'info_dict': {
-            'id': '36015',
-            'ext': 'mp4',
-            'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna',
-            'description': 'Jupiters måne mest aktiv av alla himlakroppar',
-            'timestamp': 1394142732,
-            'upload_date': '20140306',
-        },
-    }
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-
-        # find internal video meta data
-        meta_url = 'http://aftonbladet-play-metadata.cdn.drvideo.aptoma.no/video/%s.json'
-        player_config = self._parse_json(self._html_search_regex(
-            r'data-player-config="([^"]+)"', webpage, 'player config'), video_id)
-        internal_meta_id = player_config['aptomaVideoId']
-        internal_meta_url = meta_url % internal_meta_id
-        internal_meta_json = self._download_json(
-            internal_meta_url, video_id, 'Downloading video meta data')
-
-        # find internal video formats
-        format_url = 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s'
-        internal_video_id = internal_meta_json['videoId']
-        internal_formats_url = format_url % internal_video_id
-        internal_formats_json = self._download_json(
-            internal_formats_url, video_id, 'Downloading video formats')
-
-        formats = []
-        for fmt in internal_formats_json['formats']['http']['pseudostreaming']['mp4']:
-            p = fmt['paths'][0]
-            formats.append({
-                'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']),
-                'ext': 'mp4',
-                'width': int_or_none(fmt.get('width')),
-                'height': int_or_none(fmt.get('height')),
-                'tbr': int_or_none(fmt.get('bitrate')),
-                'protocol': 'http',
-            })
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'title': internal_meta_json['title'],
-            'formats': formats,
-            'thumbnail': internal_meta_json.get('imageUrl'),
-            'description': internal_meta_json.get('shortPreamble'),
-            'timestamp': int_or_none(internal_meta_json.get('timePublished')),
-            'duration': int_or_none(internal_meta_json.get('duration')),
-            'view_count': int_or_none(internal_meta_json.get('views')),
-        }
index b081695d8400c0e24d36e84bd8445efa084ed8b3..388e578d569a27bdfd3a7d597d3ebcd5f31ccb94 100644 (file)
@@ -4,7 +4,7 @@ from .common import InfoExtractor
 
 
 class AlJazeeraIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.aljazeera\.com/programmes/.*?/(?P<id>[^/]+)\.html'
+    _VALID_URL = r'https?://(?:www\.)?aljazeera\.com/programmes/.*?/(?P<id>[^/]+)\.html'
 
     _TEST = {
         'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
index 190bc2cc8730853a23b9025f1849bf234a32e001..517b06def4d2ff690628eece4b1e85e647aea267 100644 (file)
@@ -1,29 +1,26 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
-import re
-import json
-
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
+    remove_end,
     qualities,
-    unescapeHTML,
-    xpath_element,
+    url_basename,
 )
 
 
 class AllocineIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?P<typ>article|video|film)/(fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=|video-)(?P<id>[0-9]+)(?:\.html)?'
+    _VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?:article|video|film)/(?:fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=|video-)(?P<id>[0-9]+)(?:\.html)?'
 
     _TESTS = [{
         'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html',
         'md5': '0c9fcf59a841f65635fa300ac43d8269',
         'info_dict': {
             'id': '19546517',
+            'display_id': '18635087',
             'ext': 'mp4',
             'title': 'Astérix - Le Domaine des Dieux Teaser VF',
-            'description': 'md5:abcd09ce503c6560512c14ebfdb720d2',
+            'description': 'md5:4a754271d9c6f16c72629a8a993ee884',
             'thumbnail': 're:http://.*\.jpg',
         },
     }, {
@@ -31,64 +28,82 @@ class AllocineIE(InfoExtractor):
         'md5': 'd0cdce5d2b9522ce279fdfec07ff16e0',
         'info_dict': {
             'id': '19540403',
+            'display_id': '19540403',
             'ext': 'mp4',
             'title': 'Planes 2 Bande-annonce VF',
             'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway',
             'thumbnail': 're:http://.*\.jpg',
         },
     }, {
-        'url': 'http://www.allocine.fr/film/fichefilm_gen_cfilm=181290.html',
+        'url': 'http://www.allocine.fr/video/player_gen_cmedia=19544709&cfilm=181290.html',
         'md5': '101250fb127ef9ca3d73186ff22a47ce',
         'info_dict': {
             'id': '19544709',
+            'display_id': '19544709',
             'ext': 'mp4',
             'title': 'Dragons 2 - Bande annonce finale VF',
-            'description': 'md5:601d15393ac40f249648ef000720e7e3',
+            'description': 'md5:6cdd2d7c2687d4c6aafe80a35e17267a',
             'thumbnail': 're:http://.*\.jpg',
         },
     }, {
         'url': 'http://www.allocine.fr/video/video-19550147/',
-        'only_matching': True,
+        'md5': '3566c0668c0235e2d224fd8edb389f67',
+        'info_dict': {
+            'id': '19550147',
+            'ext': 'mp4',
+            'title': 'Faux Raccord N°123 - Les gaffes de Cliffhanger',
+            'description': 'md5:bc734b83ffa2d8a12188d9eb48bb6354',
+            'thumbnail': 're:http://.*\.jpg',
+        },
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        typ = mobj.group('typ')
-        display_id = mobj.group('id')
+        display_id = self._match_id(url)
 
         webpage = self._download_webpage(url, display_id)
 
-        if typ == 'film':
-            video_id = self._search_regex(r'href="/video/player_gen_cmedia=([0-9]+).+"', webpage, 'video id')
-        else:
-            player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player', default=None)
-            if player:
-                player_data = json.loads(player)
-                video_id = compat_str(player_data['refMedia'])
-            else:
-                model = self._search_regex(r'data-model="([^"]+)">', webpage, 'data model')
-                model_data = self._parse_json(unescapeHTML(model), display_id)
-                video_id = compat_str(model_data['id'])
+        formats = []
+        quality = qualities(['ld', 'md', 'hd'])
 
-        xml = self._download_xml('http://www.allocine.fr/ws/AcVisiondataV4.ashx?media=%s' % video_id, display_id)
+        model = self._html_search_regex(
+            r'data-model="([^"]+)"', webpage, 'data model', default=None)
+        if model:
+            model_data = self._parse_json(model, display_id)
 
-        video = xpath_element(xml, './/AcVisionVideo').attrib
-        quality = qualities(['ld', 'md', 'hd'])
+            for video_url in model_data['sources'].values():
+                video_id, format_id = url_basename(video_url).split('_')[:2]
+                formats.append({
+                    'format_id': format_id,
+                    'quality': quality(format_id),
+                    'url': video_url,
+                })
 
-        formats = []
-        for k, v in video.items():
-            if re.match(r'.+_path', k):
-                format_id = k.split('_')[0]
+            title = model_data['title']
+        else:
+            video_id = display_id
+            media_data = self._download_json(
+                'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id)
+            for key, value in media_data['video'].items():
+                if not key.endswith('Path'):
+                    continue
+
+                format_id = key[:-len('Path')]
                 formats.append({
                     'format_id': format_id,
                     'quality': quality(format_id),
-                    'url': v,
+                    'url': value,
                 })
+
+            title = remove_end(self._html_search_regex(
+                r'(?s)<title>(.+?)</title>', webpage, 'title'
+            ).strip(), ' - AlloCiné')
+
         self._sort_formats(formats)
 
         return {
             'id': video_id,
-            'title': video['videoTitle'],
+            'display_id': display_id,
+            'title': title,
             'thumbnail': self._og_search_thumbnail(webpage),
             'formats': formats,
             'description': self._og_search_description(webpage),
index c739d2c99167dbd27e44dee326fcb49222029187..87c803e948fd2e04cde6b0b43251d3f804b952a0 100644 (file)
@@ -10,7 +10,7 @@ from ..utils import (
 
 
 class AMCNetworksIE(ThePlatformIE):
-    _VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies/|shows/[^/]+/(?:full-episodes/)?season-\d+/episode-\d+(?:-(?:[^/]+/)?|/))(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies/|shows/[^/]+/(?:full-episodes/)?[^/]+/episode-\d+(?:-(?:[^/]+/)?|/))(?P<id>[^/?#]+)'
     _TESTS = [{
         'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1',
         'md5': '',
@@ -28,6 +28,7 @@ class AMCNetworksIE(ThePlatformIE):
             # m3u8 download
             'skip_download': True,
         },
+        'skip': 'Requires TV provider accounts',
     }, {
         'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
         'only_matching': True,
@@ -40,6 +41,9 @@ class AMCNetworksIE(ThePlatformIE):
     }, {
         'url': 'http://www.ifc.com/movies/chaos',
         'only_matching': True,
+    }, {
+        'url': 'http://www.bbcamerica.com/shows/doctor-who/full-episodes/the-power-of-the-daleks/episode-01-episode-1-color-version',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
index cb29cf11122f3f53ede969f829af5ea27666b0ff..623f44dceda866f1d4f8c947536effde11758d32 100644 (file)
@@ -157,22 +157,16 @@ class AnvatoIE(InfoExtractor):
             video_data_url, video_id, transform_source=strip_jsonp,
             data=json.dumps(payload).encode('utf-8'))
 
-    def _extract_anvato_videos(self, webpage, video_id):
-        anvplayer_data = self._parse_json(self._html_search_regex(
-            r'<script[^>]+data-anvp=\'([^\']+)\'', webpage,
-            'Anvato player data'), video_id)
-
-        video_id = anvplayer_data['video']
-        access_key = anvplayer_data['accessKey']
-
+    def _get_anvato_videos(self, access_key, video_id):
         video_data = self._get_video_json(access_key, video_id)
 
         formats = []
         for published_url in video_data['published_urls']:
             video_url = published_url['embed_url']
+            media_format = published_url.get('format')
             ext = determine_ext(video_url)
 
-            if ext == 'smil':
+            if ext == 'smil' or media_format == 'smil':
                 formats.extend(self._extract_smil_formats(video_url, video_id))
                 continue
 
@@ -183,7 +177,7 @@ class AnvatoIE(InfoExtractor):
                 'tbr': tbr if tbr != 0 else None,
             }
 
-            if ext == 'm3u8':
+            if ext == 'm3u8' or media_format in ('m3u8', 'm3u8-variant'):
                 # Not using _extract_m3u8_formats here as individual media
                 # playlists are also included in published_urls.
                 if tbr is None:
@@ -194,7 +188,7 @@ class AnvatoIE(InfoExtractor):
                         'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
                         'ext': 'mp4',
                     })
-            elif ext == 'mp3':
+            elif ext == 'mp3' or media_format == 'mp3':
                 a_format['vcodec'] = 'none'
             else:
                 a_format.update({
@@ -218,7 +212,19 @@ class AnvatoIE(InfoExtractor):
             'formats': formats,
             'title': video_data.get('def_title'),
             'description': video_data.get('def_description'),
+            'tags': video_data.get('def_tags', '').split(','),
             'categories': video_data.get('categories'),
             'thumbnail': video_data.get('thumbnail'),
+            'timestamp': int_or_none(video_data.get(
+                'ts_published') or video_data.get('ts_added')),
+            'uploader': video_data.get('mcp_id'),
+            'duration': int_or_none(video_data.get('duration')),
             'subtitles': subtitles,
         }
+
+    def _extract_anvato_videos(self, webpage, video_id):
+        anvplayer_data = self._parse_json(self._html_search_regex(
+            r'<script[^>]+data-anvp=\'([^\']+)\'', webpage,
+            'Anvato player data'), video_id)
+        return self._get_anvato_videos(
+            anvplayer_data['accessKey'], anvplayer_data['video'])
index 07e67dd3393962eedccd2460954711d01f08efdb..35f3656f11d7579a1f67cd0ac6e9c06a37c44917 100644 (file)
@@ -174,11 +174,15 @@ class ARDMediathekIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
-        if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage:
-            raise ExtractorError('Video %s is no longer available' % video_id, expected=True)
+        ERRORS = (
+            ('>Leider liegt eine Störung vor.', 'Video %s is unavailable'),
+            ('>Der gewünschte Beitrag ist nicht mehr verfügbar.<',
+             'Video %s is no longer available'),
+        )
 
-        if 'Diese Sendung ist für Jugendliche unter 12 Jahren nicht geeignet. Der Clip ist deshalb nur von 20 bis 6 Uhr verfügbar.' in webpage:
-            raise ExtractorError('This program is only suitable for those aged 12 and older. Video %s is therefore only available between 20 pm and 6 am.' % video_id, expected=True)
+        for pattern, message in ERRORS:
+            if pattern in webpage:
+                raise ExtractorError(message % video_id, expected=True)
 
         if re.search(r'[\?&]rss($|[=&])', url):
             doc = compat_etree_fromstring(webpage.encode('utf-8'))
@@ -238,7 +242,7 @@ class ARDMediathekIE(InfoExtractor):
 
 
 class ARDIE(InfoExtractor):
-    _VALID_URL = '(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
+    _VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
     _TEST = {
         'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
         'md5': 'd216c3a86493f9322545e045ddc3eb35',
index e0c5c18045312a064d8663a025a9fdaabb7a28df..69a23e88c5b08738a3ce66cce47215fe58e4fcc0 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -410,6 +410,22 @@ class ArteTVEmbedIE(ArteTVPlus7IE):
         return self._extract_from_json_url(json_url, video_id, lang)
 
 
+class TheOperaPlatformIE(ArteTVPlus7IE):
+    IE_NAME = 'theoperaplatform'
+    _VALID_URL = r'https?://(?:www\.)?theoperaplatform\.eu/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)'
+
+    _TESTS = [{
+        'url': 'http://www.theoperaplatform.eu/de/opera/verdi-otello',
+        'md5': '970655901fa2e82e04c00b955e9afe7b',
+        'info_dict': {
+            'id': '060338-009-A',
+            'ext': 'mp4',
+            'title': 'Verdi - OTELLO',
+            'upload_date': '20160927',
+        },
+    }]
+
+
 class ArteTVPlaylistIE(ArteTVBaseIE):
     IE_NAME = 'arte.tv:playlist'
     _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/[^#]*#collection/(?P<id>PL-\d+)'
index 2ec2d7092aca0468d122d9ab658f1ef848da7f90..d7d1c6306443b77dd7161b3c07480ad16c14ffa5 100644 (file)
@@ -6,8 +6,8 @@ from ..utils import float_or_none
 
 
 class AudioBoomIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?audioboom\.com/boos/(?P<id>[0-9]+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?audioboom\.com/(?:boos|posts)/(?P<id>[0-9]+)'
+    _TESTS = [{
         'url': 'https://audioboom.com/boos/4279833-3-09-2016-czaban-hour-3?t=0',
         'md5': '63a8d73a055c6ed0f1e51921a10a5a76',
         'info_dict': {
@@ -19,7 +19,10 @@ class AudioBoomIE(InfoExtractor):
             'uploader': 'Steve Czaban',
             'uploader_url': 're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio',
         }
-    }
+    }, {
+        'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
similarity index 51%
rename from youtube_dl/extractor/dcn.py
rename to youtube_dl/extractor/awaan.py
index b8542820a36534fe548aac43ea7f64375396e3ba..a2603bbffef454481143cb253a24cdb0f4909ec1 100644 (file)
@@ -12,74 +12,51 @@ from ..compat import (
 from ..utils import (
     int_or_none,
     parse_iso8601,
-    sanitized_Request,
     smuggle_url,
     unsmuggle_url,
     urlencode_postdata,
 )
 
 
-class DCNIE(InfoExtractor):
+class AWAANIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?'
 
     def _real_extract(self, url):
         show_id, video_id, season_id = re.match(self._VALID_URL, url).groups()
         if video_id and int(video_id) > 0:
             return self.url_result(
-                'http://www.dcndigital.ae/media/%s' % video_id, 'DCNVideo')
+                'http://awaan.ae/media/%s' % video_id, 'AWAANVideo')
         elif season_id and int(season_id) > 0:
             return self.url_result(smuggle_url(
-                'http://www.dcndigital.ae/program/season/%s' % season_id,
-                {'show_id': show_id}), 'DCNSeason')
+                'http://awaan.ae/program/season/%s' % season_id,
+                {'show_id': show_id}), 'AWAANSeason')
         else:
             return self.url_result(
-                'http://www.dcndigital.ae/program/%s' % show_id, 'DCNSeason')
+                'http://awaan.ae/program/%s' % show_id, 'AWAANSeason')
 
 
-class DCNBaseIE(InfoExtractor):
-    def _extract_video_info(self, video_data, video_id, is_live):
+class AWAANBaseIE(InfoExtractor):
+    def _parse_video_data(self, video_data, video_id, is_live):
         title = video_data.get('title_en') or video_data['title_ar']
         img = video_data.get('img')
-        thumbnail = 'http://admin.mangomolo.com/analytics/%s' % img if img else None
-        duration = int_or_none(video_data.get('duration'))
-        description = video_data.get('description_en') or video_data.get('description_ar')
-        timestamp = parse_iso8601(video_data.get('create_time'), ' ')
 
         return {
             'id': video_id,
             'title': self._live_title(title) if is_live else title,
-            'description': description,
-            'thumbnail': thumbnail,
-            'duration': duration,
-            'timestamp': timestamp,
+            'description': video_data.get('description_en') or video_data.get('description_ar'),
+            'thumbnail': 'http://admin.mangomolo.com/analytics/%s' % img if img else None,
+            'duration': int_or_none(video_data.get('duration')),
+            'timestamp': parse_iso8601(video_data.get('create_time'), ' '),
             'is_live': is_live,
         }
 
-    def _extract_video_formats(self, webpage, video_id, m3u8_entry_protocol):
-        formats = []
-        format_url_base = 'http' + self._html_search_regex(
-            [
-                r'file\s*:\s*"https?(://[^"]+)/playlist.m3u8',
-                r'<a[^>]+href="rtsp(://[^"]+)"'
-            ], webpage, 'format url')
-        formats.extend(self._extract_mpd_formats(
-            format_url_base + '/manifest.mpd',
-            video_id, mpd_id='dash', fatal=False))
-        formats.extend(self._extract_m3u8_formats(
-            format_url_base + '/playlist.m3u8', video_id, 'mp4',
-            m3u8_entry_protocol, m3u8_id='hls', fatal=False))
-        formats.extend(self._extract_f4m_formats(
-            format_url_base + '/manifest.f4m',
-            video_id, f4m_id='hds', fatal=False))
-        self._sort_formats(formats)
-        return formats
-
-
-class DCNVideoIE(DCNBaseIE):
-    IE_NAME = 'dcn:video'
+
+class AWAANVideoIE(AWAANBaseIE):
+    IE_NAME = 'awaan:video'
     _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?(?:video(?:/[^/]+)?|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)'
     _TESTS = [{
         'url': 'http://www.dcndigital.ae/#/video/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375',
+        'md5': '5f61c33bfc7794315c671a62d43116aa',
         'info_dict':
         {
             'id': '17375',
@@ -89,10 +66,7 @@ class DCNVideoIE(DCNBaseIE):
             'duration': 2041,
             'timestamp': 1227504126,
             'upload_date': '20081124',
-        },
-        'params': {
-            # m3u8 download
-            'skip_download': True,
+            'uploader_id': '71',
         },
     }, {
         'url': 'http://awaan.ae/video/26723981/%D8%AF%D8%A7%D8%B1-%D8%A7%D9%84%D8%B3%D9%84%D8%A7%D9%85:-%D8%AE%D9%8A%D8%B1-%D8%AF%D9%88%D8%B1-%D8%A7%D9%84%D8%A3%D9%86%D8%B5%D8%A7%D8%B1',
@@ -102,54 +76,69 @@ class DCNVideoIE(DCNBaseIE):
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        request = sanitized_Request(
+        video_data = self._download_json(
             'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id,
-            headers={'Origin': 'http://www.dcndigital.ae'})
-        video_data = self._download_json(request, video_id)
-        info = self._extract_video_info(video_data, video_id, False)
-
-        webpage = self._download_webpage(
-            'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' +
-            compat_urllib_parse_urlencode({
-                'id': video_data['id'],
-                'user_id': video_data['user_id'],
-                'signature': video_data['signature'],
-                'countries': 'Q0M=',
-                'filter': 'DENY',
-            }), video_id)
-        info['formats'] = self._extract_video_formats(webpage, video_id, 'm3u8_native')
+            video_id, headers={'Origin': 'http://awaan.ae'})
+        info = self._parse_video_data(video_data, video_id, False)
+
+        embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + compat_urllib_parse_urlencode({
+            'id': video_data['id'],
+            'user_id': video_data['user_id'],
+            'signature': video_data['signature'],
+            'countries': 'Q0M=',
+            'filter': 'DENY',
+        })
+        info.update({
+            '_type': 'url_transparent',
+            'url': embed_url,
+            'ie_key': 'MangomoloVideo',
+        })
         return info
 
 
-class DCNLiveIE(DCNBaseIE):
-    IE_NAME = 'dcn:live'
+class AWAANLiveIE(AWAANBaseIE):
+    IE_NAME = 'awaan:live'
     _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?live/(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://awaan.ae/live/6/dubai-tv',
+        'info_dict': {
+            'id': '6',
+            'ext': 'mp4',
+            'title': 're:Dubai Al Oula [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+            'upload_date': '20150107',
+            'timestamp': 1420588800,
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }
 
     def _real_extract(self, url):
         channel_id = self._match_id(url)
 
-        request = sanitized_Request(
+        channel_data = self._download_json(
             'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id=%s' % channel_id,
-            headers={'Origin': 'http://www.dcndigital.ae'})
-
-        channel_data = self._download_json(request, channel_id)
-        info = self._extract_video_info(channel_data, channel_id, True)
-
-        webpage = self._download_webpage(
-            'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' +
-            compat_urllib_parse_urlencode({
-                'id': base64.b64encode(channel_data['user_id'].encode()).decode(),
-                'channelid': base64.b64encode(channel_data['id'].encode()).decode(),
-                'signature': channel_data['signature'],
-                'countries': 'Q0M=',
-                'filter': 'DENY',
-            }), channel_id)
-        info['formats'] = self._extract_video_formats(webpage, channel_id, 'm3u8')
+            channel_id, headers={'Origin': 'http://awaan.ae'})
+        info = self._parse_video_data(channel_data, channel_id, True)
+
+        embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + compat_urllib_parse_urlencode({
+            'id': base64.b64encode(channel_data['user_id'].encode()).decode(),
+            'channelid': base64.b64encode(channel_data['id'].encode()).decode(),
+            'signature': channel_data['signature'],
+            'countries': 'Q0M=',
+            'filter': 'DENY',
+        })
+        info.update({
+            '_type': 'url_transparent',
+            'url': embed_url,
+            'ie_key': 'MangomoloLive',
+        })
         return info
 
 
-class DCNSeasonIE(InfoExtractor):
-    IE_NAME = 'dcn:season'
+class AWAANSeasonIE(InfoExtractor):
+    IE_NAME = 'awaan:season'
     _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))'
     _TEST = {
         'url': 'http://dcndigital.ae/#/program/205024/%D9%85%D8%AD%D8%A7%D8%B6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%B4%D9%8A%D8%AE-%D8%A7%D9%84%D8%B4%D8%B9%D8%B1%D8%A7%D9%88%D9%8A',
@@ -170,21 +159,17 @@ class DCNSeasonIE(InfoExtractor):
             data['season'] = season_id
             show_id = smuggled_data.get('show_id')
             if show_id is None:
-                request = sanitized_Request(
+                season = self._download_json(
                     'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id=%s' % season_id,
-                    headers={'Origin': 'http://www.dcndigital.ae'})
-                season = self._download_json(request, season_id)
+                    season_id, headers={'Origin': 'http://awaan.ae'})
                 show_id = season['id']
         data['show_id'] = show_id
-        request = sanitized_Request(
+        show = self._download_json(
             'http://admin.mangomolo.com/analytics/index.php/plus/show',
-            urlencode_postdata(data),
-            {
-                'Origin': 'http://www.dcndigital.ae',
+            show_id, data=urlencode_postdata(data), headers={
+                'Origin': 'http://awaan.ae',
                 'Content-Type': 'application/x-www-form-urlencoded'
             })
-
-        show = self._download_json(request, show_id)
         if not season_id:
             season_id = show['default_season']
         for season in show['seasons']:
@@ -195,6 +180,6 @@ class DCNSeasonIE(InfoExtractor):
                 for video in show['videos']:
                     video_id = compat_str(video['id'])
                     entries.append(self.url_result(
-                        'http://www.dcndigital.ae/media/%s' % video_id, 'DCNVideo', video_id))
+                        'http://awaan.ae/media/%s' % video_id, 'AWAANVideo', video_id))
 
                 return self.playlist_result(entries, season_id, title)
index a813eb429fe8168c2e4223342fd6540647fe127a..1eebf5dfd48d31654f3612bbb058c0dd7aa9e030 100644 (file)
@@ -11,7 +11,7 @@ from ..utils import (
 
 
 class AzubuIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?azubu\.tv/[^/]+#!/play/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?azubu\.(?:tv|uol.com.br)/[^/]+#!/play/(?P<id>\d+)'
     _TESTS = [
         {
             'url': 'http://www.azubu.tv/GSL#!/play/15575/2014-hot6-cup-last-big-match-ro8-day-1',
@@ -103,12 +103,15 @@ class AzubuIE(InfoExtractor):
 
 
 class AzubuLiveIE(InfoExtractor):
-    _VALID_URL = r'https?://www.azubu.tv/(?P<id>[^/]+)$'
+    _VALID_URL = r'https?://(?:www\.)?azubu\.(?:tv|uol.com.br)/(?P<id>[^/]+)$'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.azubu.tv/MarsTVMDLen',
         'only_matching': True,
-    }
+    }, {
+        'url': 'http://azubu.uol.com.br/adolfz',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         user = self._match_id(url)
index 991ab0676e6b93a1c64d04f48b3728551bc4ccf0..88c590e98388d5f6058dd71ffb97f4f0254f0c5b 100644 (file)
@@ -1,7 +1,9 @@
 from __future__ import unicode_literals
 
 import json
+import random
 import re
+import time
 
 from .common import InfoExtractor
 from ..compat import (
@@ -12,6 +14,9 @@ from ..utils import (
     ExtractorError,
     float_or_none,
     int_or_none,
+    parse_filesize,
+    unescapeHTML,
+    update_url_query,
 )
 
 
@@ -81,35 +86,68 @@ class BandcampIE(InfoExtractor):
             r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$',
             webpage, 'video id')
 
-        download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page')
-        # We get the dictionary of the track from some javascript code
-        all_info = self._parse_json(self._search_regex(
-            r'(?sm)items: (.*?),$', download_webpage, 'items'), video_id)
-        info = all_info[0]
-        # We pick mp3-320 for now, until format selection can be easily implemented.
-        mp3_info = info['downloads']['mp3-320']
-        # If we try to use this url it says the link has expired
-        initial_url = mp3_info['url']
-        m_url = re.match(
-            r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$',
-            initial_url)
-        # We build the url we will use to get the final track url
-        # This url is build in Bandcamp in the script download_bunde_*.js
-        request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts'))
-        final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url')
-        # If we could correctly generate the .rand field the url would be
-        # in the "download_url" key
-        final_url = self._proto_relative_url(self._search_regex(
-            r'"retry_url":"(.+?)"', final_url_webpage, 'final video URL'), 'http:')
+        download_webpage = self._download_webpage(
+            download_link, video_id, 'Downloading free downloads page')
+
+        blob = self._parse_json(
+            self._search_regex(
+                r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage,
+                'blob', group='blob'),
+            video_id, transform_source=unescapeHTML)
+
+        info = blob['digital_items'][0]
+
+        downloads = info['downloads']
+        track = info['title']
+
+        artist = info.get('artist')
+        title = '%s - %s' % (artist, track) if artist else track
+
+        download_formats = {}
+        for f in blob['download_formats']:
+            name, ext = f.get('name'), f.get('file_extension')
+            if all(isinstance(x, compat_str) for x in (name, ext)):
+                download_formats[name] = ext.strip('.')
+
+        formats = []
+        for format_id, f in downloads.items():
+            format_url = f.get('url')
+            if not format_url:
+                continue
+            # Stat URL generation algorithm is reverse engineered from
+            # download_*_bundle_*.js
+            stat_url = update_url_query(
+                format_url.replace('/download/', '/statdownload/'), {
+                    '.rand': int(time.time() * 1000 * random.random()),
+                })
+            format_id = f.get('encoding_name') or format_id
+            stat = self._download_json(
+                stat_url, video_id, 'Downloading %s JSON' % format_id,
+                transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1],
+                fatal=False)
+            if not stat:
+                continue
+            retry_url = stat.get('retry_url')
+            if not isinstance(retry_url, compat_str):
+                continue
+            formats.append({
+                'url': self._proto_relative_url(retry_url, 'http:'),
+                'ext': download_formats.get(format_id),
+                'format_id': format_id,
+                'format_note': f.get('description'),
+                'filesize': parse_filesize(f.get('size_mb')),
+                'vcodec': 'none',
+            })
+        self._sort_formats(formats)
 
         return {
             'id': video_id,
-            'title': info['title'],
-            'ext': 'mp3',
-            'vcodec': 'none',
-            'url': final_url,
+            'title': title,
             'thumbnail': info.get('thumb_url'),
             'uploader': info.get('artist'),
+            'artist': artist,
+            'track': track,
+            'formats': formats,
         }
 
 
@@ -162,6 +200,15 @@ class BandcampAlbumIE(InfoExtractor):
             'uploader_id': 'dotscale',
         },
         'playlist_mincount': 7,
+    }, {
+        # with escaped quote in title
+        'url': 'https://jstrecords.bandcamp.com/album/entropy-ep',
+        'info_dict': {
+            'title': '"Entropy" EP',
+            'uploader_id': 'jstrecords',
+            'id': 'entropy-ep',
+        },
+        'playlist_mincount': 3,
     }]
 
     def _real_extract(self, url):
@@ -176,8 +223,11 @@ class BandcampAlbumIE(InfoExtractor):
         entries = [
             self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
             for t_path in tracks_paths]
-        title = self._search_regex(
-            r'album_title\s*:\s*"(.*?)"', webpage, 'title', fatal=False)
+        title = self._html_search_regex(
+            r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"',
+            webpage, 'title', fatal=False)
+        if title:
+            title = title.replace(r'\"', '"')
         return {
             '_type': 'playlist',
             'uploader_id': uploader_id,
index deb9cc1c0fe2855c77c330f36f06069e8448189f..b17916137ec51808e8c0c869142d37bf083c90e0 100644 (file)
@@ -1028,7 +1028,7 @@ class BBCIE(BBCCoUkIE):
 
 
 class BBCCoUkArticleIE(InfoExtractor):
-    _VALID_URL = r'https?://www.bbc.co.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
     IE_NAME = 'bbc.co.uk:article'
     IE_DESC = 'BBC articles'
 
similarity index 89%
rename from youtube_dl/extractor/beatportpro.py
rename to youtube_dl/extractor/beatport.py
index 3c7775d3e2762bef7f9424ffa769346dae2d577f..e6070941790b9c6e7cbabba5d11b60dfe512930a 100644 (file)
@@ -8,10 +8,10 @@ from ..compat import compat_str
 from ..utils import int_or_none
 
 
-class BeatportProIE(InfoExtractor):
-    _VALID_URL = r'https?://pro\.beatport\.com/track/(?P<display_id>[^/]+)/(?P<id>[0-9]+)'
+class BeatportIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.|pro\.)?beatport\.com/track/(?P<display_id>[^/]+)/(?P<id>[0-9]+)'
     _TESTS = [{
-        'url': 'https://pro.beatport.com/track/synesthesia-original-mix/5379371',
+        'url': 'https://beatport.com/track/synesthesia-original-mix/5379371',
         'md5': 'b3c34d8639a2f6a7f734382358478887',
         'info_dict': {
             'id': '5379371',
@@ -20,7 +20,7 @@ class BeatportProIE(InfoExtractor):
             'title': 'Froxic - Synesthesia (Original Mix)',
         },
     }, {
-        'url': 'https://pro.beatport.com/track/love-and-war-original-mix/3756896',
+        'url': 'https://beatport.com/track/love-and-war-original-mix/3756896',
         'md5': 'e44c3025dfa38c6577fbaeb43da43514',
         'info_dict': {
             'id': '3756896',
@@ -29,7 +29,7 @@ class BeatportProIE(InfoExtractor):
             'title': 'Wolfgang Gartner - Love & War (Original Mix)',
         },
     }, {
-        'url': 'https://pro.beatport.com/track/birds-original-mix/4991738',
+        'url': 'https://beatport.com/track/birds-original-mix/4991738',
         'md5': 'a1fd8e8046de3950fd039304c186c05f',
         'info_dict': {
             'id': '4991738',
index 956c7680e2ecc46a1df493947ed0be7b973d81b8..b0b7914d89777fcba136a12562f771bf4f2af4d6 100644 (file)
@@ -46,19 +46,19 @@ class BeegIE(InfoExtractor):
                 self._proto_relative_url(cpl_url), video_id,
                 'Downloading cpl JS', fatal=False)
             if cpl:
-                beeg_version = self._search_regex(
-                    r'beeg_version\s*=\s*(\d+)', cpl,
-                    'beeg version', default=None) or self._search_regex(
+                beeg_version = int_or_none(self._search_regex(
+                    r'beeg_version\s*=\s*([^\b]+)', cpl,
+                    'beeg version', default=None)) or self._search_regex(
                     r'/(\d+)\.js', cpl_url, 'beeg version', default=None)
                 beeg_salt = self._search_regex(
-                    r'beeg_salt\s*=\s*(["\'])(?P<beeg_salt>.+?)\1', cpl, 'beeg beeg_salt',
+                    r'beeg_salt\s*=\s*(["\'])(?P<beeg_salt>.+?)\1', cpl, 'beeg salt',
                     default=None, group='beeg_salt')
 
-        beeg_version = beeg_version or '1750'
-        beeg_salt = beeg_salt or 'MIDtGaw96f0N1kMMAM1DE46EC9pmFr'
+        beeg_version = beeg_version or '2000'
+        beeg_salt = beeg_salt or 'pmweAkq8lAYKdfWcFCUj0yoVgoPlinamH5UE1CB3H'
 
         video = self._download_json(
-            'http://api.beeg.com/api/v6/%s/video/%s' % (beeg_version, video_id),
+            'https://api.beeg.com/api/v6/%s/video/%s' % (beeg_version, video_id),
             video_id)
 
         def split(o, e):
diff --git a/youtube_dl/extractor/bellmedia.py b/youtube_dl/extractor/bellmedia.py
new file mode 100644 (file)
index 0000000..32326ed
--- /dev/null
@@ -0,0 +1,75 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class BellMediaIE(InfoExtractor):
+    _VALID_URL = r'''(?x)https?://(?:www\.)?
+        (?P<domain>
+            (?:
+                ctv|
+                tsn|
+                bnn|
+                thecomedynetwork|
+                discovery|
+                discoveryvelocity|
+                sciencechannel|
+                investigationdiscovery|
+                animalplanet|
+                bravo|
+                mtv|
+                space
+            )\.ca|
+            much\.com
+        )/.*?(?:\bvid=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6})'''
+    _TESTS = [{
+        'url': 'http://www.ctv.ca/video/player?vid=706966',
+        'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0',
+        'info_dict': {
+            'id': '706966',
+            'ext': 'mp4',
+            'title': 'Larry Day and Richard Jutras on the TIFF red carpet of \'Stonewall\'',
+            'description': 'etalk catches up with Larry Day and Richard Jutras on the TIFF red carpet of "Stonewall”.',
+            'upload_date': '20150919',
+            'timestamp': 1442624700,
+        },
+        'expected_warnings': ['HTTP Error 404'],
+    }, {
+        'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.tsn.ca/video/expectations-high-for-milos-raonic-at-us-open~939549',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.bnn.ca/video/berman-s-call-part-two-viewer-questions~939654',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.ctv.ca/YourMorning/Video/S1E6-Monday-August-29-2016-vid938009',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.much.com/shows/atmidnight/episode948007/tuesday-september-13-2016',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.much.com/shows/the-almost-impossible-gameshow/928979/episode-6',
+        'only_matching': True,
+    }]
+    _DOMAINS = {
+        'thecomedynetwork': 'comedy',
+        'discoveryvelocity': 'discvel',
+        'sciencechannel': 'discsci',
+        'investigationdiscovery': 'invdisc',
+        'animalplanet': 'aniplan',
+    }
+
+    def _real_extract(self, url):
+        domain, video_id = re.match(self._VALID_URL, url).groups()
+        domain = domain.split('.')[0]
+        return {
+            '_type': 'url_transparent',
+            'id': video_id,
+            'url': '9c9media:%s_web:%s' % (self._DOMAINS.get(domain, domain), video_id),
+            'ie_key': 'NineCNineMedia',
+        }
index bd3ee2e2eb3822253bb04fceb253d4028448f5f5..1f8ef030380c5fb548d14cc8e944c8dad1fca900 100644 (file)
@@ -2,7 +2,6 @@ from __future__ import unicode_literals
 
 from .mtv import MTVServicesInfoExtractor
 from ..utils import unified_strdate
-from ..compat import compat_urllib_parse_urlencode
 
 
 class BetIE(MTVServicesInfoExtractor):
@@ -53,9 +52,9 @@ class BetIE(MTVServicesInfoExtractor):
     _FEED_URL = "http://feeds.mtvnservices.com/od/feed/bet-mrss-player"
 
     def _get_feed_query(self, uri):
-        return compat_urllib_parse_urlencode({
+        return {
             'uuid': uri,
-        })
+        }
 
     def _extract_mgid(self, webpage):
         return self._search_regex(r'data-uri="([^"]+)', webpage, 'mgid')
index d8eb718212b3d8482f7ac7cf479217da2391de6d..2d174e6f9a81da7412cd58ac316c7b5924dcde78 100644 (file)
@@ -1,33 +1,27 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import calendar
-import datetime
+import hashlib
 import re
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_etree_fromstring,
-    compat_str,
-    compat_parse_qs,
-    compat_xml_parse_error,
-)
+from ..compat import compat_parse_qs
 from ..utils import (
-    ExtractorError,
     int_or_none,
     float_or_none,
-    xpath_text,
+    unified_timestamp,
+    urlencode_postdata,
 )
 
 
 class BiliBiliIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/v/)(?P<id>\d+)'
 
-    _TESTS = [{
+    _TEST = {
         'url': 'http://www.bilibili.tv/video/av1074402/',
         'md5': '9fa226fe2b8a9a4d5a69b4c6a183417e',
         'info_dict': {
-            'id': '1554319',
+            'id': '1074402',
             'ext': 'mp4',
             'title': '【金坷垃】金泡沫',
             'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
@@ -38,128 +32,70 @@ class BiliBiliIE(InfoExtractor):
             'uploader': '菊子桑',
             'uploader_id': '156160',
         },
-    }, {
-        'url': 'http://www.bilibili.com/video/av1041170/',
-        'info_dict': {
-            'id': '1507019',
-            'ext': 'mp4',
-            'title': '【BD1080P】刀语【诸神&异域】',
-            'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~',
-            'timestamp': 1396530060,
-            'upload_date': '20140403',
-            'uploader': '枫叶逝去',
-            'uploader_id': '520116',
-        },
-    }, {
-        'url': 'http://www.bilibili.com/video/av4808130/',
-        'info_dict': {
-            'id': '7802182',
-            'ext': 'mp4',
-            'title': '【长篇】哆啦A梦443【钉铛】',
-            'description': '(2016.05.27)来组合客人的脸吧&amp;amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;amp;illust_id=56912929',
-            'timestamp': 1464564180,
-            'upload_date': '20160529',
-            'uploader': '喜欢拉面',
-            'uploader_id': '151066',
-        },
-    }, {
-        # Missing upload time
-        'url': 'http://www.bilibili.com/video/av1867637/',
-        'info_dict': {
-            'id': '2880301',
-            'ext': 'mp4',
-            'title': '【HDTV】【喜剧】岳父岳母真难当 (2014)【法国票房冠军】',
-            'description': '一个信奉天主教的法国旧式传统资产阶级家庭中有四个女儿。三个女儿却分别找了阿拉伯、犹太、中国丈夫,老夫老妻唯独期盼剩下未嫁的小女儿能找一个信奉天主教的法国白人,结果没想到小女儿找了一位非裔黑人……【这次应该不会跳帧了】',
-            'uploader': '黑夜为猫',
-            'uploader_id': '610729',
-        },
-        'params': {
-            # Just to test metadata extraction
-            'skip_download': True,
-        },
-        'expected_warnings': ['upload time'],
-    }]
+    }
 
-    # BiliBili blocks keys from time to time. The current key is extracted from
-    # the Android client
-    # TODO: find the sign algorithm used in the flash player
-    _APP_KEY = '86385cdc024c0f6c'
+    _APP_KEY = '6f90a59ac58a4123'
+    _BILIBILI_KEY = '0bfd84cc3940035173f35e6777508326'
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        params = compat_parse_qs(self._search_regex(
-            [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
-             r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
-            webpage, 'player parameters'))
-        cid = params['cid'][0]
-
-        info_xml_str = self._download_webpage(
-            'http://interface.bilibili.com/v_cdn_play',
-            cid, query={'appkey': self._APP_KEY, 'cid': cid},
-            note='Downloading video info page')
-
-        err_msg = None
-        durls = None
-        info_xml = None
-        try:
-            info_xml = compat_etree_fromstring(info_xml_str.encode('utf-8'))
-        except compat_xml_parse_error:
-            info_json = self._parse_json(info_xml_str, video_id, fatal=False)
-            err_msg = (info_json or {}).get('error_text')
+        if 'anime/v' not in url:
+            cid = compat_parse_qs(self._search_regex(
+                [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
+                 r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
+                webpage, 'player parameters'))['cid'][0]
         else:
-            err_msg = xpath_text(info_xml, './message')
+            js = self._download_json(
+                'http://bangumi.bilibili.com/web_api/get_source', video_id,
+                data=urlencode_postdata({'episode_id': video_id}),
+                headers={'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'})
+            cid = js['result']['cid']
+
+        payload = 'appkey=%s&cid=%s&otype=json&quality=2&type=mp4' % (self._APP_KEY, cid)
+        sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest()
 
-        if info_xml is not None:
-            durls = info_xml.findall('./durl')
-        if not durls:
-            if err_msg:
-                raise ExtractorError('%s said: %s' % (self.IE_NAME, err_msg), expected=True)
-            else:
-                raise ExtractorError('No videos found!')
+        video_info = self._download_json(
+            'http://interface.bilibili.com/playurl?%s&sign=%s' % (payload, sign),
+            video_id, note='Downloading video info page')
 
         entries = []
 
-        for durl in durls:
-            size = xpath_text(durl, ['./filesize', './size'])
+        for idx, durl in enumerate(video_info['durl']):
             formats = [{
-                'url': durl.find('./url').text,
-                'filesize': int_or_none(size),
+                'url': durl['url'],
+                'filesize': int_or_none(durl['size']),
             }]
-            for backup_url in durl.findall('./backup_url/url'):
+            for backup_url in durl.get('backup_url', []):
                 formats.append({
-                    'url': backup_url.text,
+                    'url': backup_url,
                     # backup URLs have lower priorities
-                    'preference': -2 if 'hd.mp4' in backup_url.text else -3,
+                    'preference': -2 if 'hd.mp4' in backup_url else -3,
                 })
 
             self._sort_formats(formats)
 
             entries.append({
-                'id': '%s_part%s' % (cid, xpath_text(durl, './order')),
-                'duration': int_or_none(xpath_text(durl, './length'), 1000),
+                'id': '%s_part%s' % (video_id, idx),
+                'duration': float_or_none(durl.get('length'), 1000),
                 'formats': formats,
             })
 
         title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title')
         description = self._html_search_meta('description', webpage)
-        datetime_str = self._html_search_regex(
-            r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False)
-        timestamp = None
-        if datetime_str:
-            timestamp = calendar.timegm(datetime.datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M').timetuple())
+        timestamp = unified_timestamp(self._html_search_regex(
+            r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False))
+        thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage)
 
         # TODO 'view_count' requires deobfuscating Javascript
         info = {
-            'id': compat_str(cid),
+            'id': video_id,
             'title': title,
             'description': description,
             'timestamp': timestamp,
-            'thumbnail': self._html_search_meta('thumbnailUrl', webpage),
-            'duration': float_or_none(xpath_text(info_xml, './timelength'), scale=1000),
+            'thumbnail': thumbnail,
+            'duration': float_or_none(video_info.get('timelength'), scale=1000),
         }
 
         uploader_mobj = re.search(
index 6ad45a1e6a30bac2450743de3f0d12a2c9f2b89d..9661ade4f312e5c5e1068a42d3a693dd936fd1d6 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 class BpbIE(InfoExtractor):
     IE_DESC = 'Bundeszentrale für politische Bildung'
-    _VALID_URL = r'https?://www\.bpb\.de/mediathek/(?P<id>[0-9]+)/'
+    _VALID_URL = r'https?://(?:www\.)?bpb\.de/mediathek/(?P<id>[0-9]+)/'
 
     _TEST = {
         'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr',
index 541c769445796c5388743fd739f0823ca3228fb6..a25d500e478b90c983cd676249fdf6bc676c68fa 100644 (file)
@@ -1,31 +1,74 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-from .common import InfoExtractor
-from ..utils import smuggle_url
+from .adobepass import AdobePassIE
+from ..utils import (
+    smuggle_url,
+    update_url_query,
+    int_or_none,
+)
 
 
-class BravoTVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+videos/(?P<id>[^/?]+)'
-    _TEST = {
+class BravoTVIE(AdobePassIE):
+    _VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
+    _TESTS = [{
         'url': 'http://www.bravotv.com/last-chance-kitchen/season-5/videos/lck-ep-12-fishy-finale',
-        'md5': 'd60cdf68904e854fac669bd26cccf801',
+        'md5': '9086d0b7ef0ea2aabc4781d75f4e5863',
         'info_dict': {
-            'id': 'LitrBdX64qLn',
+            'id': 'zHyk1_HU_mPy',
             'ext': 'mp4',
-            'title': 'Last Chance Kitchen Returns',
-            'description': 'S13: Last Chance Kitchen Returns for Top Chef Season 13',
-            'timestamp': 1448926740,
-            'upload_date': '20151130',
+            'title': 'LCK Ep 12: Fishy Finale',
+            'description': 'S13/E12: Two eliminated chefs have just 12 minutes to cook up a delicious fish dish.',
             'uploader': 'NBCU-BRAV',
+            'upload_date': '20160302',
+            'timestamp': 1456945320,
         }
-    }
+    }, {
+        'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-        account_pid = self._search_regex(r'"account_pid"\s*:\s*"([^"]+)"', webpage, 'account pid')
-        release_pid = self._search_regex(r'"release_pid"\s*:\s*"([^"]+)"', webpage, 'release pid')
-        return self.url_result(smuggle_url(
-            'http://link.theplatform.com/s/%s/%s?mbr=true&switch=progressive' % (account_pid, release_pid),
-            {'force_smil_url': True}), 'ThePlatform', release_pid)
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        settings = self._parse_json(self._search_regex(
+            r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', webpage, 'drupal settings'),
+            display_id)
+        info = {}
+        query = {
+            'mbr': 'true',
+        }
+        account_pid, release_pid = [None] * 2
+        tve = settings.get('sharedTVE')
+        if tve:
+            query['manifest'] = 'm3u'
+            account_pid = 'HNK2IC'
+            release_pid = tve['release_pid']
+            if tve.get('entitlement') == 'auth':
+                adobe_pass = settings.get('adobePass', {})
+                resource = self._get_mvpd_resource(
+                    adobe_pass.get('adobePassResourceId', 'bravo'),
+                    tve['title'], release_pid, tve.get('rating'))
+                query['auth'] = self._extract_mvpd_auth(
+                    url, release_pid, adobe_pass.get('adobePassRequestorId', 'bravo'), resource)
+        else:
+            shared_playlist = settings['shared_playlist']
+            account_pid = shared_playlist['account_pid']
+            metadata = shared_playlist['video_metadata'][shared_playlist['default_clip']]
+            release_pid = metadata['release_pid']
+            info.update({
+                'title': metadata['title'],
+                'description': metadata.get('description'),
+                'season_number': int_or_none(metadata.get('season_num')),
+                'episode_number': int_or_none(metadata.get('episode_num')),
+            })
+            query['switch'] = 'progressive'
+        info.update({
+            '_type': 'url_transparent',
+            'id': release_pid,
+            'url': smuggle_url(update_url_query(
+                'http://link.theplatform.com/s/%s/%s' % (account_pid, release_pid),
+                query), {'force_smil_url': True}),
+            'ie_key': 'ThePlatform',
+        })
+        return info
index aeb22be168402fd4e7e134c164339db74f95a0f8..945cf19e8bce0f1f9576d26abc455c9795a250d3 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -621,15 +621,21 @@ class BrightcoveNewIE(InfoExtractor):
                     'url': text_track['src'],
                 })
 
+        is_live = False
+        duration = float_or_none(json_data.get('duration'), 1000)
+        if duration and duration < 0:
+            is_live = True
+
         return {
             'id': video_id,
-            'title': title,
+            'title': self._live_title(title) if is_live else title,
             'description': clean_html(json_data.get('description')),
             'thumbnail': json_data.get('thumbnail') or json_data.get('poster'),
-            'duration': float_or_none(json_data.get('duration'), 1000),
+            'duration': duration,
             'timestamp': parse_iso8601(json_data.get('published_at')),
             'uploader_id': account_id,
             'formats': formats,
             'subtitles': subtitles,
             'tags': json_data.get('tags', []),
+            'is_live': is_live,
         }
index 3aec601f8e7179570088e1ea5ad1f7b6d30f219d..4be175d7039dd845f7c961af552bc1153b73598e 100644 (file)
@@ -1,6 +1,5 @@
 from __future__ import unicode_literals
 
-import json
 import re
 
 from .common import InfoExtractor
@@ -8,15 +7,15 @@ from ..utils import ExtractorError
 
 
 class BYUtvIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?'
+    _TESTS = [{
         'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
-        'md5': '05850eb8c749e2ee05ad5a1c34668493',
         'info_dict': {
-            'id': 'studio-c-season-5-episode-5',
+            'id': '6587b9a3-89d2-42a6-a7f7-fd2f81840a7d',
+            'display_id': 'studio-c-season-5-episode-5',
             'ext': 'mp4',
-            'description': 'md5:e07269172baff037f8e8bf9956bc9747',
             'title': 'Season 5 Episode 5',
+            'description': 'md5:e07269172baff037f8e8bf9956bc9747',
             'thumbnail': 're:^https?://.*\.jpg$',
             'duration': 1486.486,
         },
@@ -24,28 +23,71 @@ class BYUtvIE(InfoExtractor):
             'skip_download': True,
         },
         'add_ie': ['Ooyala'],
-    }
+    }, {
+        'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('video_id')
+        video_id = mobj.group('id')
+        display_id = mobj.group('display_id') or video_id
 
-        webpage = self._download_webpage(url, video_id)
+        webpage = self._download_webpage(url, display_id)
         episode_code = self._search_regex(
             r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information')
-        episode_json = re.sub(
-            r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', episode_code)
-        ep = json.loads(episode_json)
-
-        if ep['providerType'] == 'Ooyala':
-            return {
-                '_type': 'url_transparent',
-                'ie_key': 'Ooyala',
-                'url': 'ooyala:%s' % ep['providerId'],
-                'id': video_id,
-                'title': ep['title'],
-                'description': ep.get('description'),
-                'thumbnail': ep.get('imageThumbnail'),
-            }
-        else:
+
+        ep = self._parse_json(
+            episode_code, display_id, transform_source=lambda s:
+            re.sub(r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', s))
+
+        if ep['providerType'] != 'Ooyala':
             raise ExtractorError('Unsupported provider %s' % ep['provider'])
+
+        return {
+            '_type': 'url_transparent',
+            'ie_key': 'Ooyala',
+            'url': 'ooyala:%s' % ep['providerId'],
+            'id': video_id,
+            'display_id': display_id,
+            'title': ep['title'],
+            'description': ep.get('description'),
+            'thumbnail': ep.get('imageThumbnail'),
+        }
+
+
+class BYUtvEventIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/event/(?P<id>[0-9a-f-]+)'
+    _TEST = {
+        'url': 'http://www.byutv.org/watch/event/29941b9b-8bf6-48d2-aebf-7a87add9e34b',
+        'info_dict': {
+            'id': '29941b9b-8bf6-48d2-aebf-7a87add9e34b',
+            'ext': 'mp4',
+            'title': 'Toledo vs. BYU (9/30/16)',
+        },
+        'params': {
+            'skip_download': True,
+        },
+        'add_ie': ['Ooyala'],
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        ooyala_id = self._search_regex(
+            r'providerId\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
+            webpage, 'ooyala id', group='id')
+
+        title = self._search_regex(
+            r'class=["\']description["\'][^>]*>\s*<h1>([^<]+)</h1>', webpage,
+            'title').strip()
+
+        return {
+            '_type': 'url_transparent',
+            'ie_key': 'Ooyala',
+            'url': 'ooyala:%s' % ooyala_id,
+            'id': video_id,
+            'title': title,
+        }
index 268c34392468e1b145e893a55496ad7bbb728d41..d4e6fbdce029b8267450b9d50d3b41556a47664d 100644 (file)
@@ -112,7 +112,7 @@ class CamdemyIE(InfoExtractor):
 
 
 class CamdemyFolderIE(InfoExtractor):
-    _VALID_URL = r'https?://www.camdemy.com/folder/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?camdemy\.com/folder/(?P<id>\d+)'
     _TESTS = [{
         # links with trailing slash
         'url': 'http://www.camdemy.com/folder/450',
index 61463f249f6e4ded3b5f59831d7dba421ef9de9a..1c3c41d26619ec2fa347c4a75093b2a1cf7003a2 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -6,11 +6,13 @@ import re
 from .common import InfoExtractor
 from ..compat import compat_urllib_parse_urlparse
 from ..utils import (
+    dict_get,
     ExtractorError,
     HEADRequest,
-    unified_strdate,
-    qualities,
     int_or_none,
+    qualities,
+    remove_end,
+    unified_strdate,
 )
 
 
@@ -23,6 +25,7 @@ class CanalplusIE(InfoExtractor):
                                     (?:(?:www|m)\.)?canalplus\.fr|
                                     (?:www\.)?piwiplus\.fr|
                                     (?:www\.)?d8\.tv|
+                                    (?:www\.)?c8\.fr|
                                     (?:www\.)?d17\.tv|
                                     (?:www\.)?itele\.fr
                                 )/(?:(?:[^/]+/)*(?P<display_id>[^/?#&]+))?(?:\?.*\bvid=(?P<vid>\d+))?|
@@ -35,53 +38,53 @@ class CanalplusIE(InfoExtractor):
         'canalplus': 'cplus',
         'piwiplus': 'teletoon',
         'd8': 'd8',
+        'c8': 'd8',
         'd17': 'd17',
         'itele': 'itele',
     }
 
     _TESTS = [{
         'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1192814',
-        'md5': '41f438a4904f7664b91b4ed0dec969dc',
         'info_dict': {
-            'id': '1192814',
+            'id': '1405510',
+            'display_id': 'pid1830-c-zapping',
             'ext': 'mp4',
-            'title': "L'Année du Zapping 2014 - L'Année du Zapping 2014",
-            'description': "Toute l'année 2014 dans un Zapping exceptionnel !",
-            'upload_date': '20150105',
+            'title': 'Zapping - 02/07/2016',
+            'description': 'Le meilleur de toutes les chaînes, tous les jours',
+            'upload_date': '20160702',
         },
     }, {
         'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190',
         'info_dict': {
             'id': '1108190',
-            'ext': 'flv',
-            'title': 'Le labyrinthe - Boing super ranger',
+            'display_id': 'pid1405-le-labyrinthe-boing-super-ranger',
+            'ext': 'mp4',
+            'title': 'BOING SUPER RANGER - Ep : Le labyrinthe',
             'description': 'md5:4cea7a37153be42c1ba2c1d3064376ff',
             'upload_date': '20140724',
         },
         'skip': 'Only works from France',
     }, {
-        'url': 'http://www.d8.tv/d8-docs-mags/pid5198-d8-en-quete-d-actualite.html?vid=1390231',
+        'url': 'http://www.c8.fr/c8-divertissement/ms-touche-pas-a-mon-poste/pid6318-videos-integrales.html',
+        'md5': '4b47b12b4ee43002626b97fad8fb1de5',
         'info_dict': {
-            'id': '1390231',
+            'id': '1420213',
+            'display_id': 'pid6318-videos-integrales',
             'ext': 'mp4',
-            'title': "Vacances pas chères : prix discount ou grosses dépenses ? - En quête d'actualité",
-            'description': 'md5:edb6cf1cb4a1e807b5dd089e1ac8bfc6',
-            'upload_date': '20160512',
-        },
-        'params': {
-            'skip_download': True,
+            'title': 'TPMP ! Même le matin - Les 35H de Baba - 14/10/2016',
+            'description': 'md5:f96736c1b0ffaa96fd5b9e60ad871799',
+            'upload_date': '20161014',
         },
+        'skip': 'Only works from France',
     }, {
-        'url': 'http://www.itele.fr/chroniques/invite-bruce-toussaint/thierry-solere-nicolas-sarkozy-officialisera-sa-candidature-a-la-primaire-quand-il-le-voudra-167224',
+        'url': 'http://www.itele.fr/chroniques/invite-michael-darmon/rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510',
         'info_dict': {
-            'id': '1398334',
+            'id': '1420176',
+            'display_id': 'rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510',
             'ext': 'mp4',
-            'title': "L'invité de Bruce Toussaint du 07/06/2016 - ",
-            'description': 'md5:40ac7c9ad0feaeb6f605bad986f61324',
-            'upload_date': '20160607',
-        },
-        'params': {
-            'skip_download': True,
+            'title': 'L\'invité de Michaël Darmon du 14/10/2016 - ',
+            'description': 'Chaque matin du lundi au vendredi, Michaël Darmon reçoit un invité politique à 8h25.',
+            'upload_date': '20161014',
         },
     }, {
         'url': 'http://m.canalplus.fr/?vid=1398231',
@@ -93,18 +96,17 @@ class CanalplusIE(InfoExtractor):
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.groupdict().get('id') or mobj.groupdict().get('vid')
 
         site_id = self._SITE_ID_MAP[compat_urllib_parse_urlparse(url).netloc.rsplit('.', 2)[-2]]
 
         # Beware, some subclasses do not define an id group
-        display_id = mobj.group('display_id') or video_id
+        display_id = remove_end(dict_get(mobj.groupdict(), ('display_id', 'id', 'vid')), '.html')
 
-        if video_id is None:
-            webpage = self._download_webpage(url, display_id)
-            video_id = self._search_regex(
-                [r'<canal:player[^>]+?videoId=(["\'])(?P<id>\d+)', r'id=["\']canal_video_player(?P<id>\d+)'],
-                webpage, 'video id', group='id')
+        webpage = self._download_webpage(url, display_id)
+        video_id = self._search_regex(
+            [r'<canal:player[^>]+?videoId=(["\'])(?P<id>\d+)',
+             r'id=["\']canal_video_player(?P<id>\d+)'],
+            webpage, 'video id', group='id')
 
         info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id)
         video_data = self._download_json(info_url, video_id, 'Downloading video JSON')
index ec6d24d96cac80379e15eba267829f89c7b53df7..d183d5d527fb8ab4163b16fcaffd0aeedbf0dd0c 100644 (file)
@@ -1,11 +1,13 @@
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from ..utils import float_or_none
 
 
 class CanvasIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?canvas\.be/video/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
     _TESTS = [{
         'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',
         'md5': 'ea838375a547ac787d4064d8c7860a6c',
@@ -38,22 +40,42 @@ class CanvasIE(InfoExtractor):
         'params': {
             'skip_download': True,
         }
+    }, {
+        'url': 'https://www.een.be/sorry-voor-alles/herbekijk-sorry-voor-alles',
+        'info_dict': {
+            'id': 'mz-ast-11a587f8-b921-4266-82e2-0bce3e80d07f',
+            'display_id': 'herbekijk-sorry-voor-alles',
+            'ext': 'mp4',
+            'title': 'Herbekijk Sorry voor alles',
+            'description': 'md5:8bb2805df8164e5eb95d6a7a29dc0dd3',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 3788.06,
+        },
+        'params': {
+            'skip_download': True,
+        }
+    }, {
+        'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
-        display_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        site_id, display_id = mobj.group('site_id'), mobj.group('id')
 
         webpage = self._download_webpage(url, display_id)
 
-        title = self._search_regex(
+        title = (self._search_regex(
             r'<h1[^>]+class="video__body__header__title"[^>]*>(.+?)</h1>',
-            webpage, 'title', default=None) or self._og_search_title(webpage)
+            webpage, 'title', default=None) or self._og_search_title(
+            webpage)).strip()
 
         video_id = self._html_search_regex(
-            r'data-video=(["\'])(?P<id>.+?)\1', webpage, 'video id', group='id')
+            r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id', group='id')
 
         data = self._download_json(
-            'https://mediazone.vrt.be/api/v1/canvas/assets/%s' % video_id, display_id)
+            'https://mediazone.vrt.be/api/v1/%s/assets/%s'
+            % (site_id, video_id), display_id)
 
         formats = []
         for target in data['targetUrls']:
index 5797fb95142f556bade163051f4f51a40e32cda7..66c0f900a402664653a846e9b39fc44c1da2853e 100644 (file)
@@ -9,6 +9,8 @@ from ..utils import (
     try_get,
 )
 
+from .videomore import VideomoreIE
+
 
 class CarambaTVIE(InfoExtractor):
     _VALID_URL = r'(?:carambatv:|https?://video1\.carambatv\.ru/v/)(?P<id>\d+)'
@@ -62,14 +64,16 @@ class CarambaTVPageIE(InfoExtractor):
     _VALID_URL = r'https?://carambatv\.ru/(?:[^/]+/)+(?P<id>[^/?#&]+)'
     _TEST = {
         'url': 'http://carambatv.ru/movie/bad-comedian/razborka-v-manile/',
-        'md5': '',
+        'md5': 'a49fb0ec2ad66503eeb46aac237d3c86',
         'info_dict': {
-            'id': '191910501',
-            'ext': 'mp4',
+            'id': '475222',
+            'ext': 'flv',
             'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)',
-            'thumbnail': 're:^https?://.*\.jpg$',
-            'duration': 2678.31,
+            'thumbnail': 're:^https?://.*\.jpg',
+            # duration reported by videomore is incorrect
+            'duration': int,
         },
+        'add_ie': [VideomoreIE.ie_key()],
     }
 
     def _real_extract(self, url):
@@ -77,6 +81,16 @@ class CarambaTVPageIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
+        videomore_url = VideomoreIE._extract_url(webpage)
+        if videomore_url:
+            title = self._og_search_title(webpage)
+            return {
+                '_type': 'url_transparent',
+                'url': videomore_url,
+                'ie_key': VideomoreIE.ie_key(),
+                'title': title,
+            }
+
         video_url = self._og_search_property('video:iframe', webpage, default=None)
 
         if not video_url:
diff --git a/youtube_dl/extractor/cartoonnetwork.py b/youtube_dl/extractor/cartoonnetwork.py
new file mode 100644 (file)
index 0000000..086ec90
--- /dev/null
@@ -0,0 +1,42 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .turner import TurnerBaseIE
+
+
+class CartoonNetworkIE(TurnerBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?cartoonnetwork\.com/video/(?:[^/]+/)+(?P<id>[^/?#]+)-(?:clip|episode)\.html'
+    _TEST = {
+        'url': 'http://www.cartoonnetwork.com/video/teen-titans-go/starfire-the-cat-lady-clip.html',
+        'info_dict': {
+            'id': '8a250ab04ed07e6c014ef3f1e2f9016c',
+            'ext': 'mp4',
+            'title': 'Starfire the Cat Lady',
+            'description': 'Robin decides to become a cat so that Starfire will finally love him.',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        id_type, video_id = re.search(r"_cnglobal\.cvp(Video|Title)Id\s*=\s*'([^']+)';", webpage).groups()
+        query = ('id' if id_type == 'Video' else 'titleId') + '=' + video_id
+        return self._extract_cvp_info(
+            'http://www.cartoonnetwork.com/video-seo-svc/episodeservices/getCvpPlaylist?networkName=CN2&' + query, video_id, {
+                'secure': {
+                    'media_src': 'http://androidhls-secure.cdn.turner.com/toon/big',
+                    'tokenizer_src': 'http://www.cartoonnetwork.com/cntv/mvpd/processors/services/token_ipadAdobe.do',
+                },
+            }, {
+                'url': url,
+                'site_name': 'CartoonNetwork',
+                'auth_required': self._search_regex(
+                    r'_cnglobal\.cvpFullOrPreviewAuth\s*=\s*(true|false);',
+                    webpage, 'auth required', default='false') == 'true',
+            })
index a87e971406b9a45d665d4bfe14e655c851f5a899..d71fddf58a068461cd2d377b31e4c3981d6c2b3d 100644 (file)
@@ -9,10 +9,19 @@ from ..utils import (
     js_to_json,
     smuggle_url,
     try_get,
+    xpath_text,
+    xpath_element,
+    xpath_with_ns,
+    find_xpath_attr,
+    parse_iso8601,
+    parse_age_limit,
+    int_or_none,
+    ExtractorError,
 )
 
 
 class CBCIE(InfoExtractor):
+    IE_NAME = 'cbc.ca'
     _VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?!player/)(?:[^/]+/)+(?P<id>[^/?#]+)'
     _TESTS = [{
         # with mediaId
@@ -114,6 +123,7 @@ class CBCIE(InfoExtractor):
 
 
 class CBCPlayerIE(InfoExtractor):
+    IE_NAME = 'cbc.ca:player'
     _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)'
     _TESTS = [{
         'url': 'http://www.cbc.ca/player/play/2683190193',
@@ -167,3 +177,165 @@ class CBCPlayerIE(InfoExtractor):
                 }),
             'id': video_id,
         }
+
+
+class CBCWatchBaseIE(InfoExtractor):
+    _device_id = None
+    _device_token = None
+    _API_BASE_URL = 'https://api-cbc.cloud.clearleap.com/cloffice/client/'
+    _NS_MAP = {
+        'media': 'http://search.yahoo.com/mrss/',
+        'clearleap': 'http://www.clearleap.com/namespace/clearleap/1.0/',
+    }
+
+    def _call_api(self, path, video_id):
+        url = path if path.startswith('http') else self._API_BASE_URL + path
+        result = self._download_xml(url, video_id, headers={
+            'X-Clearleap-DeviceId': self._device_id,
+            'X-Clearleap-DeviceToken': self._device_token,
+        })
+        error_message = xpath_text(result, 'userMessage') or xpath_text(result, 'systemMessage')
+        if error_message:
+            raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message))
+        return result
+
+    def _real_initialize(self):
+        if not self._device_id or not self._device_token:
+            device = self._downloader.cache.load('cbcwatch', 'device') or {}
+            self._device_id, self._device_token = device.get('id'), device.get('token')
+            if not self._device_id or not self._device_token:
+                result = self._download_xml(
+                    self._API_BASE_URL + 'device/register',
+                    None, data=b'<device><type>web</type></device>')
+                self._device_id = xpath_text(result, 'deviceId', fatal=True)
+                self._device_token = xpath_text(result, 'deviceToken', fatal=True)
+                self._downloader.cache.store(
+                    'cbcwatch', 'device', {
+                        'id': self._device_id,
+                        'token': self._device_token,
+                    })
+
+    def _parse_rss_feed(self, rss):
+        channel = xpath_element(rss, 'channel', fatal=True)
+
+        def _add_ns(path):
+            return xpath_with_ns(path, self._NS_MAP)
+
+        entries = []
+        for item in channel.findall('item'):
+            guid = xpath_text(item, 'guid', fatal=True)
+            title = xpath_text(item, 'title', fatal=True)
+
+            media_group = xpath_element(item, _add_ns('media:group'), fatal=True)
+            content = xpath_element(media_group, _add_ns('media:content'), fatal=True)
+            content_url = content.attrib['url']
+
+            thumbnails = []
+            for thumbnail in media_group.findall(_add_ns('media:thumbnail')):
+                thumbnail_url = thumbnail.get('url')
+                if not thumbnail_url:
+                    continue
+                thumbnails.append({
+                    'id': thumbnail.get('profile'),
+                    'url': thumbnail_url,
+                    'width': int_or_none(thumbnail.get('width')),
+                    'height': int_or_none(thumbnail.get('height')),
+                })
+
+            timestamp = None
+            release_date = find_xpath_attr(
+                item, _add_ns('media:credit'), 'role', 'releaseDate')
+            if release_date is not None:
+                timestamp = parse_iso8601(release_date.text)
+
+            entries.append({
+                '_type': 'url_transparent',
+                'url': content_url,
+                'id': guid,
+                'title': title,
+                'description': xpath_text(item, 'description'),
+                'timestamp': timestamp,
+                'duration': int_or_none(content.get('duration')),
+                'age_limit': parse_age_limit(xpath_text(item, _add_ns('media:rating'))),
+                'episode': xpath_text(item, _add_ns('clearleap:episode')),
+                'episode_number': int_or_none(xpath_text(item, _add_ns('clearleap:episodeInSeason'))),
+                'series': xpath_text(item, _add_ns('clearleap:series')),
+                'season_number': int_or_none(xpath_text(item, _add_ns('clearleap:season'))),
+                'thumbnails': thumbnails,
+                'ie_key': 'CBCWatchVideo',
+            })
+
+        return self.playlist_result(
+            entries, xpath_text(channel, 'guid'),
+            xpath_text(channel, 'title'),
+            xpath_text(channel, 'description'))
+
+
+class CBCWatchVideoIE(CBCWatchBaseIE):
+    IE_NAME = 'cbc.ca:watch:video'
+    _VALID_URL = r'https?://api-cbc\.cloud\.clearleap\.com/cloffice/client/web/play/?\?.*?\bcontentId=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        result = self._call_api(url, video_id)
+
+        m3u8_url = xpath_text(result, 'url', fatal=True)
+        formats = self._extract_m3u8_formats(re.sub(r'/([^/]+)/[^/?]+\.m3u8', r'/\1/\1.m3u8', m3u8_url), video_id, 'mp4', fatal=False)
+        if len(formats) < 2:
+            formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
+        # Despite metadata in m3u8 all video+audio formats are
+        # actually video-only (no audio)
+        for f in formats:
+            if f.get('acodec') != 'none' and f.get('vcodec') != 'none':
+                f['acodec'] = 'none'
+        self._sort_formats(formats)
+
+        info = {
+            'id': video_id,
+            'title': video_id,
+            'formats': formats,
+        }
+
+        rss = xpath_element(result, 'rss')
+        if rss:
+            info.update(self._parse_rss_feed(rss)['entries'][0])
+            del info['url']
+            del info['_type']
+            del info['ie_key']
+        return info
+
+
+class CBCWatchIE(CBCWatchBaseIE):
+    IE_NAME = 'cbc.ca:watch'
+    _VALID_URL = r'https?://watch\.cbc\.ca/(?:[^/]+/)+(?P<id>[0-9a-f-]+)'
+    _TESTS = [{
+        'url': 'http://watch.cbc.ca/doc-zone/season-6/customer-disservice/38e815a-009e3ab12e4',
+        'info_dict': {
+            'id': '38e815a-009e3ab12e4',
+            'ext': 'mp4',
+            'title': 'Customer (Dis)Service',
+            'description': 'md5:8bdd6913a0fe03d4b2a17ebe169c7c87',
+            'upload_date': '20160219',
+            'timestamp': 1455840000,
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+            'format': 'bestvideo',
+        },
+        'skip': 'Geo-restricted to Canada',
+    }, {
+        'url': 'http://watch.cbc.ca/arthur/all/1ed4b385-cd84-49cf-95f0-80f004680057',
+        'info_dict': {
+            'id': '1ed4b385-cd84-49cf-95f0-80f004680057',
+            'title': 'Arthur',
+            'description': 'Arthur, the sweetest 8-year-old aardvark, and his pals solve all kinds of problems with humour, kindness and teamwork.',
+        },
+        'playlist_mincount': 30,
+        'skip': 'Geo-restricted to Canada',
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        rss = self._call_api('web/browse/' + video_id, video_id)
+        return self._parse_rss_feed(rss)
index a23173d6f1a9570225242692ee74d68fc061fb3d..58f258c54b059b09888cf0e26a4718a69c704faa 100644 (file)
@@ -4,6 +4,9 @@ from .theplatform import ThePlatformFeedIE
 from ..utils import (
     int_or_none,
     find_xpath_attr,
+    xpath_element,
+    xpath_text,
+    update_url_query,
 )
 
 
@@ -17,19 +20,6 @@ class CBSBaseIE(ThePlatformFeedIE):
             }]
         } if closed_caption_e is not None and closed_caption_e.attrib.get('value') else []
 
-    def _extract_video_info(self, filter_query, video_id):
-        return self._extract_feed_info(
-            'dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id, lambda entry: {
-                'series': entry.get('cbs$SeriesTitle'),
-                'season_number': int_or_none(entry.get('cbs$SeasonNumber')),
-                'episode': entry.get('cbs$EpisodeTitle'),
-                'episode_number': int_or_none(entry.get('cbs$EpisodeNumber')),
-            }, {
-                'StreamPack': {
-                    'manifest': 'm3u',
-                }
-            })
-
 
 class CBSIE(CBSBaseIE):
     _VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)'
@@ -38,7 +28,6 @@ class CBSIE(CBSBaseIE):
         'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
         'info_dict': {
             'id': '_u7W953k6la293J7EPTd9oHkSPs6Xn6_',
-            'display_id': 'connect-chat-feat-garth-brooks',
             'ext': 'mp4',
             'title': 'Connect Chat feat. Garth Brooks',
             'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!',
@@ -47,7 +36,10 @@ class CBSIE(CBSBaseIE):
             'upload_date': '20131127',
             'uploader': 'CBSI-NEW',
         },
-        'expected_warnings': ['Failed to download m3u8 information'],
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
         '_skip': 'Blocked outside the US',
     }, {
         'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/',
@@ -56,8 +48,53 @@ class CBSIE(CBSBaseIE):
         'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
         'only_matching': True,
     }]
-    TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true'
+
+    def _extract_video_info(self, content_id):
+        items_data = self._download_xml(
+            'http://can.cbs.com/thunder/player/videoPlayerService.php',
+            content_id, query={'partner': 'cbs', 'contentId': content_id})
+        video_data = xpath_element(items_data, './/item')
+        title = xpath_text(video_data, 'videoTitle', 'title', True)
+        tp_path = 'dJ5BDC/media/guid/2198311517/%s' % content_id
+        tp_release_url = 'http://link.theplatform.com/s/' + tp_path
+
+        asset_types = []
+        subtitles = {}
+        formats = []
+        for item in items_data.findall('.//item'):
+            asset_type = xpath_text(item, 'assetType')
+            if not asset_type or asset_type in asset_types:
+                continue
+            asset_types.append(asset_type)
+            query = {
+                'mbr': 'true',
+                'assetTypes': asset_type,
+            }
+            if asset_type.startswith('HLS') or asset_type in ('OnceURL', 'StreamPack'):
+                query['formats'] = 'MPEG4,M3U'
+            elif asset_type in ('RTMP', 'WIFI', '3G'):
+                query['formats'] = 'MPEG4,FLV'
+            tp_formats, tp_subtitles = self._extract_theplatform_smil(
+                update_url_query(tp_release_url, query), content_id,
+                'Downloading %s SMIL data' % asset_type)
+            formats.extend(tp_formats)
+            subtitles = self._merge_subtitles(subtitles, tp_subtitles)
+        self._sort_formats(formats)
+
+        info = self._extract_theplatform_metadata(tp_path, content_id)
+        info.update({
+            'id': content_id,
+            'title': title,
+            'series': xpath_text(video_data, 'seriesTitle'),
+            'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')),
+            'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
+            'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000),
+            'thumbnail': xpath_text(video_data, 'previewImageURL'),
+            'formats': formats,
+            'subtitles': subtitles,
+        })
+        return info
 
     def _real_extract(self, url):
         content_id = self._match_id(url)
-        return self._extract_video_info('byGuid=%s' % content_id, content_id)
+        return self._extract_video_info(content_id)
index 821db20b23052ca71d594c6c05ad705a400129a3..57b18e81d412b20162f60e8d8e44699b76f2e3af 100644 (file)
@@ -63,7 +63,7 @@ class CBSInteractiveIE(ThePlatformIE):
         webpage = self._download_webpage(url, display_id)
 
         data_json = self._html_search_regex(
-            r"data-(?:cnet|zdnet)-video(?:-uvp)?-options='([^']+)'",
+            r"data-(?:cnet|zdnet)-video(?:-uvp(?:js)?)?-options='([^']+)'",
             webpage, 'data json')
         data = self._parse_json(data_json, display_id)
         vdata = data.get('video') or data['videos'][0]
index 4bcd104af7463b1cc4b9c6c88673cafc9ad655e7..8d5f11dd11de8bb85a9f6a2ddc86710a65c56a94 100644 (file)
@@ -4,11 +4,14 @@ from __future__ import unicode_literals
 from .anvato import AnvatoIE
 from .sendtonews import SendtoNewsIE
 from ..compat import compat_urlparse
-from ..utils import unified_timestamp
+from ..utils import (
+    parse_iso8601,
+    unified_timestamp,
+)
 
 
 class CBSLocalIE(AnvatoIE):
-    _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
+    _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/(?:\d+/\d+/\d+|video)/(?P<id>[0-9a-z-]+)'
 
     _TESTS = [{
         # Anvato backend
@@ -22,6 +25,7 @@ class CBSLocalIE(AnvatoIE):
             'thumbnail': 're:^https?://.*',
             'timestamp': 1463440500,
             'upload_date': '20160516',
+            'uploader': 'CBS',
             'subtitles': {
                 'en': 'mincount:5',
             },
@@ -35,6 +39,7 @@ class CBSLocalIE(AnvatoIE):
                 'Syndication\\Curb.tv',
                 'Content\\News'
             ],
+            'tags': ['CBS 2 News Evening'],
         },
     }, {
         # SendtoNews embed
@@ -47,6 +52,31 @@ class CBSLocalIE(AnvatoIE):
             # m3u8 download
             'skip_download': True,
         },
+    }, {
+        'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
+        'info_dict': {
+            'id': '3580809',
+            'ext': 'mp4',
+            'title': 'A Very Blue Anniversary',
+            'description': 'CBS2’s Cindy Hsu has more.',
+            'thumbnail': 're:^https?://.*',
+            'timestamp': 1479962220,
+            'upload_date': '20161124',
+            'uploader': 'CBS',
+            'subtitles': {
+                'en': 'mincount:5',
+            },
+            'categories': [
+                'Stations\\Spoken Word\\WCBSTV',
+                'Syndication\\AOL',
+                'Syndication\\MSN',
+                'Syndication\\NDN',
+                'Syndication\\Yahoo',
+                'Content\\News',
+                'Content\\News\\Local News',
+            ],
+            'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
+        },
     }]
 
     def _real_extract(self, url):
@@ -62,8 +92,11 @@ class CBSLocalIE(AnvatoIE):
         info_dict = self._extract_anvato_videos(webpage, display_id)
 
         time_str = self._html_search_regex(
-            r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False)
-        timestamp = unified_timestamp(time_str)
+            r'class="entry-date">([^<]+)<', webpage, 'released date', default=None)
+        if time_str:
+            timestamp = unified_timestamp(time_str)
+        else:
+            timestamp = parse_iso8601(self._html_search_meta('uploadDate', webpage))
 
         info_dict.update({
             'display_id': display_id,
index 9d3b75526395ee946ef660c14e0629019e10c1e2..91b0f5fa94c7ba919e01fd097cbdfc71fe6992b4 100644 (file)
@@ -1,14 +1,15 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from .cbs import CBSBaseIE
+from .cbs import CBSIE
 from ..utils import (
     parse_duration,
 )
 
 
-class CBSNewsIE(CBSBaseIE):
+class CBSNewsIE(CBSIE):
+    IE_NAME = 'cbsnews'
     IE_DESC = 'CBS News'
     _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)'
 
@@ -35,7 +36,8 @@ class CBSNewsIE(CBSBaseIE):
                 'ext': 'mp4',
                 'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack',
                 'description': 'md5:4a6983e480542d8b333a947bfc64ddc7',
-                'upload_date': '19700101',
+                'upload_date': '20140404',
+                'timestamp': 1396650660,
                 'uploader': 'CBSI-NEW',
                 'thumbnail': 're:^https?://.*\.jpg$',
                 'duration': 205,
@@ -63,19 +65,20 @@ class CBSNewsIE(CBSBaseIE):
 
         item = video_info['item'] if 'item' in video_info else video_info
         guid = item['mpxRefId']
-        return self._extract_video_info('byGuid=%s' % guid, guid)
+        return self._extract_video_info(guid)
 
 
 class CBSNewsLiveVideoIE(InfoExtractor):
+    IE_NAME = 'cbsnews:livevideo'
     IE_DESC = 'CBS News Live Videos'
-    _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)'
+    _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[^/?#]+)'
 
     # Live videos get deleted soon. See http://www.cbsnews.com/live/ for the latest examples
     _TEST = {
         'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/',
         'info_dict': {
             'id': 'clinton-sanders-prepare-to-face-off-in-nh',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'Clinton, Sanders Prepare To Face Off In NH',
             'duration': 334,
         },
@@ -83,25 +86,22 @@ class CBSNewsLiveVideoIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, video_id)
+        display_id = self._match_id(url)
 
-        video_info = self._parse_json(self._html_search_regex(
-            r'data-story-obj=\'({.+?})\'', webpage, 'video JSON info'), video_id)['story']
+        video_info = self._download_json(
+            'http://feeds.cbsn.cbsnews.com/rundown/story', display_id, query={
+                'device': 'desktop',
+                'dvr_slug': display_id,
+            })
 
-        hdcore_sign = 'hdcore=3.3.1'
-        f4m_formats = self._extract_f4m_formats(video_info['url'] + '&' + hdcore_sign, video_id)
-        if f4m_formats:
-            for entry in f4m_formats:
-                # URLs without the extra param induce an 404 error
-                entry.update({'extra_param_to_segment_url': hdcore_sign})
-        self._sort_formats(f4m_formats)
+        formats = self._extract_akamai_formats(video_info['url'], display_id)
+        self._sort_formats(formats)
 
         return {
-            'id': video_id,
+            'id': display_id,
+            'display_id': display_id,
             'title': video_info['headline'],
             'thumbnail': video_info.get('thumbnail_url_hd') or video_info.get('thumbnail_url_sd'),
             'duration': parse_duration(video_info.get('segmentDur')),
-            'formats': f4m_formats,
+            'formats': formats,
         }
index 78ca44b024bfb20dc6ce79e4ee51f3472d599711..3a62c840b42bace9993ddb3cb77fc89201b0578e 100644 (file)
@@ -4,7 +4,7 @@ from .cbs import CBSBaseIE
 
 
 class CBSSportsIE(CBSBaseIE):
-    _VALID_URL = r'https?://www\.cbssports\.com/video/player/[^/]+/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?cbssports\.com/video/player/[^/]+/(?P<id>\d+)'
 
     _TESTS = [{
         'url': 'http://www.cbssports.com/video/player/videos/708337219968/0/ben-simmons-the-next-lebron?-not-so-fast',
@@ -23,6 +23,9 @@ class CBSSportsIE(CBSBaseIE):
         }
     }]
 
+    def _extract_video_info(self, filter_query, video_id):
+        return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id)
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
         return self._extract_video_info('byId=%s' % video_id, video_id)
diff --git a/youtube_dl/extractor/cctv.py b/youtube_dl/extractor/cctv.py
new file mode 100644 (file)
index 0000000..72a72cb
--- /dev/null
@@ -0,0 +1,53 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import float_or_none
+
+
+class CCTVIE(InfoExtractor):
+    _VALID_URL = r'''(?x)https?://(?:.+?\.)?
+        (?:
+            cctv\.(?:com|cn)|
+            cntv\.cn
+        )/
+        (?:
+            video/[^/]+/(?P<id>[0-9a-f]{32})|
+            \d{4}/\d{2}/\d{2}/(?P<display_id>VID[0-9A-Za-z]+)
+        )'''
+    _TESTS = [{
+        'url': 'http://english.cntv.cn/2016/09/03/VIDEhnkB5y9AgHyIEVphCEz1160903.shtml',
+        'md5': '819c7b49fc3927d529fb4cd555621823',
+        'info_dict': {
+            'id': '454368eb19ad44a1925bf1eb96140a61',
+            'ext': 'mp4',
+            'title': 'Portrait of Real Current Life 09/03/2016 Modern Inventors Part 1',
+        }
+    }, {
+        'url': 'http://tv.cctv.com/2016/09/07/VIDE5C1FnlX5bUywlrjhxXOV160907.shtml',
+        'only_matching': True,
+    }, {
+        'url': 'http://tv.cntv.cn/video/C39296/95cfac44cabd3ddc4a9438780a4e5c44',
+        'only_matching': True
+    }]
+
+    def _real_extract(self, url):
+        video_id, display_id = re.match(self._VALID_URL, url).groups()
+        if not video_id:
+            webpage = self._download_webpage(url, display_id)
+            video_id = self._search_regex(
+                r'(?:fo\.addVariable\("videoCenterId",\s*|guid\s*=\s*)"([0-9a-f]{32})',
+                webpage, 'video_id')
+        api_data = self._download_json(
+            'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid=' + video_id, video_id)
+        m3u8_url = re.sub(r'maxbr=\d+&?', '', api_data['hls_url'])
+
+        return {
+            'id': video_id,
+            'title': api_data['title'],
+            'formats': self._extract_m3u8_formats(
+                m3u8_url, video_id, 'mp4', 'm3u8_native', fatal=False),
+            'duration': float_or_none(api_data.get('video', {}).get('totalLength')),
+        }
index 8af318703b0ae9ad57fedb49c7f320288322caa9..e00bdaf66a6d9eb6ac051cc169cabbf02844770b 100755 (executable)
@@ -5,14 +5,16 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
-    decode_packed_codes,
     ExtractorError,
-    parse_duration
+    float_or_none,
+    int_or_none,
+    parse_duration,
 )
 
 
 class CDAIE(InfoExtractor):
     _VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)'
+    _BASE_URL = 'http://www.cda.pl/'
     _TESTS = [{
         'url': 'http://www.cda.pl/video/5749950c',
         'md5': '6f844bf51b15f31fae165365707ae970',
@@ -21,6 +23,9 @@ class CDAIE(InfoExtractor):
             'ext': 'mp4',
             'height': 720,
             'title': 'Oto dlaczego przed zakrętem należy zwolnić.',
+            'description': 'md5:269ccd135d550da90d1662651fcb9772',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'average_rating': float,
             'duration': 39
         }
     }, {
@@ -30,6 +35,11 @@ class CDAIE(InfoExtractor):
             'id': '57413289',
             'ext': 'mp4',
             'title': 'Lądowanie na lotnisku na Maderze',
+            'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'uploader': 'crash404',
+            'view_count': int,
+            'average_rating': float,
             'duration': 137
         }
     }, {
@@ -39,31 +49,55 @@ class CDAIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        webpage = self._download_webpage('http://ebd.cda.pl/0x0/' + video_id, video_id)
+        self._set_cookie('cda.pl', 'cda.player', 'html5')
+        webpage = self._download_webpage(
+            self._BASE_URL + '/video/' + video_id, video_id)
 
         if 'Ten film jest dostępny dla użytkowników premium' in webpage:
             raise ExtractorError('This video is only available for premium users.', expected=True)
 
-        title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
-
         formats = []
 
+        uploader = self._search_regex(r'''(?x)
+            <(span|meta)[^>]+itemprop=(["\'])author\2[^>]*>
+            (?:<\1[^>]*>[^<]*</\1>|(?!</\1>)(?:.|\n))*?
+            <(span|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P<uploader>[^<]+)</\3>
+        ''', webpage, 'uploader', default=None, group='uploader')
+        view_count = self._search_regex(
+            r'Odsłony:(?:\s|&nbsp;)*([0-9]+)', webpage,
+            'view_count', default=None)
+        average_rating = self._search_regex(
+            r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
+            webpage, 'rating', fatal=False, group='rating_value')
+
         info_dict = {
             'id': video_id,
-            'title': title,
+            'title': self._og_search_title(webpage),
+            'description': self._og_search_description(webpage),
+            'uploader': uploader,
+            'view_count': int_or_none(view_count),
+            'average_rating': float_or_none(average_rating),
+            'thumbnail': self._og_search_thumbnail(webpage),
             'formats': formats,
             'duration': None,
         }
 
         def extract_format(page, version):
-            unpacked = decode_packed_codes(page)
-            format_url = self._search_regex(
-                r"(?:file|url)\s*:\s*(\\?[\"'])(?P<url>http.+?)\1", unpacked,
-                '%s url' % version, fatal=False, group='url')
-            if not format_url:
+            json_str = self._search_regex(
+                r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page,
+                '%s player_json' % version, fatal=False, group='player_data')
+            if not json_str:
+                return
+            player_data = self._parse_json(
+                json_str, '%s player_data' % version, fatal=False)
+            if not player_data:
+                return
+            video = player_data.get('video')
+            if not video or 'file' not in video:
+                self.report_warning('Unable to extract %s version information' % version)
                 return
             f = {
-                'url': format_url,
+                'url': video['file'],
             }
             m = re.search(
                 r'<a[^>]+data-quality="(?P<format_id>[^"]+)"[^>]+href="[^"]+"[^>]+class="[^"]*quality-btn-active[^"]*">(?P<height>[0-9]+)p',
@@ -75,9 +109,7 @@ class CDAIE(InfoExtractor):
                 })
             info_dict['formats'].append(f)
             if not info_dict['duration']:
-                info_dict['duration'] = parse_duration(self._search_regex(
-                    r"duration\s*:\s*(\\?[\"'])(?P<duration>.+?)\1",
-                    unpacked, 'duration', fatal=False, group='duration'))
+                info_dict['duration'] = parse_duration(video.get('duration'))
 
         extract_format(webpage, 'default')
 
@@ -85,7 +117,8 @@ class CDAIE(InfoExtractor):
                 r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)',
                 webpage):
             webpage = self._download_webpage(
-                href, video_id, 'Downloading %s version information' % resolution, fatal=False)
+                self._BASE_URL + href, video_id,
+                'Downloading %s version information' % resolution, fatal=False)
             if not webpage:
                 # Manually report warning because empty page is returned when
                 # invalid version is requested.
index 5a58d1777d50297557cae49039df19cbfe15fef0..4ec79d19dd9db6402752ee65d462631985009cbf 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -17,7 +17,7 @@ from ..utils import (
 
 
 class CeskaTelevizeIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(?:[^/]+/)*(?P<id>[^/#?]+)/*(?:[#?].*)?$'
+    _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(porady|ivysilani)/(?:[^/]+/)*(?P<id>[^/#?]+)/*(?:[#?].*)?$'
     _TESTS = [{
         'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
         'info_dict': {
diff --git a/youtube_dl/extractor/charlierose.py b/youtube_dl/extractor/charlierose.py
new file mode 100644 (file)
index 0000000..4bf2cf7
--- /dev/null
@@ -0,0 +1,51 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import remove_end
+
+
+class CharlieRoseIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?charlierose\.com/video(?:s|/player)/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://charlierose.com/videos/27996',
+        'md5': 'fda41d49e67d4ce7c2411fd2c4702e09',
+        'info_dict': {
+            'id': '27996',
+            'ext': 'mp4',
+            'title': 'Remembering Zaha Hadid',
+            'thumbnail': 're:^https?://.*\.jpg\?\d+',
+            'description': 'We revisit past conversations with Zaha Hadid, in memory of the world renowned Iraqi architect.',
+            'subtitles': {
+                'en': [{
+                    'ext': 'vtt',
+                }],
+            },
+        },
+    }, {
+        'url': 'https://charlierose.com/videos/27996',
+        'only_matching': True,
+    }]
+
+    _PLAYER_BASE = 'https://charlierose.com/video/player/%s'
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(self._PLAYER_BASE % video_id, video_id)
+
+        title = remove_end(self._og_search_title(webpage), ' - Charlie Rose')
+
+        info_dict = self._parse_html5_media_entries(
+            self._PLAYER_BASE % video_id, webpage, video_id,
+            m3u8_entry_protocol='m3u8_native')[0]
+
+        self._sort_formats(info_dict['formats'])
+        self._remove_duplicate_formats(info_dict['formats'])
+
+        info_dict.update({
+            'id': video_id,
+            'title': title,
+            'thumbnail': self._og_search_thumbnail(webpage),
+            'description': self._og_search_description(webpage),
+        })
+
+        return info_dict
index b435186523454e0873a8adedda2f80c1a8a6ecbe..f35df143a604695c0b1fe7b0e33d7384192d1d98 100644 (file)
@@ -2,6 +2,7 @@
 from __future__ import unicode_literals
 
 import base64
+import re
 
 from .common import InfoExtractor
 from ..utils import parse_duration
@@ -65,12 +66,11 @@ class ChirbitIE(InfoExtractor):
 
 class ChirbitProfileIE(InfoExtractor):
     IE_NAME = 'chirbit:profile'
-    _VALID_URL = r'https?://(?:www\.)?chirbit.com/(?:rss/)?(?P<id>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?chirbit\.com/(?:rss/)?(?P<id>[^/]+)'
     _TEST = {
         'url': 'http://chirbit.com/ScarletBeauty',
         'info_dict': {
             'id': 'ScarletBeauty',
-            'title': 'Chirbits by ScarletBeauty',
         },
         'playlist_mincount': 3,
     }
@@ -78,13 +78,10 @@ class ChirbitProfileIE(InfoExtractor):
     def _real_extract(self, url):
         profile_id = self._match_id(url)
 
-        rss = self._download_xml(
-            'http://chirbit.com/rss/%s' % profile_id, profile_id)
+        webpage = self._download_webpage(url, profile_id)
 
         entries = [
-            self.url_result(audio_url.text, 'Chirbit')
-            for audio_url in rss.findall('./channel/item/link')]
+            self.url_result(self._proto_relative_url('//chirb.it/' + video_id))
+            for _, video_id in re.findall(r'<input[^>]+id=([\'"])copy-btn-(?P<id>[0-9a-zA-Z]+)\1', webpage)]
 
-        title = rss.find('./channel/title').text
-
-        return self.playlist_result(entries, profile_id, title)
+        return self.playlist_result(entries, profile_id)
index 3a47f6fa4e1cdf734670ff64abb9aa4c02c94a6e..bb52e0c6ff75178626f83cd0a6d2de6607e861ad 100644 (file)
@@ -1,3 +1,4 @@
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
@@ -10,15 +11,15 @@ from ..utils import (
 class ClipfishIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?clipfish\.de/(?:[^/]+/)+video/(?P<id>[0-9]+)'
     _TEST = {
-        'url': 'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
-        'md5': '79bc922f3e8a9097b3d68a93780fd475',
+        'url': 'http://www.clipfish.de/special/ugly-americans/video/4343170/s01-e01-ugly-americans-date-in-der-hoelle/',
+        'md5': '720563e467b86374c194bdead08d207d',
         'info_dict': {
-            'id': '3966754',
+            'id': '4343170',
             'ext': 'mp4',
-            'title': 'FIFA 14 - E3 2013 Trailer',
-            'description': 'Video zu FIFA 14: E3 2013 Trailer',
-            'upload_date': '20130611',
-            'duration': 82,
+            'title': 'S01 E01 - Ugly Americans - Date in der Hölle',
+            'description': 'Mark Lilly arbeitet im Sozialdienst der Stadt New York und soll Immigranten bei ihrer Einbürgerung in die USA zur Seite stehen.',
+            'upload_date': '20161005',
+            'duration': 1291,
             'view_count': int,
         }
     }
@@ -50,10 +51,14 @@ class ClipfishIE(InfoExtractor):
                 'tbr': int_or_none(video_info.get('bitrate')),
             })
 
+        descr = video_info.get('descr')
+        if descr:
+            descr = descr.strip()
+
         return {
             'id': video_id,
             'title': video_info['title'],
-            'description': video_info.get('descr'),
+            'description': descr,
             'formats': formats,
             'thumbnail': video_info.get('media_content_thumbnail_large') or video_info.get('media_thumbnail'),
             'duration': int_or_none(video_info.get('media_length')),
index 2fba93543474cd7ebd53848aca62848c32bf7164..f7ee3a8f8ebe4715b2d2a5f4634bc50836cc33f7 100644 (file)
@@ -1,9 +1,6 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import json
-import re
-
 from .common import InfoExtractor
 from ..utils import (
     clean_html,
@@ -30,16 +27,14 @@ class ClubicIE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id
         player_page = self._download_webpage(player_url, video_id)
 
-        config_json = self._search_regex(
+        config = self._parse_json(self._search_regex(
             r'(?m)M6\.Player\.config\s*=\s*(\{.+?\});$', player_page,
-            'configuration')
-        config = json.loads(config_json)
+            'configuration'), video_id)
 
         video_info = config['videoInfo']
         sources = config['sources']
index f24568dcc25740f7814c134f9e58e659e7f11855..7d3e9b0c9ce89fff9b8094f2d86beaa5fb35e7e0 100644 (file)
@@ -6,7 +6,7 @@ from ..utils import ExtractorError
 
 class CMTIE(MTVIE):
     IE_NAME = 'cmt.com'
-    _VALID_URL = r'https?://www\.cmt\.com/(?:videos|shows)/(?:[^/]+/)*(?P<videoid>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows)/(?:[^/]+/)*(?P<videoid>\d+)'
     _FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/'
 
     _TESTS = [{
@@ -26,7 +26,7 @@ class CMTIE(MTVIE):
             'id': '1504699',
             'ext': 'mp4',
             'title': 'Still The King Ep. 109 in 3 Minutes',
-            'description': 'Relive or catch up with Still The King by watching this recap of season 1, episode 9. New episodes Sundays 9/8c.',
+            'description': 'Relive or catch up with Still The King by watching this recap of season 1, episode 9.',
             'timestamp': 1469421000.0,
             'upload_date': '20160725',
         },
@@ -42,3 +42,8 @@ class CMTIE(MTVIE):
                 '%s said: video is not available' % cls.IE_NAME, expected=True)
 
         return super(CMTIE, cls)._transform_rtmp_url(rtmp_video_url)
+
+    def _extract_mgid(self, webpage):
+        return self._search_regex(
+            r'MTVN\.VIDEO\.contentUri\s*=\s*([\'"])(?P<mgid>.+?)\1',
+            webpage, 'mgid', group='mgid')
index 53489a14e38399680c8338f4f22a521f7fa6ad45..5fc311f538eb23b0b16e99f6c5623f0db4290b40 100644 (file)
@@ -3,15 +3,12 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
-    int_or_none,
-    parse_duration,
-    url_basename,
-)
+from .turner import TurnerBaseIE
+from ..utils import url_basename
 
 
-class CNNIE(InfoExtractor):
-    _VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/
+class CNNIE(TurnerBaseIE):
+    _VALID_URL = r'''(?x)https?://(?:(?P<sub_domain>edition|www|money)\.)?cnn\.com/(?:video/(?:data/.+?|\?)/)?videos?/
         (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z\-]+)|(?=&)))'''
 
     _TESTS = [{
@@ -25,6 +22,7 @@ class CNNIE(InfoExtractor):
             'duration': 135,
             'upload_date': '20130609',
         },
+        'expected_warnings': ['Failed to download m3u8 information'],
     }, {
         'url': 'http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29',
         'md5': 'b5cc60c60a3477d185af8f19a2a26f4e',
@@ -34,7 +32,8 @@ class CNNIE(InfoExtractor):
             'title': "Student's epic speech stuns new freshmen",
             'description': "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"",
             'upload_date': '20130821',
-        }
+        },
+        'expected_warnings': ['Failed to download m3u8 information'],
     }, {
         'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html',
         'md5': 'f14d02ebd264df951feb2400e2c25a1b',
@@ -44,80 +43,61 @@ class CNNIE(InfoExtractor):
             'title': 'Nashville Ep. 1: Hand crafted skateboards',
             'description': 'md5:e7223a503315c9f150acac52e76de086',
             'upload_date': '20141222',
-        }
+        },
+        'expected_warnings': ['Failed to download m3u8 information'],
+    }, {
+        'url': 'http://money.cnn.com/video/news/2016/08/19/netflix-stunning-stats.cnnmoney/index.html',
+        'md5': '52a515dc1b0f001cd82e4ceda32be9d1',
+        'info_dict': {
+            'id': '/video/news/2016/08/19/netflix-stunning-stats.cnnmoney',
+            'ext': 'mp4',
+            'title': '5 stunning stats about Netflix',
+            'description': 'Did you know that Netflix has more than 80 million members? Here are five facts about the online video distributor that you probably didn\'t know.',
+            'upload_date': '20160819',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
     }, {
         'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk',
         'only_matching': True,
     }, {
         'url': 'http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg',
         'only_matching': True,
+    }, {
+        'url': 'http://edition.cnn.com/videos/arts/2016/04/21/olympic-games-cultural-a-z-brazil.cnn',
+        'only_matching': True,
     }]
 
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        path = mobj.group('path')
-        page_title = mobj.group('title')
-        info_url = 'http://edition.cnn.com/video/data/3.0/%s/index.xml' % path
-        info = self._download_xml(info_url, page_title)
-
-        formats = []
-        rex = re.compile(r'''(?x)
-            (?P<width>[0-9]+)x(?P<height>[0-9]+)
-            (?:_(?P<bitrate>[0-9]+)k)?
-        ''')
-        for f in info.findall('files/file'):
-            video_url = 'http://ht.cdn.turner.com/cnn/big%s' % (f.text.strip())
-            fdct = {
-                'format_id': f.attrib['bitrate'],
-                'url': video_url,
-            }
-
-            mf = rex.match(f.attrib['bitrate'])
-            if mf:
-                fdct['width'] = int(mf.group('width'))
-                fdct['height'] = int(mf.group('height'))
-                fdct['tbr'] = int_or_none(mf.group('bitrate'))
-            else:
-                mf = rex.search(f.text)
-                if mf:
-                    fdct['width'] = int(mf.group('width'))
-                    fdct['height'] = int(mf.group('height'))
-                    fdct['tbr'] = int_or_none(mf.group('bitrate'))
-                else:
-                    mi = re.match(r'ios_(audio|[0-9]+)$', f.attrib['bitrate'])
-                    if mi:
-                        if mi.group(1) == 'audio':
-                            fdct['vcodec'] = 'none'
-                            fdct['ext'] = 'm4a'
-                        else:
-                            fdct['tbr'] = int(mi.group(1))
-
-            formats.append(fdct)
-
-        self._sort_formats(formats)
-
-        thumbnails = [{
-            'height': int(t.attrib['height']),
-            'width': int(t.attrib['width']),
-            'url': t.text,
-        } for t in info.findall('images/image')]
-
-        metas_el = info.find('metas')
-        upload_date = (
-            metas_el.attrib.get('version') if metas_el is not None else None)
+    _CONFIG = {
+        # http://edition.cnn.com/.element/apps/cvp/3.0/cfg/spider/cnn/expansion/config.xml
+        'edition': {
+            'data_src': 'http://edition.cnn.com/video/data/3.0/video/%s/index.xml',
+            'media_src': 'http://pmd.cdn.turner.com/cnn/big',
+        },
+        # http://money.cnn.com/.element/apps/cvp2/cfg/config.xml
+        'money': {
+            'data_src': 'http://money.cnn.com/video/data/4.0/video/%s.xml',
+            'media_src': 'http://ht3.cdn.turner.com/money/big',
+        },
+    }
 
-        duration_el = info.find('length')
-        duration = parse_duration(duration_el.text)
+    def _extract_timestamp(self, video_data):
+        # TODO: fix timestamp extraction
+        return None
 
-        return {
-            'id': info.attrib['id'],
-            'title': info.find('headline').text,
-            'formats': formats,
-            'thumbnails': thumbnails,
-            'description': info.find('description').text,
-            'duration': duration,
-            'upload_date': upload_date,
-        }
+    def _real_extract(self, url):
+        sub_domain, path, page_title = re.match(self._VALID_URL, url).groups()
+        if sub_domain not in ('money', 'edition'):
+            sub_domain = 'edition'
+        config = self._CONFIG[sub_domain]
+        return self._extract_cvp_info(
+            config['data_src'] % path, page_title, {
+                'default': {
+                    'media_src': config['media_src'],
+                }
+            })
 
 
 class CNNBlogsIE(InfoExtractor):
@@ -132,6 +112,7 @@ class CNNBlogsIE(InfoExtractor):
             'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.',
             'upload_date': '20140209',
         },
+        'expected_warnings': ['Failed to download m3u8 information'],
         'add_ie': ['CNN'],
     }
 
@@ -146,7 +127,7 @@ class CNNBlogsIE(InfoExtractor):
 
 
 class CNNArticleIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!video/)'
+    _VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!videos?/)'
     _TEST = {
         'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/',
         'md5': '689034c2a3d9c6dc4aa72d65a81efd01',
@@ -154,9 +135,10 @@ class CNNArticleIE(InfoExtractor):
             'id': 'bestoftv/2014/12/21/ip-north-korea-obama.cnn',
             'ext': 'mp4',
             'title': 'Obama: Cyberattack not an act of war',
-            'description': 'md5:51ce6750450603795cad0cdfbd7d05c5',
+            'description': 'md5:0a802a40d2376f60e6b04c8d5bcebc4b',
             'upload_date': '20141221',
         },
+        'expected_warnings': ['Failed to download m3u8 information'],
         'add_ie': ['CNN'],
     }
 
index 747c245c844171958637213b37daec3dd03f3a7e..588aad0d911038229a4a3a97e5c74284f7bafc56 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index 88346dde7754a124e2b1d88d5ab8291dca4ca632..0239dfd84d776d45e8457d0357fceacb5d1d7467 100644 (file)
@@ -6,7 +6,7 @@ from .common import InfoExtractor
 
 class ComedyCentralIE(MTVServicesInfoExtractor):
     _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
-        (video-clips|episodes|cc-studios|video-collections|full-episodes|shows)
+        (video-clips|episodes|cc-studios|video-collections|shows(?=/[^/]+/(?!full-episodes)))
         /(?P<title>.*)'''
     _FEED_URL = 'http://comedycentral.com/feeds/mrss/'
 
@@ -27,6 +27,40 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
     }]
 
 
+class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor):
+    _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
+        (?:full-episodes|shows(?=/[^/]+/full-episodes))
+        /(?P<id>[^?]+)'''
+    _FEED_URL = 'http://comedycentral.com/feeds/mrss/'
+
+    _TESTS = [{
+        'url': 'http://www.cc.com/full-episodes/pv391a/the-daily-show-with-trevor-noah-november-28--2016---ryan-speedo-green-season-22-ep-22028',
+        'info_dict': {
+            'description': 'Donald Trump is accused of exploiting his president-elect status for personal gain, Cuban leader Fidel Castro dies, and Ryan Speedo Green discusses "Sing for Your Life."',
+            'title': 'November 28, 2016 - Ryan Speedo Green',
+        },
+        'playlist_count': 4,
+    }, {
+        'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+        webpage = self._download_webpage(url, playlist_id)
+
+        feed_json = self._search_regex(r'var triforceManifestFeed\s*=\s*(\{.+?\});\n', webpage, 'triforce feeed')
+        feed = self._parse_json(feed_json, playlist_id)
+        zones = feed['manifest']['zones']
+
+        video_zone = zones['t2_lc_promo1']
+        feed = self._download_json(video_zone['feed'], playlist_id)
+        mgid = feed['result']['data']['id']
+
+        videos_info = self._get_videos_info(mgid)
+        return videos_info
+
+
 class ToshIE(MTVServicesInfoExtractor):
     IE_DESC = 'Tosh.0'
     _VALID_URL = r'^https?://tosh\.cc\.com/video-(?:clips|collections)/[^/]+/(?P<videotitle>[^/?#]+)'
index 9427ff4499243d5236e5349c7cc98c11b9413fb9..05c51fac9b0b4162fb126cb79a79d871b591ead8 100644 (file)
@@ -21,6 +21,7 @@ from ..compat import (
     compat_os_name,
     compat_str,
     compat_urllib_error,
+    compat_urllib_parse_unquote,
     compat_urllib_parse_urlencode,
     compat_urllib_request,
     compat_urlparse,
@@ -29,6 +30,7 @@ from ..downloader.f4m import remove_encrypted_media
 from ..utils import (
     NO_DEFAULT,
     age_restricted,
+    base_url,
     bug_reports_message,
     clean_html,
     compiled_regex_type,
@@ -87,6 +89,9 @@ class InfoExtractor(object):
 
                     Potential fields:
                     * url        Mandatory. The URL of the video file
+                    * manifest_url
+                                 The URL of the manifest file in case of
+                                 fragmented media (DASH, hls, hds)
                     * ext        Will be calculated from URL if missing
                     * format     A human-readable description of the format
                                  ("mp4 container with h264/opus").
@@ -115,6 +120,11 @@ class InfoExtractor(object):
                                  download, lower-case.
                                  "http", "https", "rtsp", "rtmp", "rtmpe",
                                  "m3u8", "m3u8_native" or "http_dash_segments".
+                    * fragments  A list of fragments of the fragmented media,
+                                 with the following entries:
+                                 * "url" (mandatory) - fragment's URL
+                                 * "duration" (optional, int or float)
+                                 * "filesize" (optional, int)
                     * preference Order number of this format. If this field is
                                  present and not None, the formats get sorted
                                  by this field, regardless of all other values.
@@ -226,7 +236,7 @@ class InfoExtractor(object):
     chapter_id:     Id of the chapter the video belongs to, as a unicode string.
 
     The following fields should only be used when the video is an episode of some
-    series or programme:
+    series, programme or podcast:
 
     series:         Title of the series or programme the video episode belongs to.
     season:         Title of the season the video episode belongs to.
@@ -674,33 +684,36 @@ class InfoExtractor(object):
                     username = info[0]
                     password = info[2]
                 else:
-                    raise netrc.NetrcParseError('No authenticators for %s' % netrc_machine)
+                    raise netrc.NetrcParseError(
+                        'No authenticators for %s' % netrc_machine)
             except (IOError, netrc.NetrcParseError) as err:
-                self._downloader.report_warning('parsing .netrc: %s' % error_to_compat_str(err))
+                self._downloader.report_warning(
+                    'parsing .netrc: %s' % error_to_compat_str(err))
 
-        return (username, password)
+        return username, password
 
-    def _get_login_info(self):
+    def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None):
         """
         Get the login info as (username, password)
-        It will look in the netrc file using the _NETRC_MACHINE value
+        First look for the manually specified credentials using username_option
+        and password_option as keys in params dictionary. If no such credentials
+        available look in the netrc file using the netrc_machine or _NETRC_MACHINE
+        value.
         If there's no info available, return (None, None)
         """
         if self._downloader is None:
             return (None, None)
 
-        username = None
-        password = None
         downloader_params = self._downloader.params
 
         # Attempt to use provided username and password or .netrc data
-        if downloader_params.get('username') is not None:
-            username = downloader_params['username']
-            password = downloader_params['password']
+        if downloader_params.get(username_option) is not None:
+            username = downloader_params[username_option]
+            password = downloader_params[password_option]
         else:
-            username, password = self._get_netrc_login_info()
+            username, password = self._get_netrc_login_info(netrc_machine)
 
-        return (username, password)
+        return username, password
 
     def _get_tfa_info(self, note='two-factor verification code'):
         """
@@ -873,7 +886,7 @@ class InfoExtractor(object):
                         'url': e.get('contentUrl'),
                         'title': unescapeHTML(e.get('name')),
                         'description': unescapeHTML(e.get('description')),
-                        'thumbnail': e.get('thumbnailUrl'),
+                        'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'),
                         'duration': parse_duration(e.get('duration')),
                         'timestamp': unified_timestamp(e.get('uploadDate')),
                         'filesize': float_or_none(e.get('contentSize')),
@@ -888,16 +901,16 @@ class InfoExtractor(object):
     def _hidden_inputs(html):
         html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
         hidden_inputs = {}
-        for input in re.findall(r'(?i)<input([^>]+)>', html):
-            if not re.search(r'type=(["\'])(?:hidden|submit)\1', input):
-                continue
-            name = re.search(r'(?:name|id)=(["\'])(?P<value>.+?)\1', input)
-            if not name:
+        for input in re.findall(r'(?i)(<input[^>]+>)', html):
+            attrs = extract_attributes(input)
+            if not input:
                 continue
-            value = re.search(r'value=(["\'])(?P<value>.*?)\1', input)
-            if not value:
+            if attrs.get('type') not in ('hidden', 'submit'):
                 continue
-            hidden_inputs[name.group('value')] = value.group('value')
+            name = attrs.get('name') or attrs.get('id')
+            value = attrs.get('value')
+            if name and value is not None:
+                hidden_inputs[name] = value
         return hidden_inputs
 
     def _form_hidden_inputs(self, form_id, html):
@@ -1088,6 +1101,13 @@ class InfoExtractor(object):
             manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
             'bootstrap info', default=None)
 
+        vcodec = None
+        mime_type = xpath_text(
+            manifest, ['{http://ns.adobe.com/f4m/1.0}mimeType', '{http://ns.adobe.com/f4m/2.0}mimeType'],
+            'base URL', default=None)
+        if mime_type and mime_type.startswith('audio/'):
+            vcodec = 'none'
+
         for i, media_el in enumerate(media_nodes):
             tbr = int_or_none(media_el.attrib.get('bitrate'))
             width = int_or_none(media_el.attrib.get('width'))
@@ -1128,6 +1148,7 @@ class InfoExtractor(object):
                             'width': f.get('width') or width,
                             'height': f.get('height') or height,
                             'format_id': f.get('format_id') if not tbr else format_id,
+                            'vcodec': vcodec,
                         })
                     formats.extend(f4m_formats)
                     continue
@@ -1139,10 +1160,12 @@ class InfoExtractor(object):
             formats.append({
                 'format_id': format_id,
                 'url': manifest_url,
+                'manifest_url': manifest_url,
                 'ext': 'flv' if bootstrap_info is not None else None,
                 'tbr': tbr,
                 'width': width,
                 'height': height,
+                'vcodec': vcodec,
                 'preference': preference,
             })
         return formats
@@ -1163,13 +1186,6 @@ class InfoExtractor(object):
                               m3u8_id=None, note=None, errnote=None,
                               fatal=True, live=False):
 
-        formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
-
-        format_url = lambda u: (
-            u
-            if re.match(r'^https?://', u)
-            else compat_urlparse.urljoin(m3u8_url, u))
-
         res = self._download_webpage_handle(
             m3u8_url, video_id,
             note=note or 'Downloading m3u8 information',
@@ -1180,6 +1196,13 @@ class InfoExtractor(object):
         m3u8_doc, urlh = res
         m3u8_url = urlh.geturl()
 
+        formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
+
+        format_url = lambda u: (
+            u
+            if re.match(r'^https?://', u)
+            else compat_urlparse.urljoin(m3u8_url, u))
+
         # We should try extracting formats only from master playlists [1], i.e.
         # playlists that describe available qualities. On the other hand media
         # playlists [2] should be returned as is since they contain just the media
@@ -1201,35 +1224,54 @@ class InfoExtractor(object):
                 'protocol': entry_protocol,
                 'preference': preference,
             }]
-        last_info = None
-        last_media = None
+        last_info = {}
+        last_media = {}
         for line in m3u8_doc.splitlines():
             if line.startswith('#EXT-X-STREAM-INF:'):
                 last_info = parse_m3u8_attributes(line)
             elif line.startswith('#EXT-X-MEDIA:'):
-                last_media = parse_m3u8_attributes(line)
+                media = parse_m3u8_attributes(line)
+                media_type = media.get('TYPE')
+                if media_type in ('VIDEO', 'AUDIO'):
+                    media_url = media.get('URI')
+                    if media_url:
+                        format_id = []
+                        for v in (media.get('GROUP-ID'), media.get('NAME')):
+                            if v:
+                                format_id.append(v)
+                        formats.append({
+                            'format_id': '-'.join(format_id),
+                            'url': format_url(media_url),
+                            'language': media.get('LANGUAGE'),
+                            'vcodec': 'none' if media_type == 'AUDIO' else None,
+                            'ext': ext,
+                            'protocol': entry_protocol,
+                            'preference': preference,
+                        })
+                    else:
+                        # When there is no URI in EXT-X-MEDIA let this tag's
+                        # data be used by regular URI lines below
+                        last_media = media
             elif line.startswith('#') or not line.strip():
                 continue
             else:
-                if last_info is None:
-                    formats.append({'url': format_url(line)})
-                    continue
-                tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
+                tbr = int_or_none(last_info.get('AVERAGE-BANDWIDTH') or last_info.get('BANDWIDTH'), scale=1000)
                 format_id = []
                 if m3u8_id:
                     format_id.append(m3u8_id)
-                last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') not in ('SUBTITLES', 'CLOSED-CAPTIONS') else None
                 # Despite specification does not mention NAME attribute for
                 # EXT-X-STREAM-INF it still sometimes may be present
-                stream_name = last_info.get('NAME') or last_media_name
+                stream_name = last_info.get('NAME') or last_media.get('NAME')
                 # Bandwidth of live streams may differ over time thus making
                 # format_id unpredictable. So it's better to keep provided
                 # format_id intact.
                 if not live:
                     format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats)))
+                manifest_url = format_url(line.strip())
                 f = {
                     'format_id': '-'.join(format_id),
-                    'url': format_url(line.strip()),
+                    'url': manifest_url,
+                    'manifest_url': manifest_url,
                     'tbr': tbr,
                     'ext': ext,
                     'fps': float_or_none(last_info.get('FRAME-RATE')),
@@ -1238,9 +1280,10 @@ class InfoExtractor(object):
                 }
                 resolution = last_info.get('RESOLUTION')
                 if resolution:
-                    width_str, height_str = resolution.split('x')
-                    f['width'] = int(width_str)
-                    f['height'] = int(height_str)
+                    mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution)
+                    if mobj:
+                        f['width'] = int(mobj.group('width'))
+                        f['height'] = int(mobj.group('height'))
                 # Unified Streaming Platform
                 mobj = re.search(
                     r'audio.*?(?:%3D|=)(\d+)(?:-video.*?(?:%3D|=)(\d+))?', f['url'])
@@ -1252,11 +1295,9 @@ class InfoExtractor(object):
                         'abr': abr,
                     })
                 f.update(parse_codecs(last_info.get('CODECS')))
-                if last_media is not None:
-                    f['m3u8_media'] = last_media
-                    last_media = None
                 formats.append(f)
                 last_info = {}
+                last_media = {}
         return formats
 
     @staticmethod
@@ -1500,12 +1541,13 @@ class InfoExtractor(object):
         if res is False:
             return []
         mpd, urlh = res
-        mpd_base_url = re.match(r'https?://.+/', urlh.geturl()).group()
+        mpd_base_url = base_url(urlh.geturl())
 
         return self._parse_mpd_formats(
-            compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, formats_dict=formats_dict)
+            compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url,
+            formats_dict=formats_dict, mpd_url=mpd_url)
 
-    def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}):
+    def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None):
         """
         Parse formats from MPD manifest.
         References:
@@ -1526,42 +1568,52 @@ class InfoExtractor(object):
 
         def extract_multisegment_info(element, ms_parent_info):
             ms_info = ms_parent_info.copy()
+
+            # As per [1, 5.3.9.2.2] SegmentList and SegmentTemplate share some
+            # common attributes and elements.  We will only extract relevant
+            # for us.
+            def extract_common(source):
+                segment_timeline = source.find(_add_ns('SegmentTimeline'))
+                if segment_timeline is not None:
+                    s_e = segment_timeline.findall(_add_ns('S'))
+                    if s_e:
+                        ms_info['total_number'] = 0
+                        ms_info['s'] = []
+                        for s in s_e:
+                            r = int(s.get('r', 0))
+                            ms_info['total_number'] += 1 + r
+                            ms_info['s'].append({
+                                't': int(s.get('t', 0)),
+                                # @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60])
+                                'd': int(s.attrib['d']),
+                                'r': r,
+                            })
+                start_number = source.get('startNumber')
+                if start_number:
+                    ms_info['start_number'] = int(start_number)
+                timescale = source.get('timescale')
+                if timescale:
+                    ms_info['timescale'] = int(timescale)
+                segment_duration = source.get('duration')
+                if segment_duration:
+                    ms_info['segment_duration'] = int(segment_duration)
+
+            def extract_Initialization(source):
+                initialization = source.find(_add_ns('Initialization'))
+                if initialization is not None:
+                    ms_info['initialization_url'] = initialization.attrib['sourceURL']
+
             segment_list = element.find(_add_ns('SegmentList'))
             if segment_list is not None:
+                extract_common(segment_list)
+                extract_Initialization(segment_list)
                 segment_urls_e = segment_list.findall(_add_ns('SegmentURL'))
                 if segment_urls_e:
                     ms_info['segment_urls'] = [segment.attrib['media'] for segment in segment_urls_e]
-                initialization = segment_list.find(_add_ns('Initialization'))
-                if initialization is not None:
-                    ms_info['initialization_url'] = initialization.attrib['sourceURL']
             else:
                 segment_template = element.find(_add_ns('SegmentTemplate'))
                 if segment_template is not None:
-                    start_number = segment_template.get('startNumber')
-                    if start_number:
-                        ms_info['start_number'] = int(start_number)
-                    segment_timeline = segment_template.find(_add_ns('SegmentTimeline'))
-                    if segment_timeline is not None:
-                        s_e = segment_timeline.findall(_add_ns('S'))
-                        if s_e:
-                            ms_info['total_number'] = 0
-                            ms_info['s'] = []
-                            for s in s_e:
-                                r = int(s.get('r', 0))
-                                ms_info['total_number'] += 1 + r
-                                ms_info['s'].append({
-                                    't': int(s.get('t', 0)),
-                                    # @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60])
-                                    'd': int(s.attrib['d']),
-                                    'r': r,
-                                })
-                    else:
-                        timescale = segment_template.get('timescale')
-                        if timescale:
-                            ms_info['timescale'] = int(timescale)
-                        segment_duration = segment_template.get('duration')
-                        if segment_duration:
-                            ms_info['segment_duration'] = int(segment_duration)
+                    extract_common(segment_template)
                     media_template = segment_template.get('media')
                     if media_template:
                         ms_info['media_template'] = media_template
@@ -1569,11 +1621,14 @@ class InfoExtractor(object):
                     if initialization:
                         ms_info['initialization_url'] = initialization
                     else:
-                        initialization = segment_template.find(_add_ns('Initialization'))
-                        if initialization is not None:
-                            ms_info['initialization_url'] = initialization.attrib['sourceURL']
+                        extract_Initialization(segment_template)
             return ms_info
 
+        def combine_url(base_url, target_url):
+            if re.match(r'^https?://', target_url):
+                return target_url
+            return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url)
+
         mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
         formats = []
         for period in mpd_doc.findall(_add_ns('Period')):
@@ -1616,6 +1671,7 @@ class InfoExtractor(object):
                         f = {
                             'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
                             'url': base_url,
+                            'manifest_url': mpd_url,
                             'ext': mimetype2ext(mime_type),
                             'width': int_or_none(representation_attrib.get('width')),
                             'height': int_or_none(representation_attrib.get('height')),
@@ -1630,9 +1686,7 @@ class InfoExtractor(object):
                         }
                         representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
                         if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info:
-                            if 'total_number' not in representation_ms_info and 'segment_duration':
-                                segment_duration = float(representation_ms_info['segment_duration']) / float(representation_ms_info['timescale'])
-                                representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
+
                             media_template = representation_ms_info['media_template']
                             media_template = media_template.replace('$RepresentationID$', representation_id)
                             media_template = re.sub(r'\$(Number|Bandwidth|Time)\$', r'%(\1)d', media_template)
@@ -1641,46 +1695,79 @@ class InfoExtractor(object):
 
                             # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
                             # can't be used at the same time
-                            if '%(Number' in media_template:
-                                representation_ms_info['segment_urls'] = [
-                                    media_template % {
+                            if '%(Number' in media_template and 's' not in representation_ms_info:
+                                segment_duration = None
+                                if 'total_number' not in representation_ms_info and 'segment_duration':
+                                    segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
+                                    representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
+                                representation_ms_info['fragments'] = [{
+                                    'url': media_template % {
                                         'Number': segment_number,
-                                        'Bandwidth': representation_attrib.get('bandwidth'),
-                                    }
-                                    for segment_number in range(
-                                        representation_ms_info['start_number'],
-                                        representation_ms_info['total_number'] + representation_ms_info['start_number'])]
+                                        'Bandwidth': int_or_none(representation_attrib.get('bandwidth')),
+                                    },
+                                    'duration': segment_duration,
+                                } for segment_number in range(
+                                    representation_ms_info['start_number'],
+                                    representation_ms_info['total_number'] + representation_ms_info['start_number'])]
                             else:
-                                representation_ms_info['segment_urls'] = []
+                                # $Number*$ or $Time$ in media template with S list available
+                                # Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg
+                                # Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411
+                                representation_ms_info['fragments'] = []
                                 segment_time = 0
+                                segment_d = None
+                                segment_number = representation_ms_info['start_number']
 
                                 def add_segment_url():
-                                    representation_ms_info['segment_urls'].append(
-                                        media_template % {
-                                            'Time': segment_time,
-                                            'Bandwidth': representation_attrib.get('bandwidth'),
-                                        }
-                                    )
+                                    segment_url = media_template % {
+                                        'Time': segment_time,
+                                        'Bandwidth': int_or_none(representation_attrib.get('bandwidth')),
+                                        'Number': segment_number,
+                                    }
+                                    representation_ms_info['fragments'].append({
+                                        'url': segment_url,
+                                        'duration': float_or_none(segment_d, representation_ms_info['timescale']),
+                                    })
 
                                 for num, s in enumerate(representation_ms_info['s']):
                                     segment_time = s.get('t') or segment_time
+                                    segment_d = s['d']
                                     add_segment_url()
+                                    segment_number += 1
                                     for r in range(s.get('r', 0)):
-                                        segment_time += s['d']
+                                        segment_time += segment_d
                                         add_segment_url()
-                                    segment_time += s['d']
-                        if 'segment_urls' in representation_ms_info:
+                                        segment_number += 1
+                                    segment_time += segment_d
+                        elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info:
+                            # No media template
+                            # Example: https://www.youtube.com/watch?v=iXZV5uAYMJI
+                            # or any YouTube dashsegments video
+                            fragments = []
+                            s_num = 0
+                            for segment_url in representation_ms_info['segment_urls']:
+                                s = representation_ms_info['s'][s_num]
+                                for r in range(s.get('r', 0) + 1):
+                                    fragments.append({
+                                        'url': segment_url,
+                                        'duration': float_or_none(s['d'], representation_ms_info['timescale']),
+                                    })
+                            representation_ms_info['fragments'] = fragments
+                        # NB: MPD manifest may contain direct URLs to unfragmented media.
+                        # No fragments key is present in this case.
+                        if 'fragments' in representation_ms_info:
                             f.update({
-                                'segment_urls': representation_ms_info['segment_urls'],
+                                'fragments': [],
                                 'protocol': 'http_dash_segments',
                             })
                             if 'initialization_url' in representation_ms_info:
                                 initialization_url = representation_ms_info['initialization_url'].replace('$RepresentationID$', representation_id)
-                                f.update({
-                                    'initialization_url': initialization_url,
-                                })
                                 if not f.get('url'):
                                     f['url'] = initialization_url
+                                f['fragments'].append({'url': initialization_url})
+                            f['fragments'].extend(representation_ms_info['fragments'])
+                            for fragment in f['fragments']:
+                                fragment['url'] = combine_url(base_url, fragment['url'])
                         try:
                             existing_format = next(
                                 fo for fo in formats
@@ -1695,7 +1782,106 @@ class InfoExtractor(object):
                         self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
         return formats
 
-    def _parse_html5_media_entries(self, base_url, webpage):
+    def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True):
+        res = self._download_webpage_handle(
+            ism_url, video_id,
+            note=note or 'Downloading ISM manifest',
+            errnote=errnote or 'Failed to download ISM manifest',
+            fatal=fatal)
+        if res is False:
+            return []
+        ism, urlh = res
+
+        return self._parse_ism_formats(
+            compat_etree_fromstring(ism.encode('utf-8')), urlh.geturl(), ism_id)
+
+    def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
+        if ism_doc.get('IsLive') == 'TRUE' or ism_doc.find('Protection') is not None:
+            return []
+
+        duration = int(ism_doc.attrib['Duration'])
+        timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000
+
+        formats = []
+        for stream in ism_doc.findall('StreamIndex'):
+            stream_type = stream.get('Type')
+            if stream_type not in ('video', 'audio'):
+                continue
+            url_pattern = stream.attrib['Url']
+            stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
+            stream_name = stream.get('Name')
+            for track in stream.findall('QualityLevel'):
+                fourcc = track.get('FourCC')
+                # TODO: add support for WVC1 and WMAP
+                if fourcc not in ('H264', 'AVC1', 'AACL'):
+                    self.report_warning('%s is not a supported codec' % fourcc)
+                    continue
+                tbr = int(track.attrib['Bitrate']) // 1000
+                width = int_or_none(track.get('MaxWidth'))
+                height = int_or_none(track.get('MaxHeight'))
+                sampling_rate = int_or_none(track.get('SamplingRate'))
+
+                track_url_pattern = re.sub(r'{[Bb]itrate}', track.attrib['Bitrate'], url_pattern)
+                track_url_pattern = compat_urlparse.urljoin(ism_url, track_url_pattern)
+
+                fragments = []
+                fragment_ctx = {
+                    'time': 0,
+                }
+                stream_fragments = stream.findall('c')
+                for stream_fragment_index, stream_fragment in enumerate(stream_fragments):
+                    fragment_ctx['time'] = int_or_none(stream_fragment.get('t')) or fragment_ctx['time']
+                    fragment_repeat = int_or_none(stream_fragment.get('r')) or 1
+                    fragment_ctx['duration'] = int_or_none(stream_fragment.get('d'))
+                    if not fragment_ctx['duration']:
+                        try:
+                            next_fragment_time = int(stream_fragment[stream_fragment_index + 1].attrib['t'])
+                        except IndexError:
+                            next_fragment_time = duration
+                        fragment_ctx['duration'] = (next_fragment_time - fragment_ctx['time']) / fragment_repeat
+                    for _ in range(fragment_repeat):
+                        fragments.append({
+                            'url': re.sub(r'{start[ _]time}', compat_str(fragment_ctx['time']), track_url_pattern),
+                            'duration': fragment_ctx['duration'] / stream_timescale,
+                        })
+                        fragment_ctx['time'] += fragment_ctx['duration']
+
+                format_id = []
+                if ism_id:
+                    format_id.append(ism_id)
+                if stream_name:
+                    format_id.append(stream_name)
+                format_id.append(compat_str(tbr))
+
+                formats.append({
+                    'format_id': '-'.join(format_id),
+                    'url': ism_url,
+                    'manifest_url': ism_url,
+                    'ext': 'ismv' if stream_type == 'video' else 'isma',
+                    'width': width,
+                    'height': height,
+                    'tbr': tbr,
+                    'asr': sampling_rate,
+                    'vcodec': 'none' if stream_type == 'audio' else fourcc,
+                    'acodec': 'none' if stream_type == 'video' else fourcc,
+                    'protocol': 'ism',
+                    'fragments': fragments,
+                    '_download_params': {
+                        'duration': duration,
+                        'timescale': stream_timescale,
+                        'width': width or 0,
+                        'height': height or 0,
+                        'fourcc': fourcc,
+                        'codec_private_data': track.get('CodecPrivateData'),
+                        'sampling_rate': sampling_rate,
+                        'channels': int_or_none(track.get('Channels', 2)),
+                        'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)),
+                        'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)),
+                    },
+                })
+        return formats
+
+    def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8'):
         def absolute_url(video_url):
             return compat_urlparse.urljoin(base_url, video_url)
 
@@ -1710,8 +1896,27 @@ class InfoExtractor(object):
                 return f
             return {}
 
+        def _media_formats(src, cur_media_type):
+            full_url = absolute_url(src)
+            if determine_ext(full_url) == 'm3u8':
+                is_plain_url = False
+                formats = self._extract_m3u8_formats(
+                    full_url, video_id, ext='mp4',
+                    entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id)
+            else:
+                is_plain_url = True
+                formats = [{
+                    'url': full_url,
+                    'vcodec': 'none' if cur_media_type == 'audio' else None,
+                }]
+            return is_plain_url, formats
+
         entries = []
-        for media_tag, media_type, media_content in re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage):
+        media_tags = [(media_tag, media_type, '')
+                      for media_tag, media_type
+                      in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)]
+        media_tags.extend(re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage))
+        for media_tag, media_type, media_content in media_tags:
             media_info = {
                 'formats': [],
                 'subtitles': {},
@@ -1719,10 +1924,8 @@ class InfoExtractor(object):
             media_attributes = extract_attributes(media_tag)
             src = media_attributes.get('src')
             if src:
-                media_info['formats'].append({
-                    'url': absolute_url(src),
-                    'vcodec': 'none' if media_type == 'audio' else None,
-                })
+                _, formats = _media_formats(src, media_type)
+                media_info['formats'].extend(formats)
             media_info['thumbnail'] = media_attributes.get('poster')
             if media_content:
                 for source_tag in re.findall(r'<source[^>]+>', media_content):
@@ -1730,16 +1933,17 @@ class InfoExtractor(object):
                     src = source_attributes.get('src')
                     if not src:
                         continue
-                    f = parse_content_type(source_attributes.get('type'))
-                    f.update({
-                        'url': absolute_url(src),
-                        'vcodec': 'none' if media_type == 'audio' else None,
-                    })
-                    media_info['formats'].append(f)
+                    is_plain_url, formats = _media_formats(src, media_type)
+                    if is_plain_url:
+                        f = parse_content_type(source_attributes.get('type'))
+                        f.update(formats[0])
+                        media_info['formats'].append(f)
+                    else:
+                        media_info['formats'].extend(formats)
                 for track_tag in re.findall(r'<track[^>]+>', media_content):
                     track_attributes = extract_attributes(track_tag)
                     kind = track_attributes.get('kind')
-                    if not kind or kind == 'subtitles':
+                    if not kind or kind in ('subtitles', 'captions'):
                         src = track_attributes.get('src')
                         if not src:
                             continue
@@ -1747,10 +1951,70 @@ class InfoExtractor(object):
                         media_info['subtitles'].setdefault(lang, []).append({
                             'url': absolute_url(src),
                         })
-            if media_info['formats']:
+            if media_info['formats'] or media_info['subtitles']:
                 entries.append(media_info)
         return entries
 
+    def _extract_akamai_formats(self, manifest_url, video_id):
+        formats = []
+        hdcore_sign = 'hdcore=3.7.0'
+        f4m_url = re.sub(r'(https?://.+?)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
+        if 'hdcore=' not in f4m_url:
+            f4m_url += ('&' if '?' in f4m_url else '?') + hdcore_sign
+        f4m_formats = self._extract_f4m_formats(
+            f4m_url, video_id, f4m_id='hds', fatal=False)
+        for entry in f4m_formats:
+            entry.update({'extra_param_to_segment_url': hdcore_sign})
+        formats.extend(f4m_formats)
+        m3u8_url = re.sub(r'(https?://.+?)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
+        formats.extend(self._extract_m3u8_formats(
+            m3u8_url, video_id, 'mp4', 'm3u8_native',
+            m3u8_id='hls', fatal=False))
+        return formats
+
+    def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
+        url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
+        url_base = self._search_regex(r'(?:https?|rtmp|rtsp)(://[^?]+)', url, 'format url')
+        http_base_url = 'http' + url_base
+        formats = []
+        if 'm3u8' not in skip_protocols:
+            formats.extend(self._extract_m3u8_formats(
+                http_base_url + '/playlist.m3u8', video_id, 'mp4',
+                m3u8_entry_protocol, m3u8_id='hls', fatal=False))
+        if 'f4m' not in skip_protocols:
+            formats.extend(self._extract_f4m_formats(
+                http_base_url + '/manifest.f4m',
+                video_id, f4m_id='hds', fatal=False))
+        if 'dash' not in skip_protocols:
+            formats.extend(self._extract_mpd_formats(
+                http_base_url + '/manifest.mpd',
+                video_id, mpd_id='dash', fatal=False))
+        if re.search(r'(?:/smil:|\.smil)', url_base):
+            if 'smil' not in skip_protocols:
+                rtmp_formats = self._extract_smil_formats(
+                    http_base_url + '/jwplayer.smil',
+                    video_id, fatal=False)
+                for rtmp_format in rtmp_formats:
+                    rtsp_format = rtmp_format.copy()
+                    rtsp_format['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path'])
+                    del rtsp_format['play_path']
+                    del rtsp_format['ext']
+                    rtsp_format.update({
+                        'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'),
+                        'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'),
+                        'protocol': 'rtsp',
+                    })
+                    formats.extend([rtmp_format, rtsp_format])
+        else:
+            for protocol in ('rtmp', 'rtsp'):
+                if protocol not in skip_protocols:
+                    formats.append({
+                        'url': protocol + url_base,
+                        'format_id': protocol,
+                        'protocol': protocol,
+                    })
+        return formats
+
     def _live_title(self, name):
         """ Generate the title for a live video """
         now = datetime.datetime.now()
@@ -1871,6 +2135,12 @@ class InfoExtractor(object):
             headers['Ytdl-request-proxy'] = geo_verification_proxy
         return headers
 
+    def _generic_id(self, url):
+        return compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
+
+    def _generic_title(self, url):
+        return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
+
 
 class SearchInfoExtractor(InfoExtractor):
     """
index 5d130a170ed79454e05087cae24ab0d132448b32..d98331a4e400b23389bee55e4b648b31d464b8d6 100644 (file)
@@ -1,13 +1,9 @@
 from __future__ import unicode_literals
 
-import os
-
 from .common import InfoExtractor
 from ..compat import (
-    compat_urllib_parse_unquote,
     compat_urlparse,
 )
-from ..utils import url_basename
 
 
 class RtmpIE(InfoExtractor):
@@ -23,8 +19,8 @@ class RtmpIE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
-        video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
-        title = compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
+        video_id = self._generic_id(url)
+        title = self._generic_title(url)
         return {
             'id': video_id,
             'title': title,
@@ -34,3 +30,31 @@ class RtmpIE(InfoExtractor):
                 'format_id': compat_urlparse.urlparse(url).scheme,
             }],
         }
+
+
+class MmsIE(InfoExtractor):
+    IE_DESC = False  # Do not list
+    _VALID_URL = r'(?i)mms://.+'
+
+    _TEST = {
+        # Direct MMS link
+        'url': 'mms://kentro.kaist.ac.kr/200907/MilesReid(0709).wmv',
+        'info_dict': {
+            'id': 'MilesReid(0709)',
+            'ext': 'wmv',
+            'title': 'MilesReid(0709)',
+        },
+        'params': {
+            'skip_download': True,  # rtsp downloads, requiring mplayer or mpv
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._generic_id(url)
+        title = self._generic_title(url)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'url': url,
+        }
index 79238cce7a22d040e70c977219a86438bea4dfc7..cc68f1c0082674eaf850c2a0c1e3d6ae0f670d74 100644 (file)
@@ -1,5 +1,5 @@
 # coding: utf-8
-from __future__ import unicode_literals
+from __future__ import unicode_literals, division
 
 from .common import InfoExtractor
 from ..utils import int_or_none
@@ -8,12 +8,22 @@ from ..utils import int_or_none
 class CrackleIE(InfoExtractor):
     _VALID_URL = r'(?:crackle:|https?://(?:www\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
     _TEST = {
-        'url': 'http://www.crackle.com/the-art-of-more/2496419',
+        'url': 'http://www.crackle.com/comedians-in-cars-getting-coffee/2498934',
         'info_dict': {
-            'id': '2496419',
+            'id': '2498934',
             'ext': 'mp4',
-            'title': 'Heavy Lies the Head',
-            'description': 'md5:bb56aa0708fe7b9a4861535f15c3abca',
+            'title': 'Everybody Respects A Bloody Nose',
+            'description': 'Jerry is kaffeeklatsching in L.A. with funnyman J.B. Smoove (Saturday Night Live, Real Husbands of Hollywood). They’re headed for brew at 10 Speed Coffee in a 1964 Studebaker Avanti.',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'duration': 906,
+            'series': 'Comedians In Cars Getting Coffee',
+            'season_number': 8,
+            'episode_number': 4,
+            'subtitles': {
+                'en-US': [{
+                    'ext': 'ttml',
+                }]
+            },
         },
         'params': {
             # m3u8 download
@@ -21,12 +31,8 @@ class CrackleIE(InfoExtractor):
         }
     }
 
-    # extracted from http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx
-    _SUBTITLE_SERVER = 'http://web-us-az.crackle.com'
-    _UPLYNK_OWNER_ID = 'e8773f7770a44dbd886eee4fca16a66b'
-    _THUMBNAIL_TEMPLATE = 'http://images-us-am.crackle.com/%stnl_1920x1080.jpg?ts=20140107233116?c=635333335057637614'
-
     # extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx
+    _THUMBNAIL_TEMPLATE = 'http://images-us-am.crackle.com/%stnl_1920x1080.jpg?ts=20140107233116?c=635333335057637614'
     _MEDIA_FILE_SLOTS = {
         'c544.flv': {
             'width': 544,
@@ -48,16 +54,21 @@ class CrackleIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
+
+        config_doc = self._download_xml(
+            'http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx?site=16',
+            video_id, 'Downloading config')
+
         item = self._download_xml(
             'http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm=%s' % video_id,
             video_id).find('i')
         title = item.attrib['t']
 
-        thumbnail = None
         subtitles = {}
         formats = self._extract_m3u8_formats(
-            'http://content.uplynk.com/ext/%s/%s.m3u8' % (self._UPLYNK_OWNER_ID, video_id),
+            'http://content.uplynk.com/ext/%s/%s.m3u8' % (config_doc.attrib['strUplynkOwnerId'], video_id),
             video_id, 'mp4', m3u8_id='hls', fatal=None)
+        thumbnail = None
         path = item.attrib.get('p')
         if path:
             thumbnail = self._THUMBNAIL_TEMPLATE % path
@@ -76,7 +87,7 @@ class CrackleIE(InfoExtractor):
                     if locale not in subtitles:
                         subtitles[locale] = []
                     subtitles[locale] = [{
-                        'url': '%s/%s%s_%s.xml' % (self._SUBTITLE_SERVER, path, locale, v),
+                        'url': '%s/%s%s_%s.xml' % (config_doc.attrib['strSubtitleServer'], path, locale, v),
                         'ext': 'ttml',
                     }]
         self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
@@ -85,7 +96,7 @@ class CrackleIE(InfoExtractor):
             'id': video_id,
             'title': title,
             'description': item.attrib.get('d'),
-            'duration': int(item.attrib.get('r'), 16) if item.attrib.get('r') else None,
+            'duration': int(item.attrib.get('r'), 16) / 1000 if item.attrib.get('r') else None,
             'series': item.attrib.get('sn'),
             'season_number': int_or_none(item.attrib.get('se')),
             'episode_number': int_or_none(item.attrib.get('ep')),
index dedb810a092618a090641dfaf582939efabf3fc0..cf6a5d6cbe906443b1db592616cd89926860bbdd 100644 (file)
@@ -1,13 +1,11 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 
 
 class CriterionIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.criterion\.com/films/(?P<id>[0-9]+)-.+'
+    _VALID_URL = r'https?://(?:www\.)?criterion\.com/films/(?P<id>[0-9]+)-.+'
     _TEST = {
         'url': 'http://www.criterion.com/films/184-le-samourai',
         'md5': 'bc51beba55685509883a9a7830919ec3',
@@ -16,20 +14,20 @@ class CriterionIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Le Samouraï',
             'description': 'md5:a2b4b116326558149bef81f76dcbb93f',
+            'thumbnail': 're:^https?://.*\.jpg$',
         }
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         final_url = self._search_regex(
-            r'so.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
+            r'so\.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
         title = self._og_search_title(webpage)
         description = self._html_search_meta('description', webpage)
         thumbnail = self._search_regex(
-            r'so.addVariable\("thumbnailURL", "(.+?)"\)\;',
+            r'so\.addVariable\("thumbnailURL", "(.+?)"\)\;',
             webpage, 'thumbnail url')
 
         return {
index 6d3abb52f735de655e11ce88c70a4b6b9cc287d6..8d5b69f68d3ddb345dc67487db998cf164b2765c 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -34,22 +34,58 @@ from ..aes import (
 
 
 class CrunchyrollBaseIE(InfoExtractor):
+    _LOGIN_URL = 'https://www.crunchyroll.com/login'
+    _LOGIN_FORM = 'login_form'
     _NETRC_MACHINE = 'crunchyroll'
 
     def _login(self):
         (username, password) = self._get_login_info()
         if username is None:
             return
-        self.report_login()
-        login_url = 'https://www.crunchyroll.com/?a=formhandler'
-        data = urlencode_postdata({
-            'formname': 'RpcApiUser_Login',
-            'name': username,
-            'password': password,
+
+        login_page = self._download_webpage(
+            self._LOGIN_URL, None, 'Downloading login page')
+
+        def is_logged(webpage):
+            return '<title>Redirecting' in webpage
+
+        # Already logged in
+        if is_logged(login_page):
+            return
+
+        login_form_str = self._search_regex(
+            r'(?P<form><form[^>]+?id=(["\'])%s\2[^>]*>)' % self._LOGIN_FORM,
+            login_page, 'login form', group='form')
+
+        post_url = extract_attributes(login_form_str).get('action')
+        if not post_url:
+            post_url = self._LOGIN_URL
+        elif not post_url.startswith('http'):
+            post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
+
+        login_form = self._form_hidden_inputs(self._LOGIN_FORM, login_page)
+
+        login_form.update({
+            'login_form[name]': username,
+            'login_form[password]': password,
         })
-        login_request = sanitized_Request(login_url, data)
-        login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
-        self._download_webpage(login_request, None, False, 'Wrong login info')
+
+        response = self._download_webpage(
+            post_url, None, 'Logging in', 'Wrong login info',
+            data=urlencode_postdata(login_form),
+            headers={'Content-Type': 'application/x-www-form-urlencoded'})
+
+        # Successful login
+        if is_logged(response):
+            return
+
+        error = self._html_search_regex(
+            '(?s)<ul[^>]+class=["\']messages["\'][^>]*>(.+?)</ul>',
+            response, 'error message', default=None)
+        if error:
+            raise ExtractorError('Unable to login: %s' % error, expected=True)
+
+        raise ExtractorError('Unable to log in')
 
     def _real_initialize(self):
         self._login()
@@ -114,6 +150,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
             # rtmp
             'skip_download': True,
         },
+        'skip': 'Video gone',
     }, {
         'url': 'http://www.crunchyroll.com/rezero-starting-life-in-another-world-/episode-5-the-morning-of-our-promise-is-still-distant-702409',
         'info_dict': {
@@ -199,7 +236,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
         output += 'WrapStyle: %s\n' % sub_root.attrib['wrap_style']
         output += 'PlayResX: %s\n' % sub_root.attrib['play_res_x']
         output += 'PlayResY: %s\n' % sub_root.attrib['play_res_y']
-        output += """ScaledBorderAndShadow: yes
+        output += """ScaledBorderAndShadow: no
 
 [V4+ Styles]
 Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
diff --git a/youtube_dl/extractor/ctv.py b/youtube_dl/extractor/ctv.py
deleted file mode 100644 (file)
index 5807fba..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-
-
-class CTVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?ctv\.ca/video/player\?vid=(?P<id>[0-9.]+)'
-    _TESTS = [{
-        'url': 'http://www.ctv.ca/video/player?vid=706966',
-        'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0',
-        'info_dict': {
-            'id': '706966',
-            'ext': 'mp4',
-            'title': 'Larry Day and Richard Jutras on the TIFF red carpet of \'Stonewall\'',
-            'description': 'etalk catches up with Larry Day and Richard Jutras on the TIFF red carpet of "Stonewall”.',
-            'upload_date': '20150919',
-            'timestamp': 1442624700,
-        },
-        'expected_warnings': ['HTTP Error 404'],
-    }]
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        return {
-            '_type': 'url_transparent',
-            'id': video_id,
-            'url': '9c9media:ctv_web:%s' % video_id,
-            'ie_key': 'NineCNineMedia',
-        }
index 9c764fe68c57314d8524b2705f8bae7c30520c26..9f26fa5878777d3302383646ad581056f429841a 100644 (file)
@@ -1,9 +1,13 @@
 from __future__ import unicode_literals
 
 import re
+import time
 
 from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+    int_or_none,
+    HEADRequest,
+)
 
 
 class CultureUnpluggedIE(InfoExtractor):
@@ -32,6 +36,9 @@ class CultureUnpluggedIE(InfoExtractor):
         video_id = mobj.group('id')
         display_id = mobj.group('display_id') or video_id
 
+        # request setClientTimezone.php to get PHPSESSID cookie which is need to get valid json data in the next request
+        self._request_webpage(HEADRequest(
+            'http://www.cultureunplugged.com/setClientTimezone.php?timeOffset=%d' % -(time.timezone / 3600)), display_id)
         movie_data = self._download_json(
             'http://www.cultureunplugged.com/movie-data/cu-%s.json' % video_id, display_id)
 
diff --git a/youtube_dl/extractor/curiositystream.py b/youtube_dl/extractor/curiositystream.py
new file mode 100644 (file)
index 0000000..e3c9946
--- /dev/null
@@ -0,0 +1,120 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    urlencode_postdata,
+    compat_str,
+    ExtractorError,
+)
+
+
+class CuriosityStreamBaseIE(InfoExtractor):
+    _NETRC_MACHINE = 'curiositystream'
+    _auth_token = None
+    _API_BASE_URL = 'https://api.curiositystream.com/v1/'
+
+    def _handle_errors(self, result):
+        error = result.get('error', {}).get('message')
+        if error:
+            if isinstance(error, dict):
+                error = ', '.join(error.values())
+            raise ExtractorError(
+                '%s said: %s' % (self.IE_NAME, error), expected=True)
+
+    def _call_api(self, path, video_id):
+        headers = {}
+        if self._auth_token:
+            headers['X-Auth-Token'] = self._auth_token
+        result = self._download_json(
+            self._API_BASE_URL + path, video_id, headers=headers)
+        self._handle_errors(result)
+        return result['data']
+
+    def _real_initialize(self):
+        (email, password) = self._get_login_info()
+        if email is None:
+            return
+        result = self._download_json(
+            self._API_BASE_URL + 'login', None, data=urlencode_postdata({
+                'email': email,
+                'password': password,
+            }))
+        self._handle_errors(result)
+        self._auth_token = result['message']['auth_token']
+
+    def _extract_media_info(self, media):
+        video_id = compat_str(media['id'])
+        limelight_media_id = media['limelight_media_id']
+        title = media['title']
+
+        subtitles = {}
+        for closed_caption in media.get('closed_captions', []):
+            sub_url = closed_caption.get('file')
+            if not sub_url:
+                continue
+            lang = closed_caption.get('code') or closed_caption.get('language') or 'en'
+            subtitles.setdefault(lang, []).append({
+                'url': sub_url,
+            })
+
+        return {
+            '_type': 'url_transparent',
+            'id': video_id,
+            'url': 'limelight:media:' + limelight_media_id,
+            'title': title,
+            'description': media.get('description'),
+            'thumbnail': media.get('image_large') or media.get('image_medium') or media.get('image_small'),
+            'duration': int_or_none(media.get('duration')),
+            'tags': media.get('tags'),
+            'subtitles': subtitles,
+            'ie_key': 'LimelightMedia',
+        }
+
+
+class CuriosityStreamIE(CuriosityStreamBaseIE):
+    IE_NAME = 'curiositystream'
+    _VALID_URL = r'https?://app\.curiositystream\.com/video/(?P<id>\d+)'
+    _TEST = {
+        'url': 'https://app.curiositystream.com/video/2',
+        'md5': 'a0074c190e6cddaf86900b28d3e9ee7a',
+        'info_dict': {
+            'id': '2',
+            'ext': 'mp4',
+            'title': 'How Did You Develop The Internet?',
+            'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
+            'timestamp': 1448388615,
+            'upload_date': '20151124',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        media = self._call_api('media/' + video_id, video_id)
+        return self._extract_media_info(media)
+
+
+class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
+    IE_NAME = 'curiositystream:collection'
+    _VALID_URL = r'https?://app\.curiositystream\.com/collection/(?P<id>\d+)'
+    _TEST = {
+        'url': 'https://app.curiositystream.com/collection/2',
+        'info_dict': {
+            'id': '2',
+            'title': 'Curious Minds: The Internet',
+            'description': 'How is the internet shaping our lives in the 21st Century?',
+        },
+        'playlist_mincount': 17,
+    }
+
+    def _real_extract(self, url):
+        collection_id = self._match_id(url)
+        collection = self._call_api(
+            'collections/' + collection_id, collection_id)
+        entries = []
+        for media in collection.get('media', []):
+            entries.append(self._extract_media_info(media))
+        return self.playlist_result(
+            entries, collection_id,
+            collection.get('title'), collection.get('description'))
index 496883d15bbc0b3e1a3503d0545ea1e46c33e6c9..4a3314ea7d4fc2df95543cda554d32a8caf586ac 100644 (file)
@@ -94,7 +94,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
                 'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
                 'uploader': 'HotWaves1012',
                 'age_limit': 18,
-            }
+            },
+            'skip': 'video gone',
         },
         # geo-restricted, player v5
         {
@@ -144,7 +145,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
         player_v5 = self._search_regex(
             [r'buildPlayer\(({.+?})\);\n',  # See https://github.com/rg3/youtube-dl/issues/7826
              r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);',
-             r'buildPlayer\(({.+?})\);'],
+             r'buildPlayer\(({.+?})\);',
+             r'var\s+config\s*=\s*({.+?});'],
             webpage, 'player v5', default=None)
         if player_v5:
             player = self._parse_json(player_v5, video_id)
@@ -394,7 +396,7 @@ class DailymotionUserIE(DailymotionPlaylistIE):
 
 
 class DailymotionCloudIE(DailymotionBaseInfoExtractor):
-    _VALID_URL_PREFIX = r'http://api\.dmcloud\.net/(?:player/)?embed/'
+    _VALID_URL_PREFIX = r'https?://api\.dmcloud\.net/(?:player/)?embed/'
     _VALID_URL = r'%s[^/]+/(?P<id>[^/?]+)' % _VALID_URL_PREFIX
     _VALID_EMBED_URL = r'%s[^/]+/[^\'"]+' % _VALID_URL_PREFIX
 
index b5c310ccb8042c7bfa44c6a909ead398fc679dd4..732b4362a96488e67f4b1858f83429a85e877555 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 
 from __future__ import unicode_literals
 
index caff8842e9432f5737f7fee247c943c4a0685920..6d880d43d6507077018f9489749947d83a36f64b 100644 (file)
@@ -38,6 +38,12 @@ class DBTVIE(InfoExtractor):
         'only_matching': True,
     }]
 
+    @staticmethod
+    def _extract_urls(webpage):
+        return [url for _, url in re.findall(
+            r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?dbtv\.no/(?:lazy)?player/\d+.*?)\1',
+            webpage)]
+
     def _real_extract(self, url):
         video_id, display_id = re.match(self._VALID_URL, url).groups()
 
index 9099f5046a14ad7c769a6da50d813076f8b9231e..14ba88715887caeb9144e68384417b2e7b518b07 100644 (file)
@@ -1,61 +1,54 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..compat import compat_str
+from ..utils import unified_strdate
 
 
 class DctpTvIE(InfoExtractor):
-    _VALID_URL = r'https?://www.dctp.tv/(#/)?filme/(?P<id>.+?)/$'
+    _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(#/)?filme/(?P<id>.+?)/$'
     _TEST = {
         'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
+        'md5': '174dd4a8a6225cf5655952f969cfbe24',
         'info_dict': {
-            'id': '1324',
+            'id': '95eaa4f33dad413aa17b4ee613cccc6c',
             'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
-            'ext': 'flv',
-            'title': 'Videoinstallation für eine Kaufhausfassade'
+            'ext': 'mp4',
+            'title': 'Videoinstallation für eine Kaufhausfassade',
+            'description': 'Kurzfilm',
+            'upload_date': '20110407',
+            'thumbnail': 're:^https?://.*\.jpg$',
         },
-        'params': {
-            # rtmp download
-            'skip_download': True,
-        }
     }
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        base_url = 'http://dctp-ivms2-restapi.s3.amazonaws.com/'
-        version_json = self._download_json(
-            base_url + 'version.json',
-            video_id, note='Determining file version')
-        version = version_json['version_name']
-        info_json = self._download_json(
-            '{0}{1}/restapi/slugs/{2}.json'.format(base_url, version, video_id),
-            video_id, note='Fetching object ID')
-        object_id = compat_str(info_json['object_id'])
-        meta_json = self._download_json(
-            '{0}{1}/restapi/media/{2}.json'.format(base_url, version, object_id),
-            video_id, note='Downloading metadata')
-        uuid = meta_json['uuid']
-        title = meta_json['title']
-        wide = meta_json['is_wide']
-        if wide:
-            ratio = '16x9'
-        else:
-            ratio = '4x3'
-        play_path = 'mp4:{0}_dctp_0500_{1}.m4v'.format(uuid, ratio)
+        webpage = self._download_webpage(url, video_id)
+
+        object_id = self._html_search_meta('DC.identifier', webpage)
 
         servers_json = self._download_json(
-            'http://www.dctp.tv/streaming_servers/',
+            'http://www.dctp.tv/elastic_streaming_client/get_streaming_server/',
             video_id, note='Downloading server list')
-        url = servers_json[0]['endpoint']
+        server = servers_json[0]['server']
+        m3u8_path = self._search_regex(
+            r'\'([^\'"]+/playlist\.m3u8)"', webpage, 'm3u8 path')
+        formats = self._extract_m3u8_formats(
+            'http://%s%s' % (server, m3u8_path), video_id, ext='mp4',
+            entry_protocol='m3u8_native')
+
+        title = self._og_search_title(webpage)
+        description = self._html_search_meta('DC.description', webpage)
+        upload_date = unified_strdate(
+            self._html_search_meta('DC.date.created', webpage))
+        thumbnail = self._og_search_thumbnail(webpage)
 
         return {
             'id': object_id,
             'title': title,
-            'format': 'rtmp',
-            'url': url,
-            'play_path': play_path,
-            'rtmp_real_time': True,
-            'ext': 'flv',
-            'display_id': video_id
+            'formats': formats,
+            'display_id': video_id,
+            'description': description,
+            'upload_date': upload_date,
+            'thumbnail': thumbnail,
         }
index 65a98d7892816e36608a3350cc00db9d7efd4cb9..bdfe638b4d7bd6fa39a4d22a0718033eb130cfa7 100644 (file)
@@ -13,7 +13,7 @@ from ..utils import (
 
 
 class DemocracynowIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?democracynow.org/(?P<id>[^\?]*)'
+    _VALID_URL = r'https?://(?:www\.)?democracynow\.org/(?P<id>[^\?]*)'
     IE_NAME = 'democracynow'
     _TESTS = [{
         'url': 'http://www.democracynow.org/shows/2015/7/3',
index adb68b96c4033a3ef5cf8dcf3357fa64ed73acb8..c4e83b2c3790670ec7d6c1b7c9cca4e47b4d7779 100644 (file)
@@ -7,11 +7,22 @@ from ..utils import (
     int_or_none,
     parse_age_limit,
     unescapeHTML,
+    ExtractorError,
 )
 
 
 class DiscoveryGoIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?discoverygo\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+    _VALID_URL = r'''(?x)https?://(?:www\.)?(?:
+            discovery|
+            investigationdiscovery|
+            discoverylife|
+            animalplanet|
+            ahctv|
+            destinationamerica|
+            sciencechannel|
+            tlc|
+            velocitychannel
+        )go\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'''
     _TEST = {
         'url': 'https://www.discoverygo.com/love-at-first-kiss/kiss-first-ask-questions-later/',
         'info_dict': {
@@ -43,7 +54,14 @@ class DiscoveryGoIE(InfoExtractor):
 
         title = video['name']
 
-        stream = video['stream']
+        stream = video.get('stream')
+        if not stream:
+            if video.get('authenticated') is True:
+                raise ExtractorError(
+                    'This video is only available via cable service provider subscription that'
+                    ' is not currently supported. You may want to use --cookies.', expected=True)
+            else:
+                raise ExtractorError('Unable to find stream')
         STREAM_URL_SUFFIX = 'streamUrl'
         formats = []
         for stream_kind in ('', 'hds'):
index e9ca236d4a03c13b1b29b3386535c4262332dab0..1f75352ca945c3e63ddf85e1ec204b5787cafeb6 100644 (file)
@@ -9,22 +9,39 @@ from ..utils import (
 
 class DotsubIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)'
-    _TEST = {
-        'url': 'http://dotsub.com/view/aed3b8b2-1889-4df5-ae63-ad85f5572f27',
-        'md5': '0914d4d69605090f623b7ac329fea66e',
+    _TESTS = [{
+        'url': 'https://dotsub.com/view/9c63db2a-fa95-4838-8e6e-13deafe47f09',
+        'md5': '21c7ff600f545358134fea762a6d42b6',
         'info_dict': {
-            'id': 'aed3b8b2-1889-4df5-ae63-ad85f5572f27',
+            'id': '9c63db2a-fa95-4838-8e6e-13deafe47f09',
             'ext': 'flv',
-            'title': 'Pyramids of Waste (2010), AKA The Lightbulb Conspiracy - Planned obsolescence documentary',
-            'description': 'md5:699a0f7f50aeec6042cb3b1db2d0d074',
-            'thumbnail': 're:^https?://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p',
-            'duration': 3169,
-            'uploader': '4v4l0n42',
-            'timestamp': 1292248482.625,
-            'upload_date': '20101213',
+            'title': 'MOTIVATION - "It\'s Possible" Best Inspirational Video Ever',
+            'description': 'md5:41af1e273edbbdfe4e216a78b9d34ac6',
+            'thumbnail': 're:^https?://dotsub.com/media/9c63db2a-fa95-4838-8e6e-13deafe47f09/p',
+            'duration': 198,
+            'uploader': 'liuxt',
+            'timestamp': 1385778501.104,
+            'upload_date': '20131130',
             'view_count': int,
         }
-    }
+    }, {
+        'url': 'https://dotsub.com/view/747bcf58-bd59-45b7-8c8c-ac312d084ee6',
+        'md5': '2bb4a83896434d5c26be868c609429a3',
+        'info_dict': {
+            'id': '168006778',
+            'ext': 'mp4',
+            'title': 'Apartments and flats in Raipur the white symphony',
+            'description': 'md5:784d0639e6b7d1bc29530878508e38fe',
+            'thumbnail': 're:^https?://dotsub.com/media/747bcf58-bd59-45b7-8c8c-ac312d084ee6/p',
+            'duration': 290,
+            'timestamp': 1476767794.2809999,
+            'upload_date': '20160525',
+            'uploader': 'parthivi001',
+            'uploader_id': 'user52596202',
+            'view_count': int,
+        },
+        'add_ie': ['Vimeo'],
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -37,12 +54,23 @@ class DotsubIE(InfoExtractor):
             webpage = self._download_webpage(url, video_id)
             video_url = self._search_regex(
                 [r'<source[^>]+src="([^"]+)"', r'"file"\s*:\s*\'([^\']+)'],
-                webpage, 'video url')
+                webpage, 'video url', default=None)
+            info_dict = {
+                'id': video_id,
+                'url': video_url,
+                'ext': 'flv',
+            }
 
-        return {
-            'id': video_id,
-            'url': video_url,
-            'ext': 'flv',
+        if not video_url:
+            setup_data = self._parse_json(self._html_search_regex(
+                r'(?s)data-setup=([\'"])(?P<content>(?!\1).+?)\1',
+                webpage, 'setup data', group='content'), video_id)
+            info_dict = {
+                '_type': 'url_transparent',
+                'url': setup_data['src'],
+            }
+
+        info_dict.update({
             'title': info['title'],
             'description': info.get('description'),
             'thumbnail': info.get('screenshotURI'),
@@ -50,4 +78,6 @@ class DotsubIE(InfoExtractor):
             'uploader': info.get('user'),
             'timestamp': float_or_none(info.get('dateCreated'), 1000),
             'view_count': int_or_none(info.get('numberOfViews')),
-        }
+        })
+
+        return info_dict
index ce6962755831a8c6f853271ad49112fee4fcbc63..e366e17e68139288543243667d637544488a6a23 100644 (file)
@@ -3,9 +3,17 @@ from __future__ import unicode_literals
 
 import hashlib
 import time
+import uuid
+
 from .common import InfoExtractor
-from ..utils import (ExtractorError, unescapeHTML)
-from ..compat import (compat_str, compat_basestring)
+from ..compat import (
+    compat_str,
+    compat_urllib_parse_urlencode,
+)
+from ..utils import (
+    ExtractorError,
+    unescapeHTML,
+)
 
 
 class DouyuTVIE(InfoExtractor):
@@ -21,7 +29,6 @@ class DouyuTVIE(InfoExtractor):
             'description': 're:.*m7show@163\.com.*',
             'thumbnail': 're:^https?://.*\.jpg$',
             'uploader': '7师傅',
-            'uploader_id': '431925',
             'is_live': True,
         },
         'params': {
@@ -37,7 +44,6 @@ class DouyuTVIE(InfoExtractor):
             'description': 'md5:746a2f7a253966a06755a912f0acc0d2',
             'thumbnail': 're:^https?://.*\.jpg$',
             'uploader': 'douyu小漠',
-            'uploader_id': '3769985',
             'is_live': True,
         },
         'params': {
@@ -54,7 +60,6 @@ class DouyuTVIE(InfoExtractor):
             'description': 're:.*m7show@163\.com.*',
             'thumbnail': 're:^https?://.*\.jpg$',
             'uploader': '7师傅',
-            'uploader_id': '431925',
             'is_live': True,
         },
         'params': {
@@ -65,6 +70,10 @@ class DouyuTVIE(InfoExtractor):
         'only_matching': True,
     }]
 
+    # Decompile core.swf in webpage by ffdec "Search SWFs in memory". core.swf
+    # is encrypted originally, but ffdec can dump memory to get the decrypted one.
+    _API_KEY = 'A12Svb&%1UUmf@hC'
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
@@ -75,74 +84,56 @@ class DouyuTVIE(InfoExtractor):
             room_id = self._html_search_regex(
                 r'"room_id"\s*:\s*(\d+),', page, 'room id')
 
-        config = None
-        # Douyu API sometimes returns error "Unable to load the requested class: eticket_redis_cache"
-        # Retry with different parameters - same parameters cause same errors
-        for i in range(5):
-            prefix = 'room/%s?aid=android&client_sys=android&time=%d' % (
-                room_id, int(time.time()))
-            auth = hashlib.md5((prefix + '1231').encode('ascii')).hexdigest()
-
-            config_page = self._download_webpage(
-                'http://www.douyutv.com/api/v1/%s&auth=%s' % (prefix, auth),
-                video_id)
-            try:
-                config = self._parse_json(config_page, video_id, fatal=False)
-            except ExtractorError:
-                # Wait some time before retrying to get a different time() value
-                self._sleep(1, video_id, msg_template='%(video_id)s: Error occurs. '
-                                                      'Waiting for %(timeout)s seconds before retrying')
-                continue
-            else:
-                break
-        if config is None:
-            raise ExtractorError('Unable to fetch API result')
-
-        data = config['data']
-
-        error_code = config.get('error', 0)
-        if error_code is not 0:
-            error_desc = 'Server reported error %i' % error_code
-            if isinstance(data, (compat_str, compat_basestring)):
-                error_desc += ': ' + data
-            raise ExtractorError(error_desc, expected=True)
+        room = self._download_json(
+            'http://m.douyu.com/html5/live?roomId=%s' % room_id, video_id,
+            note='Downloading room info')['data']
 
-        show_status = data.get('show_status')
         # 1 = live, 2 = offline
-        if show_status == '2':
-            raise ExtractorError(
-                'Live stream is offline', expected=True)
+        if room.get('show_status') == '2':
+            raise ExtractorError('Live stream is offline', expected=True)
+
+        tt = compat_str(int(time.time() / 60))
+        did = uuid.uuid4().hex.upper()
 
-        base_url = data['rtmp_url']
-        live_path = data['rtmp_live']
+        sign_content = ''.join((room_id, did, self._API_KEY, tt))
+        sign = hashlib.md5((sign_content).encode('utf-8')).hexdigest()
 
-        title = self._live_title(unescapeHTML(data['room_name']))
-        description = data.get('show_details')
-        thumbnail = data.get('room_src')
+        flv_data = compat_urllib_parse_urlencode({
+            'cdn': 'ws',
+            'rate': '0',
+            'tt': tt,
+            'did': did,
+            'sign': sign,
+        })
+
+        video_info = self._download_json(
+            'http://www.douyu.com/lapi/live/getPlay/%s' % room_id, video_id,
+            data=flv_data, note='Downloading video info',
+            headers={'Content-Type': 'application/x-www-form-urlencoded'})
+
+        error_code = video_info.get('error', 0)
+        if error_code is not 0:
+            raise ExtractorError(
+                '%s reported error %i' % (self.IE_NAME, error_code),
+                expected=True)
 
-        uploader = data.get('nickname')
-        uploader_id = data.get('owner_uid')
+        base_url = video_info['data']['rtmp_url']
+        live_path = video_info['data']['rtmp_live']
 
-        multi_formats = data.get('rtmp_multi_bitrate')
-        if not isinstance(multi_formats, dict):
-            multi_formats = {}
-        multi_formats['live'] = live_path
+        video_url = '%s/%s' % (base_url, live_path)
 
-        formats = [{
-            'url': '%s/%s' % (base_url, format_path),
-            'format_id': format_id,
-            'preference': 1 if format_id == 'live' else 0,
-        } for format_id, format_path in multi_formats.items()]
-        self._sort_formats(formats)
+        title = self._live_title(unescapeHTML(room['room_name']))
+        description = room.get('notice')
+        thumbnail = room.get('room_src')
+        uploader = room.get('nickname')
 
         return {
             'id': room_id,
             'display_id': video_id,
+            'url': video_url,
             'title': title,
             'description': description,
             'thumbnail': thumbnail,
             'uploader': uploader,
-            'uploader_id': uploader_id,
-            'formats': formats,
             'is_live': True,
         }
index 3b6529f4b108052e3019c8e400bbc2cd0eb5a9a1..c115956121a242920ec8016e8c9f3558c34060c6 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import itertools
index e8870c4607d4e4f0293f134610ed8f1f8d48e956..22da8e48105e5e8ee81a9cc948c67f6ec7d72eb8 100644 (file)
@@ -10,8 +10,8 @@ from ..utils import (
 
 
 class DrTuberIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?drtuber\.com/video/(?P<id>\d+)/(?P<display_id>[\w-]+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?drtuber\.com/(?:video|embed)/(?P<id>\d+)(?:/(?P<display_id>[\w-]+))?'
+    _TESTS = [{
         'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf',
         'md5': '93e680cf2536ad0dfb7e74d94a89facd',
         'info_dict': {
@@ -25,20 +25,30 @@ class DrTuberIE(InfoExtractor):
             'thumbnail': 're:https?://.*\.jpg$',
             'age_limit': 18,
         }
-    }
+    }, {
+        'url': 'http://www.drtuber.com/embed/489939',
+        'only_matching': True,
+    }]
+
+    @staticmethod
+    def _extract_urls(webpage):
+        return re.findall(
+            r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?drtuber\.com/embed/\d+)',
+            webpage)
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
-        display_id = mobj.group('display_id')
+        display_id = mobj.group('display_id') or video_id
 
-        webpage = self._download_webpage(url, display_id)
+        webpage = self._download_webpage(
+            'http://www.drtuber.com/video/%s' % video_id, display_id)
 
         video_url = self._html_search_regex(
             r'<source src="([^"]+)"', webpage, 'video URL')
 
         title = self._html_search_regex(
-            (r'class="title_watch"[^>]*><p>([^<]+)<',
+            (r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<',
              r'<p[^>]+class="title_substrate">([^<]+)</p>',
              r'<title>([^<]+) - \d+'),
             webpage, 'title')
index 2d74ff855f1670e0dcb46e35d1875e8e9c9fd144..88d096b307cdf6d484ef6b89253f6cdbcb82deb0 100644 (file)
@@ -4,26 +4,45 @@ from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
+    int_or_none,
+    float_or_none,
+    mimetype2ext,
     parse_iso8601,
+    remove_end,
 )
 
 
 class DRTVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
+    _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
 
-    _TEST = {
-        'url': 'https://www.dr.dk/tv/se/boern/ultra/panisk-paske/panisk-paske-5',
-        'md5': 'dc515a9ab50577fa14cc4e4b0265168f',
+    _TESTS = [{
+        'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10',
+        'md5': '25e659cccc9a2ed956110a299fdf5983',
         'info_dict': {
-            'id': 'panisk-paske-5',
+            'id': 'klassen-darlig-taber-10',
             'ext': 'mp4',
-            'title': 'Panisk Påske (5)',
-            'description': 'md5:ca14173c5ab24cd26b0fcc074dff391c',
-            'timestamp': 1426984612,
-            'upload_date': '20150322',
-            'duration': 1455,
+            'title': 'Klassen - Dårlig taber (10)',
+            'description': 'md5:815fe1b7fa656ed80580f31e8b3c79aa',
+            'timestamp': 1471991907,
+            'upload_date': '20160823',
+            'duration': 606.84,
         },
-    }
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang',
+        'md5': '2c37175c718155930f939ef59952474a',
+        'info_dict': {
+            'id': 'christiania-pusher-street-ryddes-drdkrjpo',
+            'ext': 'mp4',
+            'title': 'LIVE Christianias rydning af Pusher Street er i gang',
+            'description': '- Det er det fedeste, der er sket i 20 år, fortæller christianit til DR Nyheder.',
+            'timestamp': 1472800279,
+            'upload_date': '20160902',
+            'duration': 131.4,
+        },
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -35,7 +54,8 @@ class DRTVIE(InfoExtractor):
                 'Video %s is not available' % video_id, expected=True)
 
         video_id = self._search_regex(
-            r'data-(?:material-identifier|episode-slug)="([^"]+)"',
+            (r'data-(?:material-identifier|episode-slug)="([^"]+)"',
+                r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'),
             webpage, 'video id')
 
         programcard = self._download_json(
@@ -43,9 +63,12 @@ class DRTVIE(InfoExtractor):
             video_id, 'Downloading video JSON')
         data = programcard['Data'][0]
 
-        title = data['Title']
-        description = data['Description']
-        timestamp = parse_iso8601(data['CreatedTime'])
+        title = remove_end(self._og_search_title(
+            webpage, default=None), ' | TV | DR') or data['Title']
+        description = self._og_search_description(
+            webpage, default=None) or data.get('Description')
+
+        timestamp = parse_iso8601(data.get('CreatedTime'))
 
         thumbnail = None
         duration = None
@@ -56,16 +79,18 @@ class DRTVIE(InfoExtractor):
         subtitles = {}
 
         for asset in data['Assets']:
-            if asset['Kind'] == 'Image':
-                thumbnail = asset['Uri']
-            elif asset['Kind'] == 'VideoResource':
-                duration = asset['DurationInMilliseconds'] / 1000.0
-                restricted_to_denmark = asset['RestrictedToDenmark']
-                spoken_subtitles = asset['Target'] == 'SpokenSubtitles'
-                for link in asset['Links']:
-                    uri = link['Uri']
-                    target = link['Target']
-                    format_id = target
+            if asset.get('Kind') == 'Image':
+                thumbnail = asset.get('Uri')
+            elif asset.get('Kind') == 'VideoResource':
+                duration = float_or_none(asset.get('DurationInMilliseconds'), 1000)
+                restricted_to_denmark = asset.get('RestrictedToDenmark')
+                spoken_subtitles = asset.get('Target') == 'SpokenSubtitles'
+                for link in asset.get('Links', []):
+                    uri = link.get('Uri')
+                    if not uri:
+                        continue
+                    target = link.get('Target')
+                    format_id = target or ''
                     preference = None
                     if spoken_subtitles:
                         preference = -1
@@ -76,8 +101,8 @@ class DRTVIE(InfoExtractor):
                             video_id, preference, f4m_id=format_id))
                     elif target == 'HLS':
                         formats.extend(self._extract_m3u8_formats(
-                            uri, video_id, 'mp4', preference=preference,
-                            m3u8_id=format_id))
+                            uri, video_id, 'mp4', entry_protocol='m3u8_native',
+                            preference=preference, m3u8_id=format_id))
                     else:
                         bitrate = link.get('Bitrate')
                         if bitrate:
@@ -85,7 +110,7 @@ class DRTVIE(InfoExtractor):
                         formats.append({
                             'url': uri,
                             'format_id': format_id,
-                            'tbr': bitrate,
+                            'tbr': int_or_none(bitrate),
                             'ext': link.get('FileFormat'),
                         })
                 subtitles_list = asset.get('SubtitlesList')
@@ -94,12 +119,18 @@ class DRTVIE(InfoExtractor):
                         'Danish': 'da',
                     }
                     for subs in subtitles_list:
-                        lang = subs['Language']
-                        subtitles[LANGS.get(lang, lang)] = [{'url': subs['Uri'], 'ext': 'vtt'}]
+                        if not subs.get('Uri'):
+                            continue
+                        lang = subs.get('Language') or 'da'
+                        subtitles.setdefault(LANGS.get(lang, lang), []).append({
+                            'url': subs['Uri'],
+                            'ext': mimetype2ext(subs.get('MimeType')) or 'vtt'
+                        })
 
         if not formats and restricted_to_denmark:
-            raise ExtractorError(
-                'Unfortunately, DR is not allowed to show this program outside Denmark.', expected=True)
+            self.raise_geo_restricted(
+                'Unfortunately, DR is not allowed to show this program outside Denmark.',
+                expected=True)
 
         self._sort_formats(formats)
 
index 12d28d3b9f1e76f84f0f9fa322befd0bfa056f09..c2f593eca201a42f7023cc64d4237b5052fbc722 100644 (file)
@@ -4,11 +4,13 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..compat import (
+    compat_HTTPError,
+    compat_str,
+)
 from ..utils import (
     ExtractorError,
     int_or_none,
-    url_basename,
 )
 
 
@@ -52,11 +54,24 @@ class EaglePlatformIE(InfoExtractor):
 
     @staticmethod
     def _extract_url(webpage):
+        # Regular iframe embedding
         mobj = re.search(
             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1',
             webpage)
         if mobj is not None:
             return mobj.group('url')
+        # Basic usage embedding (see http://dultonmedia.github.io/eplayer/)
+        mobj = re.search(
+            r'''(?xs)
+                    <script[^>]+
+                        src=(?P<q1>["\'])(?:https?:)?//(?P<host>.+?\.media\.eagleplatform\.com)/player/player\.js(?P=q1)
+                    .+?
+                    <div[^>]+
+                        class=(?P<q2>["\'])eagleplayer(?P=q2)[^>]+
+                        data-id=["\'](?P<id>\d+)
+            ''', webpage)
+        if mobj is not None:
+            return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict()
 
     @staticmethod
     def _handle_error(response):
@@ -64,7 +79,7 @@ class EaglePlatformIE(InfoExtractor):
         if status != 200:
             raise ExtractorError(' '.join(response['errors']), expected=True)
 
-    def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'):
+    def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', *args, **kwargs):
         try:
             response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note)
         except ExtractorError as ee:
@@ -103,29 +118,38 @@ class EaglePlatformIE(InfoExtractor):
 
         m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON')
         m3u8_formats = self._extract_m3u8_formats(
-            m3u8_url, video_id,
-            'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
+            m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
+            m3u8_id='hls', fatal=False)
         formats.extend(m3u8_formats)
 
-        mp4_url = self._get_video_url(
+        m3u8_formats_dict = {}
+        for f in m3u8_formats:
+            if f.get('height') is not None:
+                m3u8_formats_dict[f['height']] = f
+
+        mp4_data = self._download_json(
             # Secure mp4 URL is constructed according to Player.prototype.mp4 from
             # http://lentaru.media.eagleplatform.com/player/player.js
-            re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4', secure_m3u8),
-            video_id, 'Downloading mp4 JSON')
-        mp4_url_basename = url_basename(mp4_url)
-        for m3u8_format in m3u8_formats:
-            mobj = re.search('/([^/]+)/index\.m3u8', m3u8_format['url'])
-            if mobj:
-                http_format = m3u8_format.copy()
-                video_url = mp4_url.replace(mp4_url_basename, mobj.group(1))
-                if not self._is_valid_url(video_url, video_id):
+            re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4s', secure_m3u8),
+            video_id, 'Downloading mp4 JSON', fatal=False)
+        if mp4_data:
+            for format_id, format_url in mp4_data.get('data', {}).items():
+                if not isinstance(format_url, compat_str):
                     continue
-                http_format.update({
-                    'url': video_url,
-                    'format_id': m3u8_format['format_id'].replace('hls', 'http'),
-                    'protocol': 'http',
-                })
-                formats.append(http_format)
+                height = int_or_none(format_id)
+                if height is not None and m3u8_formats_dict.get(height):
+                    f = m3u8_formats_dict[height].copy()
+                    f.update({
+                        'format_id': f['format_id'].replace('hls', 'http'),
+                        'protocol': 'http',
+                    })
+                else:
+                    f = {
+                        'format_id': 'http-%s' % format_id,
+                        'height': int_or_none(format_id),
+                    }
+                f['url'] = format_url
+                formats.append(f)
 
         self._sort_formats(formats)
 
index f7339702cad3ed2804fe276b9d1fc6857c368206..443865ad27ba96eea8f78c56d14b72a54bc86389 100644 (file)
@@ -14,7 +14,7 @@ class EinthusanIE(InfoExtractor):
     _TESTS = [
         {
             'url': 'http://www.einthusan.com/movies/watch.php?id=2447',
-            'md5': 'af244f4458cd667205e513d75da5b8b1',
+            'md5': 'd71379996ff5b7f217eca034c34e3461',
             'info_dict': {
                 'id': '2447',
                 'ext': 'mp4',
@@ -25,13 +25,13 @@ class EinthusanIE(InfoExtractor):
         },
         {
             'url': 'http://www.einthusan.com/movies/watch.php?id=1671',
-            'md5': 'ef63c7a803e22315880ed182c10d1c5c',
+            'md5': 'b16a6fd3c67c06eb7c79c8a8615f4213',
             'info_dict': {
                 'id': '1671',
                 'ext': 'mp4',
                 'title': 'Soodhu Kavvuum',
                 'thumbnail': 're:^https?://.*\.jpg$',
-                'description': 'md5:05d8a0c0281a4240d86d76e14f2f4d51',
+                'description': 'md5:b40f2bf7320b4f9414f3780817b2af8c',
             }
         },
     ]
@@ -50,9 +50,11 @@ class EinthusanIE(InfoExtractor):
         video_id = self._search_regex(
             r'data-movieid=["\'](\d+)', webpage, 'video id', default=video_id)
 
-        video_url = self._download_webpage(
+        m3u8_url = self._download_webpage(
             'http://cdn.einthusan.com/geturl/%s/hd/London,Washington,Toronto,Dallas,San,Sydney/'
-            % video_id, video_id)
+            % video_id, video_id, headers={'Referer': url})
+        formats = self._extract_m3u8_formats(
+            m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native')
 
         description = self._html_search_meta('description', webpage)
         thumbnail = self._html_search_regex(
@@ -64,7 +66,7 @@ class EinthusanIE(InfoExtractor):
         return {
             'id': video_id,
             'title': title,
-            'url': video_url,
+            'formats': formats,
             'thumbnail': thumbnail,
             'description': description,
         }
index 713cb7b329208d3c761b12858cc265b401c16dd0..ee5ead18b0834b7c2e27258b4fc6950fa93ad960 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index 1cdb11e34804186e05cdca81d978ab944d49b4db..a5820b21e05a721fd654ff8c1d1313eb80239a73 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index a39e9010d4c732a8bbb9d36f124a99fa2992990a..65635c18b7153ec188437f9c24cbe939c65304d7 100644 (file)
@@ -4,7 +4,7 @@ from .common import InfoExtractor
 
 
 class EngadgetIE(InfoExtractor):
-    _VALID_URL = r'https?://www.engadget.com/video/(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?engadget\.com/video/(?P<id>[^/?#]+)'
 
     _TESTS = [{
         # video with 5min ID
index 66c08bec47d8aa639cf758bb3e083b9772230c76..8795e0ddf5e26f676a421173a0e1fd019cd112cb 100644 (file)
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import remove_end
+from ..compat import compat_str
+from ..utils import (
+    determine_ext,
+    int_or_none,
+    unified_timestamp,
+)
 
 
 class ESPNIE(InfoExtractor):
-    _VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P<id>[^/]+)'
+    _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/video/clip(?:\?.*?\bid=|/_/id/)(?P<id>\d+)'
     _TESTS = [{
         'url': 'http://espn.go.com/video/clip?id=10365079',
-        'md5': '60e5d097a523e767d06479335d1bdc58',
         'info_dict': {
-            'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',
+            'id': '10365079',
             'ext': 'mp4',
             'title': '30 for 30 Shorts: Judging Jewell',
-            'description': None,
+            'description': 'md5:39370c2e016cb4ecf498ffe75bef7f0f',
+            'timestamp': 1390936111,
+            'upload_date': '20140128',
         },
         'params': {
             'skip_download': True,
         },
-        'add_ie': ['OoyalaExternal'],
     }, {
         # intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season
         'url': 'http://espn.go.com/video/clip?id=2743663',
-        'md5': 'f4ac89b59afc7e2d7dbb049523df6768',
         'info_dict': {
-            'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg',
+            'id': '2743663',
             'ext': 'mp4',
             'title': 'Must-See Moments: Best of the MLS season',
+            'description': 'md5:4c2d7232beaea572632bec41004f0aeb',
+            'timestamp': 1449446454,
+            'upload_date': '20151207',
         },
         'params': {
             'skip_download': True,
         },
-        'add_ie': ['OoyalaExternal'],
+        'expected_warnings': ['Unable to download f4m manifest'],
     }, {
+        'url': 'http://www.espn.com/video/clip?id=10365079',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.espn.com/video/clip/_/id/17989860',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        clip = self._download_json(
+            'http://api-app.espn.com/v1/video/clips/%s' % video_id,
+            video_id)['videos'][0]
+
+        title = clip['headline']
+
+        format_urls = set()
+        formats = []
+
+        def traverse_source(source, base_source_id=None):
+            for source_id, source in source.items():
+                if isinstance(source, compat_str):
+                    extract_source(source, base_source_id)
+                elif isinstance(source, dict):
+                    traverse_source(
+                        source,
+                        '%s-%s' % (base_source_id, source_id)
+                        if base_source_id else source_id)
+
+        def extract_source(source_url, source_id=None):
+            if source_url in format_urls:
+                return
+            format_urls.add(source_url)
+            ext = determine_ext(source_url)
+            if ext == 'smil':
+                formats.extend(self._extract_smil_formats(
+                    source_url, video_id, fatal=False))
+            elif ext == 'f4m':
+                formats.extend(self._extract_f4m_formats(
+                    source_url, video_id, f4m_id=source_id, fatal=False))
+            elif ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    source_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                    m3u8_id=source_id, fatal=False))
+            else:
+                formats.append({
+                    'url': source_url,
+                    'format_id': source_id,
+                })
+
+        traverse_source(clip['links']['source'])
+        self._sort_formats(formats)
+
+        description = clip.get('caption') or clip.get('description')
+        thumbnail = clip.get('thumbnail')
+        duration = int_or_none(clip.get('duration'))
+        timestamp = unified_timestamp(clip.get('originalPublishDate'))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'timestamp': timestamp,
+            'duration': duration,
+            'formats': formats,
+        }
+
+
+class ESPNArticleIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
+    _TESTS = [{
         'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
         'only_matching': True,
     }, {
@@ -49,6 +128,10 @@ class ESPNIE(InfoExtractor):
         'only_matching': True,
     }]
 
+    @classmethod
+    def suitable(cls, url):
+        return False if ESPNIE.suitable(url) else super(ESPNArticleIE, cls).suitable(url)
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
@@ -58,23 +141,5 @@ class ESPNIE(InfoExtractor):
             r'class=(["\']).*?video-play-button.*?\1[^>]+data-id=["\'](?P<id>\d+)',
             webpage, 'video id', group='id')
 
-        cms = 'espn'
-        if 'data-source="intl"' in webpage:
-            cms = 'intl'
-        player_url = 'https://espn.go.com/video/iframe/twitter/?id=%s&cms=%s' % (video_id, cms)
-        player = self._download_webpage(
-            player_url, video_id)
-
-        pcode = self._search_regex(
-            r'["\']pcode=([^"\']+)["\']', player, 'pcode')
-
-        title = remove_end(
-            self._og_search_title(webpage),
-            '- ESPN Video').strip()
-
-        return {
-            '_type': 'url_transparent',
-            'url': 'ooyalaexternal:%s:%s:%s' % (cms, video_id, pcode),
-            'ie_key': 'OoyalaExternal',
-            'title': title,
-        }
+        return self.url_result(
+            'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key())
diff --git a/youtube_dl/extractor/exfm.py b/youtube_dl/extractor/exfm.py
deleted file mode 100644 (file)
index 09ed4f2..0000000
+++ /dev/null
@@ -1,58 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-
-
-class ExfmIE(InfoExtractor):
-    IE_NAME = 'exfm'
-    IE_DESC = 'ex.fm'
-    _VALID_URL = r'https?://(?:www\.)?ex\.fm/song/(?P<id>[^/]+)'
-    _SOUNDCLOUD_URL = r'http://(?:www\.)?api\.soundcloud\.com/tracks/([^/]+)/stream'
-    _TESTS = [
-        {
-            'url': 'http://ex.fm/song/eh359',
-            'md5': 'e45513df5631e6d760970b14cc0c11e7',
-            'info_dict': {
-                'id': '44216187',
-                'ext': 'mp3',
-                'title': 'Test House "Love Is Not Enough" (Extended Mix) DeadJournalist Exclusive',
-                'uploader': 'deadjournalist',
-                'upload_date': '20120424',
-                'description': 'Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive',
-            },
-            'note': 'Soundcloud song',
-            'skip': 'The site is down too often',
-        },
-        {
-            'url': 'http://ex.fm/song/wddt8',
-            'md5': '966bd70741ac5b8570d8e45bfaed3643',
-            'info_dict': {
-                'id': 'wddt8',
-                'ext': 'mp3',
-                'title': 'Safe and Sound',
-                'uploader': 'Capital Cities',
-            },
-            'skip': 'The site is down too often',
-        },
-    ]
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        song_id = mobj.group('id')
-        info_url = 'http://ex.fm/api/v3/song/%s' % song_id
-        info = self._download_json(info_url, song_id)['song']
-        song_url = info['url']
-        if re.match(self._SOUNDCLOUD_URL, song_url) is not None:
-            self.to_screen('Soundcloud song detected')
-            return self.url_result(song_url.replace('/stream', ''), 'Soundcloud')
-        return {
-            'id': song_id,
-            'url': song_url,
-            'ext': 'mp3',
-            'title': info['title'],
-            'thumbnail': info['image']['large'],
-            'uploader': info['artist'],
-            'view_count': info['loved_count'],
-        }
index 971c918a419c0609f5dcb50a768d53505252fbc4..ef11962f35035617a589e91cde5db43659099f66 100644 (file)
@@ -8,7 +8,7 @@ from ..utils import (
 
 
 class ExpoTVIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.expotv\.com/videos/[^?#]*/(?P<id>[0-9]+)($|[?#])'
+    _VALID_URL = r'https?://(?:www\.)?expotv\.com/videos/[^?#]*/(?P<id>[0-9]+)($|[?#])'
     _TEST = {
         'url': 'http://www.expotv.com/videos/reviews/3/40/NYX-Butter-lipstick/667916',
         'md5': 'fe1d728c3a813ff78f595bc8b7a707a8',
index 55c639158dda61b53f290d880a47602aa0bb4616..46d007b7d50d7b3916e3dacc897547dfc924446d 100644 (file)
@@ -1,12 +1,18 @@
 # flake8: noqa
 from __future__ import unicode_literals
 
-from .abc import ABCIE
-from .abc7news import Abc7NewsIE
+from .abc import (
+    ABCIE,
+    ABCIViewIE,
+)
 from .abcnews import (
     AbcNewsIE,
     AbcNewsVideoIE,
 )
+from .abcotvs import (
+    ABCOTVSIE,
+    ABCOTVSClipsIE,
+)
 from .academicearth import AcademicEarthCourseIE
 from .acast import (
     ACastIE,
@@ -25,7 +31,6 @@ from .aenetworks import (
     HistoryTopicIE,
 )
 from .afreecatv import AfreecaTVIE
-from .aftonbladet import AftonbladetIE
 from .airmozilla import AirMozillaIE
 from .aljazeera import AlJazeeraIE
 from .alphaporno import AlphaPornoIE
@@ -61,6 +66,7 @@ from .arte import (
     ArteTVDDCIE,
     ArteTVMagazineIE,
     ArteTVEmbedIE,
+    TheOperaPlatformIE,
     ArteTVPlaylistIE,
 )
 from .atresplayer import AtresPlayerIE
@@ -68,6 +74,12 @@ from .atttechchannel import ATTTechChannelIE
 from .audimedia import AudiMediaIE
 from .audioboom import AudioBoomIE
 from .audiomack import AudiomackIE, AudiomackAlbumIE
+from .awaan import (
+    AWAANIE,
+    AWAANVideoIE,
+    AWAANLiveIE,
+    AWAANSeasonIE,
+)
 from .azubu import AzubuIE, AzubuLiveIE
 from .baidu import BaiduVideoIE
 from .bambuser import BambuserIE, BambuserChannelIE
@@ -81,7 +93,8 @@ from .bbc import (
 )
 from .beeg import BeegIE
 from .behindkink import BehindKinkIE
-from .beatportpro import BeatportProIE
+from .bellmedia import BellMediaIE
+from .beatport import BeatportIE
 from .bet import BetIE
 from .bigflix import BigflixIE
 from .bild import BildIE
@@ -104,7 +117,10 @@ from .brightcove import (
     BrightcoveNewIE,
 )
 from .buzzfeed import BuzzFeedIE
-from .byutv import BYUtvIE
+from .byutv import (
+    BYUtvIE,
+    BYUtvEventIE,
+)
 from .c56 import C56IE
 from .camdemy import (
     CamdemyIE,
@@ -118,9 +134,12 @@ from .carambatv import (
     CarambaTVIE,
     CarambaTVPageIE,
 )
+from .cartoonnetwork import CartoonNetworkIE
 from .cbc import (
     CBCIE,
     CBCPlayerIE,
+    CBCWatchVideoIE,
+    CBCWatchIE,
 )
 from .cbs import CBSIE
 from .cbslocal import CBSLocalIE
@@ -131,9 +150,11 @@ from .cbsnews import (
 )
 from .cbssports import CBSSportsIE
 from .ccc import CCCIE
+from .cctv import CCTVIE
 from .cda import CDAIE
 from .ceskatelevize import CeskaTelevizeIE
 from .channel9 import Channel9IE
+from .charlierose import CharlieRoseIE
 from .chaturbate import ChaturbateIE
 from .chilloutzone import ChilloutzoneIE
 from .chirbit import (
@@ -159,6 +180,7 @@ from .cnn import (
 from .coub import CoubIE
 from .collegerama import CollegeRamaIE
 from .comedycentral import (
+    ComedyCentralFullEpisodesIE,
     ComedyCentralIE,
     ComedyCentralShortnameIE,
     ComedyCentralTVIE,
@@ -166,7 +188,10 @@ from .comedycentral import (
 )
 from .comcarcoff import ComCarCoffIE
 from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
-from .commonprotocols import RtmpIE
+from .commonprotocols import (
+    MmsIE,
+    RtmpIE,
+)
 from .condenast import CondeNastIE
 from .cracked import CrackedIE
 from .crackle import CrackleIE
@@ -178,9 +203,12 @@ from .crunchyroll import (
 )
 from .cspan import CSpanIE
 from .ctsnews import CtsNewsIE
-from .ctv import CTVIE
 from .ctvnews import CTVNewsIE
 from .cultureunplugged import CultureUnpluggedIE
+from .curiositystream import (
+    CuriosityStreamIE,
+    CuriosityStreamCollectionIE,
+)
 from .cwtv import CWTVIE
 from .dailymail import DailyMailIE
 from .dailymotion import (
@@ -196,12 +224,6 @@ from .daum import (
     DaumUserIE,
 )
 from .dbtv import DBTVIE
-from .dcn import (
-    DCNIE,
-    DCNVideoIE,
-    DCNLiveIE,
-    DCNSeasonIE,
-)
 from .dctp import DctpTvIE
 from .deezer import DeezerPlaylistIE
 from .democracynow import DemocracynowIE
@@ -246,17 +268,25 @@ from .engadget import EngadgetIE
 from .eporner import EpornerIE
 from .eroprofile import EroProfileIE
 from .escapist import EscapistIE
-from .espn import ESPNIE
+from .espn import (
+    ESPNIE,
+    ESPNArticleIE,
+)
 from .esri import EsriVideoIE
 from .europa import EuropaIE
 from .everyonesmixtape import EveryonesMixtapeIE
-from .exfm import ExfmIE
 from .expotv import ExpoTVIE
 from .extremetube import ExtremeTubeIE
 from .eyedotv import EyedoTVIE
-from .facebook import FacebookIE
+from .facebook import (
+    FacebookIE,
+    FacebookPluginsVideoIE,
+)
 from .faz import FazIE
-from .fc2 import FC2IE
+from .fc2 import (
+    FC2IE,
+    FC2EmbedIE,
+)
 from .fczenit import FczenitIE
 from .firstpost import FirstpostIE
 from .firsttv import FirstTVIE
@@ -270,8 +300,13 @@ from .footyroom import FootyRoomIE
 from .formula1 import Formula1IE
 from .fourtube import FourTubeIE
 from .fox import FOXIE
+from .fox9 import FOX9IE
 from .foxgay import FoxgayIE
-from .foxnews import FoxNewsIE
+from .foxnews import (
+    FoxNewsIE,
+    FoxNewsArticleIE,
+    FoxNewsInsiderIE,
+)
 from .foxsports import FoxSportsIE
 from .franceculture import FranceCultureIE
 from .franceinter import FranceInterIE
@@ -308,6 +343,7 @@ from .globo import (
     GloboIE,
     GloboArticleIE,
 )
+from .go import GoIE
 from .godtube import GodTubeIE
 from .godtv import GodTVIE
 from .golem import GolemIE
@@ -318,13 +354,19 @@ from .goshgay import GoshgayIE
 from .gputechconf import GPUTechConfIE
 from .groupon import GrouponIE
 from .hark import HarkIE
-from .hbo import HBOIE
+from .hbo import (
+    HBOIE,
+    HBOEpisodeIE,
+)
 from .hearthisat import HearThisAtIE
 from .heise import HeiseIE
 from .hellporno import HellPornoIE
 from .helsinki import HelsinkiIE
 from .hentaistigma import HentaiStigmaIE
-from .hgtv import HGTVIE
+from .hgtv import (
+    HGTVIE,
+    HGTVComShowIE,
+)
 from .historicfilms import HistoricFilmsIE
 from .hitbox import HitboxIE, HitboxLiveIE
 from .hornbunny import HornBunnyIE
@@ -336,6 +378,7 @@ from .hrti import (
     HRTiIE,
     HRTiPlaylistIE,
 )
+from .huajiao import HuajiaoIE
 from .huffpost import HuffPostIE
 from .hypem import HypemIE
 from .iconosquare import IconosquareIE
@@ -368,7 +411,12 @@ from .ivi import (
     IviCompilationIE
 )
 from .ivideon import IvideonIE
+from .iwara import IwaraIE
 from .izlesene import IzleseneIE
+from .jamendo import (
+    JamendoIE,
+    JamendoAlbumIE,
+)
 from .jeuxvideo import JeuxVideoIE
 from .jove import JoveIE
 from .jwplatform import JWPlatformIE
@@ -380,6 +428,7 @@ from .kankan import KankanIE
 from .karaoketv import KaraoketvIE
 from .karrierevideos import KarriereVideosIE
 from .keezmovies import KeezMoviesIE
+from .ketnet import KetnetIE
 from .khanacademy import KhanAcademyIE
 from .kickstarter import KickStarterIE
 from .keek import KeekIE
@@ -398,12 +447,14 @@ from .kuwo import (
 )
 from .la7 import LA7IE
 from .laola1tv import Laola1TvIE
+from .lci import LCIIE
 from .lcp import (
     LcpPlayIE,
     LcpIE,
 )
 from .learnr import LearnrIE
 from .lecture2go import Lecture2GoIE
+from .lego import LEGOIE
 from .lemonde import LemondeIE
 from .leeco import (
     LeIE,
@@ -441,6 +492,10 @@ from .macgamestore import MacGameStoreIE
 from .mailru import MailRuIE
 from .makerschannel import MakersChannelIE
 from .makertv import MakerTVIE
+from .mangomolo import (
+    MangomoloVideoIE,
+    MangomoloLiveIE,
+)
 from .matchtv import MatchTVIE
 from .mdr import MDRIE
 from .meta import METAIE
@@ -448,6 +503,7 @@ from .metacafe import MetacafeIE
 from .metacritic import MetacriticIE
 from .mgoon import MgoonIE
 from .mgtv import MGTVIE
+from .miaopai import MiaoPaiIE
 from .microsoftvirtualacademy import (
     MicrosoftVirtualAcademyIE,
     MicrosoftVirtualAcademyCourseIE,
@@ -476,9 +532,11 @@ from .motherless import MotherlessIE
 from .motorsport import MotorsportIE
 from .movieclips import MovieClipsIE
 from .moviezine import MoviezineIE
+from .movingimage import MovingImageIE
 from .msn import MSNIE
 from .mtv import (
     MTVIE,
+    MTVVideoIE,
     MTVServicesEmbeddedIE,
     MTVDEIE,
 )
@@ -501,6 +559,7 @@ from .nbc import (
     CSNNEIE,
     NBCIE,
     NBCNewsIE,
+    NBCOlympicsIE,
     NBCSportsIE,
     NBCSportsVPlayerIE,
 )
@@ -532,6 +591,7 @@ from .nextmedia import (
 )
 from .nfb import NFBIE
 from .nfl import NFLIE
+from .nhk import NhkVodIE
 from .nhl import (
     NHLVideocenterIE,
     NHLNewsIE,
@@ -541,12 +601,17 @@ from .nhl import (
 from .nick import (
     NickIE,
     NickDeIE,
+    NickNightIE,
 )
 from .niconico import NiconicoIE, NiconicoPlaylistIE
-from .ninecninemedia import NineCNineMediaIE
+from .ninecninemedia import (
+    NineCNineMediaStackIE,
+    NineCNineMediaIE,
+)
 from .ninegag import NineGagIE
 from .ninenow import NineNowIE
 from .nintendo import NintendoIE
+from .nobelprize import NobelPrizeIE
 from .noco import NocoIE
 from .normalboots import NormalbootsIE
 from .nosvideo import NosVideoIE
@@ -569,13 +634,14 @@ from .nowtv import (
 )
 from .noz import NozIE
 from .npo import (
+    AndereTijdenIE,
     NPOIE,
     NPOLiveIE,
     NPORadioIE,
     NPORadioFragmentIE,
     SchoolTVIE,
     VPROIE,
-    WNLIE
+    WNLIE,
 )
 from .npr import NprIE
 from .nrk import (
@@ -591,6 +657,7 @@ from .nytimes import (
     NYTimesArticleIE,
 )
 from .nuvid import NuvidIE
+from .nzz import NZZIE
 from .odatv import OdaTVIE
 from .odnoklassniki import OdnoklassnikiIE
 from .oktoberfesttv import OktoberfestTVIE
@@ -611,6 +678,7 @@ from .orf import (
     ORFFM4IE,
     ORFIPTVIE,
 )
+from .pandatv import PandaTVIE
 from .pandoratv import PandoraTVIE
 from .parliamentliveuk import ParliamentLiveUKIE
 from .patreon import PatreonIE
@@ -625,7 +693,6 @@ from .phoenix import PhoenixIE
 from .photobucket import PhotobucketIE
 from .pinkbike import PinkbikeIE
 from .pladform import PladformIE
-from .played import PlayedIE
 from .playfm import PlayFMIE
 from .plays import PlaysTVIE
 from .playtvak import PlaytvakIE
@@ -637,8 +704,12 @@ from .pluralsight import (
 )
 from .podomatic import PodomaticIE
 from .pokemon import PokemonIE
-from .polskieradio import PolskieRadioIE
+from .polskieradio import (
+    PolskieRadioIE,
+    PolskieRadioCategoryIE,
+)
 from .porn91 import Porn91IE
+from .porncom import PornComIE
 from .pornhd import PornHdIE
 from .pornhub import (
     PornHubIE,
@@ -681,6 +752,10 @@ from .rbmaradio import RBMARadioIE
 from .rds import RDSIE
 from .redtube import RedTubeIE
 from .regiotv import RegioTVIE
+from .rentv import (
+    RENTVIE,
+    RENTVArticleIE,
+)
 from .restudy import RestudyIE
 from .reuters import ReutersIE
 from .reverbnation import ReverbNationIE
@@ -690,6 +765,7 @@ from .revision3 import (
 )
 from .rice import RICEIE
 from .ringtv import RingTVIE
+from .rmcdecouverte import RMCDecouverteIE
 from .ro220 import Ro220IE
 from .rockstargames import RockstarGamesIE
 from .roosterteeth import RoosterTeethIE
@@ -729,14 +805,16 @@ from .scivee import SciVeeIE
 from .screencast import ScreencastIE
 from .screencastomatic import ScreencastOMaticIE
 from .screenjunkies import ScreenJunkiesIE
-from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE
 from .seeker import SeekerIE
 from .senateisvp import SenateISVPIE
 from .sendtonews import SendtoNewsIE
 from .servingsys import ServingSysIE
 from .sexu import SexuIE
 from .shahid import ShahidIE
-from .shared import SharedIE
+from .shared import (
+    SharedIE,
+    VivoIE,
+)
 from .sharesix import ShareSixIE
 from .sina import SinaIE
 from .sixplay import SixPlayIE
@@ -792,7 +870,6 @@ from .srgssr import (
     SRGSSRPlayIE,
 )
 from .srmediathek import SRMediathekIE
-from .ssa import SSAIE
 from .stanfordoc import StanfordOpenClassroomIE
 from .steam import SteamIE
 from .streamable import StreamableIE
@@ -812,6 +889,7 @@ from .tagesschau import (
     TagesschauIE,
 )
 from .tass import TassIE
+from .tbs import TBSIE
 from .tdslifeway import TDSLifewayIE
 from .teachertube import (
     TeacherTubeIE,
@@ -819,6 +897,7 @@ from .teachertube import (
 )
 from .teachingchannel import TeachingChannelIE
 from .teamcoco import TeamcocoIE
+from .teamfourstar import TeamFourStarIE
 from .techtalks import TechTalksIE
 from .ted import TEDIE
 from .tele13 import Tele13IE
@@ -826,10 +905,12 @@ from .telebruxelles import TeleBruxellesIE
 from .telecinco import TelecincoIE
 from .telegraaf import TelegraafIE
 from .telemb import TeleMBIE
+from .telequebec import TeleQuebecIE
 from .teletask import TeleTaskIE
 from .telewebion import TelewebionIE
 from .testurl import TestURLIE
 from .tf1 import TF1IE
+from .tfo import TFOIE
 from .theintercept import TheInterceptIE
 from .theplatform import (
     ThePlatformIE,
@@ -838,8 +919,10 @@ from .theplatform import (
 from .thescene import TheSceneIE
 from .thesixtyone import TheSixtyOneIE
 from .thestar import TheStarIE
+from .theweatherchannel import TheWeatherChannelIE
 from .thisamericanlife import ThisAmericanLifeIE
 from .thisav import ThisAVIE
+from .thisoldhouse import ThisOldHouseIE
 from .threeqsdn import ThreeQSDNIE
 from .tinypic import TinyPicIE
 from .tlc import TlcDeIE
@@ -854,16 +937,12 @@ from .tnaflix import (
     MovieFapIE,
 )
 from .toggle import ToggleIE
-from .thvideo import (
-    THVideoIE,
-    THVideoPlaylistIE
-)
+from .tonline import TOnlineIE
 from .toutv import TouTvIE
 from .toypics import ToypicsUserIE, ToypicsIE
 from .traileraddict import TrailerAddictIE
 from .trilulilu import TriluliluIE
-from .trollvids import TrollvidsIE
-from .trutube import TruTubeIE
+from .trutv import TruTVIE
 from .tube8 import Tube8IE
 from .tubitv import TubiTvIE
 from .tudou import (
@@ -887,12 +966,17 @@ from .tv2 import (
 )
 from .tv3 import TV3IE
 from .tv4 import TV4IE
+from .tvanouvelles import (
+    TVANouvellesIE,
+    TVANouvellesArticleIE,
+)
 from .tvc import (
     TVCIE,
     TVCArticleIE,
 )
 from .tvigle import TvigleIE
 from .tvland import TVLandIE
+from .tvnoe import TVNoeIE
 from .tvp import (
     TVPEmbedIE,
     TVPIE,
@@ -937,6 +1021,7 @@ from .uplynk import (
 )
 from .urort import UrortIE
 from .urplay import URPlayIE
+from .usanetwork import USANetworkIE
 from .usatoday import USATodayIE
 from .ustream import UstreamIE, UstreamChannelIE
 from .ustudio import (
@@ -1024,6 +1109,8 @@ from .vporn import VpornIE
 from .vrt import VRTIE
 from .vube import VubeIE
 from .vuclip import VuClipIE
+from .vyborymos import VyboryMosIE
+from .vzaar import VzaarIE
 from .walla import WallaIE
 from .washingtonpost import (
     WashingtonPostIE,
@@ -1035,6 +1122,10 @@ from .wdr import (
     WDRIE,
     WDRMobileIE,
 )
+from .webcaster import (
+    WebcasterIE,
+    WebcasterFeedIE,
+)
 from .webofstories import (
     WebOfStoriesIE,
     WebOfStoriesPlaylistIE,
@@ -1110,7 +1201,4 @@ from .youtube import (
 )
 from .zapiks import ZapiksIE
 from .zdf import ZDFIE, ZDFChannelIE
-from .zingmp3 import (
-    ZingMp3SongIE,
-    ZingMp3AlbumIE,
-)
+from .zingmp3 import ZingMp3IE
index b4fd9334aeb7f3dfc3f45ce2da67e5236d5d4018..445f9438db182d0ced6d48233306a53e56271f9d 100644 (file)
@@ -5,13 +5,12 @@ from .keezmovies import KeezMoviesIE
 
 
 class ExtremeTubeIE(KeezMoviesIE):
-    _VALID_URL = r'https?://(?:www\.)?extremetube\.com/(?:[^/]+/)?video/(?:(?P<display_id>[^/]+)-)(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?extremetube\.com/(?:[^/]+/)?video/(?P<id>[^/#?&]+)'
     _TESTS = [{
         'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
         'md5': '1fb9228f5e3332ec8c057d6ac36f33e0',
         'info_dict': {
-            'id': '652431',
-            'display_id': 'music-video-14-british-euro-brit-european-cumshots-swallow',
+            'id': 'music-video-14-british-euro-brit-european-cumshots-swallow-652431',
             'ext': 'mp4',
             'title': 'Music Video 14 british euro brit european cumshots swallow',
             'uploader': 'unknown',
index 0fb781a733f4c19780ed88f8f5b24c1102b10a44..b4d38e5c258b830e192bcfa2639f2074d9217434 100644 (file)
@@ -1,6 +1,5 @@
 from __future__ import unicode_literals
 
-import json
 import re
 import socket
 
@@ -15,6 +14,7 @@ from ..compat import (
 from ..utils import (
     error_to_compat_str,
     ExtractorError,
+    int_or_none,
     limit_length,
     sanitized_Request,
     urlencode_postdata,
@@ -62,6 +62,8 @@ class FacebookIE(InfoExtractor):
             'ext': 'mp4',
             'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam',
             'uploader': 'Tennis on Facebook',
+            'upload_date': '20140908',
+            'timestamp': 1410199200,
         }
     }, {
         'note': 'Video without discernible title',
@@ -71,6 +73,8 @@ class FacebookIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Facebook video #274175099429670',
             'uploader': 'Asif Nawab Butt',
+            'upload_date': '20140506',
+            'timestamp': 1399398998,
         },
         'expected_warnings': [
             'title'
@@ -78,12 +82,14 @@ class FacebookIE(InfoExtractor):
     }, {
         'note': 'Video with DASH manifest',
         'url': 'https://www.facebook.com/video.php?v=957955867617029',
-        'md5': '54706e4db4f5ad58fbad82dde1f1213f',
+        'md5': 'b2c28d528273b323abe5c6ab59f0f030',
         'info_dict': {
             'id': '957955867617029',
             'ext': 'mp4',
             'title': 'When you post epic content on instagram.com/433 8 million followers, this is ...',
             'uploader': 'Demy de Zeeuw',
+            'upload_date': '20160110',
+            'timestamp': 1452431627,
         },
     }, {
         'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570',
@@ -93,7 +99,8 @@ class FacebookIE(InfoExtractor):
             'ext': 'mp4',
             'title': '"What are you doing running in the snow?"',
             'uploader': 'FailArmy',
-        }
+        },
+        'skip': 'Video gone',
     }, {
         'url': 'https://m.facebook.com/story.php?story_fbid=1035862816472149&id=116132035111903',
         'md5': '1deb90b6ac27f7efcf6d747c8a27f5e3',
@@ -103,6 +110,7 @@ class FacebookIE(InfoExtractor):
             'title': 'What the Flock Is Going On In New Zealand  Credit: ViralHog',
             'uploader': 'S. Saint',
         },
+        'skip': 'Video gone',
     }, {
         'note': 'swf params escaped',
         'url': 'https://www.facebook.com/barackobama/posts/10153664894881749',
@@ -112,6 +120,18 @@ class FacebookIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Facebook video #10153664894881749',
         },
+    }, {
+        # have 1080P, but only up to 720p in swf params
+        'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
+        'md5': '0d9813160b146b3bc8744e006027fcc6',
+        'info_dict': {
+            'id': '10155529876156509',
+            'ext': 'mp4',
+            'title': 'Holocaust survivor becomes US citizen',
+            'timestamp': 1477818095,
+            'upload_date': '20161030',
+            'uploader': 'CNN',
+        },
     }, {
         'url': 'https://www.facebook.com/video.php?v=10204634152394104',
         'only_matching': True,
@@ -220,43 +240,13 @@ class FacebookIE(InfoExtractor):
 
         video_data = None
 
-        BEFORE = '{swf.addParam(param[0], param[1]);});'
-        AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
-        PATTERN = re.escape(BEFORE) + '(?:\n|\\\\n)(.*?)' + re.escape(AFTER)
-
-        for m in re.findall(PATTERN, webpage):
-            swf_params = m.replace('\\\\', '\\').replace('\\"', '"')
-            data = dict(json.loads(swf_params))
-            params_raw = compat_urllib_parse_unquote(data['params'])
-            video_data_candidate = json.loads(params_raw)['video_data']
-            for _, f in video_data_candidate.items():
-                if not f:
-                    continue
-                if isinstance(f, dict):
-                    f = [f]
-                if not isinstance(f, list):
-                    continue
-                if f[0].get('video_id') == video_id:
-                    video_data = video_data_candidate
-                    break
-            if video_data:
+        server_js_data = self._parse_json(self._search_regex(
+            r'handleServerJS\(({.+})(?:\);|,")', webpage, 'server js data', default='{}'), video_id)
+        for item in server_js_data.get('instances', []):
+            if item[1][0] == 'VideoConfig':
+                video_data = item[2][0]['videoData']
                 break
 
-        def video_data_list2dict(video_data):
-            ret = {}
-            for item in video_data:
-                format_id = item['stream_type']
-                ret.setdefault(format_id, []).append(item)
-            return ret
-
-        if not video_data:
-            server_js_data = self._parse_json(self._search_regex(
-                r'handleServerJS\(({.+})\);', webpage, 'server js data', default='{}'), video_id)
-            for item in server_js_data.get('instances', []):
-                if item[1][0] == 'VideoConfig':
-                    video_data = video_data_list2dict(item[2][0]['videoData'])
-                    break
-
         if not video_data:
             if not fatal_if_no_video:
                 return webpage, False
@@ -269,7 +259,8 @@ class FacebookIE(InfoExtractor):
                 raise ExtractorError('Cannot parse data')
 
         formats = []
-        for format_id, f in video_data.items():
+        for f in video_data:
+            format_id = f['stream_type']
             if f and isinstance(f, dict):
                 f = [f]
             if not f or not isinstance(f, list):
@@ -306,12 +297,16 @@ class FacebookIE(InfoExtractor):
         if not video_title:
             video_title = 'Facebook video #%s' % video_id
         uploader = clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
+        timestamp = int_or_none(self._search_regex(
+            r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
+            'timestamp', default=None))
 
         info_dict = {
             'id': video_id,
             'title': video_title,
             'formats': formats,
             'uploader': uploader,
+            'timestamp': timestamp,
         }
 
         return webpage, info_dict
@@ -340,3 +335,32 @@ class FacebookIE(InfoExtractor):
                 self._VIDEO_PAGE_TEMPLATE % video_id,
                 video_id, fatal_if_no_video=True)
             return info_dict
+
+
+class FacebookPluginsVideoIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/plugins/video\.php\?.*?\bhref=(?P<id>https.+)'
+
+    _TESTS = [{
+        'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fgov.sg%2Fvideos%2F10154383743583686%2F&show_text=0&width=560',
+        'md5': '5954e92cdfe51fe5782ae9bda7058a07',
+        'info_dict': {
+            'id': '10154383743583686',
+            'ext': 'mp4',
+            'title': 'What to do during the haze?',
+            'uploader': 'Gov.sg',
+            'upload_date': '20160826',
+            'timestamp': 1472184808,
+        },
+        'add_ie': [FacebookIE.ie_key()],
+    }, {
+        'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fvideo.php%3Fv%3D10204634152394104',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.facebook.com/plugins/video.php?href=https://www.facebook.com/gov.sg/videos/10154383743583686/&show_text=0&width=560',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        return self.url_result(
+            compat_urllib_parse_unquote(self._match_id(url)),
+            FacebookIE.ie_key())
index fd535457dc56a589eaf9e062dc40fe5374735020..4bc8fc5127010e1b3ced207da04f8926716cc94d 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index c7d69ff1f980de46bd4ecce96e4ac301b1f1be59..c032d4d0282cc7907b08ec42de9ac842dd4a34c2 100644 (file)
@@ -1,10 +1,12 @@
-#! -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 import hashlib
+import re
 
 from .common import InfoExtractor
 from ..compat import (
+    compat_parse_qs,
     compat_urllib_request,
     compat_urlparse,
 )
@@ -16,7 +18,7 @@ from ..utils import (
 
 
 class FC2IE(InfoExtractor):
-    _VALID_URL = r'^https?://video\.fc2\.com/(?:[^/]+/)*content/(?P<id>[^/]+)'
+    _VALID_URL = r'^(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
     IE_NAME = 'fc2'
     _NETRC_MACHINE = 'fc2'
     _TESTS = [{
@@ -75,12 +77,17 @@ class FC2IE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
         self._login()
-        webpage = self._download_webpage(url, video_id)
-        self._downloader.cookiejar.clear_session_cookies()  # must clear
-        self._login()
-
-        title = self._og_search_title(webpage)
-        thumbnail = self._og_search_thumbnail(webpage)
+        webpage = None
+        if not url.startswith('fc2:'):
+            webpage = self._download_webpage(url, video_id)
+            self._downloader.cookiejar.clear_session_cookies()  # must clear
+            self._login()
+
+        title = 'FC2 video %s' % video_id
+        thumbnail = None
+        if webpage is not None:
+            title = self._og_search_title(webpage)
+            thumbnail = self._og_search_thumbnail(webpage)
         refer = url.replace('/content/', '/a/content/') if '/a/content/' not in url else url
 
         mimi = hashlib.md5((video_id + '_gGddgPfeaf_gzyr').encode('utf-8')).hexdigest()
@@ -113,3 +120,41 @@ class FC2IE(InfoExtractor):
             'ext': 'flv',
             'thumbnail': thumbnail,
         }
+
+
+class FC2EmbedIE(InfoExtractor):
+    _VALID_URL = r'https?://video\.fc2\.com/flv2\.swf\?(?P<query>.+)'
+    IE_NAME = 'fc2:embed'
+
+    _TEST = {
+        'url': 'http://video.fc2.com/flv2.swf?t=201404182936758512407645&i=20130316kwishtfitaknmcgd76kjd864hso93htfjcnaogz629mcgfs6rbfk0hsycma7shkf85937cbchfygd74&i=201403223kCqB3Ez&d=2625&sj=11&lang=ja&rel=1&from=11&cmt=1&tk=TlRBM09EQTNNekU9&tl=プリズン・ブレイク%20S1-01%20マイケル%20【吹替】',
+        'md5': 'b8aae5334cb691bdb1193a88a6ab5d5a',
+        'info_dict': {
+            'id': '201403223kCqB3Ez',
+            'ext': 'flv',
+            'title': 'プリズン・ブレイク S1-01 マイケル 【吹替】',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        query = compat_parse_qs(mobj.group('query'))
+
+        video_id = query['i'][-1]
+        title = query.get('tl', ['FC2 video %s' % video_id])[0]
+
+        sj = query.get('sj', [None])[0]
+        thumbnail = None
+        if sj:
+            # See thumbnailImagePath() in ServerConst.as of flv2.swf
+            thumbnail = 'http://video%s-thumbnail.fc2.com/up/pic/%s.jpg' % (
+                sj, '/'.join((video_id[:6], video_id[6:8], video_id[-2], video_id[-1], video_id)))
+
+        return {
+            '_type': 'url_transparent',
+            'ie_key': FC2IE.ie_key(),
+            'url': 'fc2:%s' % video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+        }
index 88bca100763337011a444369543a1479296e097e..6b662cc3cd78e4acf661af473f2374b5ec2af05c 100644 (file)
@@ -1,45 +1,41 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..compat import compat_xpath
+from ..compat import compat_urlparse
 from ..utils import (
     int_or_none,
     qualities,
     unified_strdate,
-    xpath_attr,
-    xpath_element,
-    xpath_text,
-    xpath_with_ns,
 )
 
 
 class FirstTVIE(InfoExtractor):
     IE_NAME = '1tv'
     IE_DESC = 'Первый канал'
-    _VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+p?(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>[^/?#]+)'
 
     _TESTS = [{
-        # single format via video_materials.json API
-        'url': 'http://www.1tv.ru/prj/inprivate/vypusk/35930',
-        'md5': '82a2777648acae812d58b3f5bd42882b',
+        # single format
+        'url': 'http://www.1tv.ru/shows/naedine-so-vsemi/vypuski/gost-lyudmila-senchina-naedine-so-vsemi-vypusk-ot-12-02-2015',
+        'md5': 'a1b6b60d530ebcf8daacf4565762bbaf',
         'info_dict': {
-            'id': '35930',
+            'id': '40049',
             'ext': 'mp4',
             'title': 'Гость Людмила Сенчина. Наедине со всеми. Выпуск от 12.02.2015',
-            'description': 'md5:357933adeede13b202c7c21f91b871b2',
+            'description': 'md5:36a39c1d19618fec57d12efe212a8370',
             'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
             'upload_date': '20150212',
             'duration': 2694,
         },
     }, {
-        # multiple formats via video_materials.json API
-        'url': 'http://www.1tv.ru/video_archive/projects/dobroeutro/p113641',
+        # multiple formats
+        'url': 'http://www.1tv.ru/shows/dobroe-utro/pro-zdorove/vesennyaya-allergiya-dobroe-utro-fragment-vypuska-ot-07042016',
         'info_dict': {
-            'id': '113641',
+            'id': '364746',
             'ext': 'mp4',
             'title': 'Весенняя аллергия. Доброе утро. Фрагмент выпуска от 07.04.2016',
-            'description': 'md5:8dcebb3dded0ff20fade39087fd1fee2',
+            'description': 'md5:a242eea0031fd180a4497d52640a9572',
             'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
             'upload_date': '20160407',
             'duration': 179,
@@ -48,84 +44,47 @@ class FirstTVIE(InfoExtractor):
         'params': {
             'skip_download': True,
         },
-    }, {
-        # single format only available via ONE_ONLINE_VIDEOS.archive_single_xml API
-        'url': 'http://www.1tv.ru/video_archive/series/f7552/p47038',
-        'md5': '519d306c5b5669761fd8906c39dbee23',
-        'info_dict': {
-            'id': '47038',
-            'ext': 'mp4',
-            'title': '"Побег". Второй сезон. 3 серия',
-            'description': 'md5:3abf8f6b9bce88201c33e9a3d794a00b',
-            'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
-            'upload_date': '20120516',
-            'duration': 3080,
-        },
-    }, {
-        'url': 'http://www.1tv.ru/videoarchive/9967',
-        'only_matching': True,
     }]
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        # Videos with multiple formats only available via this API
-        video = self._download_json(
-            'http://www.1tv.ru/video_materials.json?legacy_id=%s' % video_id,
-            video_id, fatal=False)
-
-        description, thumbnail, upload_date, duration = [None] * 4
-
-        if video:
-            item = video[0]
-            title = item['title']
-            quality = qualities(('ld', 'sd', 'hd', ))
-            formats = [{
-                'url': f['src'],
-                'format_id': f.get('name'),
-                'quality': quality(f.get('name')),
-            } for f in item['mbr'] if f.get('src')]
-            thumbnail = item.get('poster')
-        else:
-            # Some videos are not available via video_materials.json
-            video = self._download_xml(
-                'http://www.1tv.ru/owa/win/ONE_ONLINE_VIDEOS.archive_single_xml?pid=%s' % video_id,
-                video_id)
-
-            NS_MAP = {
-                'media': 'http://search.yahoo.com/mrss/',
-            }
+        display_id = self._match_id(url)
 
-            item = xpath_element(video, './channel/item', fatal=True)
-            title = xpath_text(item, './title', fatal=True)
-            formats = [{
-                'url': content.attrib['url'],
-            } for content in item.findall(
-                compat_xpath(xpath_with_ns('./media:content', NS_MAP))) if content.attrib.get('url')]
-            thumbnail = xpath_attr(
-                item, xpath_with_ns('./media:thumbnail', NS_MAP), 'url')
+        webpage = self._download_webpage(url, display_id)
+        playlist_url = compat_urlparse.urljoin(url, self._search_regex(
+            r'data-playlist-url="([^"]+)', webpage, 'playlist url'))
 
+        item = self._download_json(playlist_url, display_id)[0]
+        video_id = item['id']
+        quality = qualities(('ld', 'sd', 'hd', ))
+        formats = []
+        for f in item.get('mbr', []):
+            src = f.get('src')
+            if not src:
+                continue
+            fname = f.get('name')
+            formats.append({
+                'url': src,
+                'format_id': fname,
+                'quality': quality(fname),
+            })
         self._sort_formats(formats)
 
-        webpage = self._download_webpage(url, video_id, 'Downloading page', fatal=False)
-        if webpage:
-            title = self._html_search_regex(
-                (r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>',
-                 r"'title'\s*:\s*'([^']+)'"),
-                webpage, 'title', default=None) or title
-            description = self._html_search_regex(
-                r'<div class="descr">\s*<div>&nbsp;</div>\s*<p>([^<]*)</p></div>',
-                webpage, 'description', default=None) or self._html_search_meta(
-                'description', webpage, 'description')
-            thumbnail = thumbnail or self._og_search_thumbnail(webpage)
-            duration = int_or_none(self._html_search_meta(
-                'video:duration', webpage, 'video duration', fatal=False))
-            upload_date = unified_strdate(self._html_search_meta(
-                'ya:ovs:upload_date', webpage, 'upload date', fatal=False))
+        title = self._html_search_regex(
+            (r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>',
+             r"'title'\s*:\s*'([^']+)'"),
+            webpage, 'title', default=None) or item['title']
+        description = self._html_search_regex(
+            r'<div class="descr">\s*<div>&nbsp;</div>\s*<p>([^<]*)</p></div>',
+            webpage, 'description', default=None) or self._html_search_meta(
+            'description', webpage, 'description')
+        duration = int_or_none(self._html_search_meta(
+            'video:duration', webpage, 'video duration', fatal=False))
+        upload_date = unified_strdate(self._html_search_meta(
+            'ya:ovs:upload_date', webpage, 'upload date', fatal=False))
 
         return {
             'id': video_id,
-            'thumbnail': thumbnail,
+            'thumbnail': item.get('poster') or self._og_search_thumbnail(webpage),
             'title': title,
             'description': description,
             'upload_date': upload_date,
index 75399fa7d2a3164c67f2d72c24628a861ed77806..b3df93f28fc6471b1c5fe7303415c223042261bc 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index d2503ae2eff3d2e46497bbcba356af11db665452..118325b6d5cd6f29645f94c0d5cc6c719715e400 100644 (file)
@@ -2,25 +2,27 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from .streamable import StreamableIE
 
 
 class FootyRoomIE(InfoExtractor):
-    _VALID_URL = r'https?://footyroom\.com/(?P<id>[^/]+)'
+    _VALID_URL = r'https?://footyroom\.com/matches/(?P<id>\d+)'
     _TESTS = [{
-        'url': 'http://footyroom.com/schalke-04-0-2-real-madrid-2015-02/',
+        'url': 'http://footyroom.com/matches/79922154/hull-city-vs-chelsea/review',
         'info_dict': {
-            'id': 'schalke-04-0-2-real-madrid-2015-02',
-            'title': 'Schalke 04 0 – 2 Real Madrid',
+            'id': '79922154',
+            'title': 'VIDEO Hull City 0 - 2 Chelsea',
         },
-        'playlist_count': 3,
-        'skip': 'Video for this match is not available',
+        'playlist_count': 2,
+        'add_ie': [StreamableIE.ie_key()],
     }, {
-        'url': 'http://footyroom.com/georgia-0-2-germany-2015-03/',
+        'url': 'http://footyroom.com/matches/75817984/georgia-vs-germany/review',
         'info_dict': {
-            'id': 'georgia-0-2-germany-2015-03',
-            'title': 'Georgia 0 – 2 Germany',
+            'id': '75817984',
+            'title': 'VIDEO Georgia 0 - 2 Germany',
         },
         'playlist_count': 1,
+        'add_ie': ['Playwire']
     }]
 
     def _real_extract(self, url):
@@ -28,9 +30,8 @@ class FootyRoomIE(InfoExtractor):
 
         webpage = self._download_webpage(url, playlist_id)
 
-        playlist = self._parse_json(
-            self._search_regex(
-                r'VideoSelector\.load\((\[.+?\])\);', webpage, 'video selector'),
+        playlist = self._parse_json(self._search_regex(
+            r'DataStore\.media\s*=\s*([^;]+)', webpage, 'media data'),
             playlist_id)
 
         playlist_title = self._og_search_title(webpage)
@@ -40,11 +41,16 @@ class FootyRoomIE(InfoExtractor):
             payload = video.get('payload')
             if not payload:
                 continue
-            playwire_url = self._search_regex(
+            playwire_url = self._html_search_regex(
                 r'data-config="([^"]+)"', payload,
                 'playwire url', default=None)
             if playwire_url:
                 entries.append(self.url_result(self._proto_relative_url(
                     playwire_url, 'http:'), 'Playwire'))
 
+            streamable_url = StreamableIE._extract_url(payload)
+            if streamable_url:
+                entries.append(self.url_result(
+                    streamable_url, StreamableIE.ie_key()))
+
         return self.playlist_result(entries, playlist_id, playlist_title)
index 8c417ab65b0478025ae92929830d03767448ebfa..fecfc28ae9667c128a7edf2e46bd83f123f46b4f 100644 (file)
@@ -11,9 +11,13 @@ class Formula1IE(InfoExtractor):
         'md5': '8c79e54be72078b26b89e0e111c0502b',
         'info_dict': {
             'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'Race highlights - Spain 2016',
         },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
         'add_ie': ['Ooyala'],
     }, {
         'url': 'http://www.formula1.com/en/video/2016/5/Race_highlights_-_Spain_2016.html',
index 9f406b17eebfab6248f0a6e0d4722151abd7add9..9f2e5d0652a3266c08e83567a3b0f650ec624720 100644 (file)
@@ -1,14 +1,14 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-from .common import InfoExtractor
+from .adobepass import AdobePassIE
 from ..utils import (
     smuggle_url,
     update_url_query,
 )
 
 
-class FOXIE(InfoExtractor):
+class FOXIE(AdobePassIE):
     _VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[0-9]+)'
     _TEST = {
         'url': 'http://www.fox.com/watch/255180355939/7684182528',
@@ -30,14 +30,26 @@ class FOXIE(InfoExtractor):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        release_url = self._parse_json(self._search_regex(
-            r'"fox_pdk_player"\s*:\s*({[^}]+?})', webpage, 'fox_pdk_player'),
-            video_id)['release_url']
+        settings = self._parse_json(self._search_regex(
+            r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
+            webpage, 'drupal settings'), video_id)
+        fox_pdk_player = settings['fox_pdk_player']
+        release_url = fox_pdk_player['release_url']
+        query = {
+            'mbr': 'true',
+            'switch': 'http'
+        }
+        if fox_pdk_player.get('access') == 'locked':
+            ap_p = settings['foxAdobePassProvider']
+            rating = ap_p.get('videoRating')
+            if rating == 'n/a':
+                rating = None
+            resource = self._get_mvpd_resource('fbc-fox', None, ap_p['videoGUID'], rating)
+            query['auth'] = self._extract_mvpd_auth(url, video_id, 'fbc-fox', resource)
 
         return {
             '_type': 'url_transparent',
             'ie_key': 'ThePlatform',
-            'url': smuggle_url(update_url_query(
-                release_url, {'switch': 'http'}), {'force_smil_url': True}),
+            'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
             'id': video_id,
         }
diff --git a/youtube_dl/extractor/fox9.py b/youtube_dl/extractor/fox9.py
new file mode 100644 (file)
index 0000000..56d9975
--- /dev/null
@@ -0,0 +1,43 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .anvato import AnvatoIE
+from ..utils import js_to_json
+
+
+class FOX9IE(AnvatoIE):
+    _VALID_URL = r'https?://(?:www\.)?fox9\.com/(?:[^/]+/)+(?P<id>\d+)-story'
+    _TESTS = [{
+        'url': 'http://www.fox9.com/news/215123287-story',
+        'md5': 'd6e1b2572c3bab8a849c9103615dd243',
+        'info_dict': {
+            'id': '314473',
+            'ext': 'mp4',
+            'title': 'Bear climbs tree in downtown Duluth',
+            'description': 'md5:6a36bfb5073a411758a752455408ac90',
+            'duration': 51,
+            'timestamp': 1478123580,
+            'upload_date': '20161102',
+            'uploader': 'EPFOX',
+            'categories': ['News', 'Sports'],
+            'tags': ['news', 'video'],
+        },
+    }, {
+        'url': 'http://www.fox9.com/news/investigators/214070684-story',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_id = self._parse_json(
+            self._search_regex(
+                r'AnvatoPlaylist\s*\(\s*(\[.+?\])\s*\)\s*;',
+                webpage, 'anvato playlist'),
+            video_id, transform_source=js_to_json)[0]['video']
+
+        return self._get_anvato_videos(
+            'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b',
+            video_id)
index 70c1a815d3121bf048da9510a00abf10dc516126..39174fcecca44b54ce42a174f59f3d14fbec2592 100644 (file)
@@ -1,18 +1,24 @@
 from __future__ import unicode_literals
 
+import itertools
+
 from .common import InfoExtractor
+from ..utils import (
+    get_element_by_id,
+    remove_end,
+)
 
 
 class FoxgayIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?foxgay\.com/videos/(?:\S+-)?(?P<id>\d+)\.shtml'
     _TEST = {
         'url': 'http://foxgay.com/videos/fuck-turkish-style-2582.shtml',
-        'md5': '80d72beab5d04e1655a56ad37afe6841',
+        'md5': '344558ccfea74d33b7adbce22e577f54',
         'info_dict': {
             'id': '2582',
             'ext': 'mp4',
-            'title': 'md5:6122f7ae0fc6b21ebdf59c5e083ce25a',
-            'description': 'md5:5e51dc4405f1fd315f7927daed2ce5cf',
+            'title': 'Fuck Turkish-style',
+            'description': 'md5:6ae2d9486921891efe89231ace13ffdf',
             'age_limit': 18,
             'thumbnail': 're:https?://.*\.jpg$',
         },
@@ -22,27 +28,35 @@ class FoxgayIE(InfoExtractor):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        title = self._html_search_regex(
-            r'<title>(?P<title>.*?)</title>',
-            webpage, 'title', fatal=False)
-        description = self._html_search_regex(
-            r'<div class="ico_desc"><h2>(?P<description>.*?)</h2>',
-            webpage, 'description', fatal=False)
+        title = remove_end(self._html_search_regex(
+            r'<title>([^<]+)</title>', webpage, 'title'), ' - Foxgay.com')
+        description = get_element_by_id('inf_tit', webpage)
 
+        # The default user-agent with foxgay cookies leads to pages without videos
+        self._downloader.cookiejar.clear('.foxgay.com')
         # Find the URL for the iFrame which contains the actual video.
+        iframe_url = self._html_search_regex(
+            r'<iframe[^>]+src=([\'"])(?P<url>[^\'"]+)\1', webpage,
+            'video frame', group='url')
         iframe = self._download_webpage(
-            self._html_search_regex(r'iframe src="(?P<frame>.*?)"', webpage, 'video frame'),
-            video_id)
-        video_url = self._html_search_regex(
-            r"v_path = '(?P<vid>http://.*?)'", iframe, 'url')
-        thumb_url = self._html_search_regex(
-            r"t_path = '(?P<thumb>http://.*?)'", iframe, 'thumbnail', fatal=False)
+            iframe_url, video_id, headers={'User-Agent': 'curl/7.50.1'},
+            note='Downloading video frame')
+        video_data = self._parse_json(self._search_regex(
+            r'video_data\s*=\s*([^;]+);', iframe, 'video data'), video_id)
+
+        formats = [{
+            'url': source,
+            'height': resolution,
+        } for source, resolution in zip(
+            video_data['sources'], video_data.get('resolutions', itertools.repeat(None)))]
+
+        self._sort_formats(formats)
 
         return {
             'id': video_id,
             'title': title,
-            'url': video_url,
+            'formats': formats,
             'description': description,
-            'thumbnail': thumb_url,
+            'thumbnail': video_data.get('act_vid', {}).get('thumb'),
             'age_limit': 18,
         }
index b04da2415246974c4959c6baa7745e550c0c9fa4..229bcb175789ee78b12ae71dbcca811de69d9b65 100644 (file)
@@ -3,11 +3,13 @@ from __future__ import unicode_literals
 import re
 
 from .amp import AMPIE
+from .common import InfoExtractor
 
 
 class FoxNewsIE(AMPIE):
+    IE_NAME = 'foxnews'
     IE_DESC = 'Fox News and Fox Business Video'
-    _VALID_URL = r'https?://(?P<host>video\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
+    _VALID_URL = r'https?://(?P<host>video\.(?:insider\.)?fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
     _TESTS = [
         {
             'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips',
@@ -49,6 +51,11 @@ class FoxNewsIE(AMPIE):
             'url': 'http://video.foxbusiness.com/v/4442309889001',
             'only_matching': True,
         },
+        {
+            # From http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words
+            'url': 'http://video.insider.foxnews.com/v/video-embed.html?video_id=5099377331001&autoplay=true&share_url=http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words&share_title=Student%20Group:%20Saying%20%27Politically%20Correct,%27%20%27Trash%27%20and%20%27Lame%27%20Is%20Offensive&share=true',
+            'only_matching': True,
+        },
     ]
 
     def _real_extract(self, url):
@@ -58,3 +65,76 @@ class FoxNewsIE(AMPIE):
             'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id))
         info['id'] = video_id
         return info
+
+
+class FoxNewsArticleIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?foxnews\.com/(?!v)([^/]+/)+(?P<id>[a-z-]+)'
+    IE_NAME = 'foxnews:article'
+
+    _TEST = {
+        'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html',
+        'md5': '62aa5a781b308fdee212ebb6f33ae7ef',
+        'info_dict': {
+            'id': '5116295019001',
+            'ext': 'mp4',
+            'title': 'Trump and Clinton asked to defend positions on Iraq War',
+            'description': 'Veterans react on \'The Kelly File\'',
+            'timestamp': 1473299755,
+            'upload_date': '20160908',
+        },
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        video_id = self._html_search_regex(
+            r'data-video-id=([\'"])(?P<id>[^\'"]+)\1',
+            webpage, 'video ID', group='id')
+        return self.url_result(
+            'http://video.foxnews.com/v/' + video_id,
+            FoxNewsIE.ie_key())
+
+
+class FoxNewsInsiderIE(InfoExtractor):
+    _VALID_URL = r'https?://insider\.foxnews\.com/([^/]+/)+(?P<id>[a-z-]+)'
+    IE_NAME = 'foxnews:insider'
+
+    _TEST = {
+        'url': 'http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words',
+        'md5': 'a10c755e582d28120c62749b4feb4c0c',
+        'info_dict': {
+            'id': '5099377331001',
+            'display_id': 'univ-wisconsin-student-group-pushing-silence-certain-words',
+            'ext': 'mp4',
+            'title': 'Student Group: Saying \'Politically Correct,\' \'Trash\' and \'Lame\' Is Offensive',
+            'description': 'Is campus censorship getting out of control?',
+            'timestamp': 1472168725,
+            'upload_date': '20160825',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+        'add_ie': [FoxNewsIE.ie_key()],
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        embed_url = self._html_search_meta('embedUrl', webpage, 'embed URL')
+
+        title = self._og_search_title(webpage)
+        description = self._og_search_description(webpage)
+
+        return {
+            '_type': 'url_transparent',
+            'ie_key': FoxNewsIE.ie_key(),
+            'url': embed_url,
+            'display_id': display_id,
+            'title': title,
+            'description': description,
+        }
index 186da0d3ba7b0f1241bf761d9b64a3222ed33fec..56048ffc21e8de8810b7e6b10122cc621927fbba 100644 (file)
@@ -29,7 +29,7 @@ class FranceCultureIE(InfoExtractor):
         webpage = self._download_webpage(url, display_id)
 
         video_url = self._search_regex(
-            r'(?s)<div[^>]+class="[^"]*?title-zone-diffusion[^"]*?"[^>]*>.*?<a[^>]+href="([^"]+)"',
+            r'(?s)<div[^>]+class="[^"]*?title-zone-diffusion[^"]*?"[^>]*>.*?<button[^>]+data-asset-source="([^"]+)"',
             webpage, 'video path')
 
         title = self._og_search_title(webpage)
@@ -38,7 +38,7 @@ class FranceCultureIE(InfoExtractor):
             '(?s)<div[^>]+class="date"[^>]*>.*?<span[^>]+class="inner"[^>]*>([^<]+)<',
             webpage, 'upload date', fatal=False))
         thumbnail = self._search_regex(
-            r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+data-pagespeed-(?:lazy|high-res)-src="([^"]+)"',
+            r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+data-dejavu-src="([^"]+)"',
             webpage, 'thumbnail', fatal=False)
         uploader = self._html_search_regex(
             r'(?s)<div id="emission".*?<span class="author">(.*?)</span>',
index 2369f868da4a39b1cf84c7cee6a5830859484082..707b9e00db02104a43e65ed8e0e94a3b2c7211c7 100644 (file)
@@ -2,21 +2,21 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import month_by_name
 
 
 class FranceInterIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?franceinter\.fr/emissions/(?P<id>[^?#]+)'
+
     _TEST = {
-        'url': 'http://www.franceinter.fr/player/reecouter?play=793962',
-        'md5': '4764932e466e6f6c79c317d2e74f6884',
+        'url': 'https://www.franceinter.fr/emissions/affaires-sensibles/affaires-sensibles-07-septembre-2016',
+        'md5': '9e54d7bdb6fdc02a841007f8a975c094',
         'info_dict': {
-            'id': '793962',
+            'id': 'affaires-sensibles/affaires-sensibles-07-septembre-2016',
             'ext': 'mp3',
-            'title': 'L’Histoire dans les jeux vidéo',
-            'description': 'md5:7e93ddb4451e7530022792240a3049c7',
-            'timestamp': 1387369800,
-            'upload_date': '20131218',
+            'title': 'Affaire Cahuzac : le contentieux du compte en Suisse',
+            'description': 'md5:401969c5d318c061f86bda1fa359292b',
+            'upload_date': '20160907',
         },
     }
 
@@ -25,23 +25,30 @@ class FranceInterIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
-        path = self._search_regex(
-            r'<a id="player".+?href="([^"]+)"', webpage, 'video url')
-        video_url = 'http://www.franceinter.fr/' + path
-
-        title = self._html_search_regex(
-            r'<span class="title-diffusion">(.+?)</span>', webpage, 'title')
-        description = self._html_search_regex(
-            r'<span class="description">(.*?)</span>',
-            webpage, 'description', fatal=False)
-        timestamp = int_or_none(self._search_regex(
-            r'data-date="(\d+)"', webpage, 'upload date', fatal=False))
+        video_url = self._search_regex(
+            r'(?s)<div[^>]+class=["\']page-diffusion["\'][^>]*>.*?<button[^>]+data-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
+            webpage, 'video url', group='url')
+
+        title = self._og_search_title(webpage)
+        description = self._og_search_description(webpage)
+
+        upload_date_str = self._search_regex(
+            r'class=["\']cover-emission-period["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<',
+            webpage, 'upload date', fatal=False)
+        if upload_date_str:
+            upload_date_list = upload_date_str.split()
+            upload_date_list.reverse()
+            upload_date_list[1] = '%02d' % (month_by_name(upload_date_list[1], lang='fr') or 0)
+            upload_date_list[2] = '%02d' % int(upload_date_list[2])
+            upload_date = ''.join(upload_date_list)
+        else:
+            upload_date = None
 
         return {
             'id': video_id,
             'title': title,
             'description': description,
-            'timestamp': timestamp,
+            'upload_date': upload_date,
             'formats': [{
                 'url': video_url,
                 'vcodec': 'none',
index 3233f66d5fe2efd8381b125948e4eed3d0e446ce..e7068d1aed9573199211a29a91486bd72e9aecd0 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 
 from __future__ import unicode_literals
 
index 1477708bbec14c38bf0db7801d09d68a22ff1546..0a70ca76351ab310ba394959b717973ec772f52d 100644 (file)
@@ -8,7 +8,7 @@ from .common import InfoExtractor
 
 class FreespeechIE(InfoExtractor):
     IE_NAME = 'freespeech.org'
-    _VALID_URL = r'https://www\.freespeech\.org/video/(?P<title>.+)'
+    _VALID_URL = r'https?://(?:www\.)?freespeech\.org/video/(?P<title>.+)'
     _TEST = {
         'add_ie': ['Youtube'],
         'url': 'https://www.freespeech.org/video/obama-romney-campaign-colorado-ahead-debate-0',
index 8c5ffc9e84cec305e9fc813a6366b360b7e36230..f2928b5fecb68df5429a90ca079faac6faf93bb2 100644 (file)
@@ -28,6 +28,9 @@ class FunnyOrDieIE(InfoExtractor):
             'description': 'Please use this to sell something.  www.jonlajoie.com',
             'thumbnail': 're:^http:.*\.jpg$',
         },
+        'params': {
+            'skip_download': True,
+        },
     }, {
         'url': 'http://www.funnyordie.com/articles/ebf5e34fc8/10-hours-of-walking-in-nyc-as-a-man',
         'only_matching': True,
@@ -51,19 +54,45 @@ class FunnyOrDieIE(InfoExtractor):
 
         formats = []
 
-        formats.extend(self._extract_m3u8_formats(
-            m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+        m3u8_formats = self._extract_m3u8_formats(
+            m3u8_url, video_id, 'mp4', 'm3u8_native',
+            m3u8_id='hls', fatal=False)
+        source_formats = list(filter(
+            lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
+            m3u8_formats))
 
-        bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)[,/]', m3u8_url)]
+        bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)(?=[,/])', m3u8_url)]
         bitrates.sort()
 
-        for bitrate in bitrates:
-            for link in links:
-                formats.append({
-                    'url': self._proto_relative_url('%s%d.%s' % (link[0], bitrate, link[1])),
-                    'format_id': '%s-%d' % (link[1], bitrate),
-                    'vbr': bitrate,
-                })
+        if source_formats:
+            self._sort_formats(source_formats)
+
+        for bitrate, f in zip(bitrates, source_formats or [{}] * len(bitrates)):
+            for path, ext in links:
+                ff = f.copy()
+                if ff:
+                    if ext != 'mp4':
+                        ff = dict(
+                            [(k, v) for k, v in ff.items()
+                             if k in ('height', 'width', 'format_id')])
+                    ff.update({
+                        'format_id': ff['format_id'].replace('hls', ext),
+                        'ext': ext,
+                        'protocol': 'http',
+                    })
+                else:
+                    ff.update({
+                        'format_id': '%s-%d' % (ext, bitrate),
+                        'vbr': bitrate,
+                    })
+                ff['url'] = self._proto_relative_url(
+                    '%s%d.%s' % (path, bitrate, ext))
+                formats.append(ff)
+        self._check_formats(formats, video_id)
+
+        formats.extend(m3u8_formats)
+        self._sort_formats(
+            formats, field_preference=('height', 'width', 'tbr', 'format_id'))
 
         subtitles = {}
         for src, src_lang in re.findall(r'<track kind="captions" src="([^"]+)" srclang="([^"]+)"', webpage):
index 69058a5835f2bac0d1e56ce0917909df0fb9a92b..55a34604af2cd2bca83ebc2c7957f1f4eb7401f1 100644 (file)
@@ -1,19 +1,15 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 from ..utils import (
     int_or_none,
-    parse_duration,
-    str_to_int,
-    unified_strdate,
+    remove_end,
 )
 
 
 class GameStarIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html'
+    _VALID_URL = r'https?://(?:www\.)?gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html'
     _TEST = {
         'url': 'http://www.gamestar.de/videos/trailer,3/hobbit-3-die-schlacht-der-fuenf-heere,76110.html',
         'md5': '96974ecbb7fd8d0d20fca5a00810cea7',
@@ -21,8 +17,9 @@ class GameStarIE(InfoExtractor):
             'id': '76110',
             'ext': 'mp4',
             'title': 'Hobbit 3: Die Schlacht der Fünf Heere - Teaser-Trailer zum dritten Teil',
-            'description': 'Der Teaser-Trailer zu Hobbit 3: Die Schlacht der Fünf Heere zeigt einige Szenen aus dem dritten Teil der Saga und kündigt den vollständigen Trailer an.',
-            'thumbnail': 'http://images.gamestar.de/images/idgwpgsgp/bdb/2494525/600x.jpg',
+            'description': 'Der Teaser-Trailer zu Hobbit 3: Die Schlacht der Fünf Heere zeigt einige Szenen aus dem dritten Teil der Saga und kündigt den...',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'timestamp': 1406542020,
             'upload_date': '20140728',
             'duration': 17
         }
@@ -32,41 +29,27 @@ class GameStarIE(InfoExtractor):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        og_title = self._og_search_title(webpage)
-        title = re.sub(r'\s*- Video (bei|-) GameStar\.de$', '', og_title)
-
         url = 'http://gamestar.de/_misc/videos/portal/getVideoUrl.cfm?premium=0&videoId=' + video_id
 
-        description = self._og_search_description(webpage).strip()
-
-        thumbnail = self._proto_relative_url(
-            self._og_search_thumbnail(webpage), scheme='http:')
-
-        upload_date = unified_strdate(self._html_search_regex(
-            r'<span style="float:left;font-size:11px;">Datum: ([0-9]+\.[0-9]+\.[0-9]+)&nbsp;&nbsp;',
-            webpage, 'upload_date', fatal=False))
-
-        duration = parse_duration(self._html_search_regex(
-            r'&nbsp;&nbsp;Länge: ([0-9]+:[0-9]+)</span>', webpage, 'duration',
-            fatal=False))
-
-        view_count = str_to_int(self._html_search_regex(
-            r'&nbsp;&nbsp;Zuschauer: ([0-9\.]+)&nbsp;&nbsp;', webpage,
-            'view_count', fatal=False))
+        # TODO: there are multiple ld+json objects in the webpage,
+        # while _search_json_ld finds only the first one
+        json_ld = self._parse_json(self._search_regex(
+            r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>[^<]+VideoObject[^<]+)</script>',
+            webpage, 'JSON-LD', group='json_ld'), video_id)
+        info_dict = self._json_ld(json_ld, video_id)
+        info_dict['title'] = remove_end(info_dict['title'], ' - GameStar')
 
+        view_count = json_ld.get('interactionCount')
         comment_count = int_or_none(self._html_search_regex(
-            r'>Kommentieren \(([0-9]+)\)</a>', webpage, 'comment_count',
+            r'([0-9]+) Kommentare</span>', webpage, 'comment_count',
             fatal=False))
 
-        return {
+        info_dict.update({
             'id': video_id,
-            'title': title,
             'url': url,
             'ext': 'mp4',
-            'thumbnail': thumbnail,
-            'description': description,
-            'upload_date': upload_date,
-            'duration': duration,
             'view_count': view_count,
             'comment_count': comment_count
-        }
+        })
+
+        return info_dict
index 197ab95319419d29971979b94464aa191ef04a04..3949c8bf7d5f3088b076b78f321fb6657075aded 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 
 from __future__ import unicode_literals
 
@@ -27,7 +27,6 @@ from ..utils import (
     unified_strdate,
     unsmuggle_url,
     UnsupportedError,
-    url_basename,
     xpath_text,
 )
 from .brightcove import (
@@ -48,6 +47,8 @@ from .svt import SVTIE
 from .pornhub import PornHubIE
 from .xhamster import XHamsterEmbedIE
 from .tnaflix import TNAFlixNetworkEmbedIE
+from .drtuber import DrTuberIE
+from .redtube import RedTubeIE
 from .vimeo import VimeoIE
 from .dailymotion import (
     DailymotionIE,
@@ -55,10 +56,10 @@ from .dailymotion import (
 )
 from .onionstudios import OnionStudiosIE
 from .viewlift import ViewLiftEmbedIE
-from .screenwavemedia import ScreenwaveMediaIE
 from .mtv import MTVServicesEmbeddedIE
 from .pladform import PladformIE
 from .videomore import VideomoreIE
+from .webcaster import WebcasterFeedIE
 from .googledrive import GoogleDriveIE
 from .jwplatform import JWPlatformIE
 from .digiteka import DigitekaIE
@@ -73,6 +74,7 @@ from .eagleplatform import EaglePlatformIE
 from .facebook import FacebookIE
 from .soundcloud import SoundcloudIE
 from .vbox7 import Vbox7IE
+from .dbtv import DBTVIE
 
 
 class GenericIE(InfoExtractor):
@@ -103,7 +105,8 @@ class GenericIE(InfoExtractor):
             },
             'expected_warnings': [
                 'URL could be a direct video link, returning it as such.'
-            ]
+            ],
+            'skip': 'URL invalid',
         },
         # Direct download with broken HEAD
         {
@@ -267,7 +270,8 @@ class GenericIE(InfoExtractor):
             'params': {
                 # m3u8 downloads
                 'skip_download': True,
-            }
+            },
+            'skip': 'video gone',
         },
         # m3u8 served with Content-Type: text/plain
         {
@@ -282,7 +286,8 @@ class GenericIE(InfoExtractor):
             'params': {
                 # m3u8 downloads
                 'skip_download': True,
-            }
+            },
+            'skip': 'video gone',
         },
         # google redirect
         {
@@ -367,6 +372,7 @@ class GenericIE(InfoExtractor):
                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
             },
             'add_ie': ['BrightcoveLegacy'],
+            'skip': 'video gone',
         },
         {
             'url': 'http://www.championat.com/video/football/v/87/87499.html',
@@ -420,6 +426,7 @@ class GenericIE(InfoExtractor):
             'params': {
                 'skip_download': True,
             },
+            'skip': 'movie expired',
         },
         # embed.ly video
         {
@@ -447,6 +454,8 @@ class GenericIE(InfoExtractor):
                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
             },
+            # HEAD requests lead to endless 301, while GET is OK
+            'expected_warnings': ['301'],
         },
         # RUTV embed
         {
@@ -521,6 +530,9 @@ class GenericIE(InfoExtractor):
                 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
             },
             'playlist_mincount': 7,
+            # This forum does not allow <iframe> syntaxes anymore
+            # Now HTML tags are displayed as-is
+            'skip': 'No videos on this page',
         },
         # Embedded TED video
         {
@@ -569,7 +581,8 @@ class GenericIE(InfoExtractor):
             },
             'params': {
                 'skip_download': 'Requires rtmpdump'
-            }
+            },
+            'skip': 'video gone',
         },
         # francetv embed
         {
@@ -1176,16 +1189,6 @@ class GenericIE(InfoExtractor):
                 'duration': 248.667,
             },
         },
-        # ScreenwaveMedia embed
-        {
-            'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1',
-            'md5': '24ace5baba0d35d55c6810b51f34e9e0',
-            'info_dict': {
-                'id': 'cinemasnob-55d26273809dd',
-                'ext': 'mp4',
-                'title': 'cinemasnob',
-            },
-        },
         # BrightcoveInPageEmbed embed
         {
             'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
@@ -1197,20 +1200,6 @@ class GenericIE(InfoExtractor):
                 'duration': 51690,
             },
         },
-        # JWPlayer with M3U8
-        {
-            'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
-            'info_dict': {
-                'id': 'playlist',
-                'ext': 'mp4',
-                'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
-                'uploader': 'ren.tv',
-            },
-            'params': {
-                # m3u8 downloads
-                'skip_download': True,
-            }
-        },
         # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
         # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
         {
@@ -1357,6 +1346,11 @@ class GenericIE(InfoExtractor):
             },
             'add_ie': ['Vimeo'],
         },
+        {
+            # generic vimeo embed that requires original URL passed as Referer
+            'url': 'http://racing4everyone.eu/2016/07/30/formula-1-2016-round12-germany/',
+            'only_matching': True,
+        },
         {
             'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video',
             'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
@@ -1386,6 +1380,15 @@ class GenericIE(InfoExtractor):
             },
             'add_ie': [Vbox7IE.ie_key()],
         },
+        {
+            # DBTV embeds
+            'url': 'http://www.dagbladet.no/2016/02/23/nyheter/nordlys/ski/troms/ver/43254897/',
+            'info_dict': {
+                'id': '43254897',
+                'title': 'Etter ett års planlegging, klaffet endelig alt: - Jeg måtte ta en liten dans',
+            },
+            'playlist_mincount': 3,
+        },
         # {
         #     # TODO: find another test
         #     # http://schema.org/VideoObject
@@ -1523,7 +1526,7 @@ class GenericIE(InfoExtractor):
             force_videoid = smuggled_data['force_videoid']
             video_id = force_videoid
         else:
-            video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
+            video_id = self._generic_id(url)
 
         self.to_screen('%s: Requesting header' % video_id)
 
@@ -1552,7 +1555,7 @@ class GenericIE(InfoExtractor):
 
         info_dict = {
             'id': video_id,
-            'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
+            'title': self._generic_title(url),
             'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
         }
 
@@ -1623,6 +1626,10 @@ class GenericIE(InfoExtractor):
             doc = compat_etree_fromstring(webpage.encode('utf-8'))
             if doc.tag == 'rss':
                 return self._extract_rss(url, video_id, doc)
+            elif doc.tag == 'SmoothStreamingMedia':
+                info_dict['formats'] = self._parse_ism_formats(doc, url)
+                self._sort_formats(info_dict['formats'])
+                return info_dict
             elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
                 smil = self._parse_smil(doc, url, video_id)
                 self._sort_formats(smil['formats'])
@@ -1631,7 +1638,9 @@ class GenericIE(InfoExtractor):
                 return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
             elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
                 info_dict['formats'] = self._parse_mpd_formats(
-                    doc, video_id, mpd_base_url=url.rpartition('/')[0])
+                    doc, video_id,
+                    mpd_base_url=full_response.geturl().rpartition('/')[0],
+                    mpd_url=url)
                 self._sort_formats(info_dict['formats'])
                 return info_dict
             elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
@@ -1726,9 +1735,9 @@ class GenericIE(InfoExtractor):
         if matches:
             return _playlist_from_matches(matches, ie='RtlNl')
 
-        vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
-        if vimeo_url is not None:
-            return self.url_result(vimeo_url)
+        vimeo_urls = VimeoIE._extract_urls(url, webpage)
+        if vimeo_urls:
+            return _playlist_from_matches(vimeo_urls, ie=VimeoIE.ie_key())
 
         vid_me_embed_url = self._search_regex(
             r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
@@ -1964,11 +1973,6 @@ class GenericIE(InfoExtractor):
         if sportbox_urls:
             return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
 
-        # Look for embedded PornHub player
-        pornhub_url = PornHubIE._extract_url(webpage)
-        if pornhub_url:
-            return self.url_result(pornhub_url, 'PornHub')
-
         # Look for embedded XHamster player
         xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
         if xhamster_urls:
@@ -1979,6 +1983,21 @@ class GenericIE(InfoExtractor):
         if tnaflix_urls:
             return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
 
+        # Look for embedded PornHub player
+        pornhub_urls = PornHubIE._extract_urls(webpage)
+        if pornhub_urls:
+            return _playlist_from_matches(pornhub_urls, ie=PornHubIE.ie_key())
+
+        # Look for embedded DrTuber player
+        drtuber_urls = DrTuberIE._extract_urls(webpage)
+        if drtuber_urls:
+            return _playlist_from_matches(drtuber_urls, ie=DrTuberIE.ie_key())
+
+        # Look for embedded RedTube player
+        redtube_urls = RedTubeIE._extract_urls(webpage)
+        if redtube_urls:
+            return _playlist_from_matches(redtube_urls, ie=RedTubeIE.ie_key())
+
         # Look for embedded Tvigle player
         mobj = re.search(
             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
@@ -2111,6 +2130,11 @@ class GenericIE(InfoExtractor):
         if videomore_url:
             return self.url_result(videomore_url)
 
+        # Look for Webcaster embeds
+        webcaster_url = WebcasterFeedIE._extract_url(self, webpage)
+        if webcaster_url:
+            return self.url_result(webcaster_url, ie=WebcasterFeedIE.ie_key())
+
         # Look for Playwire embeds
         mobj = re.search(
             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
@@ -2177,11 +2201,6 @@ class GenericIE(InfoExtractor):
         if jwplatform_url:
             return self.url_result(jwplatform_url, 'JWPlatform')
 
-        # Look for ScreenwaveMedia embeds
-        mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
-        if mobj is not None:
-            return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
-
         # Look for Digiteka embeds
         digiteka_url = DigitekaIE._extract_url(webpage)
         if digiteka_url:
@@ -2203,6 +2222,16 @@ class GenericIE(InfoExtractor):
             return self.url_result('limelight:%s:%s' % (
                 lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2))
 
+        mobj = re.search(
+            r'''(?sx)
+                <object[^>]+class=(["\'])LimelightEmbeddedPlayerFlash\1[^>]*>.*?
+                    <param[^>]+
+                        name=(["\'])flashVars\2[^>]+
+                        value=(["\'])(?:(?!\3).)*mediaId=(?P<id>[a-z0-9]{32})
+            ''', webpage)
+        if mobj:
+            return self.url_result('limelight:media:%s' % mobj.group('id'))
+
         # Look for AdobeTVVideo embeds
         mobj = re.search(
             r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
@@ -2222,11 +2251,40 @@ class GenericIE(InfoExtractor):
 
         # Look for VODPlatform embeds
         mobj = re.search(
-            r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vod-platform\.net/embed/[^/?#]+)',
+            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vod-platform\.net/[eE]mbed/.+?)\1',
             webpage)
         if mobj is not None:
             return self.url_result(
-                self._proto_relative_url(unescapeHTML(mobj.group(1))), 'VODPlatform')
+                self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform')
+
+        # Look for Mangomolo embeds
+        mobj = re.search(
+            r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo\.com/analytics/index\.php/customers/embed/
+                (?:
+                    video\?.*?\bid=(?P<video_id>\d+)|
+                    index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
+                ).+?)\1''', webpage)
+        if mobj is not None:
+            info = {
+                '_type': 'url_transparent',
+                'url': self._proto_relative_url(unescapeHTML(mobj.group('url'))),
+                'title': video_title,
+                'description': video_description,
+                'thumbnail': video_thumbnail,
+                'uploader': video_uploader,
+            }
+            video_id = mobj.group('video_id')
+            if video_id:
+                info.update({
+                    'ie_key': 'MangomoloVideo',
+                    'id': video_id,
+                })
+            else:
+                info.update({
+                    'ie_key': 'MangomoloLive',
+                    'id': mobj.group('channel_id'),
+                })
+            return info
 
         # Look for Instagram embeds
         instagram_embed_url = InstagramIE._extract_embed_url(webpage)
@@ -2257,6 +2315,11 @@ class GenericIE(InfoExtractor):
         if vbox7_url:
             return self.url_result(vbox7_url, Vbox7IE.ie_key())
 
+        # Look for DBTV embeds
+        dbtv_urls = DBTVIE._extract_urls(webpage)
+        if dbtv_urls:
+            return _playlist_from_matches(dbtv_urls, ie=DBTVIE.ie_key())
+
         # Looking for http://schema.org/VideoObject
         json_ld = self._search_json_ld(
             webpage, video_id, default={}, expected_type='VideoObject')
@@ -2270,12 +2333,23 @@ class GenericIE(InfoExtractor):
             info_dict.update(json_ld)
             return info_dict
 
+        # Look for HTML5 media
+        entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
+        if entries:
+            for entry in entries:
+                entry.update({
+                    'id': video_id,
+                    'title': video_title,
+                })
+                self._sort_formats(entry['formats'])
+            return self.playlist_result(entries)
+
         def check_video(vurl):
             if YoutubeIE.suitable(vurl):
                 return True
             vpath = compat_urlparse.urlparse(vurl).path
             vext = determine_ext(vpath)
-            return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
+            return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js')
 
         def filter_video(urls):
             return list(filter(check_video, urls))
@@ -2325,9 +2399,6 @@ class GenericIE(InfoExtractor):
             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
             if m_video_type is not None:
                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
-        if not found:
-            # HTML5 video
-            found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
         if not found:
             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
             found = re.search(
@@ -2394,6 +2465,21 @@ class GenericIE(InfoExtractor):
                 entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
             elif ext == 'f4m':
                 entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
+            elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
+                # Just matching .ism/manifest is not enough to be reliably sure
+                # whether it's actually an ISM manifest or some other streaming
+                # manifest since there are various streaming URL formats
+                # possible (see [1]) as well as some other shenanigans like
+                # .smil/manifest URLs that actually serve an ISM (see [2]) and
+                # so on.
+                # Thus the most reasonable way to solve this is to delegate
+                # to generic extractor in order to look into the contents of
+                # the manifest itself.
+                # 1. https://azure.microsoft.com/en-us/documentation/articles/media-services-deliver-content-overview/#streaming-url-formats
+                # 2. https://svs.itworkscdn.net/lbcivod/smil:itwfcdn/lbci/170976.smil/Manifest
+                entry_info_dict = self.url_result(
+                    smuggle_url(video_url, {'to_generic': True}),
+                    GenericIE.ie_key())
             else:
                 entry_info_dict['url'] = video_url
 
index 62ff84835c87b28d18ace1afa5eee19f894d198d..f0d951396fdba4f74027e81af629f7c27c253f9a 100644 (file)
@@ -2,7 +2,6 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import unified_strdate
 
 
 class GlideIE(InfoExtractor):
@@ -14,10 +13,8 @@ class GlideIE(InfoExtractor):
         'info_dict': {
             'id': 'UZF8zlmuQbe4mr+7dCiQ0w==',
             'ext': 'mp4',
-            'title': 'Damon Timm\'s Glide message',
+            'title': "Damon's Glide message",
             'thumbnail': 're:^https?://.*?\.cloudfront\.net/.*\.jpg$',
-            'uploader': 'Damon Timm',
-            'upload_date': '20140919',
         }
     }
 
@@ -27,7 +24,8 @@ class GlideIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         title = self._html_search_regex(
-            r'<title>(.+?)</title>', webpage, 'title')
+            r'<title>(.+?)</title>', webpage,
+            'title', default=None) or self._og_search_title(webpage)
         video_url = self._proto_relative_url(self._search_regex(
             r'<source[^>]+src=(["\'])(?P<url>.+?)\1',
             webpage, 'video URL', default=None,
@@ -36,18 +34,10 @@ class GlideIE(InfoExtractor):
             r'<img[^>]+id=["\']video-thumbnail["\'][^>]+src=(["\'])(?P<url>.+?)\1',
             webpage, 'thumbnail url', default=None,
             group='url')) or self._og_search_thumbnail(webpage)
-        uploader = self._search_regex(
-            r'<div[^>]+class=["\']info-name["\'][^>]*>([^<]+)',
-            webpage, 'uploader', fatal=False)
-        upload_date = unified_strdate(self._search_regex(
-            r'<div[^>]+class="info-date"[^>]*>([^<]+)',
-            webpage, 'upload date', fatal=False))
 
         return {
             'id': video_id,
             'title': title,
             'url': video_url,
             'thumbnail': thumbnail,
-            'uploader': uploader,
-            'upload_date': upload_date,
         }
index 3de8356f68ef67e0913fd958995ad1d3e48ac62f..dc7b2661c58a0b35053ea50c7a2c1fa7b093f642 100644 (file)
@@ -2,6 +2,7 @@
 from __future__ import unicode_literals
 
 import random
+import re
 import math
 
 from .common import InfoExtractor
@@ -14,12 +15,13 @@ from ..utils import (
     ExtractorError,
     float_or_none,
     int_or_none,
+    orderedSet,
     str_or_none,
 )
 
 
 class GloboIE(InfoExtractor):
-    _VALID_URL = '(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P<id>\d{7,})'
+    _VALID_URL = r'(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P<id>\d{7,})'
 
     _API_URL_TEMPLATE = 'http://api.globovideos.com/videos/%s/playlist'
     _SECURITY_URL_TEMPLATE = 'http://security.video.globo.com/videos/%s/hash?player=flash&version=17.0.0.132&resource_id=%s'
@@ -63,6 +65,9 @@ class GloboIE(InfoExtractor):
     }, {
         'url': 'http://canaloff.globo.com/programas/desejar-profundo/videos/4518560.html',
         'only_matching': True,
+    }, {
+        'url': 'globo:3607726',
+        'only_matching': True,
     }]
 
     class MD5(object):
@@ -396,33 +401,41 @@ class GloboIE(InfoExtractor):
 
 
 class GloboArticleIE(InfoExtractor):
-    _VALID_URL = 'https?://.+?\.globo\.com/(?:[^/]+/)*(?P<id>[^/]+)\.html'
+    _VALID_URL = r'https?://.+?\.globo\.com/(?:[^/]+/)*(?P<id>[^/.]+)(?:\.html)?'
 
     _VIDEOID_REGEXES = [
         r'\bdata-video-id=["\'](\d{7,})',
         r'\bdata-player-videosids=["\'](\d{7,})',
-        r'\bvideosIDs\s*:\s*["\'](\d{7,})',
+        r'\bvideosIDs\s*:\s*["\']?(\d{7,})',
         r'\bdata-id=["\'](\d{7,})',
         r'<div[^>]+\bid=["\'](\d{7,})',
     ]
 
     _TESTS = [{
         'url': 'http://g1.globo.com/jornal-nacional/noticia/2014/09/novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes.html',
-        'md5': '307fdeae4390ccfe6ba1aa198cf6e72b',
         'info_dict': {
-            'id': '3652183',
-            'ext': 'mp4',
-            'title': 'Receita Federal explica como vai fiscalizar bagagens de quem retorna ao Brasil de avião',
-            'duration': 110.711,
-            'uploader': 'Rede Globo',
-            'uploader_id': '196',
-        }
+            'id': 'novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes',
+            'title': 'Novidade na fiscalização de bagagem pela Receita provoca discussões',
+            'description': 'md5:c3c4b4d4c30c32fce460040b1ac46b12',
+        },
+        'playlist_count': 1,
+    }, {
+        'url': 'http://g1.globo.com/pr/parana/noticia/2016/09/mpf-denuncia-lula-marisa-e-mais-seis-na-operacao-lava-jato.html',
+        'info_dict': {
+            'id': 'mpf-denuncia-lula-marisa-e-mais-seis-na-operacao-lava-jato',
+            'title': "Lula era o 'comandante máximo' do esquema da Lava Jato, diz MPF",
+            'description': 'md5:8aa7cc8beda4dc71cc8553e00b77c54c',
+        },
+        'playlist_count': 6,
     }, {
         'url': 'http://gq.globo.com/Prazeres/Poder/noticia/2015/10/all-o-desafio-assista-ao-segundo-capitulo-da-serie.html',
         'only_matching': True,
     }, {
         'url': 'http://gshow.globo.com/programas/tv-xuxa/O-Programa/noticia/2014/01/xuxa-e-junno-namoram-muuuito-em-luau-de-zeze-di-camargo-e-luciano.html',
         'only_matching': True,
+    }, {
+        'url': 'http://oglobo.globo.com/rio/a-amizade-entre-um-entregador-de-farmacia-um-piano-19946271',
+        'only_matching': True,
     }]
 
     @classmethod
@@ -432,5 +445,12 @@ class GloboArticleIE(InfoExtractor):
     def _real_extract(self, url):
         display_id = self._match_id(url)
         webpage = self._download_webpage(url, display_id)
-        video_id = self._search_regex(self._VIDEOID_REGEXES, webpage, 'video id')
-        return self.url_result('globo:%s' % video_id, 'Globo')
+        video_ids = []
+        for video_regex in self._VIDEOID_REGEXES:
+            video_ids.extend(re.findall(video_regex, webpage))
+        entries = [
+            self.url_result('globo:%s' % video_id, GloboIE.ie_key())
+            for video_id in orderedSet(video_ids)]
+        title = self._og_search_title(webpage, fatal=False)
+        description = self._html_search_meta('description', webpage)
+        return self.playlist_result(entries, display_id, title, description)
diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py
new file mode 100644 (file)
index 0000000..c7776b1
--- /dev/null
@@ -0,0 +1,122 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    determine_ext,
+    parse_age_limit,
+    urlencode_postdata,
+    ExtractorError,
+)
+
+
+class GoIE(InfoExtractor):
+    _BRANDS = {
+        'abc': '001',
+        'freeform': '002',
+        'watchdisneychannel': '004',
+        'watchdisneyjunior': '008',
+        'watchdisneyxd': '009',
+    }
+    _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_BRANDS.keys())
+    _TESTS = [{
+        'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx',
+        'info_dict': {
+            'id': '0_g86w5onx',
+            'ext': 'mp4',
+            'title': 'Sneak Peek: Language Arts',
+            'description': 'md5:7dcdab3b2d17e5217c953256af964e9c',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }, {
+        'url': 'http://abc.go.com/shows/after-paradise/video/most-recent/vdka3335601',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups()
+        if not video_id:
+            webpage = self._download_webpage(url, display_id)
+            video_id = self._search_regex(r'data-video-id=["\']VDKA(\w+)', webpage, 'video id')
+        brand = self._BRANDS[sub_domain]
+        video_data = self._download_json(
+            'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (brand, video_id),
+            video_id)['video'][0]
+        title = video_data['title']
+
+        formats = []
+        for asset in video_data.get('assets', {}).get('asset', []):
+            asset_url = asset.get('value')
+            if not asset_url:
+                continue
+            format_id = asset.get('format')
+            ext = determine_ext(asset_url)
+            if ext == 'm3u8':
+                video_type = video_data.get('type')
+                if video_type == 'lf':
+                    entitlement = self._download_json(
+                        'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json',
+                        video_id, data=urlencode_postdata({
+                            'video_id': video_data['id'],
+                            'video_type': video_type,
+                            'brand': brand,
+                            'device': '001',
+                        }))
+                    errors = entitlement.get('errors', {}).get('errors', [])
+                    if errors:
+                        error_message = ', '.join([error['message'] for error in errors])
+                        raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
+                    asset_url += '?' + entitlement['uplynkData']['sessionKey']
+                formats.extend(self._extract_m3u8_formats(
+                    asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False))
+            else:
+                formats.append({
+                    'format_id': format_id,
+                    'url': asset_url,
+                    'ext': ext,
+                })
+        self._sort_formats(formats)
+
+        subtitles = {}
+        for cc in video_data.get('closedcaption', {}).get('src', []):
+            cc_url = cc.get('value')
+            if not cc_url:
+                continue
+            ext = determine_ext(cc_url)
+            if ext == 'xml':
+                ext = 'ttml'
+            subtitles.setdefault(cc.get('lang'), []).append({
+                'url': cc_url,
+                'ext': ext,
+            })
+
+        thumbnails = []
+        for thumbnail in video_data.get('thumbnails', {}).get('thumbnail', []):
+            thumbnail_url = thumbnail.get('value')
+            if not thumbnail_url:
+                continue
+            thumbnails.append({
+                'url': thumbnail_url,
+                'width': int_or_none(thumbnail.get('width')),
+                'height': int_or_none(thumbnail.get('height')),
+            })
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': video_data.get('longdescription') or video_data.get('description'),
+            'duration': int_or_none(video_data.get('duration', {}).get('value'), 1000),
+            'age_limit': parse_age_limit(video_data.get('tvrating', {}).get('rating')),
+            'episode_number': int_or_none(video_data.get('episodenumber')),
+            'series': video_data.get('show', {}).get('title'),
+            'season_number': int_or_none(video_data.get('season', {}).get('num')),
+            'thumbnails': thumbnails,
+            'formats': formats,
+            'subtitles': subtitles,
+        }
index 731bacd673bd57fe82411268c5920a3e9c7447ac..427499b11286f00a8e10e09a8de1d9f84611b5c9 100644 (file)
@@ -10,7 +10,7 @@ from ..utils import unified_strdate
 
 class GooglePlusIE(InfoExtractor):
     IE_DESC = 'Google Plus'
-    _VALID_URL = r'https://plus\.google\.com/(?:[^/]+/)*?posts/(?P<id>\w+)'
+    _VALID_URL = r'https?://plus\.google\.com/(?:[^/]+/)*?posts/(?P<id>\w+)'
     IE_NAME = 'plus.google'
     _TEST = {
         'url': 'https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH',
index 498304cb2bd9b605d44e67291a2f38bf4481a6f8..5279fa807f6903fa757c552b3e9ad3e013e5b494 100644 (file)
@@ -4,9 +4,6 @@ import itertools
 import re
 
 from .common import SearchInfoExtractor
-from ..compat import (
-    compat_urllib_parse,
-)
 
 
 class GoogleSearchIE(SearchInfoExtractor):
@@ -34,13 +31,16 @@ class GoogleSearchIE(SearchInfoExtractor):
         }
 
         for pagenum in itertools.count():
-            result_url = (
-                'http://www.google.com/search?tbm=vid&q=%s&start=%s&hl=en'
-                % (compat_urllib_parse.quote_plus(query), pagenum * 10))
-
             webpage = self._download_webpage(
-                result_url, 'gvsearch:' + query,
-                note='Downloading result page ' + str(pagenum + 1))
+                'http://www.google.com/search',
+                'gvsearch:' + query,
+                note='Downloading result page %s' % (pagenum + 1),
+                query={
+                    'tbm': 'vid',
+                    'q': query,
+                    'start': pagenum * 10,
+                    'hl': 'en',
+                })
 
             for hit_idx, mobj in enumerate(re.finditer(
                     r'<h3 class="r"><a href="([^"]+)"', webpage)):
index 0c015141fa322465b1476e035f87da223555b211..74e1720ee325da8fb4c011eddec342fe2de62d9b 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
@@ -11,7 +11,7 @@ from ..utils import (
 
 
 class GoshgayIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.goshgay\.com/video(?P<id>\d+?)($|/)'
+    _VALID_URL = r'https?://(?:www\.)?goshgay\.com/video(?P<id>\d+?)($|/)'
     _TEST = {
         'url': 'http://www.goshgay.com/video299069/diesel_sfw_xxx_video',
         'md5': '4b6db9a0a333142eb9f15913142b0ed1',
index b6cc15b6fbad25c43fe0699668bd3ec452ed944d..342a6130ea10325d4b7e7ecee7ee86b130e90173 100644 (file)
@@ -1,11 +1,11 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
 
 
 class HarkIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.hark\.com/clips/(?P<id>.+?)-.+'
+    _VALID_URL = r'https?://(?:www\.)?hark\.com/clips/(?P<id>.+?)-.+'
     _TEST = {
         'url': 'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013',
         'md5': '6783a58491b47b92c7c1af5a77d4cbee',
index dad0f3994c93cd0a38a2be52741d7c55ce0e6749..cbf774377b7261c326bd71f5db2d5de8216be5f4 100644 (file)
@@ -12,17 +12,7 @@ from ..utils import (
 )
 
 
-class HBOIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P<id>[0-9]+)'
-    _TEST = {
-        'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839',
-        'md5': '1c33253f0c7782142c993c0ba62a8753',
-        'info_dict': {
-            'id': '1437839',
-            'ext': 'mp4',
-            'title': 'Ep. 64 Clip: Encryption',
-        }
-    }
+class HBOBaseIE(InfoExtractor):
     _FORMATS_INFO = {
         '1920': {
             'width': 1280,
@@ -50,8 +40,7 @@ class HBOIE(InfoExtractor):
         },
     }
 
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
+    def _extract_from_id(self, video_id):
         video_data = self._download_xml(
             'http://render.lv3.hbo.com/data/content/global/videos/data/%s.xml' % video_id, video_id)
         title = xpath_text(video_data, 'title', 'title', True)
@@ -116,7 +105,60 @@ class HBOIE(InfoExtractor):
         return {
             'id': video_id,
             'title': title,
-            'duration': parse_duration(xpath_element(video_data, 'duration/tv14')),
+            'duration': parse_duration(xpath_text(video_data, 'duration/tv14')),
             'formats': formats,
             'thumbnails': thumbnails,
         }
+
+
+class HBOIE(HBOBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839',
+        'md5': '1c33253f0c7782142c993c0ba62a8753',
+        'info_dict': {
+            'id': '1437839',
+            'ext': 'mp4',
+            'title': 'Ep. 64 Clip: Encryption',
+            'thumbnail': 're:https?://.*\.jpg$',
+            'duration': 1072,
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        return self._extract_from_id(video_id)
+
+
+class HBOEpisodeIE(HBOBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?hbo\.com/(?!video)([^/]+/)+video/(?P<id>[0-9a-z-]+)\.html'
+
+    _TESTS = [{
+        'url': 'http://www.hbo.com/girls/episodes/5/52-i-love-you-baby/video/ep-52-inside-the-episode.html?autoplay=true',
+        'md5': '689132b253cc0ab7434237fc3a293210',
+        'info_dict': {
+            'id': '1439518',
+            'display_id': 'ep-52-inside-the-episode',
+            'ext': 'mp4',
+            'title': 'Ep. 52: Inside the Episode',
+            'thumbnail': 're:https?://.*\.jpg$',
+            'duration': 240,
+        },
+    }, {
+        'url': 'http://www.hbo.com/game-of-thrones/about/video/season-5-invitation-to-the-set.html?autoplay=true',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        video_id = self._search_regex(
+            r'(?P<q1>[\'"])videoId(?P=q1)\s*:\s*(?P<q2>[\'"])(?P<video_id>\d+)(?P=q2)',
+            webpage, 'video ID', group='video_id')
+
+        info_dict = self._extract_from_id(video_id)
+        info_dict['display_id'] = display_id
+
+        return info_dict
index 7a1c75b655439a953e46f1692cd672c6c27374ef..10da1406787c51fc56d7fb55f120bb6908a5b49a 100644 (file)
@@ -6,12 +6,13 @@ from .common import InfoExtractor
 from ..utils import (
     js_to_json,
     remove_end,
+    determine_ext,
 )
 
 
 class HellPornoIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?hellporno\.com/videos/(?P<id>[^/]+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?hellporno\.(?:com/videos|net/v)/(?P<id>[^/]+)'
+    _TESTS = [{
         'url': 'http://hellporno.com/videos/dixie-is-posing-with-naked-ass-very-erotic/',
         'md5': '1fee339c610d2049699ef2aa699439f1',
         'info_dict': {
@@ -22,7 +23,10 @@ class HellPornoIE(InfoExtractor):
             'thumbnail': 're:https?://.*\.jpg$',
             'age_limit': 18,
         }
-    }
+    }, {
+        'url': 'http://hellporno.net/v/186271/',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
@@ -38,7 +42,7 @@ class HellPornoIE(InfoExtractor):
 
         video_id = flashvars.get('video_id')
         thumbnail = flashvars.get('preview_url')
-        ext = flashvars.get('postfix', '.mp4')[1:]
+        ext = determine_ext(flashvars.get('postfix'), 'mp4')
 
         formats = []
         for video_url_key in ['video_url', 'video_alt_url']:
index 93107b3064ebfba513b3aa208556b5822f6cf979..575fb332a055465446fc5db9448313ec793d3258 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 
 from __future__ import unicode_literals
 
index c3f0733cf7708287918d92e95a3e4179f733f4ef..69543bff2cb3c844b8ebe82d38cd33672e168bbe 100644 (file)
@@ -46,3 +46,34 @@ class HGTVIE(InfoExtractor):
             'episode_number': int_or_none(embed_vars.get('episode')),
             'ie_key': 'ThePlatform',
         }
+
+
+class HGTVComShowIE(InfoExtractor):
+    IE_NAME = 'hgtv.com:show'
+    _VALID_URL = r'https?://(?:www\.)?hgtv\.com/shows/[^/]+/(?P<id>[^/?#&]+)'
+    _TEST = {
+        'url': 'http://www.hgtv.com/shows/flip-or-flop/flip-or-flop-full-episodes-videos',
+        'info_dict': {
+            'id': 'flip-or-flop-full-episodes-videos',
+            'title': 'Flip or Flop Full Episodes',
+        },
+        'playlist_mincount': 15,
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        config = self._parse_json(
+            self._search_regex(
+                r'(?s)data-module=["\']video["\'][^>]*>.*?<script[^>]+type=["\']text/x-config["\'][^>]*>(.+?)</script',
+                webpage, 'video config'),
+            display_id)['channels'][0]
+
+        entries = [
+            self.url_result(video['releaseUrl'])
+            for video in config['videos'] if video.get('releaseUrl')]
+
+        return self.playlist_result(
+            entries, display_id, config.get('title'), config.get('description'))
index 5b6efb27eedfe0097cc47d96f3f287ab1858e9e8..0615f06af4139acbd3164f5aaac1ab2ede4cdc27 100644 (file)
@@ -1,8 +1,6 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 from ..utils import (
     int_or_none,
@@ -14,29 +12,24 @@ class HornBunnyIE(InfoExtractor):
     _VALID_URL = r'http?://(?:www\.)?hornbunny\.com/videos/(?P<title_dash>[a-z-]+)-(?P<id>\d+)\.html'
     _TEST = {
         'url': 'http://hornbunny.com/videos/panty-slut-jerk-off-instruction-5227.html',
-        'md5': '95e40865aedd08eff60272b704852ad7',
+        'md5': 'e20fd862d1894b67564c96f180f43924',
         'info_dict': {
             'id': '5227',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'panty slut jerk off instruction',
             'duration': 550,
             'age_limit': 18,
+            'view_count': int,
+            'thumbnail': 're:^https?://.*\.jpg$',
         }
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
-        webpage = self._download_webpage(
-            url, video_id, note='Downloading initial webpage')
-        title = self._html_search_regex(
-            r'class="title">(.*?)</h2>', webpage, 'title')
-        redirect_url = self._html_search_regex(
-            r'pg&settings=(.*?)\|0"\);', webpage, 'title')
-        webpage2 = self._download_webpage(redirect_url, video_id)
-        video_url = self._html_search_regex(
-            r'flvMask:(.*?);', webpage2, 'video_url')
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+        title = self._og_search_title(webpage)
+        info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0]
 
         duration = parse_duration(self._search_regex(
             r'<strong>Runtime:</strong>\s*([0-9:]+)</div>',
@@ -45,12 +38,12 @@ class HornBunnyIE(InfoExtractor):
             r'<strong>Views:</strong>\s*(\d+)</div>',
             webpage, 'view count', fatal=False))
 
-        return {
+        info_dict.update({
             'id': video_id,
-            'url': video_url,
             'title': title,
-            'ext': 'flv',
             'duration': duration,
             'view_count': view_count,
             'age_limit': 18,
-        }
+        })
+
+        return info_dict
index 9db5652096acc5ead0cb926791d731d0f6f35565..34163725f8c9562380a3ea30a17780e599f3b0a7 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 
 class HotNewHipHopIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.hotnewhiphop\.com/.*\.(?P<id>.*)\.html'
+    _VALID_URL = r'https?://(?:www\.)?hotnewhiphop\.com/.*\.(?P<id>.*)\.html'
     _TEST = {
         'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html',
         'md5': '2c2cd2f76ef11a9b3b581e8b232f3d96',
diff --git a/youtube_dl/extractor/huajiao.py b/youtube_dl/extractor/huajiao.py
new file mode 100644 (file)
index 0000000..cec0df0
--- /dev/null
@@ -0,0 +1,56 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_duration,
+    parse_iso8601,
+)
+
+
+class HuajiaoIE(InfoExtractor):
+    IE_DESC = '花椒直播'
+    _VALID_URL = r'https?://(?:www\.)?huajiao\.com/l/(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://www.huajiao.com/l/38941232',
+        'md5': 'd08bf9ac98787d24d1e4c0283f2d372d',
+        'info_dict': {
+            'id': '38941232',
+            'ext': 'mp4',
+            'title': '#新人求关注#',
+            'description': 're:.*',
+            'duration': 2424.0,
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'timestamp': 1475866459,
+            'upload_date': '20161007',
+            'uploader': 'Penny_余姿昀',
+            'uploader_id': '75206005',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        feed_json = self._search_regex(
+            r'var\s+feed\s*=\s*({.+})', webpage, 'feed json')
+        feed = self._parse_json(feed_json, video_id)
+
+        description = self._html_search_meta(
+            'description', webpage, 'description', fatal=False)
+
+        def get(section, field):
+            return feed.get(section, {}).get(field)
+
+        return {
+            'id': video_id,
+            'title': feed['feed']['formated_title'],
+            'description': description,
+            'duration': parse_duration(get('feed', 'duration')),
+            'thumbnail': get('feed', 'image'),
+            'timestamp': parse_iso8601(feed.get('creatime'), ' '),
+            'uploader': get('author', 'nickname'),
+            'uploader_id': get('author', 'uid'),
+            'formats': self._extract_m3u8_formats(
+                feed['feed']['m3u8'], video_id, 'mp4', 'm3u8_native'),
+        }
index 0acce9f4c2525a62e3d3ad22b16743737dbb5b07..f0fc8d49a4ad50c128d124534fc37141cb510ba6 100644 (file)
@@ -6,6 +6,7 @@ from .common import InfoExtractor
 from ..utils import (
     mimetype2ext,
     qualities,
+    remove_end,
 )
 
 
@@ -19,7 +20,7 @@ class ImdbIE(InfoExtractor):
         'info_dict': {
             'id': '2524815897',
             'ext': 'mp4',
-            'title': 'Ice Age: Continental Drift Trailer (No. 2) - IMDb',
+            'title': 'Ice Age: Continental Drift Trailer (No. 2)',
             'description': 'md5:9061c2219254e5d14e03c25c98e96a81',
         }
     }, {
@@ -83,17 +84,17 @@ class ImdbIE(InfoExtractor):
 
         return {
             'id': video_id,
-            'title': self._og_search_title(webpage),
+            'title': remove_end(self._og_search_title(webpage), ' - IMDb'),
             'formats': formats,
             'description': descr,
-            'thumbnail': format_info['slate'],
+            'thumbnail': format_info.get('slate'),
         }
 
 
 class ImdbListIE(InfoExtractor):
     IE_NAME = 'imdb:list'
     IE_DESC = 'Internet Movie Database lists'
-    _VALID_URL = r'https?://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})'
+    _VALID_URL = r'https?://(?:www\.)?imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})'
     _TEST = {
         'url': 'http://www.imdb.com/list/JFs9NWw6XI0',
         'info_dict': {
index d23489dcff4464c1294d9c6f5dca30aa480ad3a8..67c24a51c861f4dd9a1da8f790d61469c8e2220c 100644 (file)
@@ -13,7 +13,7 @@ from ..utils import (
 
 
 class ImgurIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|topic/[^/]+)/)?(?P<id>[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z]+)?$'
+    _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|(?:topic|r)/[^/]+)/)?(?P<id>[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z]+)?$'
 
     _TESTS = [{
         'url': 'https://i.imgur.com/A61SaA1.gifv',
@@ -43,6 +43,9 @@ class ImgurIE(InfoExtractor):
     }, {
         'url': 'http://imgur.com/topic/Funny/N8rOudd',
         'only_matching': True,
+    }, {
+        'url': 'http://imgur.com/r/aww/VQcQPhM',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
index 65712abc28c3cc68cab7052ab709b2c1e6500cb5..9544ff9d469c52a932cc3a8fed0dafbed9f4ae83 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index 8f7f232bea720ce0cfbf3c8e6aa9b38bddb93658..196407b063a9393b94c759be6c8080de9a494277 100644 (file)
@@ -29,6 +29,7 @@ class InstagramIE(InfoExtractor):
             'uploader': 'Naomi Leonor Phan-Quang',
             'like_count': int,
             'comment_count': int,
+            'comments': list,
         },
     }, {
         # missing description
@@ -44,6 +45,7 @@ class InstagramIE(InfoExtractor):
             'uploader': 'Britney Spears',
             'like_count': int,
             'comment_count': int,
+            'comments': list,
         },
         'params': {
             'skip_download': True,
@@ -82,7 +84,7 @@ class InstagramIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         (video_url, description, thumbnail, timestamp, uploader,
-         uploader_id, like_count, comment_count) = [None] * 8
+         uploader_id, like_count, comment_count, height, width) = [None] * 10
 
         shared_data = self._parse_json(
             self._search_regex(
@@ -94,6 +96,8 @@ class InstagramIE(InfoExtractor):
                 shared_data, lambda x: x['entry_data']['PostPage'][0]['media'], dict)
             if media:
                 video_url = media.get('video_url')
+                height = int_or_none(media.get('dimensions', {}).get('height'))
+                width = int_or_none(media.get('dimensions', {}).get('width'))
                 description = media.get('caption')
                 thumbnail = media.get('display_src')
                 timestamp = int_or_none(media.get('date'))
@@ -101,10 +105,24 @@ class InstagramIE(InfoExtractor):
                 uploader_id = media.get('owner', {}).get('username')
                 like_count = int_or_none(media.get('likes', {}).get('count'))
                 comment_count = int_or_none(media.get('comments', {}).get('count'))
+                comments = [{
+                    'author': comment.get('user', {}).get('username'),
+                    'author_id': comment.get('user', {}).get('id'),
+                    'id': comment.get('id'),
+                    'text': comment.get('text'),
+                    'timestamp': int_or_none(comment.get('created_at')),
+                } for comment in media.get(
+                    'comments', {}).get('nodes', []) if comment.get('text')]
 
         if not video_url:
             video_url = self._og_search_video_url(webpage, secure=False)
 
+        formats = [{
+            'url': video_url,
+            'width': width,
+            'height': height,
+        }]
+
         if not uploader_id:
             uploader_id = self._search_regex(
                 r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"',
@@ -121,7 +139,7 @@ class InstagramIE(InfoExtractor):
 
         return {
             'id': video_id,
-            'url': video_url,
+            'formats': formats,
             'ext': 'mp4',
             'title': 'Video by %s' % uploader_id,
             'description': description,
@@ -131,6 +149,7 @@ class InstagramIE(InfoExtractor):
             'uploader': uploader,
             'like_count': like_count,
             'comment_count': comment_count,
+            'comments': comments,
         }
 
 
index 45add007fd99c8bd80f16c1becfb42cf403d45d5..76cc5ec3ee21450f724564ef0c75f9c08931d2f7 100644 (file)
@@ -48,13 +48,23 @@ class InternetVideoArchiveIE(InfoExtractor):
             # There are multiple videos in the playlist whlie only the first one
             # matches the video played in browsers
             video_info = configuration['playlist'][0]
+            title = video_info['title']
 
             formats = []
             for source in video_info['sources']:
                 file_url = source['file']
                 if determine_ext(file_url) == 'm3u8':
-                    formats.extend(self._extract_m3u8_formats(
-                        file_url, video_id, ext='mp4', m3u8_id='hls'))
+                    m3u8_formats = self._extract_m3u8_formats(
+                        file_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
+                    if m3u8_formats:
+                        formats.extend(m3u8_formats)
+                        file_url = m3u8_formats[0]['url']
+                        formats.extend(self._extract_f4m_formats(
+                            file_url.replace('.m3u8', '.f4m'),
+                            video_id, f4m_id='hds', fatal=False))
+                        formats.extend(self._extract_mpd_formats(
+                            file_url.replace('.m3u8', '.mpd'),
+                            video_id, mpd_id='dash', fatal=False))
                 else:
                     a_format = {
                         'url': file_url,
@@ -70,7 +80,6 @@ class InternetVideoArchiveIE(InfoExtractor):
 
             self._sort_formats(formats)
 
-            title = video_info['title']
             description = video_info.get('description')
             thumbnail = video_info.get('image')
         else:
index 788bbe0d5c44177b5a943da9f9c3c3adf46a77b1..da2cdc656ac90f15a575eceabf33309b084c8f28 100644 (file)
@@ -81,6 +81,9 @@ class IPrimaIE(InfoExtractor):
             for _, src in re.findall(r'src["\']\s*:\s*(["\'])(.+?)\1', playerpage):
                 extract_formats(src)
 
+        if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage:
+            self.raise_geo_restricted()
+
         self._sort_formats(formats)
 
         return {
index 472d72b4c34fa3305b6b2808be1e45c6da25a60e..7c8cb21c2c5619b4809f5daf8605958a808eccb9 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -8,7 +8,7 @@ from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
     int_or_none,
-    sanitized_Request,
+    qualities,
 )
 
 
@@ -49,11 +49,27 @@ class IviIE(InfoExtractor):
                 'thumbnail': 're:^https?://.*\.jpg$',
             },
             'skip': 'Only works from Russia',
+        },
+        {
+            # with MP4-HD720 format
+            'url': 'http://www.ivi.ru/watch/146500',
+            'md5': 'd63d35cdbfa1ea61a5eafec7cc523e1e',
+            'info_dict': {
+                'id': '146500',
+                'ext': 'mp4',
+                'title': 'Кукла',
+                'description': 'md5:ffca9372399976a2d260a407cc74cce6',
+                'duration': 5599,
+                'thumbnail': 're:^https?://.*\.jpg$',
+            },
+            'skip': 'Only works from Russia',
         }
     ]
 
     # Sorted by quality
-    _KNOWN_FORMATS = ['MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi', 'MP4-SHQ']
+    _KNOWN_FORMATS = (
+        'MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi',
+        'MP4-SHQ', 'MP4-HD720', 'MP4-HD1080')
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -69,10 +85,9 @@ class IviIE(InfoExtractor):
             ]
         }
 
-        request = sanitized_Request(
-            'http://api.digitalaccess.ru/api/json/', json.dumps(data))
         video_json = self._download_json(
-            request, video_id, 'Downloading video JSON')
+            'http://api.digitalaccess.ru/api/json/', video_id,
+            'Downloading video JSON', data=json.dumps(data))
 
         if 'error' in video_json:
             error = video_json['error']
@@ -84,11 +99,13 @@ class IviIE(InfoExtractor):
 
         result = video_json['result']
 
+        quality = qualities(self._KNOWN_FORMATS)
+
         formats = [{
             'url': x['url'],
-            'format_id': x['content_format'],
-            'preference': self._KNOWN_FORMATS.index(x['content_format']),
-        } for x in result['files'] if x['content_format'] in self._KNOWN_FORMATS]
+            'format_id': x.get('content_format'),
+            'quality': quality(x.get('content_format')),
+        } for x in result['files'] if x.get('url')]
 
         self._sort_formats(formats)
 
@@ -115,7 +132,7 @@ class IviIE(InfoExtractor):
             webpage, 'season number', default=None))
 
         episode_number = int_or_none(self._search_regex(
-            r'<meta[^>]+itemprop="episode"[^>]*>\s*<meta[^>]+itemprop="episodeNumber"[^>]+content="(\d+)',
+            r'[^>]+itemprop="episode"[^>]*>\s*<meta[^>]+itemprop="episodeNumber"[^>]+content="(\d+)',
             webpage, 'episode number', default=None))
 
         description = self._og_search_description(webpage, default=None) or self._html_search_meta(
diff --git a/youtube_dl/extractor/iwara.py b/youtube_dl/extractor/iwara.py
new file mode 100644 (file)
index 0000000..8d7e7f4
--- /dev/null
@@ -0,0 +1,77 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_urlparse
+from ..utils import remove_end
+
+
+class IwaraIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.|ecchi\.)?iwara\.tv/videos/(?P<id>[a-zA-Z0-9]+)'
+    _TESTS = [{
+        'url': 'http://iwara.tv/videos/amVwUl1EHpAD9RD',
+        'md5': '1d53866b2c514b23ed69e4352fdc9839',
+        'info_dict': {
+            'id': 'amVwUl1EHpAD9RD',
+            'ext': 'mp4',
+            'title': '【MMD R-18】ガールフレンド carry_me_off',
+            'age_limit': 18,
+        },
+    }, {
+        'url': 'http://ecchi.iwara.tv/videos/Vb4yf2yZspkzkBO',
+        'md5': '7e5f1f359cd51a027ba4a7b7710a50f0',
+        'info_dict': {
+            'id': '0B1LvuHnL-sRFNXB1WHNqbGw4SXc',
+            'ext': 'mp4',
+            'title': '[3D Hentai] Kyonyu Ã\x97 Genkai Ã\x97 Emaki Shinobi Girls.mp4',
+            'age_limit': 18,
+        },
+        'add_ie': ['GoogleDrive'],
+    }, {
+        'url': 'http://www.iwara.tv/videos/nawkaumd6ilezzgq',
+        'md5': '1d85f1e5217d2791626cff5ec83bb189',
+        'info_dict': {
+            'id': '6liAP9s2Ojc',
+            'ext': 'mp4',
+            'age_limit': 0,
+            'title': '[MMD] Do It Again Ver.2 [1080p 60FPS] (Motion,Camera,Wav+DL)',
+            'description': 'md5:590c12c0df1443d833fbebe05da8c47a',
+            'upload_date': '20160910',
+            'uploader': 'aMMDsork',
+            'uploader_id': 'UCVOFyOSCyFkXTYYHITtqB7A',
+        },
+        'add_ie': ['Youtube'],
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage, urlh = self._download_webpage_handle(url, video_id)
+
+        hostname = compat_urllib_parse_urlparse(urlh.geturl()).hostname
+        # ecchi is 'sexy' in Japanese
+        age_limit = 18 if hostname.split('.')[0] == 'ecchi' else 0
+
+        entries = self._parse_html5_media_entries(url, webpage, video_id)
+
+        if not entries:
+            iframe_url = self._html_search_regex(
+                r'<iframe[^>]+src=([\'"])(?P<url>[^\'"]+)\1',
+                webpage, 'iframe URL', group='url')
+            return {
+                '_type': 'url_transparent',
+                'url': iframe_url,
+                'age_limit': age_limit,
+            }
+
+        title = remove_end(self._html_search_regex(
+            r'<title>([^<]+)</title>', webpage, 'title'), ' | Iwara')
+
+        info_dict = entries[0]
+        info_dict.update({
+            'id': video_id,
+            'title': title,
+            'age_limit': age_limit,
+        })
+
+        return info_dict
diff --git a/youtube_dl/extractor/jamendo.py b/youtube_dl/extractor/jamendo.py
new file mode 100644 (file)
index 0000000..ee9acac
--- /dev/null
@@ -0,0 +1,107 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from ..compat import compat_urlparse
+from .common import InfoExtractor
+
+
+class JamendoIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?jamendo\.com/track/(?P<id>[0-9]+)/(?P<display_id>[^/?#&]+)'
+    _TEST = {
+        'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i',
+        'md5': '6e9e82ed6db98678f171c25a8ed09ffd',
+        'info_dict': {
+            'id': '196219',
+            'display_id': 'stories-from-emona-i',
+            'ext': 'flac',
+            'title': 'Stories from Emona I',
+            'thumbnail': 're:^https?://.*\.jpg'
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = self._VALID_URL_RE.match(url)
+        track_id = mobj.group('id')
+        display_id = mobj.group('display_id')
+
+        webpage = self._download_webpage(url, display_id)
+
+        title = self._html_search_meta('name', webpage, 'title')
+
+        formats = [{
+            'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294'
+                   % (sub_domain, track_id, format_id),
+            'format_id': format_id,
+            'ext': ext,
+            'quality': quality,
+        } for quality, (format_id, sub_domain, ext) in enumerate((
+            ('mp31', 'mp3l', 'mp3'),
+            ('mp32', 'mp3d', 'mp3'),
+            ('ogg1', 'ogg', 'ogg'),
+            ('flac', 'flac', 'flac'),
+        ))]
+        self._sort_formats(formats)
+
+        thumbnail = self._html_search_meta(
+            'image', webpage, 'thumbnail', fatal=False)
+
+        return {
+            'id': track_id,
+            'display_id': display_id,
+            'thumbnail': thumbnail,
+            'title': title,
+            'formats': formats
+        }
+
+
+class JamendoAlbumIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)/(?P<display_id>[\w-]+)'
+    _TEST = {
+        'url': 'https://www.jamendo.com/album/121486/duck-on-cover',
+        'info_dict': {
+            'id': '121486',
+            'title': 'Duck On Cover'
+        },
+        'playlist': [{
+            'md5': 'e1a2fcb42bda30dfac990212924149a8',
+            'info_dict': {
+                'id': '1032333',
+                'ext': 'flac',
+                'title': 'Warmachine'
+            }
+        }, {
+            'md5': '1f358d7b2f98edfe90fd55dac0799d50',
+            'info_dict': {
+                'id': '1032330',
+                'ext': 'flac',
+                'title': 'Without Your Ghost'
+            }
+        }],
+        'params': {
+            'playlistend': 2
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = self._VALID_URL_RE.match(url)
+        album_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, mobj.group('display_id'))
+
+        title = self._html_search_meta('name', webpage, 'title')
+
+        entries = [
+            self.url_result(
+                compat_urlparse.urljoin(url, m.group('path')),
+                ie=JamendoIE.ie_key(),
+                video_id=self._search_regex(
+                    r'/track/(\d+)', m.group('path'),
+                    'track id', default=None))
+            for m in re.finditer(
+                r'<a[^>]+href=(["\'])(?P<path>(?:(?!\1).)+)\1[^>]+class=["\'][^>]*js-trackrow-albumpage-link',
+                webpage)
+        ]
+
+        return self.playlist_result(entries, album_id, title)
index 122e2dd8cad8c9fba6d861a80d77752e1b508301..4b5f346d1ef909e286b5c0555ab07a6e20bc11d4 100644 (file)
@@ -1,4 +1,4 @@
-# coding=utf-8
+# codingutf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index ce3126943939063bc65c4c710e10ea61153ac036..5d56e0a28bd55b93153a92446834ba440ad59572 100644 (file)
@@ -9,6 +9,7 @@ from ..utils import (
     determine_ext,
     float_or_none,
     int_or_none,
+    js_to_json,
     mimetype2ext,
 )
 
@@ -19,24 +20,32 @@ class JWPlatformBaseIE(InfoExtractor):
         # TODO: Merge this with JWPlayer-related codes in generic.py
 
         mobj = re.search(
-            'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\((?P<options>[^)]+)\)',
+            r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)',
             webpage)
         if mobj:
             return mobj.group('options')
 
     def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
         jwplayer_data = self._parse_json(
-            self._find_jwplayer_data(webpage), video_id)
+            self._find_jwplayer_data(webpage), video_id,
+            transform_source=js_to_json)
         return self._parse_jwplayer_data(
             jwplayer_data, video_id, *args, **kwargs)
 
-    def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, m3u8_id=None, rtmp_params=None, base_url=None):
+    def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
+                             m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
         # JWPlayer backward compatibility: flattened playlists
         # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
         if 'playlist' not in jwplayer_data:
             jwplayer_data = {'playlist': [jwplayer_data]}
 
         entries = []
+
+        # JWPlayer backward compatibility: single playlist item
+        # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10
+        if not isinstance(jwplayer_data['playlist'], list):
+            jwplayer_data['playlist'] = [jwplayer_data['playlist']]
+
         for video_data in jwplayer_data['playlist']:
             # JWPlayer backward compatibility: flattened sources
             # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
@@ -55,6 +64,9 @@ class JWPlatformBaseIE(InfoExtractor):
                 if source_type == 'hls' or ext == 'm3u8':
                     formats.extend(self._extract_m3u8_formats(
                         source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
+                elif ext == 'mpd':
+                    formats.extend(self._extract_mpd_formats(
+                        source_url, this_video_id, mpd_id=mpd_id, fatal=False))
                 # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
                 elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
                     formats.append({
@@ -63,10 +75,17 @@ class JWPlatformBaseIE(InfoExtractor):
                         'ext': ext,
                     })
                 else:
+                    height = int_or_none(source.get('height'))
+                    if height is None:
+                        # Often no height is provided but there is a label in
+                        # format like 1080p.
+                        height = int_or_none(self._search_regex(
+                            r'^(\d{3,})[pP]$', source.get('label') or '',
+                            'height', default=None))
                     a_format = {
                         'url': source_url,
                         'width': int_or_none(source.get('width')),
-                        'height': int_or_none(source.get('height')),
+                        'height': height,
                         'ext': ext,
                     }
                     if source_url.startswith('rtmp'):
index ddf1165ffb021005119622d3f162cbce9c637b55..91bc3a0a7c0af4690cf1a16713de1e76bccaa67a 100644 (file)
@@ -36,6 +36,12 @@ class KalturaIE(InfoExtractor):
                 '''
     _SERVICE_URL = 'http://cdnapi.kaltura.com'
     _SERVICE_BASE = '/api_v3/index.php'
+    # See https://github.com/kaltura/server/blob/master/plugins/content/caption/base/lib/model/enums/CaptionType.php
+    _CAPTION_TYPES = {
+        1: 'srt',
+        2: 'ttml',
+        3: 'vtt',
+    }
     _TESTS = [
         {
             'url': 'kaltura:269692:1_1jc2y3e4',
@@ -67,6 +73,27 @@ class KalturaIE(InfoExtractor):
             # video with subtitles
             'url': 'kaltura:111032:1_cw786r8q',
             'only_matching': True,
+        },
+        {
+            # video with ttml subtitles (no fileExt)
+            'url': 'kaltura:1926081:0_l5ye1133',
+            'info_dict': {
+                'id': '0_l5ye1133',
+                'ext': 'mp4',
+                'title': 'What Can You Do With Python?',
+                'upload_date': '20160221',
+                'uploader_id': 'stork',
+                'thumbnail': 're:^https?://.*/thumbnail/.*',
+                'timestamp': int,
+                'subtitles': {
+                    'en': [{
+                        'ext': 'ttml',
+                    }],
+                },
+            },
+            'params': {
+                'skip_download': True,
+            },
         }
     ]
 
@@ -78,20 +105,20 @@ class KalturaIE(InfoExtractor):
                     kWidget\.(?:thumb)?[Ee]mbed\(
                     \{.*?
                         (?P<q1>['\"])wid(?P=q1)\s*:\s*
-                        (?P<q2>['\"])_?(?P<partner_id>[^'\"]+)(?P=q2),.*?
+                        (?P<q2>['\"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
                         (?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*
-                        (?P<q4>['\"])(?P<id>[^'\"]+)(?P=q4),
+                        (?P<q4>['\"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4),
                 """, webpage) or
             re.search(
                 r'''(?xs)
                     (?P<q1>["\'])
-                        (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?
+                        (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/(?:(?!(?P=q1)).)*(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
                     (?P=q1).*?
                     (?:
                         entry_?[Ii]d|
                         (?P<q2>["\'])entry_?[Ii]d(?P=q2)
                     )\s*:\s*
-                    (?P<q3>["\'])(?P<id>.+?)(?P=q3)
+                    (?P<q3>["\'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
                 ''', webpage))
         if mobj:
             embed_info = mobj.groupdict()
@@ -122,18 +149,6 @@ class KalturaIE(InfoExtractor):
 
         return data
 
-    def _get_kaltura_signature(self, video_id, partner_id, service_url=None):
-        actions = [{
-            'apiVersion': '3.1',
-            'expiry': 86400,
-            'format': 1,
-            'service': 'session',
-            'action': 'startWidgetSession',
-            'widgetId': '_%s' % partner_id,
-        }]
-        return self._kaltura_api_call(
-            video_id, actions, service_url, note='Downloading Kaltura signature')['ks']
-
     def _get_video_info(self, video_id, partner_id, service_url=None):
         actions = [
             {
@@ -208,6 +223,17 @@ class KalturaIE(InfoExtractor):
                     reference_id)['entryResult']
                 info, flavor_assets = entry_data['meta'], entry_data['contextData']['flavorAssets']
                 entry_id = info['id']
+                # Unfortunately, data returned in kalturaIframePackageData lacks
+                # captions so we will try requesting the complete data using
+                # regular approach since we now know the entry_id
+                try:
+                    _, info, flavor_assets, captions = self._get_video_info(
+                        entry_id, partner_id)
+                except ExtractorError:
+                    # Regular scenario failed but we already have everything
+                    # extracted apart from captions and can process at least
+                    # with this
+                    pass
             else:
                 raise ExtractorError('Invalid URL', expected=True)
             ks = params.get('flashvars[ks]', [None])[0]
@@ -236,8 +262,16 @@ class KalturaIE(InfoExtractor):
             # Continue if asset is not ready
             if f.get('status') != 2:
                 continue
+            # Original format that's not available (e.g. kaltura:1926081:0_c03e1b5g)
+            # skip for now.
+            if f.get('fileExt') == 'chun':
+                continue
             video_url = sign_url(
                 '%s/flavorId/%s' % (data_url, f['id']))
+            # audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g
+            # -f mp4-56)
+            vcodec = 'none' if 'videoCodecId' not in f and f.get(
+                'frameRate') == 0 else f.get('videoCodecId')
             formats.append({
                 'format_id': '%(fileExt)s-%(bitrate)s' % f,
                 'ext': f.get('fileExt'),
@@ -245,7 +279,7 @@ class KalturaIE(InfoExtractor):
                 'fps': int_or_none(f.get('frameRate')),
                 'filesize_approx': int_or_none(f.get('size'), invscale=1024),
                 'container': f.get('containerFormat'),
-                'vcodec': f.get('videoCodecId'),
+                'vcodec': vcodec,
                 'height': int_or_none(f.get('height')),
                 'width': int_or_none(f.get('width')),
                 'url': video_url,
@@ -265,9 +299,12 @@ class KalturaIE(InfoExtractor):
                 # Continue if caption is not ready
                 if f.get('status') != 2:
                     continue
+                if not caption.get('id'):
+                    continue
+                caption_format = int_or_none(caption.get('format'))
                 subtitles.setdefault(caption.get('languageCode') or caption.get('language'), []).append({
                     'url': '%s/api_v3/service/caption_captionasset/action/serve/captionAssetId/%s' % (self._SERVICE_URL, caption['id']),
-                    'ext': caption.get('fileExt'),
+                    'ext': caption.get('fileExt') or self._CAPTION_TYPES.get(caption_format) or 'ttml',
                 })
 
         return {
index a6050c4de3e1695ac26bd1a21bab981a52755c21..bfccf89b0fda0be1100764290681a53e022947e0 100644 (file)
@@ -5,7 +5,7 @@ from .common import InfoExtractor
 
 
 class KaraoketvIE(InfoExtractor):
-    _VALID_URL = r'http://www.karaoketv.co.il/[^/]+/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?karaoketv\.co\.il/[^/]+/(?P<id>\d+)'
     _TEST = {
         'url': 'http://www.karaoketv.co.il/%D7%A9%D7%99%D7%A8%D7%99_%D7%A7%D7%A8%D7%99%D7%95%D7%A7%D7%99/58356/%D7%90%D7%99%D7%96%D7%95%D7%9F',
         'info_dict': {
index ad2f8a8c8dc880df28d61759dd567fd40f64b3c9..588a4d0ec4eda6e38817b26f192536c40a172f3e 100644 (file)
@@ -39,7 +39,9 @@ class KeezMoviesIE(InfoExtractor):
     def _extract_info(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
-        display_id = mobj.group('display_id') or video_id
+        display_id = (mobj.group('display_id')
+                      if 'display_id' in mobj.groupdict()
+                      else None) or mobj.group('id')
 
         webpage = self._download_webpage(
             url, display_id, headers={'Cookie': 'age_verified=1'})
diff --git a/youtube_dl/extractor/ketnet.py b/youtube_dl/extractor/ketnet.py
new file mode 100644 (file)
index 0000000..eb0a160
--- /dev/null
@@ -0,0 +1,72 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class KetnetIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+    _TESTS = [{
+        'url': 'https://www.ketnet.be/kijken/zomerse-filmpjes',
+        'md5': 'd907f7b1814ef0fa285c0475d9994ed7',
+        'info_dict': {
+            'id': 'zomerse-filmpjes',
+            'ext': 'mp4',
+            'title': 'Gluur mee op de filmset en op Pennenzakkenrock',
+            'description': 'Gluur mee met Ghost Rockers op de filmset',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        }
+    }, {
+        'url': 'https://www.ketnet.be/kijken/karrewiet/uitzending-8-september-2016',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.ketnet.be/achter-de-schermen/sien-repeteert-voor-stars-for-life',
+        'only_matching': True,
+    }, {
+        # mzsource, geo restricted to Belgium
+        'url': 'https://www.ketnet.be/kijken/nachtwacht/de-bermadoe',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        config = self._parse_json(
+            self._search_regex(
+                r'(?s)playerConfig\s*=\s*({.+?})\s*;', webpage,
+                'player config'),
+            video_id)
+
+        title = config['title']
+
+        formats = []
+        for source_key in ('', 'mz'):
+            source = config.get('%ssource' % source_key)
+            if not isinstance(source, dict):
+                continue
+            for format_id, format_url in source.items():
+                if format_id == 'hls':
+                    formats.extend(self._extract_m3u8_formats(
+                        format_url, video_id, 'mp4',
+                        entry_protocol='m3u8_native', m3u8_id=format_id,
+                        fatal=False))
+                elif format_id == 'hds':
+                    formats.extend(self._extract_f4m_formats(
+                        format_url, video_id, f4m_id=format_id, fatal=False))
+                else:
+                    formats.append({
+                        'url': format_url,
+                        'format_id': format_id,
+                    })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': config.get('description'),
+            'thumbnail': config.get('image'),
+            'series': config.get('program'),
+            'episode': config.get('episode'),
+            'formats': formats,
+        }
index 9f1ade2e46e8e2905adaa65eeaf2de22bfed8d2c..d4da8f48462f61358c649537cb1a41d47d9e82b1 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
@@ -6,7 +6,7 @@ from ..utils import smuggle_url
 
 
 class KickStarterIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>[^/]*)/.*'
+    _VALID_URL = r'https?://(?:www\.)?kickstarter\.com/projects/(?P<id>[^/]*)/.*'
     _TESTS = [{
         'url': 'https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant/description',
         'md5': 'c81addca81327ffa66c642b5d8b08cab',
@@ -37,7 +37,6 @@ class KickStarterIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Power Drive 2000',
         },
-        'expected_warnings': ['OpenGraph description'],
     }]
 
     def _real_extract(self, url):
@@ -67,6 +66,6 @@ class KickStarterIE(InfoExtractor):
             'id': video_id,
             'url': video_url,
             'title': title,
-            'description': self._og_search_description(webpage),
+            'description': self._og_search_description(webpage, default=None),
             'thumbnail': thumbnail,
         }
index 704bd7b34554af60dfec9b811251f5270cbd1f55..1fda451075e4e0638e0799fc2bb976f21a4bcf8e 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index 0ae8ebd687034343c364dbc968d90d84f5bc37df..cf8876fa1f2321e7b020e2e773452f82df1bd2f1 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import json
index 12cc56e444aaa63839664c8e70f82154045041c7..2e66e8cf9d791abe27d908e04e48fd6cd3bfd4dc 100644 (file)
@@ -18,31 +18,20 @@ from ..utils import (
 class KUSIIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?kusi\.com/(?P<path>story/.+|video\?clipId=(?P<clipId>\d+))'
     _TESTS = [{
-        'url': 'http://www.kusi.com/story/31183873/turko-files-case-closed-put-on-hold',
-        'md5': 'f926e7684294cf8cb7bdf8858e1b3988',
+        'url': 'http://www.kusi.com/story/32849881/turko-files-refused-to-help-it-aint-right',
+        'md5': '4e76ce8e53660ce9697d06c0ba6fc47d',
         'info_dict': {
-            'id': '12203019',
+            'id': '12689020',
             'ext': 'mp4',
-            'title': 'Turko Files: Case Closed! & Put On Hold!',
-            'duration': 231.0,
-            'upload_date': '20160210',
-            'timestamp': 1455087571,
+            'title': "Turko Files: Refused to Help, It Ain't Right!",
+            'duration': 223.586,
+            'upload_date': '20160826',
+            'timestamp': 1472233118,
             'thumbnail': 're:^https?://.*\.jpg$'
         },
     }, {
         'url': 'http://kusi.com/video?clipId=12203019',
-        'info_dict': {
-            'id': '12203019',
-            'ext': 'mp4',
-            'title': 'Turko Files: Case Closed! & Put On Hold!',
-            'duration': 231.0,
-            'upload_date': '20160210',
-            'timestamp': 1455087571,
-            'thumbnail': 're:^https?://.*\.jpg$'
-        },
-        'params': {
-            'skip_download': True,  # Same as previous one
-        },
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
index 0eeb9ffeba13d10a6b047331c4923dea3b194f80..63e10125e670b96cf706bb6c1c131ea33377a920 100644 (file)
@@ -59,7 +59,7 @@ class KuwoBaseIE(InfoExtractor):
 class KuwoIE(KuwoBaseIE):
     IE_NAME = 'kuwo:song'
     IE_DESC = '酷我音乐'
-    _VALID_URL = r'https?://www\.kuwo\.cn/yinyue/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?kuwo\.cn/yinyue/(?P<id>\d+)'
     _TESTS = [{
         'url': 'http://www.kuwo.cn/yinyue/635632/',
         'info_dict': {
@@ -82,7 +82,7 @@ class KuwoIE(KuwoBaseIE):
             'upload_date': '20150518',
         },
         'params': {
-            'format': 'mp3-320'
+            'format': 'mp3-320',
         },
     }, {
         'url': 'http://www.kuwo.cn/yinyue/3197154?catalog=yueku2016',
@@ -91,10 +91,10 @@ class KuwoIE(KuwoBaseIE):
 
     def _real_extract(self, url):
         song_id = self._match_id(url)
-        webpage = self._download_webpage(
+        webpage, urlh = self._download_webpage_handle(
             url, song_id, note='Download song detail info',
             errnote='Unable to get song detail info')
-        if '对不起,该歌曲由于版权问题已被下线,将返回网站首页' in webpage:
+        if song_id not in urlh.geturl() or '对不起,该歌曲由于版权问题已被下线,将返回网站首页' in webpage:
             raise ExtractorError('this song has been offline because of copyright issues', expected=True)
 
         song_name = self._html_search_regex(
@@ -139,7 +139,7 @@ class KuwoIE(KuwoBaseIE):
 class KuwoAlbumIE(InfoExtractor):
     IE_NAME = 'kuwo:album'
     IE_DESC = '酷我音乐 - 专辑'
-    _VALID_URL = r'https?://www\.kuwo\.cn/album/(?P<id>\d+?)/'
+    _VALID_URL = r'https?://(?:www\.)?kuwo\.cn/album/(?P<id>\d+?)/'
     _TEST = {
         'url': 'http://www.kuwo.cn/album/502294/',
         'info_dict': {
@@ -181,7 +181,7 @@ class KuwoChartIE(InfoExtractor):
         'info_dict': {
             'id': '香港中文龙虎榜',
         },
-        'playlist_mincount': 10,
+        'playlist_mincount': 7,
     }
 
     def _real_extract(self, url):
@@ -200,7 +200,7 @@ class KuwoChartIE(InfoExtractor):
 class KuwoSingerIE(InfoExtractor):
     IE_NAME = 'kuwo:singer'
     IE_DESC = '酷我音乐 - 歌手'
-    _VALID_URL = r'https?://www\.kuwo\.cn/mingxing/(?P<id>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?kuwo\.cn/mingxing/(?P<id>[^/]+)'
     _TESTS = [{
         'url': 'http://www.kuwo.cn/mingxing/bruno+mars/',
         'info_dict': {
@@ -296,14 +296,14 @@ class KuwoCategoryIE(InfoExtractor):
 class KuwoMvIE(KuwoBaseIE):
     IE_NAME = 'kuwo:mv'
     IE_DESC = '酷我音乐 - MV'
-    _VALID_URL = r'https?://www\.kuwo\.cn/mv/(?P<id>\d+?)/'
+    _VALID_URL = r'https?://(?:www\.)?kuwo\.cn/mv/(?P<id>\d+?)/'
     _TEST = {
         'url': 'http://www.kuwo.cn/mv/6480076/',
         'info_dict': {
             'id': '6480076',
             'ext': 'mp4',
             'title': 'My HouseMV',
-            'creator': 'PM02:00',
+            'creator': '2PM',
         },
         # In this video, music URLs (anti.s) are blocked outside China and
         # USA, while the MV URL (mvurl) is available globally, so force the MV
diff --git a/youtube_dl/extractor/lci.py b/youtube_dl/extractor/lci.py
new file mode 100644 (file)
index 0000000..af34829
--- /dev/null
@@ -0,0 +1,24 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class LCIIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?lci\.fr/[^/]+/[\w-]+-(?P<id>\d+)\.html'
+    _TEST = {
+        'url': 'http://www.lci.fr/international/etats-unis-a-j-62-hillary-clinton-reste-sans-voix-2001679.html',
+        'md5': '2fdb2538b884d4d695f9bd2bde137e6c',
+        'info_dict': {
+            'id': '13244802',
+            'ext': 'mp4',
+            'title': 'Hillary Clinton et sa quinte de toux, en plein meeting',
+            'description': 'md5:a4363e3a960860132f8124b62f4a01c9',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        wat_id = self._search_regex(r'data-watid=[\'"](\d+)', webpage, 'wat id')
+        return self.url_result('wat:' + wat_id, 'Wat', wat_id)
index e9cc9aa5983967861b08a2d9ee79297ae3a1726e..c48a5aad17ad36324b3cf70956d0ed234ffa522b 100644 (file)
@@ -29,7 +29,7 @@ from ..utils import (
 
 class LeIE(InfoExtractor):
     IE_DESC = '乐视网'
-    _VALID_URL = r'https?://(?:www\.le\.com/ptv/vplay|sports\.le\.com/video)/(?P<id>\d+)\.html'
+    _VALID_URL = r'https?://(?:www\.le\.com/ptv/vplay|(?:sports\.le|(?:www\.)?lesports)\.com/(?:match|video))/(?P<id>\d+)\.html'
 
     _URL_TEMPLATE = 'http://www.le.com/ptv/vplay/%s.html'
 
@@ -73,6 +73,12 @@ class LeIE(InfoExtractor):
     }, {
         'url': 'http://sports.le.com/video/25737697.html',
         'only_matching': True,
+    }, {
+        'url': 'http://www.lesports.com/match/1023203003.html',
+        'only_matching': True,
+    }, {
+        'url': 'http://sports.le.com/match/1023203003.html',
+        'only_matching': True,
     }]
 
     # ror() and calc_time_key() are reversed from a embedded swf file in KLetvPlayer.swf
diff --git a/youtube_dl/extractor/lego.py b/youtube_dl/extractor/lego.py
new file mode 100644 (file)
index 0000000..d3bca64
--- /dev/null
@@ -0,0 +1,128 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    unescapeHTML,
+    parse_duration,
+    get_element_by_class,
+)
+
+
+class LEGOIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?lego\.com/(?P<locale>[^/]+)/(?:[^/]+/)*videos/(?:[^/]+/)*[^/?#]+-(?P<id>[0-9a-f]+)'
+    _TESTS = [{
+        'url': 'http://www.lego.com/en-us/videos/themes/club/blocumentary-kawaguchi-55492d823b1b4d5e985787fa8c2973b1',
+        'md5': 'f34468f176cfd76488767fc162c405fa',
+        'info_dict': {
+            'id': '55492d823b1b4d5e985787fa8c2973b1',
+            'ext': 'mp4',
+            'title': 'Blocumentary Great Creations: Akiyuki Kawaguchi',
+            'description': 'Blocumentary Great Creations: Akiyuki Kawaguchi',
+        },
+    }, {
+        # geo-restricted but the contentUrl contain a valid url
+        'url': 'http://www.lego.com/nl-nl/videos/themes/nexoknights/episode-20-kingdom-of-heroes-13bdc2299ab24d9685701a915b3d71e7##sp=399',
+        'md5': '4c3fec48a12e40c6e5995abc3d36cc2e',
+        'info_dict': {
+            'id': '13bdc2299ab24d9685701a915b3d71e7',
+            'ext': 'mp4',
+            'title': 'Aflevering 20 - Helden van het koninkrijk',
+            'description': 'md5:8ee499aac26d7fa8bcb0cedb7f9c3941',
+        },
+    }, {
+        # special characters in title
+        'url': 'http://www.lego.com/en-us/starwars/videos/lego-star-wars-force-surprise-9685ee9d12e84ff38e84b4e3d0db533d',
+        'info_dict': {
+            'id': '9685ee9d12e84ff38e84b4e3d0db533d',
+            'ext': 'mp4',
+            'title': 'Force Surprise – LEGO® Star Wars™ Microfighters',
+            'description': 'md5:9c673c96ce6f6271b88563fe9dc56de3',
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }]
+    _BITRATES = [256, 512, 1024, 1536, 2560]
+
+    def _real_extract(self, url):
+        locale, video_id = re.match(self._VALID_URL, url).groups()
+        webpage = self._download_webpage(url, video_id)
+        title = get_element_by_class('video-header', webpage).strip()
+        progressive_base = 'https://lc-mediaplayerns-live-s.legocdn.com/'
+        streaming_base = 'http://legoprod-f.akamaihd.net/'
+        content_url = self._html_search_meta('contentUrl', webpage)
+        path = self._search_regex(
+            r'(?:https?:)?//[^/]+/(?:[iz]/s/)?public/(.+)_[0-9,]+\.(?:mp4|webm)',
+            content_url, 'video path', default=None)
+        if not path:
+            player_url = self._proto_relative_url(self._search_regex(
+                r'<iframe[^>]+src="((?:https?)?//(?:www\.)?lego\.com/[^/]+/mediaplayer/video/[^"]+)',
+                webpage, 'player url', default=None))
+            if not player_url:
+                base_url = self._proto_relative_url(self._search_regex(
+                    r'data-baseurl="([^"]+)"', webpage, 'base url',
+                    default='http://www.lego.com/%s/mediaplayer/video/' % locale))
+                player_url = base_url + video_id
+            player_webpage = self._download_webpage(player_url, video_id)
+            video_data = self._parse_json(unescapeHTML(self._search_regex(
+                r"video='([^']+)'", player_webpage, 'video data')), video_id)
+            progressive_base = self._search_regex(
+                r'data-video-progressive-url="([^"]+)"',
+                player_webpage, 'progressive base', default='https://lc-mediaplayerns-live-s.legocdn.com/')
+            streaming_base = self._search_regex(
+                r'data-video-streaming-url="([^"]+)"',
+                player_webpage, 'streaming base', default='http://legoprod-f.akamaihd.net/')
+            item_id = video_data['ItemId']
+
+            net_storage_path = video_data.get('NetStoragePath') or '/'.join([item_id[:2], item_id[2:4]])
+            base_path = '_'.join([item_id, video_data['VideoId'], video_data['Locale'], compat_str(video_data['VideoVersion'])])
+            path = '/'.join([net_storage_path, base_path])
+        streaming_path = ','.join(map(lambda bitrate: compat_str(bitrate), self._BITRATES))
+
+        formats = self._extract_akamai_formats(
+            '%si/s/public/%s_,%s,.mp4.csmil/master.m3u8' % (streaming_base, path, streaming_path), video_id)
+        m3u8_formats = list(filter(
+            lambda f: f.get('protocol') == 'm3u8_native' and f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
+            formats))
+        if len(m3u8_formats) == len(self._BITRATES):
+            self._sort_formats(m3u8_formats)
+            for bitrate, m3u8_format in zip(self._BITRATES, m3u8_formats):
+                progressive_base_url = '%spublic/%s_%d.' % (progressive_base, path, bitrate)
+                mp4_f = m3u8_format.copy()
+                mp4_f.update({
+                    'url': progressive_base_url + 'mp4',
+                    'format_id': m3u8_format['format_id'].replace('hls', 'mp4'),
+                    'protocol': 'http',
+                })
+                web_f = {
+                    'url': progressive_base_url + 'webm',
+                    'format_id': m3u8_format['format_id'].replace('hls', 'webm'),
+                    'width': m3u8_format['width'],
+                    'height': m3u8_format['height'],
+                    'tbr': m3u8_format.get('tbr'),
+                    'ext': 'webm',
+                }
+                formats.extend([web_f, mp4_f])
+        else:
+            for bitrate in self._BITRATES:
+                for ext in ('web', 'mp4'):
+                    formats.append({
+                        'format_id': '%s-%s' % (ext, bitrate),
+                        'url': '%spublic/%s_%d.%s' % (progressive_base, path, bitrate, ext),
+                        'tbr': bitrate,
+                        'ext': ext,
+                    })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': self._html_search_meta('description', webpage),
+            'thumbnail': self._html_search_meta('thumbnail', webpage),
+            'duration': parse_duration(self._html_search_meta('duration', webpage)),
+            'formats': formats,
+        }
index 87120ecd1f40c8011269a5e80b6ab158d3d94df3..afce2010eafadc3ceaab1eaa7d846e5e6360d547 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index a425bafe30ae5cffdbeb793ef27c4ffdb870dcc9..b7bfa7a6d524e4a5ebd190947b52a369a211e753 100644 (file)
@@ -34,11 +34,12 @@ class LimelightBaseIE(InfoExtractor):
     def _extract_info(self, streams, mobile_urls, properties):
         video_id = properties['media_id']
         formats = []
-
+        urls = []
         for stream in streams:
             stream_url = stream.get('url')
-            if not stream_url or stream.get('drmProtected'):
+            if not stream_url or stream.get('drmProtected') or stream_url in urls:
                 continue
+            urls.append(stream_url)
             ext = determine_ext(stream_url)
             if ext == 'f4m':
                 formats.extend(self._extract_f4m_formats(
@@ -58,9 +59,11 @@ class LimelightBaseIE(InfoExtractor):
                     format_id = 'rtmp'
                     if stream.get('videoBitRate'):
                         format_id += '-%d' % int_or_none(stream['videoBitRate'])
+                    http_url = 'http://cpl.delvenetworks.com/' + rtmp.group('playpath')[4:]
+                    urls.append(http_url)
                     http_fmt = fmt.copy()
                     http_fmt.update({
-                        'url': 'http://%s/%s' % (rtmp.group('host').replace('csl.', 'cpl.'), rtmp.group('playpath')[4:]),
+                        'url': http_url,
                         'format_id': format_id.replace('rtmp', 'http'),
                     })
                     formats.append(http_fmt)
@@ -76,8 +79,9 @@ class LimelightBaseIE(InfoExtractor):
         for mobile_url in mobile_urls:
             media_url = mobile_url.get('mobileUrl')
             format_id = mobile_url.get('targetMediaPlatform')
-            if not media_url or format_id == 'Widevine':
+            if not media_url or format_id in ('Widevine', 'SmoothStreaming') or media_url in urls:
                 continue
+            urls.append(media_url)
             ext = determine_ext(media_url)
             if ext == 'm3u8':
                 formats.extend(self._extract_m3u8_formats(
index 3356d015d9f6876ced6b86734fb436fd5ef1d4d0..ded717cf2823f6b999d310eafe72801ff507daa3 100644 (file)
@@ -2,7 +2,6 @@
 from __future__ import unicode_literals
 
 import json
-import re
 
 from .common import InfoExtractor
 from ..utils import (
@@ -14,7 +13,7 @@ from ..utils import (
 
 
 class LiTVIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.litv\.tv/vod/[^/]+/content\.do\?.*?\bid=(?P<id>[^&]+)'
+    _VALID_URL = r'https?://(?:www\.)?litv\.tv/(?:vod|promo)/[^/]+/(?:content\.do)?\?.*?\b(?:content_)?id=(?P<id>[^&]+)'
 
     _URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?id=%s'
 
@@ -27,6 +26,7 @@ class LiTVIE(InfoExtractor):
         'playlist_count': 50,
     }, {
         'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
+        'md5': '969e343d9244778cb29acec608e53640',
         'info_dict': {
             'id': 'VOD00041610',
             'ext': 'mp4',
@@ -37,13 +37,22 @@ class LiTVIE(InfoExtractor):
         },
         'params': {
             'noplaylist': True,
-            'skip_download': True,  # m3u8 download
+        },
+        'skip': 'Georestricted to Taiwan',
+    }, {
+        'url': 'https://www.litv.tv/promo/miyuezhuan/?content_id=VOD00044841&',
+        'md5': '88322ea132f848d6e3e18b32a832b918',
+        'info_dict': {
+            'id': 'VOD00044841',
+            'ext': 'mp4',
+            'title': '芈月傳第1集 霸星芈月降世楚國',
+            'description': '楚威王二年,太史令唐昧夜觀星象,發現霸星即將現世。王后得知霸星的預言後,想盡辦法不讓孩子順利出生,幸得莒姬相護化解危機。沒想到眾人期待下出生的霸星卻是位公主,楚威王對此失望至極。楚王后命人將女嬰丟棄河中,居然奇蹟似的被少司命像攔下,楚威王認為此女非同凡響,為她取名芈月。',
         },
         'skip': 'Georestricted to Taiwan',
     }]
 
-    def _extract_playlist(self, season_list, video_id, vod_data, view_data, prompt=True):
-        episode_title = view_data['title']
+    def _extract_playlist(self, season_list, video_id, program_info, prompt=True):
+        episode_title = program_info['title']
         content_id = season_list['contentId']
 
         if prompt:
@@ -51,7 +60,7 @@ class LiTVIE(InfoExtractor):
 
         all_episodes = [
             self.url_result(smuggle_url(
-                self._URL_TEMPLATE % (view_data['contentType'], episode['contentId']),
+                self._URL_TEMPLATE % (program_info['contentType'], episode['contentId']),
                 {'force_noplaylist': True}))  # To prevent infinite recursion
             for episode in season_list['episode']]
 
@@ -70,19 +79,15 @@ class LiTVIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
-        view_data = dict(map(lambda t: (t[0], t[2]), re.findall(
-            r'viewData\.([a-zA-Z]+)\s*=\s*(["\'])([^"\']+)\2',
-            webpage)))
-
-        vod_data = self._parse_json(self._search_regex(
-            'var\s+vod\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'),
+        program_info = self._parse_json(self._search_regex(
+            'var\s+programInfo\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'),
             video_id)
 
-        season_list = list(vod_data.get('seasonList', {}).values())
+        season_list = list(program_info.get('seasonList', {}).values())
         if season_list:
             if not noplaylist:
                 return self._extract_playlist(
-                    season_list[0], video_id, vod_data, view_data,
+                    season_list[0], video_id, program_info,
                     prompt=noplaylist_prompt)
 
             if noplaylist_prompt:
@@ -92,14 +97,19 @@ class LiTVIE(InfoExtractor):
         # endpoint gives the same result as the data embedded in the webpage.
         # If georestricted, there are no embedded data, so an extra request is
         # necessary to get the error code
+        if 'assetId' not in program_info:
+            program_info = self._download_json(
+                'https://www.litv.tv/vod/ajax/getProgramInfo', video_id,
+                query={'contentId': video_id},
+                headers={'Accept': 'application/json'})
         video_data = self._parse_json(self._search_regex(
             r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);',
             webpage, 'video data', default='{}'), video_id)
         if not video_data:
             payload = {
-                'assetId': view_data['assetId'],
-                'watchDevices': vod_data['watchDevices'],
-                'contentType': view_data['contentType'],
+                'assetId': program_info['assetId'],
+                'watchDevices': program_info['watchDevices'],
+                'contentType': program_info['contentType'],
             }
             video_data = self._download_json(
                 'https://www.litv.tv/vod/getMainUrl', video_id,
@@ -115,16 +125,17 @@ class LiTVIE(InfoExtractor):
             raise ExtractorError('Unexpected result from %s' % self.IE_NAME)
 
         formats = self._extract_m3u8_formats(
-            video_data['fullpath'], video_id, ext='mp4', m3u8_id='hls')
+            video_data['fullpath'], video_id, ext='mp4',
+            entry_protocol='m3u8_native', m3u8_id='hls')
         for a_format in formats:
             # LiTV HLS segments doesn't like compressions
             a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = True
 
-        title = view_data['title'] + view_data.get('secondaryMark', '')
-        description = view_data.get('description')
-        thumbnail = view_data.get('imageFile')
-        categories = [item['name'] for item in vod_data.get('category', [])]
-        episode = int_or_none(view_data.get('episode'))
+        title = program_info['title'] + program_info.get('secondaryMark', '')
+        description = program_info.get('description')
+        thumbnail = program_info.get('imageFile')
+        categories = [item['name'] for item in program_info.get('category', [])]
+        episode = int_or_none(program_info.get('episode'))
 
         return {
             'id': video_id,
index ea0565ac05099aab8c05609aee4140a1b4c2c1c7..b84e4dd6c20415a1f8bf7a562a61f80bbdbd5786 100644 (file)
@@ -54,6 +54,22 @@ class LiveLeakIE(InfoExtractor):
             'title': 'Crazy Hungarian tourist films close call waterspout in Croatia',
             'thumbnail': 're:^https?://.*\.jpg$'
         }
+    }, {
+        # Covers https://github.com/rg3/youtube-dl/pull/10664#issuecomment-247439521
+        'url': 'http://m.liveleak.com/view?i=763_1473349649',
+        'add_ie': ['Youtube'],
+        'info_dict': {
+            'id': '763_1473349649',
+            'ext': 'mp4',
+            'title': 'Reporters and public officials ignore epidemic of black on asian violence in Sacramento | Colin Flaherty',
+            'description': 'Colin being the warrior he is and showing the injustice Asians in Sacramento are being subjected to.',
+            'uploader': 'Ziz',
+            'upload_date': '20160908',
+            'uploader_id': 'UCEbta5E_jqlZmEJsriTEtnw'
+        },
+        'params': {
+            'skip_download': True,
+        },
     }]
 
     @staticmethod
@@ -87,7 +103,7 @@ class LiveLeakIE(InfoExtractor):
             else:
                 # Maybe an embed?
                 embed_url = self._search_regex(
-                    r'<iframe[^>]+src="(http://www.prochan.com/embed\?[^"]+)"',
+                    r'<iframe[^>]+src="(https?://(?:www\.)?(?:prochan|youtube)\.com/embed[^"]+)"',
                     webpage, 'embed URL')
                 return {
                     '_type': 'url_transparent',
@@ -107,6 +123,7 @@ class LiveLeakIE(InfoExtractor):
             'format_note': s.get('label'),
             'url': s['file'],
         } for i, s in enumerate(sources)]
+
         for i, s in enumerate(sources):
             # Removing '.h264_*.mp4' gives the raw video, which is essentially
             # the same video without the LiveLeak logo at the top (see
index 1072405b30c7663d19ddc4df86f858d94952fda5..f5c997ef4c79398734b2bd5feb09fd335eaf5ade 100644 (file)
@@ -1,8 +1,11 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from ..utils import (
+    determine_ext,
     int_or_none,
     parse_duration,
     remove_end,
@@ -12,8 +15,10 @@ from ..utils import (
 class LRTIE(InfoExtractor):
     IE_NAME = 'lrt.lt'
     _VALID_URL = r'https?://(?:www\.)?lrt\.lt/mediateka/irasas/(?P<id>[0-9]+)'
-    _TEST = {
+    _TESTS = [{
+        # m3u8 download
         'url': 'http://www.lrt.lt/mediateka/irasas/54391/',
+        'md5': 'fe44cf7e4ab3198055f2c598fc175cb0',
         'info_dict': {
             'id': '54391',
             'ext': 'mp4',
@@ -23,20 +28,45 @@ class LRTIE(InfoExtractor):
             'view_count': int,
             'like_count': int,
         },
-        'params': {
-            'skip_download': True,  # m3u8 download
+    }, {
+        # direct mp3 download
+        'url': 'http://www.lrt.lt/mediateka/irasas/1013074524/',
+        'md5': '389da8ca3cad0f51d12bed0c844f6a0a',
+        'info_dict': {
+            'id': '1013074524',
+            'ext': 'mp3',
+            'title': 'Kita tema 2016-09-05 15:05',
+            'description': 'md5:1b295a8fc7219ed0d543fc228c931fb5',
+            'duration': 3008,
+            'view_count': int,
+            'like_count': int,
         },
-    }
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         title = remove_end(self._og_search_title(webpage), ' - LRT')
-        m3u8_url = self._search_regex(
-            r'file\s*:\s*(["\'])(?P<url>.+?)\1\s*\+\s*location\.hash\.substring\(1\)',
-            webpage, 'm3u8 url', group='url')
-        formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
+
+        formats = []
+        for _, file_url in re.findall(
+                r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage):
+            ext = determine_ext(file_url)
+            if ext not in ('m3u8', 'mp3'):
+                continue
+            # mp3 served as m3u8 produces stuttered media file
+            if ext == 'm3u8' and '.mp3' in file_url:
+                continue
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    file_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                    fatal=False))
+            elif ext == 'mp3':
+                formats.append({
+                    'url': file_url,
+                    'vcodec': 'none',
+                })
         self._sort_formats(formats)
 
         thumbnail = self._og_search_thumbnail(webpage)
index a98c4c530ec4d62a18b437d5c17275daec5c3dfb..f4dcfd93fa760878566568636d9c2b864b6c7556 100644 (file)
@@ -94,12 +94,12 @@ class LyndaBaseIE(InfoExtractor):
 class LyndaIE(LyndaBaseIE):
     IE_NAME = 'lynda'
     IE_DESC = 'lynda.com videos'
-    _VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?lynda\.com/(?:[^/]+/[^/]+/(?P<course_id>\d+)|player/embed)/(?P<id>\d+)'
 
     _TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
 
     _TESTS = [{
-        'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
+        'url': 'https://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
         # md5 is unstable
         'info_dict': {
             'id': '114408',
@@ -112,19 +112,71 @@ class LyndaIE(LyndaBaseIE):
         'only_matching': True,
     }]
 
+    def _raise_unavailable(self, video_id):
+        self.raise_login_required(
+            'Video %s is only available for members' % video_id)
+
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        course_id = mobj.group('course_id')
+
+        query = {
+            'videoId': video_id,
+            'type': 'video',
+        }
 
         video = self._download_json(
-            'http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id,
-            video_id, 'Downloading video JSON')
+            'https://www.lynda.com/ajax/player', video_id,
+            'Downloading video JSON', fatal=False, query=query)
+
+        # Fallback scenario
+        if not video:
+            query['courseId'] = course_id
+
+            play = self._download_json(
+                'https://www.lynda.com/ajax/course/%s/%s/play'
+                % (course_id, video_id), video_id, 'Downloading play JSON')
+
+            if not play:
+                self._raise_unavailable(video_id)
+
+            formats = []
+            for formats_dict in play:
+                urls = formats_dict.get('urls')
+                if not isinstance(urls, dict):
+                    continue
+                cdn = formats_dict.get('name')
+                for format_id, format_url in urls.items():
+                    if not format_url:
+                        continue
+                    formats.append({
+                        'url': format_url,
+                        'format_id': '%s-%s' % (cdn, format_id) if cdn else format_id,
+                        'height': int_or_none(format_id),
+                    })
+            self._sort_formats(formats)
+
+            conviva = self._download_json(
+                'https://www.lynda.com/ajax/player/conviva', video_id,
+                'Downloading conviva JSON', query=query)
+
+            return {
+                'id': video_id,
+                'title': conviva['VideoTitle'],
+                'description': conviva.get('VideoDescription'),
+                'release_year': int_or_none(conviva.get('ReleaseYear')),
+                'duration': int_or_none(conviva.get('Duration')),
+                'creator': conviva.get('Author'),
+                'formats': formats,
+            }
 
         if 'Status' in video:
             raise ExtractorError(
                 'lynda returned error: %s' % video['Message'], expected=True)
 
         if video.get('HasAccess') is False:
-            self.raise_login_required('Video %s is only available for members' % video_id)
+            self._raise_unavailable(video_id)
 
         video_id = compat_str(video.get('ID') or video_id)
         duration = int_or_none(video.get('DurationInSeconds'))
@@ -148,7 +200,7 @@ class LyndaIE(LyndaBaseIE):
             for prioritized_stream_id, prioritized_stream in prioritized_streams.items():
                 formats.extend([{
                     'url': video_url,
-                    'width': int_or_none(format_id),
+                    'height': int_or_none(format_id),
                     'format_id': '%s-%s' % (prioritized_stream_id, format_id),
                 } for format_id, video_url in prioritized_stream.items()])
 
@@ -187,7 +239,7 @@ class LyndaIE(LyndaBaseIE):
             return srt
 
     def _get_subtitles(self, video_id):
-        url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
+        url = 'https://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
         subs = self._download_json(url, None, False)
         if subs:
             return {'en': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]}
@@ -209,7 +261,7 @@ class LyndaCourseIE(LyndaBaseIE):
         course_id = mobj.group('courseid')
 
         course = self._download_json(
-            'http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
+            'https://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
             course_id, 'Downloading course JSON')
 
         if course.get('Status') == 'NotFound':
@@ -231,7 +283,7 @@ class LyndaCourseIE(LyndaBaseIE):
                 if video_id:
                     entries.append({
                         '_type': 'url_transparent',
-                        'url': 'http://www.lynda.com/%s/%s-4.html' % (course_path, video_id),
+                        'url': 'https://www.lynda.com/%s/%s-4.html' % (course_path, video_id),
                         'ie_key': LyndaIE.ie_key(),
                         'chapter': chapter.get('Title'),
                         'chapter_number': int_or_none(chapter.get('ChapterIndex')),
index 39d2742c89282c2773ee1aca44ca14f047393bc5..9806875e8d87f2a75e7689fbe0e0fabd6d7eeafe 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index 3cd4a3a192ce3f6b611f6b3f4f3d928b75c9bba0..43db9929ca805fa7917824cf1bfd466f5721509e 100644 (file)
@@ -7,7 +7,7 @@ from ..utils import ExtractorError
 class MacGameStoreIE(InfoExtractor):
     IE_NAME = 'macgamestore'
     IE_DESC = 'MacGameStore trailers'
-    _VALID_URL = r'https?://www\.macgamestore\.com/mediaviewer\.php\?trailer=(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?macgamestore\.com/mediaviewer\.php\?trailer=(?P<id>\d+)'
 
     _TEST = {
         'url': 'http://www.macgamestore.com/mediaviewer.php?trailer=2450',
index 9a7098c43c600a3cc3ed697252bc784d9a9cf5b7..f7cc3c83289f1101207c385d5bfed2055c7b7f67 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
diff --git a/youtube_dl/extractor/mangomolo.py b/youtube_dl/extractor/mangomolo.py
new file mode 100644 (file)
index 0000000..1885ac7
--- /dev/null
@@ -0,0 +1,54 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import base64
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
+from ..utils import (
+    int_or_none,
+)
+
+
+class MangomoloBaseIE(InfoExtractor):
+    def _get_real_id(self, page_id):
+        return page_id
+
+    def _real_extract(self, url):
+        page_id = self._get_real_id(self._match_id(url))
+        webpage = self._download_webpage(url, page_id)
+        hidden_inputs = self._hidden_inputs(webpage)
+        m3u8_entry_protocol = 'm3u8' if self._IS_LIVE else 'm3u8_native'
+
+        format_url = self._html_search_regex(
+            [
+                r'file\s*:\s*"(https?://[^"]+?/playlist.m3u8)',
+                r'<a[^>]+href="(rtsp://[^"]+)"'
+            ], webpage, 'format url')
+        formats = self._extract_wowza_formats(
+            format_url, page_id, m3u8_entry_protocol, ['smil'])
+        self._sort_formats(formats)
+
+        return {
+            'id': page_id,
+            'title': self._live_title(page_id) if self._IS_LIVE else page_id,
+            'uploader_id': hidden_inputs.get('userid'),
+            'duration': int_or_none(hidden_inputs.get('duration')),
+            'is_live': self._IS_LIVE,
+            'formats': formats,
+        }
+
+
+class MangomoloVideoIE(MangomoloBaseIE):
+    IE_NAME = 'mangomolo:video'
+    _VALID_URL = r'https?://admin\.mangomolo\.com/analytics/index\.php/customers/embed/video\?.*?\bid=(?P<id>\d+)'
+    _IS_LIVE = False
+
+
+class MangomoloLiveIE(MangomoloBaseIE):
+    IE_NAME = 'mangomolo:live'
+    _VALID_URL = r'https?://admin\.mangomolo\.com/analytics/index\.php/customers/embed/index\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)'
+    _IS_LIVE = True
+
+    def _get_real_id(self, page_id):
+        return base64.b64decode(compat_urllib_parse_unquote(page_id).encode()).decode()
index 444ec0310877e8377f78e88b07fd110ca9e6aa0d..7d468d78bab45ac4a83bd8aa531dfd67b42c6eb6 100644 (file)
@@ -9,7 +9,7 @@ from ..utils import (
 
 
 class MetacriticIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.metacritic\.com/.+?/trailers/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?metacritic\.com/.+?/trailers/(?P<id>\d+)'
 
     _TESTS = [{
         'url': 'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222',
index 27bdff8b25cb63e628981ad3a8dbcc7f76db7653..e0bb5d208856a121f40f533fcacf3b7bd98d13ea 100644 (file)
@@ -6,7 +6,7 @@ from ..utils import int_or_none
 
 
 class MGTVIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.mgtv\.com/v/(?:[^/]+/)*(?P<id>\d+)\.html'
+    _VALID_URL = r'https?://(?:www\.)?mgtv\.com/v/(?:[^/]+/)*(?P<id>\d+)\.html'
     IE_DESC = '芒果TV'
 
     _TESTS = [{
diff --git a/youtube_dl/extractor/miaopai.py b/youtube_dl/extractor/miaopai.py
new file mode 100644 (file)
index 0000000..f9e35ac
--- /dev/null
@@ -0,0 +1,40 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class MiaoPaiIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?miaopai\.com/show/(?P<id>[-A-Za-z0-9~_]+)'
+    _TEST = {
+        'url': 'http://www.miaopai.com/show/n~0hO7sfV1nBEw4Y29-Hqg__.htm',
+        'md5': '095ed3f1cd96b821add957bdc29f845b',
+        'info_dict': {
+            'id': 'n~0hO7sfV1nBEw4Y29-Hqg__',
+            'ext': 'mp4',
+            'title': '西游记音乐会的秒拍视频',
+            'thumbnail': 're:^https?://.*/n~0hO7sfV1nBEw4Y29-Hqg___m.jpg',
+        }
+    }
+
+    _USER_AGENT_IPAD = 'Mozilla/5.0 (iPad; CPU OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1'
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(
+            url, video_id, headers={'User-Agent': self._USER_AGENT_IPAD})
+
+        title = self._html_search_regex(
+            r'<title>([^<]+)</title>', webpage, 'title')
+        thumbnail = self._html_search_regex(
+            r'<div[^>]+class=(?P<q1>[\'"]).*\bvideo_img\b.*(?P=q1)[^>]+data-url=(?P<q2>[\'"])(?P<url>[^\'"]+)(?P=q2)',
+            webpage, 'thumbnail', fatal=False, group='url')
+        videos = self._parse_html5_media_entries(url, webpage, video_id)
+        info = videos[0]
+
+        info.update({
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+        })
+        return info
index afd3e98ecbf1cd4d2016550bf715bd0a4b3fe167..8e0aee0e69699ee4065c8e4c8addb4db0e537caf 100644 (file)
@@ -71,12 +71,15 @@ class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE):
         formats = []
 
         for sources in settings.findall(compat_xpath('.//MediaSources')):
-            if sources.get('videoType') == 'smoothstreaming':
-                continue
+            sources_type = sources.get('videoType')
             for source in sources.findall(compat_xpath('./MediaSource')):
                 video_url = source.text
                 if not video_url or not video_url.startswith('http'):
                     continue
+                if sources_type == 'smoothstreaming':
+                    formats.extend(self._extract_ism_formats(
+                        video_url, video_id, 'mss', fatal=False))
+                    continue
                 video_mode = source.get('videoMode')
                 height = int_or_none(self._search_regex(
                     r'^(\d+)[pP]$', video_mode or '', 'height', default=None))
index e48eba3fa7343bbdf964be583a680affa5ad29fa..10190d5f6e1f3f55b3274855c7614bea62b620e5 100644 (file)
@@ -8,7 +8,7 @@ from ..utils import (
 
 
 class MinistryGridIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.ministrygrid.com/([^/?#]*/)*(?P<id>[^/#?]+)/?(?:$|[?#])'
+    _VALID_URL = r'https?://(?:www\.)?ministrygrid\.com/([^/?#]*/)*(?P<id>[^/#?]+)/?(?:$|[?#])'
 
     _TEST = {
         'url': 'http://www.ministrygrid.com/training-viewer/-/training/t4g-2014-conference/the-gospel-by-numbers-4/the-gospel-by-numbers',
index 937ba0f28bc41799f6489f4e246c9fac8a7c86e2..ec1b4c4fea111ded48f530c7020dd9aabd38dbb8 100644 (file)
@@ -25,10 +25,7 @@ class MioMioIE(InfoExtractor):
             'title': '【SKY】字幕 铠武昭和VS平成 假面骑士大战FEAT战队 魔星字幕组 字幕',
             'duration': 5923,
         },
-        'params': {
-            # The server provides broken file
-            'skip_download': True,
-        }
+        'skip': 'Unable to load videos',
     }, {
         'url': 'http://www.miomio.tv/watch/cc184024/',
         'info_dict': {
@@ -47,16 +44,12 @@ class MioMioIE(InfoExtractor):
         'skip': 'Unable to load videos',
     }, {
         # new 'h5' player
-        'url': 'http://www.miomio.tv/watch/cc273295/',
-        'md5': '',
+        'url': 'http://www.miomio.tv/watch/cc273997/',
+        'md5': '0b27a4b4495055d826813f8c3a6b2070',
         'info_dict': {
-            'id': '273295',
+            'id': '273997',
             'ext': 'mp4',
-            'title': 'アウト×デラックス 20160526',
-        },
-        'params': {
-            # intermittent HTTP 500
-            'skip_download': True,
+            'title': 'マツコの知らない世界【劇的進化SP!ビニール傘&冷凍食品2016】 1_2 - 16 05 31',
         },
     }]
 
@@ -116,7 +109,7 @@ class MioMioIE(InfoExtractor):
             player_webpage = self._download_webpage(
                 player_url, video_id,
                 note='Downloading player webpage', headers={'Referer': url})
-            entries = self._parse_html5_media_entries(player_url, player_webpage)
+            entries = self._parse_html5_media_entries(player_url, player_webpage, video_id)
             http_headers = {'Referer': player_url}
         else:
             http_headers = {'Referer': 'http://www.miomio.tv%s' % mioplayer_path}
index cd169f3616729871fcad9e6c619e67f392f73312..f577836be6dd01c7dc4b315c0295a973fe308a2d 100644 (file)
@@ -1,19 +1,20 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import re
+import uuid
 
 from .common import InfoExtractor
 from ..compat import (
+    compat_str,
     compat_urllib_parse_urlencode,
     compat_urlparse,
 )
 from ..utils import (
-    get_element_by_attribute,
     int_or_none,
-    remove_start,
     extract_attributes,
     determine_ext,
+    smuggle_url,
+    parse_duration,
 )
 
 
@@ -72,76 +73,133 @@ class MiTeleBaseIE(InfoExtractor):
         }
 
 
-class MiTeleIE(MiTeleBaseIE):
+class MiTeleIE(InfoExtractor):
     IE_DESC = 'mitele.es'
-    _VALID_URL = r'https?://www\.mitele\.es/(?:[^/]+/){3}(?P<id>[^/]+)/'
+    _VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player'
 
     _TESTS = [{
-        'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
-        # MD5 is unstable
+        'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player',
         'info_dict': {
-            'id': '0NF1jJnxS1Wu3pHrmvFyw2',
-            'display_id': 'programa-144',
+            'id': '57b0dfb9c715da65618b4afa',
             'ext': 'mp4',
             'title': 'Tor, la web invisible',
             'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
             'series': 'Diario de',
             'season': 'La redacción',
+            'season_number': 14,
+            'season_id': 'diario_de_t14_11981',
             'episode': 'Programa 144',
+            'episode_number': 3,
             'thumbnail': 're:(?i)^https?://.*\.jpg$',
             'duration': 2913,
         },
+        'add_ie': ['Ooyala'],
     }, {
         # no explicit title
-        'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/temporada-6/programa-226/',
+        'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player',
         'info_dict': {
-            'id': 'eLZSwoEd1S3pVyUm8lc6F',
-            'display_id': 'programa-226',
+            'id': '57b0de3dc915da14058b4876',
             'ext': 'mp4',
-            'title': 'Cuarto Milenio - Temporada 6 - Programa 226',
-            'description': 'md5:50daf9fadefa4e62d9fc866d0c015701',
+            'title': 'Cuarto Milenio Temporada 6 Programa 226',
+            'description': 'md5:5ff132013f0cd968ffbf1f5f3538a65f',
             'series': 'Cuarto Milenio',
             'season': 'Temporada 6',
+            'season_number': 6,
+            'season_id': 'cuarto_milenio_t06_12715',
             'episode': 'Programa 226',
+            'episode_number': 24,
             'thumbnail': 're:(?i)^https?://.*\.jpg$',
-            'duration': 7312,
+            'duration': 7313,
         },
         'params': {
             'skip_download': True,
         },
+        'add_ie': ['Ooyala'],
+    }, {
+        'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
-        display_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, display_id)
-
-        info = self._get_player_info(url, webpage)
-
-        title = self._search_regex(
-            r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>',
-            webpage, 'title', default=None)
-
-        mobj = re.search(r'''(?sx)
-                            class="Destacado-text"[^>]*>.*?<h1>\s*
-                            <span>(?P<series>[^<]+)</span>\s*
-                            <span>(?P<season>[^<]+)</span>\s*
-                            <span>(?P<episode>[^<]+)</span>''', webpage)
-        series, season, episode = mobj.groups() if mobj else [None] * 3
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        gigya_url = self._search_regex(
+            r'<gigya-api>[^>]*</gigya-api>[^>]*<script\s+src="([^"]*)">[^>]*</script>',
+            webpage, 'gigya', default=None)
+        gigya_sc = self._download_webpage(
+            compat_urlparse.urljoin('http://www.mitele.es/', gigya_url),
+            video_id, 'Downloading gigya script')
+
+        # Get a appKey/uuid for getting the session key
+        appKey_var = self._search_regex(
+            r'value\s*\(\s*["\']appGridApplicationKey["\']\s*,\s*([0-9a-f]+)',
+            gigya_sc, 'appKey variable')
+        appKey = self._search_regex(
+            r'var\s+%s\s*=\s*["\']([0-9a-f]+)' % appKey_var, gigya_sc, 'appKey')
+
+        session_json = self._download_json(
+            'https://appgrid-api.cloud.accedo.tv/session',
+            video_id, 'Downloading session keys', query={
+                'appKey': appKey,
+                'uuid': compat_str(uuid.uuid4()),
+            })
+
+        paths = self._download_json(
+            'https://appgrid-api.cloud.accedo.tv/metadata/general_configuration,%20web_configuration',
+            video_id, 'Downloading paths JSON',
+            query={'sessionKey': compat_str(session_json['sessionKey'])})
+
+        ooyala_s = paths['general_configuration']['api_configuration']['ooyala_search']
+        source = self._download_json(
+            'http://%s%s%s/docs/%s' % (
+                ooyala_s['base_url'], ooyala_s['full_path'],
+                ooyala_s['provider_id'], video_id),
+            video_id, 'Downloading data JSON', query={
+                'include_titles': 'Series,Season',
+                'product_name': 'test',
+                'format': 'full',
+            })['hits']['hits'][0]['_source']
+
+        embedCode = source['offers'][0]['embed_codes'][0]
+        titles = source['localizable_titles'][0]
+
+        title = titles.get('title_medium') or titles['title_long']
+
+        description = titles.get('summary_long') or titles.get('summary_medium')
+
+        def get(key1, key2):
+            value1 = source.get(key1)
+            if not value1 or not isinstance(value1, list):
+                return
+            if not isinstance(value1[0], dict):
+                return
+            return value1[0].get(key2)
+
+        series = get('localizable_titles_series', 'title_medium')
+
+        season = get('localizable_titles_season', 'title_medium')
+        season_number = int_or_none(source.get('season_number'))
+        season_id = source.get('season_id')
+
+        episode = titles.get('title_sort_name')
+        episode_number = int_or_none(source.get('episode_number'))
+
+        duration = parse_duration(get('videos', 'duration'))
 
-        if not title:
-            if mobj:
-                title = '%s - %s - %s' % (series, season, episode)
-            else:
-                title = remove_start(self._search_regex(
-                    r'<title>([^<]+)</title>', webpage, 'title'), 'Ver online ')
-
-        info.update({
-            'display_id': display_id,
+        return {
+            '_type': 'url_transparent',
+            # for some reason only HLS is supported
+            'url': smuggle_url('ooyala:' + embedCode, {'supportedformats': 'm3u8'}),
+            'id': video_id,
             'title': title,
-            'description': get_element_by_attribute('class', 'text', webpage),
+            'description': description,
             'series': series,
             'season': season,
+            'season_number': season_number,
+            'season_id': season_id,
             'episode': episode,
-        })
-        return info
+            'episode_number': episode_number,
+            'duration': duration,
+            'thumbnail': get('images', 'url'),
+        }
index 978d5d5bfeaf5ff64b7279343876a3177c43339a..91ee9c4e95204718cb069fe1dc36908821b7af6d 100644 (file)
@@ -35,7 +35,8 @@ class MoeVideoIE(InfoExtractor):
                 'height': 360,
                 'duration': 179,
                 'filesize': 17822500,
-            }
+            },
+            'skip': 'Video has been removed',
         },
         {
             'url': 'http://playreplay.net/video/77107.7f325710a627383d40540d8e991a',
index 370328b362c2a0661925d054be121a7216dc94c7..c9d1ab64dc36f12940ff6fee6f92524ce4ae3f2e 100644 (file)
@@ -9,7 +9,7 @@ from ..compat import (
 
 class MotorsportIE(InfoExtractor):
     IE_DESC = 'motorsport.com'
-    _VALID_URL = r'https?://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])'
+    _VALID_URL = r'https?://(?:www\.)?motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])'
     _TEST = {
         'url': 'http://www.motorsport.com/f1/video/main-gallery/red-bull-racing-2014-rules-explained/',
         'info_dict': {
index d0cb8278e9860591a3bcb6e711998ae204c4962d..30c206f9b61e22d3e029a68979643fc6ee7de635 100644 (file)
@@ -11,7 +11,7 @@ from ..utils import (
 
 
 class MovieClipsIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www.)?movieclips\.com/videos/.+-(?P<id>\d+)(?:\?|$)'
+    _VALID_URL = r'https?://(?:www\.)?movieclips\.com/videos/.+-(?P<id>\d+)(?:\?|$)'
     _TEST = {
         'url': 'http://www.movieclips.com/videos/warcraft-trailer-1-561180739597',
         'md5': '42b5a0352d4933a7bd54f2104f481244',
index f130b75c416ad3fe2e8d4ac3221799d1eb4aa1b9..478e3996743d1eca8434a786b58c4bd799a7dc55 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -7,7 +7,7 @@ from .common import InfoExtractor
 
 
 class MoviezineIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.moviezine\.se/video/(?P<id>[^?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?moviezine\.se/video/(?P<id>[^?#]+)'
 
     _TEST = {
         'url': 'http://www.moviezine.se/video/205866',
similarity index 65%
rename from youtube_dl/extractor/ssa.py
rename to youtube_dl/extractor/movingimage.py
index 54d1843f2200d0cef7fa2e7b192f673d316c5f18..bb789c32edb45e78e9806faaae169af09826135e 100644 (file)
@@ -7,22 +7,19 @@ from ..utils import (
 )
 
 
-class SSAIE(InfoExtractor):
-    _VALID_URL = r'https?://ssa\.nls\.uk/film/(?P<id>\d+)'
+class MovingImageIE(InfoExtractor):
+    _VALID_URL = r'https?://movingimage\.nls\.uk/film/(?P<id>\d+)'
     _TEST = {
-        'url': 'http://ssa.nls.uk/film/3561',
+        'url': 'http://movingimage.nls.uk/film/3561',
+        'md5': '4caa05c2b38453e6f862197571a7be2f',
         'info_dict': {
             'id': '3561',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'SHETLAND WOOL',
             'description': 'md5:c5afca6871ad59b4271e7704fe50ab04',
             'duration': 900,
             'thumbnail': 're:^https?://.*\.jpg$',
         },
-        'params': {
-            # rtmp download
-            'skip_download': True,
-        },
     }
 
     def _real_extract(self, url):
@@ -30,10 +27,9 @@ class SSAIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
-        streamer = self._search_regex(
-            r"'streamer'\s*,\S*'(rtmp[^']+)'", webpage, 'streamer')
-        play_path = self._search_regex(
-            r"'file'\s*,\s*'([^']+)'", webpage, 'file').rpartition('.')[0]
+        formats = self._extract_m3u8_formats(
+            self._html_search_regex(r'file\s*:\s*"([^"]+)"', webpage, 'm3u8 manifest URL'),
+            video_id, ext='mp4', entry_protocol='m3u8_native')
 
         def search_field(field_name, fatal=False):
             return self._search_regex(
@@ -44,13 +40,11 @@ class SSAIE(InfoExtractor):
         description = unescapeHTML(search_field('Description'))
         duration = parse_duration(search_field('Running time'))
         thumbnail = self._search_regex(
-            r"'image'\s*,\s*'([^']+)'", webpage, 'thumbnails', fatal=False)
+            r"image\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False)
 
         return {
             'id': video_id,
-            'url': streamer,
-            'play_path': play_path,
-            'ext': 'flv',
+            'formats': formats,
             'title': title,
             'description': description,
             'duration': duration,
index 1ec8e0f50e6d407aa05e358a1809a1633d97876a..d75ce8b3b510b68ca0dfe754d8fcf1741e6cbd9d 100644 (file)
@@ -69,10 +69,9 @@ class MSNIE(InfoExtractor):
             if not format_url:
                 continue
             ext = determine_ext(format_url)
-            # .ism is not yet supported (see
-            # https://github.com/rg3/youtube-dl/issues/8118)
             if ext == 'ism':
-                continue
+                formats.extend(self._extract_ism_formats(
+                    format_url + '/Manifest', display_id, 'mss', fatal=False))
             if 'm3u8' in format_url:
                 # m3u8_native should not be used here until
                 # https://github.com/rg3/youtube-dl/issues/9913 is fixed
index 2f455680ebba41074513f7dd33b76e5c269cb142..03351917e71cdfbfb98ecb329eecad9500b288e4 100644 (file)
@@ -4,7 +4,6 @@ import re
 
 from .common import InfoExtractor
 from ..compat import (
-    compat_urllib_parse_urlencode,
     compat_str,
     compat_xpath,
 )
@@ -14,12 +13,14 @@ from ..utils import (
     fix_xml_ampersands,
     float_or_none,
     HEADRequest,
+    NO_DEFAULT,
+    RegexNotFoundError,
     sanitized_Request,
     strip_or_none,
     timeconvert,
     unescapeHTML,
+    update_url_query,
     url_basename,
-    RegexNotFoundError,
     xpath_text,
 )
 
@@ -36,6 +37,11 @@ class MTVServicesInfoExtractor(InfoExtractor):
     def _id_from_uri(uri):
         return uri.split(':')[-1]
 
+    @staticmethod
+    def _remove_template_parameter(url):
+        # Remove the templates, like &device={device}
+        return re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', url)
+
     # This was originally implemented for ComedyCentral, but it also works here
     @classmethod
     def _transform_rtmp_url(cls, rtmp_video_url):
@@ -117,9 +123,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
         video_id = self._id_from_uri(uri)
         self.report_extraction(video_id)
         content_el = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content')))
-        mediagen_url = content_el.attrib['url']
-        # Remove the templates, like &device={device}
-        mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', mediagen_url)
+        mediagen_url = self._remove_template_parameter(content_el.attrib['url'])
         if 'acceptMethods' not in mediagen_url:
             mediagen_url += '&' if '?' in mediagen_url else '?'
             mediagen_url += 'acceptMethods=fms'
@@ -178,12 +182,12 @@ class MTVServicesInfoExtractor(InfoExtractor):
         data = {'uri': uri}
         if self._LANG:
             data['lang'] = self._LANG
-        return compat_urllib_parse_urlencode(data)
+        return data
 
     def _get_videos_info(self, uri):
         video_id = self._id_from_uri(uri)
         feed_url = self._get_feed_url(uri)
-        info_url = feed_url + '?' + self._get_feed_query(uri)
+        info_url = update_url_query(feed_url, self._get_feed_query(uri))
         return self._get_videos_info_from_url(info_url, video_id)
 
     def _get_videos_info_from_url(self, url, video_id):
@@ -198,7 +202,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
             [self._get_video_info(item) for item in idoc.findall('.//item')],
             playlist_title=title, playlist_description=description)
 
-    def _extract_mgid(self, webpage):
+    def _extract_mgid(self, webpage, default=NO_DEFAULT):
         try:
             # the url can be http://media.mtvnservices.com/fb/{mgid}.swf
             # or http://media.mtvnservices.com/{mgid}
@@ -218,7 +222,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
             sm4_embed = self._html_search_meta(
                 'sm4:video:embed', webpage, 'sm4 embed', default='')
             mgid = self._search_regex(
-                r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid')
+                r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=default)
         return mgid
 
     def _real_extract(self, url):
@@ -256,13 +260,9 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
 
     def _get_feed_url(self, uri):
         video_id = self._id_from_uri(uri)
-        site_id = uri.replace(video_id, '')
-        config_url = ('http://media.mtvnservices.com/pmt/e1/players/{0}/'
-                      'context4/context5/config.xml'.format(site_id))
-        config_doc = self._download_xml(config_url, video_id)
-        feed_node = config_doc.find('.//feed')
-        feed_url = feed_node.text.strip().split('?')[0]
-        return feed_url
+        config = self._download_json(
+            'http://media.mtvnservices.com/pmt/e1/access/index.html?uri=%s&configtype=edge' % uri, video_id)
+        return self._remove_template_parameter(config['feedWithQueryParams'])
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -271,6 +271,29 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
 
 
 class MTVIE(MTVServicesInfoExtractor):
+    IE_NAME = 'mtv'
+    _VALID_URL = r'https?://(?:www\.)?mtv\.com/(?:video-clips|full-episodes)/(?P<id>[^/?#.]+)'
+    _FEED_URL = 'http://www.mtv.com/feeds/mrss/'
+
+    _TESTS = [{
+        'url': 'http://www.mtv.com/video-clips/vl8qof/unlocking-the-truth-trailer',
+        'md5': '1edbcdf1e7628e414a8c5dcebca3d32b',
+        'info_dict': {
+            'id': '5e14040d-18a4-47c4-a582-43ff602de88e',
+            'ext': 'mp4',
+            'title': 'Unlocking The Truth|July 18, 2016|1|101|Trailer',
+            'description': '"Unlocking the Truth" premieres August 17th at 11/10c.',
+            'timestamp': 1468846800,
+            'upload_date': '20160718',
+        },
+    }, {
+        'url': 'http://www.mtv.com/full-episodes/94tujl/unlocking-the-truth-gates-of-hell-season-1-ep-101',
+        'only_matching': True,
+    }]
+
+
+class MTVVideoIE(MTVServicesInfoExtractor):
+    IE_NAME = 'mtv:video'
     _VALID_URL = r'''(?x)^https?://
         (?:(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$|
            m\.mtv\.com/videos/video\.rbml\?.*?id=(?P<mgid>[^&]+))'''
index 2174e5665778b590055c06255a91c030cb579d29..1854d59a5307a5b22f2efdda08a2b6c944aa8c50 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index a103e0323a6c62e4b0d283afdb6d4f5662bb1869..fea1caf478b2a862ae3a028b4a80041b734a5e1b 100644 (file)
@@ -9,9 +9,9 @@ from ..utils import (
 
 
 class MwaveIE(InfoExtractor):
-    _VALID_URL = r'https?://mwave\.interest\.me/mnettv/videodetail\.m\?searchVideoDetailVO\.clip_id=(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://mwave\.interest\.me/(?:[^/]+/)?mnettv/videodetail\.m\?searchVideoDetailVO\.clip_id=(?P<id>[0-9]+)'
     _URL_TEMPLATE = 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=%s'
-    _TEST = {
+    _TESTS = [{
         'url': 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=168859',
         # md5 is unstable
         'info_dict': {
@@ -23,7 +23,10 @@ class MwaveIE(InfoExtractor):
             'duration': 206,
             'view_count': int,
         }
-    }
+    }, {
+        'url': 'http://mwave.interest.me/en/mnettv/videodetail.m?searchVideoDetailVO.clip_id=176199',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -60,8 +63,8 @@ class MwaveIE(InfoExtractor):
 
 
 class MwaveMeetGreetIE(InfoExtractor):
-    _VALID_URL = r'https?://mwave\.interest\.me/meetgreet/view/(?P<id>\d+)'
-    _TEST = {
+    _VALID_URL = r'https?://mwave\.interest\.me/(?:[^/]+/)?meetgreet/view/(?P<id>\d+)'
+    _TESTS = [{
         'url': 'http://mwave.interest.me/meetgreet/view/256',
         'info_dict': {
             'id': '173294',
@@ -72,7 +75,10 @@ class MwaveMeetGreetIE(InfoExtractor):
             'duration': 3634,
             'view_count': int,
         }
-    }
+    }, {
+        'url': 'http://mwave.interest.me/en/meetgreet/view/256',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
index 0d5238d777ad00ab13e84a69474d42b360cdecc1..ab32e632e34375561980f168834443754f606383 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index 1ca7b1a9e958c221f44c48bced04c314c0957f8c..2afe535b5de0804927f2798850572caf9267b044 100644 (file)
@@ -11,7 +11,7 @@ from ..utils import (
 
 
 class MySpassIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.myspass\.de/.*'
+    _VALID_URL = r'https?://(?:www\.)?myspass\.de/.*'
     _TEST = {
         'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
         'md5': '0b49f4844a068f8b33f4b7c88405862b',
index 731c245428103b3ea96f5c396b063afadac82702..2117d302d6493e995a522a2726d312d46e76bda2 100644 (file)
@@ -13,7 +13,7 @@ class MyVidsterIE(InfoExtractor):
             'id': '3685814',
             'title': 'md5:7d8427d6d02c4fbcef50fe269980c749',
             'upload_date': '20141027',
-            'uploader_id': 'utkualp',
+            'uploader': 'utkualp',
             'ext': 'mp4',
             'age_limit': 18,
         },
index 1dcf27afef331ceb3b09d6ade66cde65e92e3cd6..b91d865286e47affdc66c138dde9507963d62733 100644 (file)
@@ -4,6 +4,7 @@ import re
 
 from .common import InfoExtractor
 from .adobepass import AdobePassIE
+from .theplatform import ThePlatformIE
 from ..utils import (
     smuggle_url,
     url_basename,
@@ -65,7 +66,7 @@ class NationalGeographicVideoIE(InfoExtractor):
         }
 
 
-class NationalGeographicIE(AdobePassIE):
+class NationalGeographicIE(ThePlatformIE, AdobePassIE):
     IE_NAME = 'natgeo'
     _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?[^/]+/(?:videos|episodes)/(?P<id>[^/?]+)'
 
@@ -110,25 +111,39 @@ class NationalGeographicIE(AdobePassIE):
         release_url = self._search_regex(
             r'video_auth_playlist_url\s*=\s*"([^"]+)"',
             webpage, 'release url')
+        theplatform_path = self._search_regex(r'https?://link.theplatform.com/s/([^?]+)', release_url, 'theplatform path')
+        video_id = theplatform_path.split('/')[-1]
         query = {
             'mbr': 'true',
-            'switch': 'http',
         }
         is_auth = self._search_regex(r'video_is_auth\s*=\s*"([^"]+)"', webpage, 'is auth', fatal=False)
         if is_auth == 'auth':
             auth_resource_id = self._search_regex(
                 r"video_auth_resourceId\s*=\s*'([^']+)'",
                 webpage, 'auth resource id')
-            query['auth'] = self._extract_mvpd_auth(url, display_id, 'natgeo', auth_resource_id)
-
-        return {
-            '_type': 'url_transparent',
-            'ie_key': 'ThePlatform',
-            'url': smuggle_url(
-                update_url_query(release_url, query),
-                {'force_smil_url': True}),
+            query['auth'] = self._extract_mvpd_auth(url, video_id, 'natgeo', auth_resource_id)
+
+        formats = []
+        subtitles = {}
+        for key, value in (('switch', 'http'), ('manifest', 'm3u')):
+            tp_query = query.copy()
+            tp_query.update({
+                key: value,
+            })
+            tp_formats, tp_subtitles = self._extract_theplatform_smil(
+                update_url_query(release_url, tp_query), video_id, 'Downloading %s SMIL data' % value)
+            formats.extend(tp_formats)
+            subtitles = self._merge_subtitles(subtitles, tp_subtitles)
+        self._sort_formats(formats)
+
+        info = self._extract_theplatform_metadata(theplatform_path, display_id)
+        info.update({
+            'id': video_id,
+            'formats': formats,
+            'subtitles': subtitles,
             'display_id': display_id,
-        }
+        })
+        return info
 
 
 class NationalGeographicEpisodeGuideIE(InfoExtractor):
index 0891d2772cd53f9c686fed9e9cd50c77ddc80bb1..055070ff54fd8990c2e58ab1d6df037b19f3a029 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index d896b0d04810655c1d7c993819b88e7b32029832..53561961c12611eeead082ed662e44a75e38acbf 100644 (file)
@@ -1,25 +1,20 @@
 from __future__ import unicode_literals
 
 import functools
-import os.path
 import re
 
-from .common import InfoExtractor
+from .turner import TurnerBaseIE
 from ..compat import (
     compat_urllib_parse_urlencode,
     compat_urlparse,
 )
 from ..utils import (
-    int_or_none,
     OnDemandPagedList,
-    parse_duration,
     remove_start,
-    xpath_text,
-    xpath_attr,
 )
 
 
-class NBAIE(InfoExtractor):
+class NBAIE(TurnerBaseIE):
     _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?P<path>(?:[^/]+/)+(?P<id>[^?]*?))/?(?:/index\.html)?(?:\?.*)?$'
     _TESTS = [{
         'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
@@ -44,28 +39,30 @@ class NBAIE(InfoExtractor):
         'url': 'http://watch.nba.com/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
         'md5': 'b2b39b81cf28615ae0c3360a3f9668c4',
         'info_dict': {
-            'id': '0041400301-cle-atl-recap',
+            'id': 'channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
             'ext': 'mp4',
             'title': 'Hawks vs. Cavaliers Game 1',
             'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d',
             'duration': 228,
             'timestamp': 1432134543,
             'upload_date': '20150520',
-        }
+        },
+        'expected_warnings': ['Unable to download f4m manifest'],
     }, {
         'url': 'http://www.nba.com/clippers/news/doc-rivers-were-not-trading-blake',
         'info_dict': {
-            'id': '1455672027478-Doc_Feb16_720',
+            'id': 'teams/clippers/2016/02/17/1455672027478-Doc_Feb16_720.mov-297324',
             'ext': 'mp4',
             'title': 'Practice: Doc Rivers - 2/16/16',
             'description': 'Head Coach Doc Rivers addresses the media following practice.',
-            'upload_date': '20160217',
+            'upload_date': '20160216',
             'timestamp': 1455672000,
         },
         'params': {
             # m3u8 download
             'skip_download': True,
         },
+        'expected_warnings': ['Unable to download f4m manifest'],
     }, {
         'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#',
         'info_dict': {
@@ -80,7 +77,7 @@ class NBAIE(InfoExtractor):
     }, {
         'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#',
         'info_dict': {
-            'id': 'Wigginsmp4',
+            'id': 'teams/timberwolves/2014/12/12/Wigginsmp4-3462601',
             'ext': 'mp4',
             'title': 'Shootaround Access - Dec. 12 | Andrew Wiggins',
             'description': 'Wolves rookie Andrew Wiggins addresses the media after Friday\'s shootaround.',
@@ -92,6 +89,7 @@ class NBAIE(InfoExtractor):
             # m3u8 download
             'skip_download': True,
         },
+        'expected_warnings': ['Unable to download f4m manifest'],
     }]
 
     _PAGE_SIZE = 30
@@ -145,53 +143,12 @@ class NBAIE(InfoExtractor):
             if path.startswith('video/teams'):
                 path = 'video/channels/proxy/' + path[6:]
 
-        video_info = self._download_xml('http://www.nba.com/%s.xml' % path, video_id)
-        video_id = os.path.splitext(xpath_text(video_info, 'slug'))[0]
-        title = xpath_text(video_info, 'headline')
-        description = xpath_text(video_info, 'description')
-        duration = parse_duration(xpath_text(video_info, 'length'))
-        timestamp = int_or_none(xpath_attr(video_info, 'dateCreated', 'uts'))
-
-        thumbnails = []
-        for image in video_info.find('images'):
-            thumbnails.append({
-                'id': image.attrib.get('cut'),
-                'url': image.text,
-                'width': int_or_none(image.attrib.get('width')),
-                'height': int_or_none(image.attrib.get('height')),
+        return self._extract_cvp_info(
+            'http://www.nba.com/%s.xml' % path, video_id, {
+                'default': {
+                    'media_src': 'http://nba.cdn.turner.com/nba/big',
+                },
+                'm3u8': {
+                    'media_src': 'http://nbavod-f.akamaihd.net',
+                },
             })
-
-        formats = []
-        for video_file in video_info.findall('.//file'):
-            video_url = video_file.text
-            if video_url.startswith('/'):
-                continue
-            if video_url.endswith('.m3u8'):
-                formats.extend(self._extract_m3u8_formats(video_url, video_id, ext='mp4', m3u8_id='hls', fatal=False))
-            elif video_url.endswith('.f4m'):
-                formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.1.1', video_id, f4m_id='hds', fatal=False))
-            else:
-                key = video_file.attrib.get('bitrate')
-                format_info = {
-                    'format_id': key,
-                    'url': video_url,
-                }
-                mobj = re.search(r'(\d+)x(\d+)(?:_(\d+))?', key)
-                if mobj:
-                    format_info.update({
-                        'width': int(mobj.group(1)),
-                        'height': int(mobj.group(2)),
-                        'tbr': int_or_none(mobj.group(3)),
-                    })
-                formats.append(format_info)
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'title': title,
-            'description': description,
-            'duration': duration,
-            'timestamp': timestamp,
-            'thumbnails': thumbnails,
-            'formats': formats,
-        }
index f694e210b1dadceb030cb24f6498abe30de5b976..7f1bd9229303ec0390c9d10937374a0cc986790b 100644 (file)
@@ -13,7 +13,7 @@ from ..utils import (
 
 
 class NBCIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
+    _VALID_URL = r'https?://(?:www\.)?nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
 
     _TESTS = [
         {
@@ -138,7 +138,7 @@ class NBCSportsVPlayerIE(InfoExtractor):
 
 class NBCSportsIE(InfoExtractor):
     # Does not include https because its certificate is invalid
-    _VALID_URL = r'https?://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
+    _VALID_URL = r'https?://(?:www\.)?nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
 
     _TEST = {
         'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke',
@@ -161,7 +161,7 @@ class NBCSportsIE(InfoExtractor):
 
 
 class CSNNEIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.csnne\.com/video/(?P<id>[0-9a-z-]+)'
+    _VALID_URL = r'https?://(?:www\.)?csnne\.com/video/(?P<id>[0-9a-z-]+)'
 
     _TEST = {
         'url': 'http://www.csnne.com/video/snc-evening-update-wright-named-red-sox-no-5-starter',
@@ -335,3 +335,43 @@ class NBCNewsIE(ThePlatformIE):
                 'url': 'http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews?byId=%s' % video_id,
                 'ie_key': 'ThePlatformFeed',
             }
+
+
+class NBCOlympicsIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.nbcolympics\.com/video/(?P<id>[a-z-]+)'
+
+    _TEST = {
+        # Geo-restricted to US
+        'url': 'http://www.nbcolympics.com/video/justin-roses-son-leo-was-tears-after-his-dad-won-gold',
+        'md5': '54fecf846d05429fbaa18af557ee523a',
+        'info_dict': {
+            'id': 'WjTBzDXx5AUq',
+            'display_id': 'justin-roses-son-leo-was-tears-after-his-dad-won-gold',
+            'ext': 'mp4',
+            'title': 'Rose\'s son Leo was in tears after his dad won gold',
+            'description': 'Olympic gold medalist Justin Rose gets emotional talking to the impact his win in men\'s golf has already had on his children.',
+            'timestamp': 1471274964,
+            'upload_date': '20160815',
+            'uploader': 'NBCU-SPORTS',
+        },
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        drupal_settings = self._parse_json(self._search_regex(
+            r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
+            webpage, 'drupal settings'), display_id)
+
+        iframe_url = drupal_settings['vod']['iframe_url']
+        theplatform_url = iframe_url.replace(
+            'vplayer.nbcolympics.com', 'player.theplatform.com')
+
+        return {
+            '_type': 'url_transparent',
+            'url': theplatform_url,
+            'ie_key': ThePlatformIE.ie_key(),
+            'display_id': display_id,
+        }
index 0cded6b5c3d0bbcb095de8672de70fa81b9f7fd1..e3b0da2e966eb9486ab5307a933c51d74f2a14ba 100644 (file)
@@ -23,7 +23,7 @@ class NDRBaseIE(InfoExtractor):
 class NDRIE(NDRBaseIE):
     IE_NAME = 'ndr'
     IE_DESC = 'NDR.de - Norddeutscher Rundfunk'
-    _VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html'
+    _VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html'
     _TESTS = [{
         # httpVideo, same content id
         'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html',
@@ -105,7 +105,7 @@ class NDRIE(NDRBaseIE):
 class NJoyIE(NDRBaseIE):
     IE_NAME = 'njoy'
     IE_DESC = 'N-JOY'
-    _VALID_URL = r'https?://www\.n-joy\.de/(?:[^/]+/)*(?:(?P<display_id>[^/?#]+),)?(?P<id>[\da-z]+)\.html'
+    _VALID_URL = r'https?://(?:www\.)?n-joy\.de/(?:[^/]+/)*(?:(?P<display_id>[^/?#]+),)?(?P<id>[\da-z]+)\.html'
     _TESTS = [{
         # httpVideo, same content id
         'url': 'http://www.n-joy.de/entertainment/comedy/comedy_contest/Benaissa-beim-NDR-Comedy-Contest,comedycontest2480.html',
@@ -238,7 +238,7 @@ class NDREmbedBaseIE(InfoExtractor):
 
 class NDREmbedIE(NDREmbedBaseIE):
     IE_NAME = 'ndr:embed'
-    _VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html'
+    _VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html'
     _TESTS = [{
         'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html',
         'md5': '8b9306142fe65bbdefb5ce24edb6b0a9',
@@ -332,7 +332,7 @@ class NDREmbedIE(NDREmbedBaseIE):
 
 class NJoyEmbedIE(NDREmbedBaseIE):
     IE_NAME = 'njoy:embed'
-    _VALID_URL = r'https?://www\.n-joy\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)_[^/]+\.html'
+    _VALID_URL = r'https?://(?:www\.)?n-joy\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)_[^/]+\.html'
     _TESTS = [{
         # httpVideo
         'url': 'http://www.n-joy.de/events/reeperbahnfestival/doku948-player_image-bc168e87-5263-4d6d-bd27-bb643005a6de_theme-n-joy.html',
index 7059403239ce19ac8b2861fa4af0dde93c98467b..9bea610c88a4ac48cc1b84ed5c3fae45789d643e 100644 (file)
@@ -1,15 +1,12 @@
 from __future__ import unicode_literals
 
-import json
-import re
-
 from .common import InfoExtractor
 
 
 class NewgroundsIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:audio/listen|portal/view)/(?P<id>[0-9]+)'
     _TESTS = [{
-        'url': 'http://www.newgrounds.com/audio/listen/549479',
+        'url': 'https://www.newgrounds.com/audio/listen/549479',
         'md5': 'fe6033d297591288fa1c1f780386f07a',
         'info_dict': {
             'id': '549479',
@@ -18,7 +15,7 @@ class NewgroundsIE(InfoExtractor):
             'uploader': 'Burn7',
         }
     }, {
-        'url': 'http://www.newgrounds.com/portal/view/673111',
+        'url': 'https://www.newgrounds.com/portal/view/673111',
         'md5': '3394735822aab2478c31b1004fe5e5bc',
         'info_dict': {
             'id': '673111',
@@ -29,24 +26,20 @@ class NewgroundsIE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        music_id = mobj.group('id')
-        webpage = self._download_webpage(url, music_id)
+        media_id = self._match_id(url)
+        webpage = self._download_webpage(url, media_id)
 
         title = self._html_search_regex(
             r'<title>([^>]+)</title>', webpage, 'title')
 
         uploader = self._html_search_regex(
-            [r',"artist":"([^"]+)",', r'[\'"]owner[\'"]\s*:\s*[\'"]([^\'"]+)[\'"],'],
-            webpage, 'uploader')
+            r'Author\s*<a[^>]+>([^<]+)', webpage, 'uploader', fatal=False)
 
-        music_url_json_string = self._html_search_regex(
-            r'({"url":"[^"]+"),', webpage, 'music url') + '}'
-        music_url_json = json.loads(music_url_json_string)
-        music_url = music_url_json['url']
+        music_url = self._parse_json(self._search_regex(
+            r'"url":("[^"]+"),', webpage, ''), media_id)
 
         return {
-            'id': music_id,
+            'id': media_id,
             'title': title,
             'url': music_url,
             'uploader': uploader,
index 0092b85ceaa27e9b190a05ea1d4dff299351b6c7..e3f35f1d8b6d526d13bf7b301e57c9570bff3af4 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index aae7aeeebb8e2adebd2669bcd899caec3432275d..dee9056d39e9bb0076d390054006c6dd4246afae 100644 (file)
@@ -7,7 +7,7 @@ from ..utils import parse_iso8601
 
 class NextMediaIE(InfoExtractor):
     IE_DESC = '蘋果日報'
-    _VALID_URL = r'https?://hk.apple.nextmedia.com/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)'
+    _VALID_URL = r'https?://hk\.apple\.nextmedia\.com/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)'
     _TESTS = [{
         'url': 'http://hk.apple.nextmedia.com/realtime/news/20141108/53109199',
         'md5': 'dff9fad7009311c421176d1ac90bfe4f',
@@ -68,7 +68,7 @@ class NextMediaIE(InfoExtractor):
 
 class NextMediaActionNewsIE(NextMediaIE):
     IE_DESC = '蘋果日報 - 動新聞'
-    _VALID_URL = r'https?://hk.dv.nextmedia.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+'
+    _VALID_URL = r'https?://hk\.dv\.nextmedia\.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+'
     _TESTS = [{
         'url': 'http://hk.dv.nextmedia.com/actionnews/hit/20150121/19009428/20061460',
         'md5': '05fce8ffeed7a5e00665d4b7cf0f9201',
@@ -93,7 +93,7 @@ class NextMediaActionNewsIE(NextMediaIE):
 
 class AppleDailyIE(NextMediaIE):
     IE_DESC = '臺灣蘋果日報'
-    _VALID_URL = r'https?://(www|ent).appledaily.com.tw/(?:animation|appledaily|enews|realtimenews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
+    _VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/(?:animation|appledaily|enews|realtimenews|actionnews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
     _TESTS = [{
         'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694',
         'md5': 'a843ab23d150977cc55ef94f1e2c1e4d',
@@ -154,6 +154,9 @@ class AppleDailyIE(NextMediaIE):
             'description': 'md5:7b859991a6a4fedbdf3dd3b66545c748',
             'upload_date': '20140417',
         },
+    }, {
+        'url': 'http://www.appledaily.com.tw/actionnews/appledaily/7/20161003/960588/',
+        'only_matching': True,
     }]
 
     _URL_PATTERN = r'\{url: \'(.+)\'\}'
index 200874d68e765e43a6b9787473c6d2b5af54cfb2..3930d16f16e4d295e9afeb84f88eb36dc7ffc30b 100644 (file)
@@ -165,7 +165,7 @@ class NFLIE(InfoExtractor):
             group='config'))
         # For articles, the id in the url is not the video id
         video_id = self._search_regex(
-            r'(?:<nflcs:avplayer[^>]+data-content[Ii]d\s*=\s*|content[Ii]d\s*:\s*)(["\'])(?P<id>.+?)\1',
+            r'(?:<nflcs:avplayer[^>]+data-content[Ii]d\s*=\s*|content[Ii]d\s*:\s*)(["\'])(?P<id>(?:(?!\1).)+)\1',
             webpage, 'video id', default=video_id, group='id')
         config = self._download_json(config_url, video_id, 'Downloading player config')
         url_template = NFLIE.prepend_host(
diff --git a/youtube_dl/extractor/nhk.py b/youtube_dl/extractor/nhk.py
new file mode 100644 (file)
index 0000000..5c8cd76
--- /dev/null
@@ -0,0 +1,51 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class NhkVodIE(InfoExtractor):
+    _VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/en/vod/(?P<id>[^/]+/[^/?#&]+)'
+    _TEST = {
+        # Videos available only for a limited period of time. Visit
+        # http://www3.nhk.or.jp/nhkworld/en/vod/ for working samples.
+        'url': 'http://www3.nhk.or.jp/nhkworld/en/vod/tokyofashion/20160815',
+        'info_dict': {
+            'id': 'A1bnNiNTE6nY3jLllS-BIISfcC_PpvF5',
+            'ext': 'flv',
+            'title': 'TOKYO FASHION EXPRESS - The Kimono as Global Fashion',
+            'description': 'md5:db338ee6ce8204f415b754782f819824',
+            'series': 'TOKYO FASHION EXPRESS',
+            'episode': 'The Kimono as Global Fashion',
+        },
+        'skip': 'Videos available only for a limited period of time',
+    }
+    _API_URL = 'http://api.nhk.or.jp/nhkworld/vodesdlist/v1/all/all/all.json?apikey=EJfK8jdS57GqlupFgAfAAwr573q01y6k'
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        data = self._download_json(self._API_URL, video_id)
+
+        try:
+            episode = next(
+                e for e in data['data']['episodes']
+                if e.get('url') and video_id in e['url'])
+        except StopIteration:
+            raise ExtractorError('Unable to find episode')
+
+        embed_code = episode['vod_id']
+
+        title = episode.get('sub_title_clean') or episode['sub_title']
+        description = episode.get('description_clean') or episode.get('description')
+        series = episode.get('title_clean') or episode.get('title')
+
+        return {
+            '_type': 'url_transparent',
+            'ie_key': 'Ooyala',
+            'url': 'ooyala:%s' % embed_code,
+            'title': '%s - %s' % (series, title) if series and title else title,
+            'description': description,
+            'series': series,
+            'episode': title,
+        }
index b04d2111312d5a9e956762b10a30422ae5a8cd64..62ce800c072d2a316a0c6b8b7479cc89dc29b90d 100644 (file)
@@ -245,7 +245,11 @@ class NHLVideocenterCategoryIE(NHLBaseInfoExtractor):
 
 class NHLIE(InfoExtractor):
     IE_NAME = 'nhl.com'
-    _VALID_URL = r'https?://(?:www\.)?nhl\.com/([^/]+/)*c-(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?(?P<site>nhl|wch2016)\.com/(?:[^/]+/)*c-(?P<id>\d+)'
+    _SITES_MAP = {
+        'nhl': 'nhl',
+        'wch2016': 'wch',
+    }
     _TESTS = [{
         # type=video
         'url': 'https://www.nhl.com/video/anisimov-cleans-up-mess/t-277752844/c-43663503',
@@ -270,13 +274,32 @@ class NHLIE(InfoExtractor):
             'upload_date': '20160204',
             'timestamp': 1454544904,
         },
+    }, {
+        # Some m3u8 URLs are invalid (https://github.com/rg3/youtube-dl/issues/10713)
+        'url': 'https://www.nhl.com/predators/video/poile-laviolette-on-subban-trade/t-277437416/c-44315003',
+        'md5': '50b2bb47f405121484dda3ccbea25459',
+        'info_dict': {
+            'id': '44315003',
+            'ext': 'mp4',
+            'title': 'Poile, Laviolette on Subban trade',
+            'description': 'General manager David Poile and head coach Peter Laviolette share their thoughts on acquiring P.K. Subban from Montreal (06/29/16)',
+            'timestamp': 1467242866,
+            'upload_date': '20160629',
+        },
+    }, {
+        'url': 'https://www.wch2016.com/video/caneur-best-of-game-2-micd-up/t-281230378/c-44983703',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.wch2016.com/news/3-stars-team-europe-vs-team-canada/c-282195068',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
-        tmp_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        tmp_id, site = mobj.group('id'), mobj.group('site')
         video_data = self._download_json(
-            'https://nhl.bamcontent.com/nhl/id/v1/%s/details/web-v1.json' % tmp_id,
-            tmp_id)
+            'https://nhl.bamcontent.com/%s/id/v1/%s/details/web-v1.json'
+            % (self._SITES_MAP[site], tmp_id), tmp_id)
         if video_data.get('type') == 'article':
             video_data = video_data['media']
 
@@ -290,9 +313,11 @@ class NHLIE(InfoExtractor):
                 continue
             ext = determine_ext(playback_url)
             if ext == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(
+                m3u8_formats = self._extract_m3u8_formats(
                     playback_url, video_id, 'mp4', 'm3u8_native',
-                    m3u8_id=playback.get('name', 'hls'), fatal=False))
+                    m3u8_id=playback.get('name', 'hls'), fatal=False)
+                self._check_formats(m3u8_formats, video_id)
+                formats.extend(m3u8_formats)
             else:
                 height = int_or_none(playback.get('height'))
                 formats.append({
index 9c54846e14a86634d3e6b20deb8e1adb5e46de55..7672845bfd0c6ebbc08ef326f024f4a02bb44a71 100644 (file)
@@ -1,8 +1,9 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .mtv import MTVServicesInfoExtractor
-from ..compat import compat_urllib_parse_urlencode
 from ..utils import update_url_query
 
 
@@ -59,10 +60,10 @@ class NickIE(MTVServicesInfoExtractor):
     }]
 
     def _get_feed_query(self, uri):
-        return compat_urllib_parse_urlencode({
+        return {
             'feed': 'nick_arc_player_prime',
             'mgid': uri,
-        })
+        }
 
     def _extract_mgid(self, webpage):
         return self._search_regex(r'data-contenturi="([^"]+)', webpage, 'mgid')
@@ -70,22 +71,53 @@ class NickIE(MTVServicesInfoExtractor):
 
 class NickDeIE(MTVServicesInfoExtractor):
     IE_NAME = 'nick.de'
-    _VALID_URL = r'https?://(?:www\.)?nick\.de/(?:playlist|shows)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?P<host>nick\.de|nickelodeon\.(?:nl|at))/(?:playlist|shows)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
     _TESTS = [{
         'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse',
         'only_matching': True,
     }, {
         'url': 'http://www.nick.de/shows/342-icarly',
         'only_matching': True,
+    }, {
+        'url': 'http://www.nickelodeon.nl/shows/474-spongebob/videos/17403-een-kijkje-in-de-keuken-met-sandy-van-binnenuit',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.nickelodeon.at/playlist/3773-top-videos/videos/episode/77993-das-letzte-gefecht',
+        'only_matching': True,
     }]
 
+    def _extract_mrss_url(self, webpage, host):
+        return update_url_query(self._search_regex(
+            r'data-mrss=(["\'])(?P<url>http.+?)\1', webpage, 'mrss url', group='url'),
+            {'siteKey': host})
+
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        host = mobj.group('host')
 
         webpage = self._download_webpage(url, video_id)
 
-        mrss_url = update_url_query(self._search_regex(
-            r'data-mrss=(["\'])(?P<url>http.+?)\1', webpage, 'mrss url', group='url'),
-            {'siteKey': 'nick.de'})
+        mrss_url = self._extract_mrss_url(webpage, host)
 
         return self._get_videos_info_from_url(mrss_url, video_id)
+
+
+class NickNightIE(NickDeIE):
+    IE_NAME = 'nicknight'
+    _VALID_URL = r'https?://(?:www\.)(?P<host>nicknight\.(?:de|at|tv))/(?:playlist|shows)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+    _TESTS = [{
+        'url': 'http://www.nicknight.at/shows/977-awkward/videos/85987-nimmer-beste-freunde',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.nicknight.at/shows/977-awkward',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.nicknight.at/shows/1900-faking-it',
+        'only_matching': True,
+    }]
+
+    def _extract_mrss_url(self, webpage, *args):
+        return self._search_regex(
+            r'mrss\s*:\s*(["\'])(?P<url>http.+?)\1', webpage,
+            'mrss url', group='url')
index dd75a48afcc9dfa4a728c600c836741785056770..a104e33f8bdea73540779e41db45d92c1249668a 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -252,7 +252,7 @@ class NiconicoIE(InfoExtractor):
 
 
 class NiconicoPlaylistIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.nicovideo\.jp/mylist/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/mylist/(?P<id>\d+)'
 
     _TEST = {
         'url': 'http://www.nicovideo.jp/mylist/27411728',
index d889245ad6885d9bbd88d0a1b335aa4b5a56f612..ec4d675e277f842172dcbaad49e4cb666c6b10f4 100644 (file)
@@ -4,40 +4,36 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
     parse_iso8601,
-    parse_duration,
-    ExtractorError
+    float_or_none,
+    ExtractorError,
+    int_or_none,
 )
 
 
-class NineCNineMediaIE(InfoExtractor):
-    _VALID_URL = r'9c9media:(?P<destination_code>[^:]+):(?P<id>\d+)'
+class NineCNineMediaBaseIE(InfoExtractor):
+    _API_BASE_TEMPLATE = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/'
+
+
+class NineCNineMediaStackIE(NineCNineMediaBaseIE):
+    IE_NAME = '9c9media:stack'
+    _VALID_URL = r'9c9media:stack:(?P<destination_code>[^:]+):(?P<content_id>\d+):(?P<content_package>\d+):(?P<id>\d+)'
 
     def _real_extract(self, url):
-        destination_code, video_id = re.match(self._VALID_URL, url).groups()
-        api_base_url = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/' % (destination_code, video_id)
-        content = self._download_json(api_base_url, video_id, query={
-            '$include': '[contentpackages]',
-        })
-        title = content['Name']
-        if len(content['ContentPackages']) > 1:
-            raise ExtractorError('multiple content packages')
-        content_package = content['ContentPackages'][0]
-        stacks_base_url = api_base_url + 'contentpackages/%s/stacks/' % content_package['Id']
-        stacks = self._download_json(stacks_base_url, video_id)['Items']
-        if len(stacks) > 1:
-            raise ExtractorError('multiple stacks')
-        stack = stacks[0]
-        stack_base_url = '%s%s/manifest.' % (stacks_base_url, stack['Id'])
+        destination_code, content_id, package_id, stack_id = re.match(self._VALID_URL, url).groups()
+        stack_base_url_template = self._API_BASE_TEMPLATE + 'contentpackages/%s/stacks/%s/manifest.'
+        stack_base_url = stack_base_url_template % (destination_code, content_id, package_id, stack_id)
+
         formats = []
         formats.extend(self._extract_m3u8_formats(
-            stack_base_url + 'm3u8', video_id, 'mp4',
+            stack_base_url + 'm3u8', stack_id, 'mp4',
             'm3u8_native', m3u8_id='hls', fatal=False))
         formats.extend(self._extract_f4m_formats(
-            stack_base_url + 'f4m', video_id,
+            stack_base_url + 'f4m', stack_id,
             f4m_id='hds', fatal=False))
-        mp4_url = self._download_webpage(stack_base_url + 'pd', video_id, fatal=False)
+        mp4_url = self._download_webpage(stack_base_url + 'pd', stack_id, fatal=False)
         if mp4_url:
             formats.append({
                 'url': mp4_url,
@@ -46,10 +42,86 @@ class NineCNineMediaIE(InfoExtractor):
         self._sort_formats(formats)
 
         return {
-            'id': video_id,
-            'title': title,
-            'description': content.get('Desc') or content.get('ShortDesc'),
-            'timestamp': parse_iso8601(content.get('BroadcastDateTime')),
-            'duration': parse_duration(content.get('BroadcastTime')),
+            'id': stack_id,
             'formats': formats,
         }
+
+
+class NineCNineMediaIE(NineCNineMediaBaseIE):
+    IE_NAME = '9c9media'
+    _VALID_URL = r'9c9media:(?P<destination_code>[^:]+):(?P<id>\d+)'
+
+    def _real_extract(self, url):
+        destination_code, content_id = re.match(self._VALID_URL, url).groups()
+        api_base_url = self._API_BASE_TEMPLATE % (destination_code, content_id)
+        content = self._download_json(api_base_url, content_id, query={
+            '$include': '[Media,Season,ContentPackages]',
+        })
+        title = content['Name']
+        if len(content['ContentPackages']) > 1:
+            raise ExtractorError('multiple content packages')
+        content_package = content['ContentPackages'][0]
+        package_id = content_package['Id']
+        content_package_url = api_base_url + 'contentpackages/%s/' % package_id
+        content_package = self._download_json(content_package_url, content_id)
+
+        if content_package.get('Constraints', {}).get('Security', {}).get('Type') == 'adobe-drm':
+            raise ExtractorError('This video is DRM protected.', expected=True)
+
+        stacks = self._download_json(content_package_url + 'stacks/', package_id)['Items']
+        multistacks = len(stacks) > 1
+
+        thumbnails = []
+        for image in content.get('Images', []):
+            image_url = image.get('Url')
+            if not image_url:
+                continue
+            thumbnails.append({
+                'url': image_url,
+                'width': int_or_none(image.get('Width')),
+                'height': int_or_none(image.get('Height')),
+            })
+
+        tags, categories = [], []
+        for source_name, container in (('Tags', tags), ('Genres', categories)):
+            for e in content.get(source_name, []):
+                e_name = e.get('Name')
+                if not e_name:
+                    continue
+                container.append(e_name)
+
+        description = content.get('Desc') or content.get('ShortDesc')
+        season = content.get('Season', {})
+        base_info = {
+            'description': description,
+            'timestamp': parse_iso8601(content.get('BroadcastDateTime')),
+            'episode_number': int_or_none(content.get('Episode')),
+            'season': season.get('Name'),
+            'season_number': season.get('Number'),
+            'season_id': season.get('Id'),
+            'series': content.get('Media', {}).get('Name'),
+            'tags': tags,
+            'categories': categories,
+        }
+
+        entries = []
+        for stack in stacks:
+            stack_id = compat_str(stack['Id'])
+            entry = {
+                '_type': 'url_transparent',
+                'url': '9c9media:stack:%s:%s:%s:%s' % (destination_code, content_id, package_id, stack_id),
+                'id': stack_id,
+                'title': '%s_part%s' % (title, stack['Name']) if multistacks else title,
+                'duration': float_or_none(stack.get('Duration')),
+                'ie_key': 'NineCNineMediaStack',
+            }
+            entry.update(base_info)
+            entries.append(entry)
+
+        return {
+            '_type': 'multi_video',
+            'id': content_id,
+            'title': title,
+            'description': description,
+            'entries': entries,
+        }
index faa5772376cac8c27dacabbf84f6ae1d5000980e..351bea7baecccb520911829ff85db809d80c1eb6 100644 (file)
@@ -44,7 +44,20 @@ class NineNowIE(InfoExtractor):
         page_data = self._parse_json(self._search_regex(
             r'window\.__data\s*=\s*({.*?});', webpage,
             'page data'), display_id)
-        common_data = page_data.get('episode', {}).get('episode') or page_data.get('clip', {}).get('clip')
+
+        for kind in ('episode', 'clip'):
+            current_key = page_data.get(kind, {}).get(
+                'current%sKey' % kind.capitalize())
+            if not current_key:
+                continue
+            cache = page_data.get(kind, {}).get('%sCache' % kind, {})
+            if not cache:
+                continue
+            common_data = (cache.get(current_key) or list(cache.values())[0])[kind]
+            break
+        else:
+            raise ExtractorError('Unable to find video data')
+
         video_data = common_data['video']
 
         if video_data.get('drm'):
diff --git a/youtube_dl/extractor/nobelprize.py b/youtube_dl/extractor/nobelprize.py
new file mode 100644 (file)
index 0000000..4dfdb09
--- /dev/null
@@ -0,0 +1,62 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    js_to_json,
+    mimetype2ext,
+    determine_ext,
+    update_url_query,
+    get_element_by_attribute,
+    int_or_none,
+)
+
+
+class NobelPrizeIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?nobelprize\.org/mediaplayer.*?\bid=(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://www.nobelprize.org/mediaplayer/?id=2636',
+        'md5': '04c81e5714bb36cc4e2232fee1d8157f',
+        'info_dict': {
+            'id': '2636',
+            'ext': 'mp4',
+            'title': 'Announcement of the 2016 Nobel Prize in Physics',
+            'description': 'md5:05beba57f4f5a4bbd4cf2ef28fcff739',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        media = self._parse_json(self._search_regex(
+            r'(?s)var\s*config\s*=\s*({.+?});', webpage,
+            'config'), video_id, js_to_json)['media']
+        title = media['title']
+
+        formats = []
+        for source in media.get('source', []):
+            source_src = source.get('src')
+            if not source_src:
+                continue
+            ext = mimetype2ext(source.get('type')) or determine_ext(source_src)
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    source_src, video_id, 'mp4', 'm3u8_native',
+                    m3u8_id='hls', fatal=False))
+            elif ext == 'f4m':
+                formats.extend(self._extract_f4m_formats(
+                    update_url_query(source_src, {'hdcore': '3.7.0'}),
+                    video_id, f4m_id='hds', fatal=False))
+            else:
+                formats.append({
+                    'url': source_src,
+                })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': get_element_by_attribute('itemprop', 'description', webpage),
+            'duration': int_or_none(media.get('duration')),
+            'formats': formats,
+        }
index 06f2bda07dd5db2c54e1e0492f244dbf0fc5a526..70ff2ab3653525664b4f1ae590393ee680a2f6e5 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index af44c3bb5714bc0079e3d1307782a8ff1fe5ba84..61fe571dfea17a3fab3206eb6eea0b7a2cbb979b 100644 (file)
@@ -1,8 +1,8 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from .screenwavemedia import ScreenwaveMediaIE
+from .jwplatform import JWPlatformIE
 
 from ..utils import (
     unified_strdate,
@@ -25,7 +25,7 @@ class NormalbootsIE(InfoExtractor):
             # m3u8 download
             'skip_download': True,
         },
-        'add_ie': ['ScreenwaveMedia'],
+        'add_ie': ['JWPlatform'],
     }
 
     def _real_extract(self, url):
@@ -39,15 +39,13 @@ class NormalbootsIE(InfoExtractor):
             r'<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
             webpage, 'date', fatal=False))
 
-        screenwavemedia_url = self._html_search_regex(
-            ScreenwaveMediaIE.EMBED_PATTERN, webpage, 'screenwave URL',
-            group='url')
+        jwplatform_url = JWPlatformIE._extract_url(webpage)
 
         return {
             '_type': 'url_transparent',
             'id': video_id,
-            'url': screenwavemedia_url,
-            'ie_key': ScreenwaveMediaIE.ie_key(),
+            'url': jwplatform_url,
+            'ie_key': JWPlatformIE.ie_key(),
             'title': self._og_search_title(webpage),
             'description': self._og_search_description(webpage),
             'thumbnail': self._og_search_thumbnail(webpage),
index 17671ad398b9e9a8148bceff74db678969d26d3f..103952345aa98ed186515452baf2f945409ffdaa 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index 74860eb2054e4f685b4b52c89149e49563ffe230..7e53463164b281e84a349a6fc382f5e203f278a4 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .brightcove import (
index 87f5675c7ff8b14169291420feb9bcf85edf894d..c91f5846171be2a720523a4531313703d18920fd 100644 (file)
@@ -3,12 +3,15 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import compat_HTTPError
 from ..utils import (
     fix_xml_ampersands,
+    orderedSet,
     parse_duration,
     qualities,
     strip_jsonp,
     unified_strdate,
+    ExtractorError,
 )
 
 
@@ -180,9 +183,16 @@ class NPOIE(NPOBaseIE):
                     continue
                 streams = format_info.get('streams')
                 if streams:
-                    video_info = self._download_json(
-                        streams[0] + '&type=json',
-                        video_id, 'Downloading %s stream JSON' % format_id)
+                    try:
+                        video_info = self._download_json(
+                            streams[0] + '&type=json',
+                            video_id, 'Downloading %s stream JSON' % format_id)
+                    except ExtractorError as ee:
+                        if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
+                            error = (self._parse_json(ee.cause.read().decode(), video_id, fatal=False) or {}).get('errorstring')
+                            if error:
+                                raise ExtractorError(error, expected=True)
+                        raise
                 else:
                     video_info = format_info
                 video_url = video_info.get('url')
@@ -429,7 +439,7 @@ class SchoolTVIE(InfoExtractor):
         display_id = self._match_id(url)
         webpage = self._download_webpage(url, display_id)
         video_id = self._search_regex(
-            r'data-mid=(["\'])(?P<id>.+?)\1', webpage, 'video_id', group='id')
+            r'data-mid=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video_id', group='id')
         return {
             '_type': 'url_transparent',
             'ie_key': 'NPO',
@@ -438,9 +448,30 @@ class SchoolTVIE(InfoExtractor):
         }
 
 
-class VPROIE(NPOIE):
+class NPOPlaylistBaseIE(NPOIE):
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        entries = [
+            self.url_result('npo:%s' % video_id if not video_id.startswith('http') else video_id)
+            for video_id in orderedSet(re.findall(self._PLAYLIST_ENTRY_RE, webpage))
+        ]
+
+        playlist_title = self._html_search_regex(
+            self._PLAYLIST_TITLE_RE, webpage, 'playlist title',
+            default=None) or self._og_search_title(webpage)
+
+        return self.playlist_result(entries, playlist_id, playlist_title)
+
+
+class VPROIE(NPOPlaylistBaseIE):
     IE_NAME = 'vpro'
-    _VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P<id>[^/]+)\.html'
+    _VALID_URL = r'https?://(?:www\.)?(?:(?:tegenlicht\.)?vpro|2doc)\.nl/(?:[^/]+/)*(?P<id>[^/]+)\.html'
+    _PLAYLIST_TITLE_RE = (r'<h1[^>]+class=["\'].*?\bmedia-platform-title\b.*?["\'][^>]*>([^<]+)',
+                          r'<h5[^>]+class=["\'].*?\bmedia-platform-subtitle\b.*?["\'][^>]*>([^<]+)')
+    _PLAYLIST_ENTRY_RE = r'data-media-id="([^"]+)"'
 
     _TESTS = [
         {
@@ -453,12 +484,13 @@ class VPROIE(NPOIE):
                 'description': 'md5:52cf4eefbc96fffcbdc06d024147abea',
                 'upload_date': '20130225',
             },
+            'skip': 'Video gone',
         },
         {
             'url': 'http://www.vpro.nl/programmas/2doc/2015/sergio-herman.html',
             'info_dict': {
                 'id': 'sergio-herman',
-                'title': 'Sergio Herman: Fucking perfect',
+                'title': 'sergio herman: fucking perfect',
             },
             'playlist_count': 2,
         },
@@ -467,54 +499,61 @@ class VPROIE(NPOIE):
             'url': 'http://www.vpro.nl/programmas/2doc/2015/education-education.html',
             'info_dict': {
                 'id': 'education-education',
-                'title': '2Doc',
+                'title': 'education education',
+            },
+            'playlist_count': 2,
+        },
+        {
+            'url': 'http://www.2doc.nl/documentaires/series/2doc/2015/oktober/de-tegenprestatie.html',
+            'info_dict': {
+                'id': 'de-tegenprestatie',
+                'title': 'De Tegenprestatie',
             },
             'playlist_count': 2,
+        }, {
+            'url': 'http://www.2doc.nl/speel~VARA_101375237~mh17-het-verdriet-van-nederland~.html',
+            'info_dict': {
+                'id': 'VARA_101375237',
+                'ext': 'm4v',
+                'title': 'MH17: Het verdriet van Nederland',
+                'description': 'md5:09e1a37c1fdb144621e22479691a9f18',
+                'upload_date': '20150716',
+            },
+            'params': {
+                # Skip because of m3u8 download
+                'skip_download': True
+            },
         }
     ]
 
-    def _real_extract(self, url):
-        playlist_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, playlist_id)
-
-        entries = [
-            self.url_result('npo:%s' % video_id if not video_id.startswith('http') else video_id)
-            for video_id in re.findall(r'data-media-id="([^"]+)"', webpage)
-        ]
-
-        playlist_title = self._search_regex(
-            r'<title>\s*([^>]+?)\s*-\s*Teledoc\s*-\s*VPRO\s*</title>',
-            webpage, 'playlist title', default=None) or self._og_search_title(webpage)
-
-        return self.playlist_result(entries, playlist_id, playlist_title)
-
 
-class WNLIE(InfoExtractor):
+class WNLIE(NPOPlaylistBaseIE):
+    IE_NAME = 'wnl'
     _VALID_URL = r'https?://(?:www\.)?omroepwnl\.nl/video/detail/(?P<id>[^/]+)__\d+'
+    _PLAYLIST_TITLE_RE = r'(?s)<h1[^>]+class="subject"[^>]*>(.+?)</h1>'
+    _PLAYLIST_ENTRY_RE = r'<a[^>]+href="([^"]+)"[^>]+class="js-mid"[^>]*>Deel \d+'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.omroepwnl.nl/video/detail/vandaag-de-dag-6-mei__060515',
         'info_dict': {
             'id': 'vandaag-de-dag-6-mei',
             'title': 'Vandaag de Dag 6 mei',
         },
         'playlist_count': 4,
-    }
-
-    def _real_extract(self, url):
-        playlist_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, playlist_id)
+    }]
 
-        entries = [
-            self.url_result('npo:%s' % video_id, 'NPO')
-            for video_id, part in re.findall(
-                r'<a[^>]+href="([^"]+)"[^>]+class="js-mid"[^>]*>(Deel \d+)', webpage)
-        ]
 
-        playlist_title = self._html_search_regex(
-            r'(?s)<h1[^>]+class="subject"[^>]*>(.+?)</h1>',
-            webpage, 'playlist title')
+class AndereTijdenIE(NPOPlaylistBaseIE):
+    IE_NAME = 'anderetijden'
+    _VALID_URL = r'https?://(?:www\.)?anderetijden\.nl/programma/(?:[^/]+/)+(?P<id>[^/?#&]+)'
+    _PLAYLIST_TITLE_RE = r'(?s)<h1[^>]+class=["\'].*?\bpage-title\b.*?["\'][^>]*>(.+?)</h1>'
+    _PLAYLIST_ENTRY_RE = r'<figure[^>]+class=["\']episode-container episode-page["\'][^>]+data-prid=["\'](.+?)["\']'
 
-        return self.playlist_result(entries, playlist_id, playlist_title)
+    _TESTS = [{
+        'url': 'http://anderetijden.nl/programma/1/Andere-Tijden/aflevering/676/Duitse-soldaten-over-de-Slag-bij-Arnhem',
+        'info_dict': {
+            'id': 'Duitse-soldaten-over-de-Slag-bij-Arnhem',
+            'title': 'Duitse soldaten over de Slag bij Arnhem',
+        },
+        'playlist_count': 3,
+    }]
index 6ded5bd456fa86bf16e1762601889b46f2d68fe9..c89aac63ee90f133074d8ade8b7af23cf020f148 100644 (file)
@@ -1,6 +1,7 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
+import random
 import re
 
 from .common import InfoExtractor
@@ -14,15 +15,24 @@ from ..utils import (
 
 
 class NRKBaseIE(InfoExtractor):
-    def _extract_formats(self, manifest_url, video_id, fatal=True):
-        formats = []
-        formats.extend(self._extract_f4m_formats(
-            manifest_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81',
-            video_id, f4m_id='hds', fatal=fatal))
-        formats.extend(self._extract_m3u8_formats(manifest_url.replace(
-            'akamaihd.net/z/', 'akamaihd.net/i/').replace('/manifest.f4m', '/master.m3u8'),
-            video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=fatal))
-        return formats
+    _faked_ip = None
+
+    def _download_webpage_handle(self, *args, **kwargs):
+        # NRK checks X-Forwarded-For HTTP header in order to figure out the
+        # origin of the client behind proxy. This allows to bypass geo
+        # restriction by faking this header's value to some Norway IP.
+        # We will do so once we encounter any geo restriction error.
+        if self._faked_ip:
+            # NB: str is intentional
+            kwargs.setdefault(str('headers'), {})['X-Forwarded-For'] = self._faked_ip
+        return super(NRKBaseIE, self)._download_webpage_handle(*args, **kwargs)
+
+    def _fake_ip(self):
+        # Use fake IP from 37.191.128.0/17 in order to workaround geo
+        # restriction
+        def octet(lb=0, ub=255):
+            return random.randint(lb, ub)
+        self._faked_ip = '37.191.%d.%d' % (octet(128), octet())
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -34,6 +44,8 @@ class NRKBaseIE(InfoExtractor):
         title = data.get('fullTitle') or data.get('mainTitle') or data['title']
         video_id = data.get('id') or video_id
 
+        http_headers = {'X-Forwarded-For': self._faked_ip} if self._faked_ip else {}
+
         entries = []
 
         media_assets = data.get('mediaAssets')
@@ -45,7 +57,7 @@ class NRKBaseIE(InfoExtractor):
                 asset_url = asset.get('url')
                 if not asset_url:
                     continue
-                formats = self._extract_formats(asset_url, video_id, fatal=False)
+                formats = self._extract_akamai_formats(asset_url, video_id)
                 if not formats:
                     continue
                 self._sort_formats(formats)
@@ -64,12 +76,13 @@ class NRKBaseIE(InfoExtractor):
                     'duration': duration,
                     'subtitles': subtitles,
                     'formats': formats,
+                    'http_headers': http_headers,
                 })
 
         if not entries:
             media_url = data.get('mediaUrl')
             if media_url:
-                formats = self._extract_formats(media_url, video_id)
+                formats = self._extract_akamai_formats(media_url, video_id)
                 self._sort_formats(formats)
                 duration = parse_duration(data.get('duration'))
                 entries = [{
@@ -80,10 +93,23 @@ class NRKBaseIE(InfoExtractor):
                 }]
 
         if not entries:
-            if data.get('usageRights', {}).get('isGeoBlocked'):
-                raise ExtractorError(
-                    'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
-                    expected=True)
+            message_type = data.get('messageType', '')
+            # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
+            if 'IsGeoBlocked' in message_type and not self._faked_ip:
+                self.report_warning(
+                    'Video is geo restricted, trying to fake IP')
+                self._fake_ip()
+                return self._real_extract(url)
+
+            MESSAGES = {
+                'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet',
+                'ProgramRightsHasExpired': 'Programmet har gått ut',
+                'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
+            }
+            raise ExtractorError(
+                '%s said: %s' % (self.IE_NAME, MESSAGES.get(
+                    message_type, message_type)),
+                expected=True)
 
         conviva = data.get('convivaStatistics') or {}
         series = conviva.get('seriesName') or data.get('seriesTitle')
@@ -123,7 +149,17 @@ class NRKBaseIE(InfoExtractor):
 
 
 class NRKIE(NRKBaseIE):
-    _VALID_URL = r'(?:nrk:|https?://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)'
+    _VALID_URL = r'''(?x)
+                        (?:
+                            nrk:|
+                            https?://
+                                (?:
+                                    (?:www\.)?nrk\.no/video/PS\*|
+                                    v8-psapi\.nrk\.no/mediaelement/
+                                )
+                            )
+                            (?P<id>[^/?#&]+)
+                        '''
     _API_HOST = 'v8.psapi.nrk.no'
     _TESTS = [{
         # video
@@ -147,6 +183,12 @@ class NRKIE(NRKBaseIE):
             'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
             'duration': 20,
         }
+    }, {
+        'url': 'nrk:ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
+        'only_matching': True,
+    }, {
+        'url': 'https://v8-psapi.nrk.no/mediaelement/ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
+        'only_matching': True,
     }]
 
 
index a83e85cb8109ef44468851355f2b522e22fc5831..d28a8154247f75cbc612f7999083cd60275c5a88 100644 (file)
@@ -1,6 +1,8 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from ..compat import compat_urlparse
 from ..utils import (
@@ -40,8 +42,8 @@ class NTVDeIE(InfoExtractor):
         timestamp = int_or_none(info.get('publishedDateAsUnixTimeStamp'))
         vdata = self._parse_json(self._search_regex(
             r'(?s)\$\(\s*"\#player"\s*\)\s*\.data\(\s*"player",\s*(\{.*?\})\);',
-            webpage, 'player data'),
-            video_id, transform_source=js_to_json)
+            webpage, 'player data'), video_id,
+            transform_source=lambda s: js_to_json(re.sub(r'advertising:\s*{[^}]+},', '', s)))
         duration = parse_duration(vdata.get('duration'))
 
         formats = []
index e8702ebcd72633f82a9bc15c55057d67d14d2401..7d7a785ab10e7b71ceb4729a012ebb574c7752d5 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index ef093dec2201afb7cb24384d7120f6ec00158de4..87fb94d1f583f5b174fe8d9ace84e4791f3afa4e 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index 681683e86f54e796f1c954de2c0cb374016fe303..2bb77ab249239163d8318a57e8fd0fdb57d2e32a 100644 (file)
@@ -1,26 +1,40 @@
+# coding: utf-8
 from __future__ import unicode_literals
 
+import hmac
+import hashlib
+import base64
+
 from .common import InfoExtractor
 from ..utils import (
+    determine_ext,
     float_or_none,
     int_or_none,
+    js_to_json,
+    mimetype2ext,
     parse_iso8601,
+    remove_start,
 )
 
 
 class NYTimesBaseIE(InfoExtractor):
+    _SECRET = b'pX(2MbU2);4N{7J8)>YwKRJ+/pQ3JkiU2Q^V>mFYv6g6gYvt6v'
+
     def _extract_video_from_id(self, video_id):
-        video_data = self._download_json(
-            'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id,
-            video_id, 'Downloading video JSON')
+        # Authorization generation algorithm is reverse engineered from `signer` in
+        # http://graphics8.nytimes.com/video/vhs/vhs-2.x.min.js
+        path = '/svc/video/api/v3/video/' + video_id
+        hm = hmac.new(self._SECRET, (path + ':vhs').encode(), hashlib.sha512).hexdigest()
+        video_data = self._download_json('http://www.nytimes.com' + path, video_id, 'Downloading video JSON', headers={
+            'Authorization': 'NYTV ' + base64.b64encode(hm.encode()).decode(),
+            'X-NYTV': 'vhs',
+        }, fatal=False)
+        if not video_data:
+            video_data = self._download_json(
+                'http://www.nytimes.com/svc/video/api/v2/video/' + video_id,
+                video_id, 'Downloading video JSON')
 
         title = video_data['headline']
-        description = video_data.get('summary')
-        duration = float_or_none(video_data.get('duration'), 1000)
-
-        uploader = video_data.get('byline')
-        publication_date = video_data.get('publication_date')
-        timestamp = parse_iso8601(publication_date[:-8]) if publication_date else None
 
         def get_file_size(file_size):
             if isinstance(file_size, int):
@@ -28,35 +42,59 @@ class NYTimesBaseIE(InfoExtractor):
             elif isinstance(file_size, dict):
                 return int(file_size.get('value', 0))
             else:
-                return 0
-
-        formats = [
-            {
-                'url': video['url'],
-                'format_id': video.get('type'),
-                'vcodec': video.get('video_codec'),
-                'width': int_or_none(video.get('width')),
-                'height': int_or_none(video.get('height')),
-                'filesize': get_file_size(video.get('fileSize')),
-            } for video in video_data['renditions'] if video.get('url')
-        ]
+                return None
+
+        urls = []
+        formats = []
+        for video in video_data.get('renditions', []):
+            video_url = video.get('url')
+            format_id = video.get('type')
+            if not video_url or format_id == 'thumbs' or video_url in urls:
+                continue
+            urls.append(video_url)
+            ext = mimetype2ext(video.get('mimetype')) or determine_ext(video_url)
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    video_url, video_id, 'mp4', 'm3u8_native',
+                    m3u8_id=format_id or 'hls', fatal=False))
+            elif ext == 'mpd':
+                continue
+            #     formats.extend(self._extract_mpd_formats(
+            #         video_url, video_id, format_id or 'dash', fatal=False))
+            else:
+                formats.append({
+                    'url': video_url,
+                    'format_id': format_id,
+                    'vcodec': video.get('videoencoding') or video.get('video_codec'),
+                    'width': int_or_none(video.get('width')),
+                    'height': int_or_none(video.get('height')),
+                    'filesize': get_file_size(video.get('file_size') or video.get('fileSize')),
+                    'tbr': int_or_none(video.get('bitrate'), 1000),
+                    'ext': ext,
+                })
         self._sort_formats(formats)
 
-        thumbnails = [
-            {
-                'url': 'http://www.nytimes.com/%s' % image['url'],
+        thumbnails = []
+        for image in video_data.get('images', []):
+            image_url = image.get('url')
+            if not image_url:
+                continue
+            thumbnails.append({
+                'url': 'http://www.nytimes.com/' + image_url,
                 'width': int_or_none(image.get('width')),
                 'height': int_or_none(image.get('height')),
-            } for image in video_data.get('images', []) if image.get('url')
-        ]
+            })
+
+        publication_date = video_data.get('publication_date')
+        timestamp = parse_iso8601(publication_date[:-8]) if publication_date else None
 
         return {
             'id': video_id,
             'title': title,
-            'description': description,
+            'description': video_data.get('summary'),
             'timestamp': timestamp,
-            'uploader': uploader,
-            'duration': duration,
+            'uploader': video_data.get('byline'),
+            'duration': float_or_none(video_data.get('duration'), 1000),
             'formats': formats,
             'thumbnails': thumbnails,
         }
@@ -67,7 +105,7 @@ class NYTimesIE(NYTimesBaseIE):
 
     _TESTS = [{
         'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263',
-        'md5': '18a525a510f942ada2720db5f31644c0',
+        'md5': 'd665342765db043f7e225cff19df0f2d',
         'info_dict': {
             'id': '100000002847155',
             'ext': 'mov',
@@ -103,16 +141,83 @@ class NYTimesArticleIE(NYTimesBaseIE):
             'upload_date': '20150414',
             'uploader': 'Matthew Williams',
         }
+    }, {
+        'url': 'http://www.nytimes.com/2016/10/14/podcasts/revelations-from-the-final-weeks.html',
+        'md5': 'e0d52040cafb07662acf3c9132db3575',
+        'info_dict': {
+            'id': '100000004709062',
+            'title': 'The Run-Up: ‘He Was Like an Octopus’',
+            'ext': 'mp3',
+            'description': 'md5:fb5c6b93b12efc51649b4847fe066ee4',
+            'series': 'The Run-Up',
+            'episode': '‘He Was Like an Octopus’',
+            'episode_number': 20,
+            'duration': 2130,
+        }
+    }, {
+        'url': 'http://www.nytimes.com/2016/10/16/books/review/inside-the-new-york-times-book-review-the-rise-of-hitler.html',
+        'info_dict': {
+            'id': '100000004709479',
+            'title': 'The Rise of Hitler',
+            'ext': 'mp3',
+            'description': 'md5:bce877fd9e3444990cb141875fab0028',
+            'creator': 'Pamela Paul',
+            'duration': 3475,
+        },
+        'params': {
+            'skip_download': True,
+        },
     }, {
         'url': 'http://www.nytimes.com/news/minute/2014/03/17/times-minute-whats-next-in-crimea/?_php=true&_type=blogs&_php=true&_type=blogs&_r=1',
         'only_matching': True,
     }]
 
+    def _extract_podcast_from_json(self, json, page_id, webpage):
+        podcast_audio = self._parse_json(
+            json, page_id, transform_source=js_to_json)
+
+        audio_data = podcast_audio['data']
+        track = audio_data['track']
+
+        episode_title = track['title']
+        video_url = track['source']
+
+        description = track.get('description') or self._html_search_meta(
+            ['og:description', 'twitter:description'], webpage)
+
+        podcast_title = audio_data.get('podcast', {}).get('title')
+        title = ('%s: %s' % (podcast_title, episode_title)
+                 if podcast_title else episode_title)
+
+        episode = audio_data.get('podcast', {}).get('episode') or ''
+        episode_number = int_or_none(self._search_regex(
+            r'[Ee]pisode\s+(\d+)', episode, 'episode number', default=None))
+
+        return {
+            'id': remove_start(podcast_audio.get('target'), 'FT') or page_id,
+            'url': video_url,
+            'title': title,
+            'description': description,
+            'creator': track.get('credit'),
+            'series': podcast_title,
+            'episode': episode_title,
+            'episode_number': episode_number,
+            'duration': int_or_none(track.get('duration')),
+        }
+
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        page_id = self._match_id(url)
 
-        webpage = self._download_webpage(url, video_id)
+        webpage = self._download_webpage(url, page_id)
 
-        video_id = self._html_search_regex(r'data-videoid="(\d+)"', webpage, 'video id')
+        video_id = self._search_regex(
+            r'data-videoid=["\'](\d+)', webpage, 'video id',
+            default=None, fatal=False)
+        if video_id is not None:
+            return self._extract_video_from_id(video_id)
 
-        return self._extract_video_from_id(video_id)
+        podcast_data = self._search_regex(
+            (r'NYTD\.FlexTypes\.push\s*\(\s*({.+?})\s*\)\s*;\s*</script',
+             r'NYTD\.FlexTypes\.push\s*\(\s*({.+})\s*\)\s*;'),
+            webpage, 'podcast data')
+        return self._extract_podcast_from_json(podcast_data, page_id, webpage)
diff --git a/youtube_dl/extractor/nzz.py b/youtube_dl/extractor/nzz.py
new file mode 100644 (file)
index 0000000..2d352f5
--- /dev/null
@@ -0,0 +1,36 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    extract_attributes,
+)
+
+
+class NZZIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?nzz\.ch/(?:[^/]+/)*[^/?#]+-ld\.(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://www.nzz.ch/zuerich/gymizyte/gymizyte-schreiben-schueler-heute-noch-diktate-ld.9153',
+        'info_dict': {
+            'id': '9153',
+        },
+        'playlist_mincount': 6,
+    }
+
+    def _real_extract(self, url):
+        page_id = self._match_id(url)
+        webpage = self._download_webpage(url, page_id)
+
+        entries = []
+        for player_element in re.findall(r'(<[^>]+class="kalturaPlayer"[^>]*>)', webpage):
+            player_params = extract_attributes(player_element)
+            if player_params.get('data-type') not in ('kaltura_singleArticle',):
+                self.report_warning('Unsupported player type')
+                continue
+            entry_id = player_params['data-id']
+            entries.append(self.url_result(
+                'kaltura:1750922:' + entry_id, 'Kaltura', entry_id))
+
+        return self.playlist_result(entries, page_id)
index 4a41c0542102165334124ea22a99a48473d19e50..50fbbc79c12761449adc70e74a58f0442f5b9cfa 100644 (file)
@@ -1,11 +1,11 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
 
 
 class OktoberfestTVIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.oktoberfest-tv\.de/[^/]+/[^/]+/video/(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?oktoberfest-tv\.de/[^/]+/[^/]+/video/(?P<id>[^/?#]+)'
 
     _TEST = {
         'url': 'http://www.oktoberfest-tv.de/de/kameras/video/hb-zelt',
index fc22ad5eb61b616afa4aaf6895302118fa4f2a50..0a501b3e5b9f0c0bd2c6789e4da302b7610ced28 100644 (file)
@@ -56,8 +56,8 @@ class OnetBaseIE(InfoExtractor):
                         continue
                     ext = determine_ext(video_url)
                     if format_id == 'ism':
-                        # TODO: Support Microsoft Smooth Streaming
-                        continue
+                        formats.extend(self._extract_ism_formats(
+                            video_url, video_id, 'mss', fatal=False))
                     elif ext == 'mpd':
                         formats.extend(self._extract_mpd_formats(
                             video_url, video_id, mpd_id='dash', fatal=False))
@@ -90,7 +90,7 @@ class OnetBaseIE(InfoExtractor):
 
 
 class OnetIE(OnetBaseIE):
-    _VALID_URL = 'https?://(?:www\.)?onet\.tv/[a-z]/[a-z]+/(?P<display_id>[0-9a-z-]+)/(?P<id>[0-9a-z]+)'
+    _VALID_URL = r'https?://(?:www\.)?onet\.tv/[a-z]/[a-z]+/(?P<display_id>[0-9a-z-]+)/(?P<id>[0-9a-z]+)'
     IE_NAME = 'onet.tv'
 
     _TEST = {
index 2038a6ba5001283e786905a23c429d2418762515..c2807d0f61b2ab5134944bd0c79b2030df80d3a1 100644 (file)
@@ -18,7 +18,7 @@ class OoyalaBaseIE(InfoExtractor):
     _CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/'
     _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s?'
 
-    def _extract(self, content_tree_url, video_id, domain='example.org'):
+    def _extract(self, content_tree_url, video_id, domain='example.org', supportedformats=None):
         content_tree = self._download_json(content_tree_url, video_id)['content_tree']
         metadata = content_tree[list(content_tree)[0]]
         embed_code = metadata['embed_code']
@@ -29,7 +29,7 @@ class OoyalaBaseIE(InfoExtractor):
             self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) +
             compat_urllib_parse_urlencode({
                 'domain': domain,
-                'supportedFormats': 'mp4,rtmp,m3u8,hds',
+                'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds',
             }), video_id)
 
         cur_auth_data = auth_data['authorization_data'][embed_code]
@@ -47,7 +47,7 @@ class OoyalaBaseIE(InfoExtractor):
                 delivery_type = stream['delivery_type']
                 if delivery_type == 'hls' or ext == 'm3u8':
                     formats.extend(self._extract_m3u8_formats(
-                        s_url, embed_code, 'mp4', 'm3u8_native',
+                        re.sub(r'/ip(?:ad|hone)/', '/all/', s_url), embed_code, 'mp4', 'm3u8_native',
                         m3u8_id='hls', fatal=False))
                 elif delivery_type == 'hds' or ext == 'f4m':
                     formats.extend(self._extract_f4m_formats(
@@ -145,8 +145,9 @@ class OoyalaIE(OoyalaBaseIE):
         url, smuggled_data = unsmuggle_url(url, {})
         embed_code = self._match_id(url)
         domain = smuggled_data.get('domain')
+        supportedformats = smuggled_data.get('supportedformats')
         content_tree_url = self._CONTENT_TREE_BASE + 'embed_code/%s/%s' % (embed_code, embed_code)
-        return self._extract(content_tree_url, embed_code, domain)
+        return self._extract(content_tree_url, embed_code, domain, supportedformats)
 
 
 class OoyalaExternalIE(OoyalaBaseIE):
index 4e80ca9ff03100e244b157f70c8ff6897256cd55..7f19b1ba5c3c355977c694334694b51f71a9840c 100644 (file)
@@ -1,19 +1,25 @@
 # coding: utf-8
 from __future__ import unicode_literals, division
 
-import math
+import re
 
 from .common import InfoExtractor
-from ..compat import compat_chr
+from ..compat import (
+    compat_chr,
+    compat_ord,
+)
 from ..utils import (
-    decode_png,
     determine_ext,
     ExtractorError,
 )
+from ..jsinterp import (
+    JSInterpreter,
+    _NAME_RE
+)
 
 
 class OpenloadIE(InfoExtractor):
-    _VALID_URL = r'https://openload.(?:co|io)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
+    _VALID_URL = r'https?://openload\.(?:co|io)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
 
     _TESTS = [{
         'url': 'https://openload.co/f/kUEfGclsU9o',
@@ -24,6 +30,22 @@ class OpenloadIE(InfoExtractor):
             'title': 'skyrim_no-audio_1080.mp4',
             'thumbnail': 're:^https?://.*\.jpg$',
         },
+    }, {
+        'url': 'https://openload.co/embed/rjC09fkPLYs',
+        'info_dict': {
+            'id': 'rjC09fkPLYs',
+            'ext': 'mp4',
+            'title': 'movie.mp4',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'subtitles': {
+                'en': [{
+                    'ext': 'vtt',
+                }],
+            },
+        },
+        'params': {
+            'skip_download': True,  # test subtitles only
+        },
     }, {
         'url': 'https://openload.co/embed/kUEfGclsU9o/skyrim_no-audio_1080.mp4',
         'only_matching': True,
@@ -40,84 +62,98 @@ class OpenloadIE(InfoExtractor):
         'only_matching': True,
     }]
 
+    def openload_decode(self, txt):
+        symbol_dict = {
+            '(゚Д゚) [゚Θ゚]': '_',
+            '(゚Д゚) [゚ω゚ノ]': 'a',
+            '(゚Д゚) [゚Θ゚ノ]': 'b',
+            '(゚Д゚) [\'c\']': 'c',
+            '(゚Д゚) [゚ー゚ノ]': 'd',
+            '(゚Д゚) [゚Д゚ノ]': 'e',
+            '(゚Д゚) [1]': 'f',
+            '(゚Д゚) [\'o\']': 'o',
+            '(o゚ー゚o)': 'u',
+            '(゚Д゚) [\'c\']': 'c',
+            '((゚ー゚) + (o^_^o))': '7',
+            '((o^_^o) +(o^_^o) +(c^_^o))': '6',
+            '((゚ー゚) + (゚Θ゚))': '5',
+            '(-~3)': '4',
+            '(-~-~1)': '3',
+            '(-~1)': '2',
+            '(-~0)': '1',
+            '((c^_^o)-(c^_^o))': '0',
+        }
+        delim = '(゚Д゚)[゚ε゚]+'
+        end_token = '(゚Д゚)[゚o゚]'
+        symbols = '|'.join(map(re.escape, symbol_dict.keys()))
+        txt = re.sub('(%s)\+\s?' % symbols, lambda m: symbol_dict[m.group(1)], txt)
+        ret = ''
+        for aacode in re.findall(r'{0}\+\s?{1}(.*?){0}'.format(re.escape(end_token), re.escape(delim)), txt):
+            for aachar in aacode.split(delim):
+                if aachar.isdigit():
+                    ret += compat_chr(int(aachar, 8))
+                else:
+                    m = re.match(r'^u([\da-f]{4})$', aachar)
+                    if m:
+                        ret += compat_chr(int(m.group(1), 16))
+                    else:
+                        self.report_warning("Cannot decode: %s" % aachar)
+        return ret
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id)
 
-        if 'File not found' in webpage:
+        if 'File not found' in webpage or 'deleted by the owner' in webpage:
             raise ExtractorError('File not found', expected=True)
 
-        # The following extraction logic is proposed by @Belderak and @gdkchan
-        # and declared to be used freely in youtube-dl
-        # See https://github.com/rg3/youtube-dl/issues/9706
-
-        numbers_js = self._download_webpage(
-            'https://openload.co/assets/js/obfuscator/n.js', video_id,
-            note='Downloading signature numbers')
-        signums = self._search_regex(
-            r'window\.signatureNumbers\s*=\s*[\'"](?P<data>[a-z]+)[\'"]',
-            numbers_js, 'signature numbers', group='data')
-
-        linkimg_uri = self._search_regex(
-            r'<img[^>]+id="linkimg"[^>]+src="([^"]+)"', webpage, 'link image')
-        linkimg = self._request_webpage(
-            linkimg_uri, video_id, note=False).read()
-
-        width, height, pixels = decode_png(linkimg)
-
-        output = ''
-        for y in range(height):
-            for x in range(width):
-                r, g, b = pixels[y][3 * x:3 * x + 3]
-                if r == 0 and g == 0 and b == 0:
-                    break
-                else:
-                    output += compat_chr(r)
-                    output += compat_chr(g)
-                    output += compat_chr(b)
-
-        img_str_length = len(output) // 200
-        img_str = [[0 for x in range(img_str_length)] for y in range(10)]
-
-        sig_str_length = len(signums) // 260
-        sig_str = [[0 for x in range(sig_str_length)] for y in range(10)]
-
-        for i in range(10):
-            for j in range(img_str_length):
-                begin = i * img_str_length * 20 + j * 20
-                img_str[i][j] = output[begin:begin + 20]
-            for j in range(sig_str_length):
-                begin = i * sig_str_length * 26 + j * 26
-                sig_str[i][j] = signums[begin:begin + 26]
-
-        parts = []
-        # TODO: find better names for str_, chr_ and sum_
-        str_ = ''
-        for i in [2, 3, 5, 7]:
-            str_ = ''
-            sum_ = float(99)
-            for j in range(len(sig_str[i])):
-                for chr_idx in range(len(img_str[i][j])):
-                    if sum_ > float(122):
-                        sum_ = float(98)
-                    chr_ = compat_chr(int(math.floor(sum_)))
-                    if sig_str[i][j][chr_idx] == chr_ and j >= len(str_):
-                        sum_ += float(2.5)
-                        str_ += img_str[i][j][chr_idx]
-            parts.append(str_.replace(',', ''))
-
-        video_url = 'https://openload.co/stream/%s~%s~%s~%s' % (parts[3], parts[1], parts[2], parts[0])
+        # The following decryption algorithm is written by @yokrysty and
+        # declared to be freely used in youtube-dl
+        # See https://github.com/rg3/youtube-dl/issues/10408
+        enc_data = self._html_search_regex(
+            r'<span[^>]*>([^<]+)</span>\s*<span[^>]*>[^<]+</span>\s*<span[^>]+id="streamurl"',
+            webpage, 'encrypted data')
+
+        enc_code = self._html_search_regex(r'<script[^>]+>(゚ω゚[^<]+)</script>',
+                                           webpage, 'encrypted code')
+
+        js_code = self.openload_decode(enc_code)
+        jsi = JSInterpreter(js_code)
+
+        m_offset_fun = self._search_regex(r'slice\(0\s*-\s*(%s)\(\)' % _NAME_RE, js_code, 'javascript offset function')
+        m_diff_fun = self._search_regex(r'charCodeAt\(0\)\s*\+\s*(%s)\(\)' % _NAME_RE, js_code, 'javascript diff function')
+
+        offset = jsi.call_function(m_offset_fun)
+        diff = jsi.call_function(m_diff_fun)
+
+        video_url_chars = []
+
+        for idx, c in enumerate(enc_data):
+            j = compat_ord(c)
+            if j >= 33 and j <= 126:
+                j = ((j + 14) % 94) + 33
+            if idx == len(enc_data) - offset:
+                j += diff
+            video_url_chars += compat_chr(j)
+
+        video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars)
 
         title = self._og_search_title(webpage, default=None) or self._search_regex(
             r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
             'title', default=None) or self._html_search_meta(
             'description', webpage, 'title', fatal=True)
 
-        return {
+        entries = self._parse_html5_media_entries(url, webpage, video_id)
+        subtitles = entries[0]['subtitles'] if entries else None
+
+        info_dict = {
             'id': video_id,
             'title': title,
             'thumbnail': self._og_search_thumbnail(webpage, default=None),
             'url': video_url,
             # Seems all videos have extensions in their titles
             'ext': determine_ext(title),
+            'subtitles': subtitles,
         }
+
+        return info_dict
index 6ae30679a0a226b0d242b2ef773fbab9a90920c5..b4cce7ea9334c7bbaf9e617932189504dcd25121 100644 (file)
@@ -1,28 +1,28 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import json
 import re
 import calendar
 import datetime
 
 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
     HEADRequest,
     unified_strdate,
-    ExtractorError,
     strip_jsonp,
     int_or_none,
     float_or_none,
     determine_ext,
     remove_end,
+    unescapeHTML,
 )
 
 
 class ORFTVthekIE(InfoExtractor):
     IE_NAME = 'orf:tvthek'
     IE_DESC = 'ORF TVthek'
-    _VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics?/.+?|program/[^/]+)/(?P<id>\d+)'
+    _VALID_URL = r'https?://tvthek\.orf\.at/(?:[^/]+/)+(?P<id>\d+)'
 
     _TESTS = [{
         'url': 'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389',
@@ -51,26 +51,23 @@ class ORFTVthekIE(InfoExtractor):
             'skip_download': True,  # rtsp downloads
         },
         '_skip': 'Blocked outside of Austria / Germany',
+    }, {
+        'url': 'http://tvthek.orf.at/topic/Fluechtlingskrise/10463081/Heimat-Fremde-Heimat/13879132/Senioren-betreuen-Migrantenkinder/13879141',
+        'skip_download': True,
+    }, {
+        'url': 'http://tvthek.orf.at/profile/Universum/35429',
+        'skip_download': True,
     }]
 
     def _real_extract(self, url):
         playlist_id = self._match_id(url)
         webpage = self._download_webpage(url, playlist_id)
 
-        data_json = self._search_regex(
-            r'initializeAdworx\((.+?)\);\n', webpage, 'video info')
-        all_data = json.loads(data_json)
-
-        def get_segments(all_data):
-            for data in all_data:
-                if data['name'] in (
-                        'Tracker::EPISODE_DETAIL_PAGE_OVER_PROGRAM',
-                        'Tracker::EPISODE_DETAIL_PAGE_OVER_TOPIC'):
-                    return data['values']['segments']
-
-        sdata = get_segments(all_data)
-        if not sdata:
-            raise ExtractorError('Unable to extract segments')
+        data_jsb = self._parse_json(
+            self._search_regex(
+                r'<div[^>]+class=(["\']).*?VideoPlaylist.*?\1[^>]+data-jsb=(["\'])(?P<json>.+?)\2',
+                webpage, 'playlist', group='json'),
+            playlist_id, transform_source=unescapeHTML)['playlist']['videos']
 
         def quality_to_int(s):
             m = re.search('([0-9]+)', s)
@@ -79,8 +76,11 @@ class ORFTVthekIE(InfoExtractor):
             return int(m.group(1))
 
         entries = []
-        for sd in sdata:
-            video_id = sd['id']
+        for sd in data_jsb:
+            video_id, title = sd.get('id'), sd.get('title')
+            if not video_id or not title:
+                continue
+            video_id = compat_str(video_id)
             formats = [{
                 'preference': -10 if fd['delivery'] == 'hls' else None,
                 'format_id': '%s-%s-%s' % (
@@ -88,7 +88,7 @@ class ORFTVthekIE(InfoExtractor):
                 'url': fd['src'],
                 'protocol': fd['protocol'],
                 'quality': quality_to_int(fd['quality']),
-            } for fd in sd['playlist_item_array']['sources']]
+            } for fd in sd['sources']]
 
             # Check for geoblocking.
             # There is a property is_geoprotection, but that's always false
@@ -115,14 +115,24 @@ class ORFTVthekIE(InfoExtractor):
             self._check_formats(formats, video_id)
             self._sort_formats(formats)
 
-            upload_date = unified_strdate(sd['created_date'])
+            subtitles = {}
+            for sub in sd.get('subtitles', []):
+                sub_src = sub.get('src')
+                if not sub_src:
+                    continue
+                subtitles.setdefault(sub.get('lang', 'de-AT'), []).append({
+                    'url': sub_src,
+                })
+
+            upload_date = unified_strdate(sd.get('created_date'))
             entries.append({
                 '_type': 'video',
                 'id': video_id,
-                'title': sd['header'],
+                'title': title,
                 'formats': formats,
+                'subtitles': subtitles,
                 'description': sd.get('description'),
-                'duration': int(sd['duration_in_seconds']),
+                'duration': int_or_none(sd.get('duration_in_seconds')),
                 'upload_date': upload_date,
                 'thumbnail': sd.get('image_full_url'),
             })
diff --git a/youtube_dl/extractor/pandatv.py b/youtube_dl/extractor/pandatv.py
new file mode 100644 (file)
index 0000000..133cc9b
--- /dev/null
@@ -0,0 +1,91 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    qualities,
+)
+
+
+class PandaTVIE(InfoExtractor):
+    IE_DESC = '熊猫TV'
+    _VALID_URL = r'http://(?:www\.)?panda\.tv/(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://www.panda.tv/10091',
+        'info_dict': {
+            'id': '10091',
+            'title': 're:.+',
+            'uploader': '囚徒',
+            'ext': 'flv',
+            'is_live': True,
+        },
+        'params': {
+            'skip_download': True,
+        },
+        'skip': 'Live stream is offline',
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        config = self._download_json(
+            'http://www.panda.tv/api_room?roomid=%s' % video_id, video_id)
+
+        error_code = config.get('errno', 0)
+        if error_code is not 0:
+            raise ExtractorError(
+                '%s returned error %s: %s'
+                % (self.IE_NAME, error_code, config['errmsg']),
+                expected=True)
+
+        data = config['data']
+        video_info = data['videoinfo']
+
+        # 2 = live, 3 = offline
+        if video_info.get('status') != '2':
+            raise ExtractorError(
+                'Live stream is offline', expected=True)
+
+        title = data['roominfo']['name']
+        uploader = data.get('hostinfo', {}).get('name')
+        room_key = video_info['room_key']
+        stream_addr = video_info.get(
+            'stream_addr', {'OD': '1', 'HD': '1', 'SD': '1'})
+
+        # Reverse engineered from web player swf
+        # (http://s6.pdim.gs/static/07153e425f581151.swf at the moment of
+        # writing).
+        plflag0, plflag1 = video_info['plflag'].split('_')
+        plflag0 = int(plflag0) - 1
+        if plflag1 == '21':
+            plflag0 = 10
+            plflag1 = '4'
+        live_panda = 'live_panda' if plflag0 < 1 else ''
+
+        quality_key = qualities(['OD', 'HD', 'SD'])
+        suffix = ['_small', '_mid', '']
+        formats = []
+        for k, v in stream_addr.items():
+            if v != '1':
+                continue
+            quality = quality_key(k)
+            if quality <= 0:
+                continue
+            for pref, (ext, pl) in enumerate((('m3u8', '-hls'), ('flv', ''))):
+                formats.append({
+                    'url': 'http://pl%s%s.live.panda.tv/live_panda/%s%s%s.%s'
+                    % (pl, plflag1, room_key, live_panda, suffix[quality], ext),
+                    'format_id': '%s-%s' % (k, ext),
+                    'quality': quality,
+                    'source_preference': pref,
+                })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': self._live_title(title),
+            'uploader': uploader,
+            'formats': formats,
+            'is_live': True,
+        }
index 8d49f5c4aff04954e773b9eb575af912130a8401..2b07958bb1f5815a162dadadff4f450f7ea0e97d 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index 0a423a08f0dd9b746ecf708509a525b4c0bed541..ebdab8db9faa0c8911c53c5764a18456926b6a55 100644 (file)
@@ -1,53 +1,43 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 
 
 class ParliamentLiveUKIE(InfoExtractor):
     IE_NAME = 'parliamentlive.tv'
     IE_DESC = 'UK parliament videos'
-    _VALID_URL = r'https?://www\.parliamentlive\.tv/Main/Player\.aspx\?(?:[^&]+&)*?meetingId=(?P<id>[0-9]+)'
+    _VALID_URL = r'(?i)https?://(?:www\.)?parliamentlive\.tv/Event/Index/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
 
-    _TEST = {
-        'url': 'http://www.parliamentlive.tv/Main/Player.aspx?meetingId=15121&player=windowsmedia',
+    _TESTS = [{
+        'url': 'http://parliamentlive.tv/Event/Index/c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
         'info_dict': {
-            'id': '15121',
-            'ext': 'asf',
-            'title': 'hoc home affairs committee, 18 mar 2014.pm',
-            'description': 'md5:033b3acdf83304cd43946b2d5e5798d1',
+            'id': 'c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
+            'ext': 'mp4',
+            'title': 'Home Affairs Committee',
+            'uploader_id': 'FFMPEG-01',
+            'timestamp': 1422696664,
+            'upload_date': '20150131',
         },
-        'params': {
-            'skip_download': True,  # Requires mplayer (mms)
-        }
-    }
+    }, {
+        'url': 'http://parliamentlive.tv/event/index/3f24936f-130f-40bf-9a5d-b3d6479da6a4',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        webpage = self._download_webpage(url, video_id)
-
-        asx_url = self._html_search_regex(
-            r'embed.*?src="([^"]+)" name="MediaPlayer"', webpage,
-            'metadata URL')
-        asx = self._download_xml(asx_url, video_id, 'Downloading ASX metadata')
-        video_url = asx.find('.//REF').attrib['HREF']
-
-        title = self._search_regex(
-            r'''(?x)player\.setClipDetails\(
-                (?:(?:[0-9]+|"[^"]+"),\s*){2}
-                "([^"]+",\s*"[^"]+)"
-                ''',
-            webpage, 'title').replace('", "', ', ')
-        description = self._html_search_regex(
-            r'(?s)<span id="MainContentPlaceHolder_CaptionsBlock_WitnessInfo">(.*?)</span>',
-            webpage, 'description')
-
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(
+            'http://vodplayer.parliamentlive.tv/?mid=' + video_id, video_id)
+        widget_config = self._parse_json(self._search_regex(
+            r'kWidgetConfig\s*=\s*({.+});',
+            webpage, 'kaltura widget config'), video_id)
+        kaltura_url = 'kaltura:%s:%s' % (widget_config['wid'][1:], widget_config['entry_id'])
+        event_title = self._download_json(
+            'http://parliamentlive.tv/Event/GetShareVideo/' + video_id, video_id)['event']['title']
         return {
+            '_type': 'url_transparent',
             'id': video_id,
-            'ext': 'asf',
-            'url': video_url,
-            'title': title,
-            'description': description,
+            'title': event_title,
+            'description': '',
+            'url': kaltura_url,
+            'ie_key': 'Kaltura',
         }
index 22975066516a0d37e74c9c520dd4faf0a68305a6..a6a2c273f240db52c967a12a96484261bd37664a 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index 75f5884a928cff177bdabfb3db430ed282d0a7a8..0e362302425cbe504b33b90aa1937dc68b9e288a 100644 (file)
@@ -1,6 +1,8 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from ..utils import (
     parse_iso8601,
@@ -8,7 +10,14 @@ from ..utils import (
 )
 
 
-class PeriscopeIE(InfoExtractor):
+class PeriscopeBaseIE(InfoExtractor):
+    def _call_api(self, method, query, item_id):
+        return self._download_json(
+            'https://api.periscope.tv/api/v2/%s' % method,
+            item_id, query=query)
+
+
+class PeriscopeIE(PeriscopeBaseIE):
     IE_DESC = 'Periscope'
     IE_NAME = 'periscope'
     _VALID_URL = r'https?://(?:www\.)?periscope\.tv/[^/]+/(?P<id>[^/?#]+)'
@@ -34,14 +43,18 @@ class PeriscopeIE(InfoExtractor):
         'only_matching': True,
     }]
 
-    def _call_api(self, method, value):
-        return self._download_json(
-            'https://api.periscope.tv/api/v2/%s?broadcast_id=%s' % (method, value), value)
+    @staticmethod
+    def _extract_url(webpage):
+        mobj = re.search(
+            r'<iframe[^>]+src=([\'"])(?P<url>(?:https?:)?//(?:www\.)?periscope\.tv/(?:(?!\1).)+)\1', webpage)
+        if mobj:
+            return mobj.group('url')
 
     def _real_extract(self, url):
         token = self._match_id(url)
 
-        broadcast_data = self._call_api('getBroadcastPublic', token)
+        broadcast_data = self._call_api(
+            'getBroadcastPublic', {'broadcast_id': token}, token)
         broadcast = broadcast_data['broadcast']
         status = broadcast['status']
 
@@ -61,7 +74,8 @@ class PeriscopeIE(InfoExtractor):
             'url': broadcast[image],
         } for image in ('image_url', 'image_url_small') if broadcast.get(image)]
 
-        stream = self._call_api('getAccessPublic', token)
+        stream = self._call_api(
+            'getAccessPublic', {'broadcast_id': token}, token)
 
         formats = []
         for format_id in ('replay', 'rtmp', 'hls', 'https_hls'):
@@ -73,7 +87,7 @@ class PeriscopeIE(InfoExtractor):
                 'ext': 'flv' if format_id == 'rtmp' else 'mp4',
             }
             if format_id != 'rtmp':
-                f['protocol'] = 'm3u8_native' if state == 'ended' else 'm3u8'
+                f['protocol'] = 'm3u8_native' if state in ('ended', 'timed_out') else 'm3u8'
             formats.append(f)
         self._sort_formats(formats)
 
@@ -88,8 +102,8 @@ class PeriscopeIE(InfoExtractor):
         }
 
 
-class PeriscopeUserIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.periscope\.tv/(?P<id>[^/]+)/?$'
+class PeriscopeUserIE(PeriscopeBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?periscope\.tv/(?P<id>[^/]+)/?$'
     IE_DESC = 'Periscope user videos'
     IE_NAME = 'periscope:user'
 
@@ -106,26 +120,34 @@ class PeriscopeUserIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        user_id = self._match_id(url)
+        user_name = self._match_id(url)
 
-        webpage = self._download_webpage(url, user_id)
+        webpage = self._download_webpage(url, user_name)
 
         data_store = self._parse_json(
             unescapeHTML(self._search_regex(
                 r'data-store=(["\'])(?P<data>.+?)\1',
                 webpage, 'data store', default='{}', group='data')),
-            user_id)
+            user_name)
 
-        user = data_store.get('User', {}).get('user', {})
-        title = user.get('display_name') or user.get('username')
-        description = user.get('description')
+        user = list(data_store['UserCache']['users'].values())[0]['user']
+        user_id = user['id']
+        session_id = data_store['SessionToken']['public']['broadcastHistory']['token']['session_id']
+
+        broadcasts = self._call_api(
+            'getUserBroadcastsPublic',
+            {'user_id': user_id, 'session_id': session_id},
+            user_name)['broadcasts']
 
-        broadcast_ids = (data_store.get('UserBroadcastHistory', {}).get('broadcastIds') or
-                         data_store.get('BroadcastCache', {}).get('broadcastIds', []))
+        broadcast_ids = [
+            broadcast['id'] for broadcast in broadcasts if broadcast.get('id')]
+
+        title = user.get('display_name') or user.get('username') or user_name
+        description = user.get('description')
 
         entries = [
             self.url_result(
-                'https://www.periscope.tv/%s/%s' % (user_id, broadcast_id))
+                'https://www.periscope.tv/%s/%s' % (user_name, broadcast_id))
             for broadcast_id in broadcast_ids]
 
         return self.playlist_result(entries, user_id, title, description)
diff --git a/youtube_dl/extractor/played.py b/youtube_dl/extractor/played.py
deleted file mode 100644 (file)
index 57c875e..0000000
+++ /dev/null
@@ -1,60 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-import os.path
-
-from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-    sanitized_Request,
-    urlencode_postdata,
-)
-
-
-class PlayedIE(InfoExtractor):
-    IE_NAME = 'played.to'
-    _VALID_URL = r'https?://(?:www\.)?played\.to/(?P<id>[a-zA-Z0-9_-]+)'
-
-    _TEST = {
-        'url': 'http://played.to/j2f2sfiiukgt',
-        'md5': 'c2bd75a368e82980e7257bf500c00637',
-        'info_dict': {
-            'id': 'j2f2sfiiukgt',
-            'ext': 'flv',
-            'title': 'youtube-dl_test_video.mp4',
-        },
-        'skip': 'Removed for copyright infringement.',  # oh wow
-    }
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        orig_webpage = self._download_webpage(url, video_id)
-
-        m_error = re.search(
-            r'(?s)Reason for deletion:.*?<b class="err"[^>]*>(?P<msg>[^<]+)</b>', orig_webpage)
-        if m_error:
-            raise ExtractorError(m_error.group('msg'), expected=True)
-
-        data = self._hidden_inputs(orig_webpage)
-
-        self._sleep(2, video_id)
-
-        post = urlencode_postdata(data)
-        headers = {
-            b'Content-Type': b'application/x-www-form-urlencoded',
-        }
-        req = sanitized_Request(url, post, headers)
-        webpage = self._download_webpage(
-            req, video_id, note='Downloading video page ...')
-
-        title = os.path.splitext(data['fname'])[0]
-
-        video_url = self._search_regex(
-            r'file: "?(.+?)",', webpage, 'video URL')
-
-        return {
-            'id': video_id,
-            'title': title,
-            'url': video_url,
-        }
index c3c38cf4ac07787e520c7c2c7eac7da1ed2aa8b4..ddfc6f1486c4b49185bf68b3be3ff9ba9e957633 100644 (file)
@@ -8,30 +8,31 @@ from ..utils import int_or_none
 
 
 class PlaysTVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?plays\.tv/video/(?P<id>[0-9a-f]{18})'
-    _TEST = {
-        'url': 'http://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall',
+    _VALID_URL = r'https?://(?:www\.)?plays\.tv/(?:video|embeds)/(?P<id>[0-9a-f]{18})'
+    _TESTS = [{
+        'url': 'https://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall',
         'md5': 'dfeac1198506652b5257a62762cec7bc',
         'info_dict': {
             'id': '56af17f56c95335490',
             'ext': 'mp4',
-            'title': 'When you outplay the Azir wall',
+            'title': 'Bjergsen - When you outplay the Azir wall',
             'description': 'Posted by Bjergsen',
         }
-    }
+    }, {
+        'url': 'https://plays.tv/embeds/56af17f56c95335490',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        webpage = self._download_webpage(
+            'https://plays.tv/video/%s' % video_id, video_id)
+
+        info = self._search_json_ld(webpage, video_id,)
 
-        title = self._og_search_title(webpage)
-        content = self._parse_json(
-            self._search_regex(
-                r'R\.bindContent\(({.+?})\);', webpage,
-                'content'), video_id)['content']
         mpd_url, sources = re.search(
             r'(?s)<video[^>]+data-mpd="([^"]+)"[^>]*>(.+?)</video>',
-            content).groups()
+            webpage).groups()
         formats = self._extract_mpd_formats(
             self._proto_relative_url(mpd_url), video_id, mpd_id='DASH')
         for format_id, height, format_url in re.findall(r'<source\s+res="((\d+)h?)"\s+src="([^"]+)"', sources):
@@ -42,10 +43,11 @@ class PlaysTVIE(InfoExtractor):
             })
         self._sort_formats(formats)
 
-        return {
+        info.update({
             'id': video_id,
-            'title': title,
             'description': self._og_search_description(webpage),
-            'thumbnail': self._og_search_thumbnail(webpage),
+            'thumbnail': info.get('thumbnail') or self._og_search_thumbnail(webpage),
             'formats': formats,
-        }
+        })
+
+        return info
index 78d21929945741f1a1b6e2fd57371650d146ed8a..79c2db08541e93d1d377c53c3e8adc415f4302e2 100644 (file)
@@ -14,7 +14,7 @@ from ..utils import (
 
 
 class PlayvidIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)'
+    _VALID_URL = r'https?://(?:www\.)?playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)'
     _TESTS = [{
         'url': 'http://www.playvid.com/watch/RnmBNgtrrJu',
         'md5': 'ffa2f6b2119af359f544388d8c01eb6c',
index 9aab7764559523e3bbc4a82f5b6ab9b71056be59..0ffd41ecd3b73bdaaba3b27cd1638cdf0383103e 100644 (file)
@@ -1,9 +1,9 @@
 from __future__ import unicode_literals
 
-import re
+import collections
 import json
+import os
 import random
-import collections
 
 from .common import InfoExtractor
 from ..compat import (
@@ -11,22 +11,24 @@ from ..compat import (
     compat_urlparse,
 )
 from ..utils import (
+    dict_get,
     ExtractorError,
+    float_or_none,
     int_or_none,
     parse_duration,
     qualities,
-    sanitized_Request,
+    srt_subtitles_timecode,
     urlencode_postdata,
 )
 
 
 class PluralsightBaseIE(InfoExtractor):
-    _API_BASE = 'http://app.pluralsight.com'
+    _API_BASE = 'https://app.pluralsight.com'
 
 
 class PluralsightIE(PluralsightBaseIE):
     IE_NAME = 'pluralsight'
-    _VALID_URL = r'https?://(?:(?:www|app)\.)?pluralsight\.com/training/player\?'
+    _VALID_URL = r'https?://(?:(?:www|app)\.)?pluralsight\.com/(?:training/)?player\?'
     _LOGIN_URL = 'https://app.pluralsight.com/id/'
 
     _NETRC_MACHINE = 'pluralsight'
@@ -48,6 +50,9 @@ class PluralsightIE(PluralsightBaseIE):
         # available without pluralsight account
         'url': 'http://app.pluralsight.com/training/player?author=scott-allen&name=angularjs-get-started-m1-introduction&mode=live&clip=0&course=angularjs-get-started',
         'only_matching': True,
+    }, {
+        'url': 'https://app.pluralsight.com/player?course=ccna-intro-networking&author=ross-bagurdes&name=ccna-intro-networking-m06&clip=0',
+        'only_matching': True,
     }]
 
     def _real_initialize(self):
@@ -75,12 +80,10 @@ class PluralsightIE(PluralsightBaseIE):
         if not post_url.startswith('http'):
             post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
 
-        request = sanitized_Request(
-            post_url, urlencode_postdata(login_form))
-        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
-
         response = self._download_webpage(
-            request, None, 'Logging in as %s' % username)
+            post_url, None, 'Logging in as %s' % username,
+            data=urlencode_postdata(login_form),
+            headers={'Content-Type': 'application/x-www-form-urlencoded'})
 
         error = self._search_regex(
             r'<span[^>]+class="field-validation-error"[^>]*>([^<]+)</span>',
@@ -91,34 +94,78 @@ class PluralsightIE(PluralsightBaseIE):
         if all(p not in response for p in ('__INITIAL_STATE__', '"currentUser"')):
             raise ExtractorError('Unable to log in')
 
+    def _get_subtitles(self, author, clip_id, lang, name, duration, video_id):
+        captions_post = {
+            'a': author,
+            'cn': clip_id,
+            'lc': lang,
+            'm': name,
+        }
+        captions = self._download_json(
+            '%s/player/retrieve-captions' % self._API_BASE, video_id,
+            'Downloading captions JSON', 'Unable to download captions JSON',
+            fatal=False, data=json.dumps(captions_post).encode('utf-8'),
+            headers={'Content-Type': 'application/json;charset=utf-8'})
+        if captions:
+            return {
+                lang: [{
+                    'ext': 'json',
+                    'data': json.dumps(captions),
+                }, {
+                    'ext': 'srt',
+                    'data': self._convert_subtitles(duration, captions),
+                }]
+            }
+
+    @staticmethod
+    def _convert_subtitles(duration, subs):
+        srt = ''
+        TIME_OFFSET_KEYS = ('displayTimeOffset', 'DisplayTimeOffset')
+        TEXT_KEYS = ('text', 'Text')
+        for num, current in enumerate(subs):
+            current = subs[num]
+            start, text = (
+                float_or_none(dict_get(current, TIME_OFFSET_KEYS)),
+                dict_get(current, TEXT_KEYS))
+            if start is None or text is None:
+                continue
+            end = duration if num == len(subs) - 1 else float_or_none(
+                dict_get(subs[num + 1], TIME_OFFSET_KEYS))
+            if end is None:
+                continue
+            srt += os.linesep.join(
+                (
+                    '%d' % num,
+                    '%s --> %s' % (
+                        srt_subtitles_timecode(start),
+                        srt_subtitles_timecode(end)),
+                    text,
+                    os.linesep,
+                ))
+        return srt
+
     def _real_extract(self, url):
         qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
 
         author = qs.get('author', [None])[0]
         name = qs.get('name', [None])[0]
         clip_id = qs.get('clip', [None])[0]
-        course = qs.get('course', [None])[0]
+        course_name = qs.get('course', [None])[0]
 
-        if any(not f for f in (author, name, clip_id, course,)):
+        if any(not f for f in (author, name, clip_id, course_name,)):
             raise ExtractorError('Invalid URL', expected=True)
 
         display_id = '%s-%s' % (name, clip_id)
 
-        webpage = self._download_webpage(url, display_id)
+        parsed_url = compat_urlparse.urlparse(url)
 
-        modules = self._search_regex(
-            r'moduleCollection\s*:\s*new\s+ModuleCollection\((\[.+?\])\s*,\s*\$rootScope\)',
-            webpage, 'modules', default=None)
+        payload_url = compat_urlparse.urlunparse(parsed_url._replace(
+            netloc='app.pluralsight.com', path='player/api/v1/payload'))
 
-        if modules:
-            collection = self._parse_json(modules, display_id)
-        else:
-            # Webpage may be served in different layout (see
-            # https://github.com/rg3/youtube-dl/issues/7607)
-            collection = self._parse_json(
-                self._search_regex(
-                    r'var\s+initialState\s*=\s*({.+?});\n', webpage, 'initial state'),
-                display_id)['course']['modules']
+        course = self._download_json(
+            payload_url, display_id, headers={'Referer': url})['payload']['course']
+
+        collection = course['modules']
 
         module, clip = None, None
 
@@ -138,6 +185,8 @@ class PluralsightIE(PluralsightBaseIE):
         if not clip:
             raise ExtractorError('Unable to resolve clip')
 
+        title = '%s - %s' % (module['title'], clip['title'])
+
         QUALITIES = {
             'low': {'width': 640, 'height': 480},
             'medium': {'width': 848, 'height': 640},
@@ -157,8 +206,7 @@ class PluralsightIE(PluralsightBaseIE):
 
         # Some courses also offer widescreen resolution for high quality (see
         # https://github.com/rg3/youtube-dl/issues/7766)
-        widescreen = True if re.search(
-            r'courseSupportsWidescreenVideoFormats\s*:\s*true', webpage) else False
+        widescreen = course.get('supportsWideScreenVideoFormats') is True
         best_quality = 'high-widescreen' if widescreen else 'high'
         if widescreen:
             for allowed_quality in ALLOWED_QUALITIES:
@@ -187,22 +235,21 @@ class PluralsightIE(PluralsightBaseIE):
             for quality in qualities_:
                 f = QUALITIES[quality].copy()
                 clip_post = {
-                    'a': author,
-                    'cap': 'false',
-                    'cn': clip_id,
-                    'course': course,
-                    'lc': 'en',
-                    'm': name,
-                    'mt': ext,
-                    'q': '%dx%d' % (f['width'], f['height']),
+                    'author': author,
+                    'includeCaptions': False,
+                    'clipIndex': int(clip_id),
+                    'courseName': course_name,
+                    'locale': 'en',
+                    'moduleName': name,
+                    'mediaType': ext,
+                    'quality': '%dx%d' % (f['width'], f['height']),
                 }
-                request = sanitized_Request(
-                    '%s/training/Player/ViewClip' % self._API_BASE,
-                    json.dumps(clip_post).encode('utf-8'))
-                request.add_header('Content-Type', 'application/json;charset=utf-8')
                 format_id = '%s-%s' % (ext, quality)
-                clip_url = self._download_webpage(
-                    request, display_id, 'Downloading %s URL' % format_id, fatal=False)
+                viewclip = self._download_json(
+                    '%s/video/clips/viewclip' % self._API_BASE, display_id,
+                    'Downloading %s viewclip JSON' % format_id, fatal=False,
+                    data=json.dumps(clip_post).encode('utf-8'),
+                    headers={'Content-Type': 'application/json;charset=utf-8'})
 
                 # Pluralsight tracks multiple sequential calls to ViewClip API and start
                 # to return 429 HTTP errors after some time (see
@@ -214,29 +261,44 @@ class PluralsightIE(PluralsightBaseIE):
                     random.randint(2, 5), display_id,
                     '%(video_id)s: Waiting for %(timeout)s seconds to avoid throttling')
 
-                if not clip_url:
+                if not viewclip:
                     continue
-                f.update({
-                    'url': clip_url,
-                    'ext': ext,
-                    'format_id': format_id,
-                    'quality': quality_key(quality),
-                })
-                formats.append(f)
+
+                clip_urls = viewclip.get('urls')
+                if not isinstance(clip_urls, list):
+                    continue
+
+                for clip_url_data in clip_urls:
+                    clip_url = clip_url_data.get('url')
+                    if not clip_url:
+                        continue
+                    cdn = clip_url_data.get('cdn')
+                    clip_f = f.copy()
+                    clip_f.update({
+                        'url': clip_url,
+                        'ext': ext,
+                        'format_id': '%s-%s' % (format_id, cdn) if cdn else format_id,
+                        'quality': quality_key(quality),
+                        'source_preference': int_or_none(clip_url_data.get('rank')),
+                    })
+                    formats.append(clip_f)
+
         self._sort_formats(formats)
 
-        # TODO: captions
-        # http://www.pluralsight.com/training/Player/ViewClip + cap = true
-        # or
-        # http://www.pluralsight.com/training/Player/Captions
-        # { a = author, cn = clip_id, lc = end, m = name }
+        duration = int_or_none(
+            clip.get('duration')) or parse_duration(clip.get('formattedDuration'))
+
+        # TODO: other languages?
+        subtitles = self.extract_subtitles(
+            author, clip_id, 'en', name, duration, display_id)
 
         return {
             'id': clip.get('clipName') or clip['name'],
-            'title': '%s - %s' % (module['title'], clip['title']),
-            'duration': int_or_none(clip.get('duration')) or parse_duration(clip.get('formattedDuration')),
+            'title': title,
+            'duration': duration,
             'creator': author,
-            'formats': formats
+            'formats': formats,
+            'subtitles': subtitles,
         }
 
 
index f559b899f44d216d0f59a5e282de0658cbf4af48..5ff173774a410bf0eba85069f6f1ed33cd583e7b 100644 (file)
@@ -1,14 +1,17 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import itertools
 import re
 
 from .common import InfoExtractor
 from ..compat import (
     compat_str,
     compat_urllib_parse_unquote,
+    compat_urlparse
 )
 from ..utils import (
+    extract_attributes,
     int_or_none,
     strip_or_none,
     unified_timestamp,
@@ -97,3 +100,81 @@ class PolskieRadioIE(InfoExtractor):
         description = strip_or_none(self._og_search_description(webpage))
 
         return self.playlist_result(entries, playlist_id, title, description)
+
+
+class PolskieRadioCategoryIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+(?:,[^/]+)?/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'http://www.polskieradio.pl/7/5102,HISTORIA-ZYWA',
+        'info_dict': {
+            'id': '5102',
+            'title': 'HISTORIA ŻYWA',
+        },
+        'playlist_mincount': 38,
+    }, {
+        'url': 'http://www.polskieradio.pl/7/4807',
+        'info_dict': {
+            'id': '4807',
+            'title': 'Vademecum 1050. rocznicy Chrztu Polski'
+        },
+        'playlist_mincount': 5
+    }, {
+        'url': 'http://www.polskieradio.pl/7/129,Sygnaly-dnia?ref=source',
+        'only_matching': True
+    }, {
+        'url': 'http://www.polskieradio.pl/37,RedakcjaKatolicka/4143,Kierunek-Krakow',
+        'info_dict': {
+            'id': '4143',
+            'title': 'Kierunek Kraków',
+        },
+        'playlist_mincount': 61
+    }, {
+        'url': 'http://www.polskieradio.pl/10,czworka/214,muzyka',
+        'info_dict': {
+            'id': '214',
+            'title': 'Muzyka',
+        },
+        'playlist_mincount': 61
+    }, {
+        'url': 'http://www.polskieradio.pl/7,Jedynka/5102,HISTORIA-ZYWA',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.polskieradio.pl/8,Dwojka/196,Publicystyka',
+        'only_matching': True,
+    }]
+
+    @classmethod
+    def suitable(cls, url):
+        return False if PolskieRadioIE.suitable(url) else super(PolskieRadioCategoryIE, cls).suitable(url)
+
+    def _entries(self, url, page, category_id):
+        content = page
+        for page_num in itertools.count(2):
+            for a_entry, entry_id in re.findall(
+                    r'(?s)<article[^>]+>.*?(<a[^>]+href=["\']/\d+/\d+/Artykul/(\d+)[^>]+>).*?</article>',
+                    content):
+                entry = extract_attributes(a_entry)
+                href = entry.get('href')
+                if not href:
+                    continue
+                yield self.url_result(
+                    compat_urlparse.urljoin(url, href), PolskieRadioIE.ie_key(),
+                    entry_id, entry.get('title'))
+            mobj = re.search(
+                r'<div[^>]+class=["\']next["\'][^>]*>\s*<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1',
+                content)
+            if not mobj:
+                break
+            next_url = compat_urlparse.urljoin(url, mobj.group('url'))
+            content = self._download_webpage(
+                next_url, category_id, 'Downloading page %s' % page_num)
+
+    def _real_extract(self, url):
+        category_id = self._match_id(url)
+        webpage = self._download_webpage(url, category_id)
+        title = self._html_search_regex(
+            r'<title>([^<]+) - [^<]+ - [^<]+</title>',
+            webpage, 'title', fatal=False)
+        return self.playlist_result(
+            self._entries(url, webpage, category_id),
+            category_id, title)
index 9894f32620c1692830df023423ae02a6199121b1..073fc3e21db07f05deef1a337aca7685f62b4079 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from ..compat import (
diff --git a/youtube_dl/extractor/porncom.py b/youtube_dl/extractor/porncom.py
new file mode 100644 (file)
index 0000000..d85e029
--- /dev/null
@@ -0,0 +1,100 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+    int_or_none,
+    js_to_json,
+    parse_filesize,
+    str_to_int,
+)
+
+
+class PornComIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:[a-zA-Z]+\.)?porn\.com/videos/(?:(?P<display_id>[^/]+)-)?(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'http://www.porn.com/videos/teen-grabs-a-dildo-and-fucks-her-pussy-live-on-1hottie-i-rec-2603339',
+        'md5': '3f30ce76267533cd12ba999263156de7',
+        'info_dict': {
+            'id': '2603339',
+            'display_id': 'teen-grabs-a-dildo-and-fucks-her-pussy-live-on-1hottie-i-rec',
+            'ext': 'mp4',
+            'title': 'Teen grabs a dildo and fucks her pussy live on 1hottie, I rec',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 551,
+            'view_count': int,
+            'age_limit': 18,
+            'categories': list,
+            'tags': list,
+        },
+    }, {
+        'url': 'http://se.porn.com/videos/marsha-may-rides-seth-on-top-of-his-thick-cock-2658067',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        display_id = mobj.group('display_id') or video_id
+
+        webpage = self._download_webpage(url, display_id)
+
+        config = self._parse_json(
+            self._search_regex(
+                r'=\s*({.+?})\s*,\s*[\da-zA-Z_]+\s*=',
+                webpage, 'config', default='{}'),
+            display_id, transform_source=js_to_json, fatal=False)
+
+        if config:
+            title = config['title']
+            formats = [{
+                'url': stream['url'],
+                'format_id': stream.get('id'),
+                'height': int_or_none(self._search_regex(
+                    r'^(\d+)[pP]', stream.get('id') or '', 'height', default=None))
+            } for stream in config['streams'] if stream.get('url')]
+            thumbnail = (compat_urlparse.urljoin(
+                config['thumbCDN'], config['poster'])
+                if config.get('thumbCDN') and config.get('poster') else None)
+            duration = int_or_none(config.get('length'))
+        else:
+            title = self._search_regex(
+                (r'<title>([^<]+)</title>', r'<h1[^>]*>([^<]+)</h1>'),
+                webpage, 'title')
+            formats = [{
+                'url': compat_urlparse.urljoin(url, format_url),
+                'format_id': '%sp' % height,
+                'height': int(height),
+                'filesize_approx': parse_filesize(filesize),
+            } for format_url, height, filesize in re.findall(
+                r'<a[^>]+href="(/download/[^"]+)">MPEG4 (\d+)p<span[^>]*>(\d+\s+[a-zA-Z]+)<',
+                webpage)]
+            thumbnail = None
+            duration = None
+
+        self._sort_formats(formats)
+
+        view_count = str_to_int(self._search_regex(
+            r'class=["\']views["\'][^>]*><p>([\d,.]+)', webpage,
+            'view count', fatal=False))
+
+        def extract_list(kind):
+            s = self._search_regex(
+                r'(?s)<p[^>]*>%s:(.+?)</p>' % kind.capitalize(),
+                webpage, kind, fatal=False)
+            return re.findall(r'<a[^>]+>([^<]+)</a>', s or '')
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'view_count': view_count,
+            'formats': formats,
+            'age_limit': 18,
+            'categories': extract_list('categories'),
+            'tags': extract_list('tags'),
+        }
index 20976c101d97f53a58b3599ded30f7bfb1d4bcd5..40dbe6967fac2126b7bf6e6a1245768b3c039c8e 100644 (file)
@@ -15,6 +15,7 @@ from ..compat import (
 from ..utils import (
     ExtractorError,
     int_or_none,
+    js_to_json,
     orderedSet,
     sanitized_Request,
     str_to_int,
@@ -32,7 +33,7 @@ class PornHubIE(InfoExtractor):
                             (?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)|
                             (?:www\.)?thumbzilla\.com/video/
                         )
-                        (?P<id>[0-9a-z]+)
+                        (?P<id>[\da-z]+)
                     '''
     _TESTS = [{
         'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
@@ -48,6 +49,8 @@ class PornHubIE(InfoExtractor):
             'dislike_count': int,
             'comment_count': int,
             'age_limit': 18,
+            'tags': list,
+            'categories': list,
         },
     }, {
         # non-ASCII title
@@ -63,6 +66,8 @@ class PornHubIE(InfoExtractor):
             'dislike_count': int,
             'comment_count': int,
             'age_limit': 18,
+            'tags': list,
+            'categories': list,
         },
         'params': {
             'skip_download': True,
@@ -91,12 +96,11 @@ class PornHubIE(InfoExtractor):
         'only_matching': True,
     }]
 
-    @classmethod
-    def _extract_url(cls, webpage):
-        mobj = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/\d+)\1', webpage)
-        if mobj:
-            return mobj.group('url')
+    @staticmethod
+    def _extract_urls(webpage):
+        return re.findall(
+            r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/[\da-z]+)',
+            webpage)
 
     def _extract_count(self, pattern, webpage, name):
         return str_to_int(self._search_regex(
@@ -183,6 +187,15 @@ class PornHubIE(InfoExtractor):
             })
         self._sort_formats(formats)
 
+        page_params = self._parse_json(self._search_regex(
+            r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P<data>{[^}]+})',
+            webpage, 'page parameters', group='data', default='{}'),
+            video_id, transform_source=js_to_json, fatal=False)
+        tags = categories = None
+        if page_params:
+            tags = page_params.get('tags', '').split(',')
+            categories = page_params.get('categories', '').split(',')
+
         return {
             'id': video_id,
             'uploader': video_uploader,
@@ -195,6 +208,8 @@ class PornHubIE(InfoExtractor):
             'comment_count': comment_count,
             'formats': formats,
             'age_limit': 18,
+            'tags': tags,
+            'categories': categories,
         }
 
 
index 6b51e5c5400ee59859eb0d29cb740a31f34f3a96..58f557e3995f25a3787018150c953cb088e4fe81 100644 (file)
@@ -2,7 +2,6 @@
 from __future__ import unicode_literals
 
 import re
-import random
 
 from .common import InfoExtractor
 from ..utils import (
@@ -13,61 +12,69 @@ from ..utils import (
 
 
 class PornoVoisinesIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?pornovoisines\.com/showvideo/(?P<id>\d+)/(?P<display_id>[^/]+)'
-
-    _VIDEO_URL_TEMPLATE = 'http://stream%d.pornovoisines.com' \
-        '/static/media/video/transcoded/%s-640x360-1000-trscded.mp4'
-
-    _SERVER_NUMBERS = (1, 2)
+    _VALID_URL = r'https?://(?:www\.)?pornovoisines\.com/videos/show/(?P<id>\d+)/(?P<display_id>[^/.]+)'
 
     _TEST = {
-        'url': 'http://www.pornovoisines.com/showvideo/1285/recherche-appartement/',
-        'md5': '5ac670803bc12e9e7f9f662ce64cf1d1',
+        'url': 'http://www.pornovoisines.com/videos/show/919/recherche-appartement.html',
+        'md5': '6f8aca6a058592ab49fe701c8ba8317b',
         'info_dict': {
-            'id': '1285',
+            'id': '919',
             'display_id': 'recherche-appartement',
             'ext': 'mp4',
             'title': 'Recherche appartement',
-            'description': 'md5:819ea0b785e2a04667a1a01cdc89594e',
+            'description': 'md5:fe10cb92ae2dd3ed94bb4080d11ff493',
             'thumbnail': 're:^https?://.*\.jpg$',
             'upload_date': '20140925',
             'duration': 120,
             'view_count': int,
             'average_rating': float,
-            'categories': ['Débutantes', 'Scénario', 'Sodomie'],
+            'categories': ['Débutante', 'Débutantes', 'Scénario', 'Sodomie'],
             'age_limit': 18,
+            'subtitles': {
+                'fr': [{
+                    'ext': 'vtt',
+                }]
+            },
         }
     }
 
-    @classmethod
-    def build_video_url(cls, num):
-        return cls._VIDEO_URL_TEMPLATE % (random.choice(cls._SERVER_NUMBERS), num)
-
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
         display_id = mobj.group('display_id')
 
-        webpage = self._download_webpage(url, video_id)
+        settings_url = self._download_json(
+            'http://www.pornovoisines.com/api/video/%s/getsettingsurl/' % video_id,
+            video_id, note='Getting settings URL')['video_settings_url']
+        settings = self._download_json(settings_url, video_id)['data']
+
+        formats = []
+        for kind, data in settings['variants'].items():
+            if kind == 'HLS':
+                formats.extend(self._extract_m3u8_formats(
+                    data, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls'))
+            elif kind == 'MP4':
+                for item in data:
+                    formats.append({
+                        'url': item['url'],
+                        'height': item.get('height'),
+                        'bitrate': item.get('bitrate'),
+                    })
+        self._sort_formats(formats)
 
-        video_url = self.build_video_url(video_id)
+        webpage = self._download_webpage(url, video_id)
 
-        title = self._html_search_regex(
-            r'<h1>(.+?)</h1>', webpage, 'title', flags=re.DOTALL)
-        description = self._html_search_regex(
-            r'<article id="descriptif">(.+?)</article>',
-            webpage, 'description', fatal=False, flags=re.DOTALL)
+        title = self._og_search_title(webpage)
+        description = self._og_search_description(webpage)
 
-        thumbnail = self._search_regex(
-            r'<div id="mediaspace%s">\s*<img src="/?([^"]+)"' % video_id,
-            webpage, 'thumbnail', fatal=False)
-        if thumbnail:
-            thumbnail = 'http://www.pornovoisines.com/%s' % thumbnail
+        # The webpage has a bug - there's no space between "thumb" and src=
+        thumbnail = self._html_search_regex(
+            r'<img[^>]+class=([\'"])thumb\1[^>]*src=([\'"])(?P<url>[^"]+)\2',
+            webpage, 'thumbnail', fatal=False, group='url')
 
         upload_date = unified_strdate(self._search_regex(
-            r'Publié le ([\d-]+)', webpage, 'upload date', fatal=False))
-        duration = int_or_none(self._search_regex(
-            'Durée (\d+)', webpage, 'duration', fatal=False))
+            r'Le\s*<b>([\d/]+)', webpage, 'upload date', fatal=False))
+        duration = settings.get('main', {}).get('duration')
         view_count = int_or_none(self._search_regex(
             r'(\d+) vues', webpage, 'view count', fatal=False))
         average_rating = self._search_regex(
@@ -75,15 +82,19 @@ class PornoVoisinesIE(InfoExtractor):
         if average_rating:
             average_rating = float_or_none(average_rating.replace(',', '.'))
 
-        categories = self._html_search_meta(
-            'keywords', webpage, 'categories', fatal=False)
+        categories = self._html_search_regex(
+            r'(?s)Catégories\s*:\s*<b>(.+?)</b>', webpage, 'categories', fatal=False)
         if categories:
             categories = [category.strip() for category in categories.split(',')]
 
+        subtitles = {'fr': [{
+            'url': subtitle,
+        } for subtitle in settings.get('main', {}).get('vtt_tracks', {}).values()]}
+
         return {
             'id': video_id,
             'display_id': display_id,
-            'url': video_url,
+            'formats': formats,
             'title': title,
             'description': description,
             'thumbnail': thumbnail,
@@ -93,4 +104,5 @@ class PornoVoisinesIE(InfoExtractor):
             'average_rating': average_rating,
             'categories': categories,
             'age_limit': 18,
+            'subtitles': subtitles,
         }
index 202f58673ae4f1dd77caee159f37dc24be9aad64..3c9087f2dfe3caa30c879f4905e857a046fd789c 100644 (file)
@@ -2,13 +2,13 @@ from __future__ import unicode_literals
 
 import re
 
-from .common import InfoExtractor
+from .jwplatform import JWPlatformBaseIE
 from ..utils import (
     str_to_int,
 )
 
 
-class PornoXOIE(InfoExtractor):
+class PornoXOIE(JWPlatformBaseIE):
     _VALID_URL = r'https?://(?:www\.)?pornoxo\.com/videos/(?P<id>\d+)/(?P<display_id>[^/]+)\.html'
     _TEST = {
         'url': 'http://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary.html',
@@ -17,7 +17,8 @@ class PornoXOIE(InfoExtractor):
             'id': '7564',
             'ext': 'flv',
             'title': 'Striptease From Sexy Secretary!',
-            'description': 'Striptease From Sexy Secretary!',
+            'display_id': 'striptease-from-sexy-secretary',
+            'description': 'md5:0ee35252b685b3883f4a1d38332f9980',
             'categories': list,  # NSFW
             'thumbnail': 're:https?://.*\.jpg$',
             'age_limit': 18,
@@ -26,23 +27,14 @@ class PornoXOIE(InfoExtractor):
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id, display_id = mobj.groups()
 
         webpage = self._download_webpage(url, video_id)
-
-        video_url = self._html_search_regex(
-            r'\'file\'\s*:\s*"([^"]+)"', webpage, 'video_url')
+        video_data = self._extract_jwplayer_data(webpage, video_id, require_title=False)
 
         title = self._html_search_regex(
             r'<title>([^<]+)\s*-\s*PornoXO', webpage, 'title')
 
-        description = self._html_search_regex(
-            r'<meta name="description" content="([^"]+)\s*featuring',
-            webpage, 'description', fatal=False)
-
-        thumbnail = self._html_search_regex(
-            r'\'image\'\s*:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
-
         view_count = str_to_int(self._html_search_regex(
             r'[vV]iews:\s*([0-9,]+)', webpage, 'view count', fatal=False))
 
@@ -53,13 +45,14 @@ class PornoXOIE(InfoExtractor):
             None if categories_str is None
             else categories_str.split(','))
 
-        return {
+        video_data.update({
             'id': video_id,
-            'url': video_url,
             'title': title,
-            'description': description,
-            'thumbnail': thumbnail,
+            'display_id': display_id,
+            'description': self._html_search_meta('description', webpage),
             'categories': categories,
             'view_count': view_count,
             'age_limit': 18,
-        }
+        })
+
+        return video_data
index f93bd19ff6dde40c87672b4fd18a3f1aab11382e..d40cca06f989b7c99329e1650497a06e9a6390e4 100644 (file)
@@ -7,7 +7,6 @@ from .common import InfoExtractor
 from ..utils import (
     determine_ext,
     ExtractorError,
-    sanitized_Request,
     urlencode_postdata,
 )
 
@@ -15,12 +14,12 @@ from ..utils import (
 class PromptFileIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?promptfile\.com/l/(?P<id>[0-9A-Z\-]+)'
     _TEST = {
-        'url': 'http://www.promptfile.com/l/D21B4746E9-F01462F0FF',
-        'md5': 'd1451b6302da7215485837aaea882c4c',
+        'url': 'http://www.promptfile.com/l/86D1CE8462-576CAAE416',
+        'md5': '5a7e285a26e0d66d9a263fae91bc92ce',
         'info_dict': {
-            'id': 'D21B4746E9-F01462F0FF',
+            'id': '86D1CE8462-576CAAE416',
             'ext': 'mp4',
-            'title': 'Birds.mp4',
+            'title': 'oceans.mp4',
             'thumbnail': 're:^https?://.*\.jpg$',
         }
     }
@@ -33,14 +32,23 @@ class PromptFileIE(InfoExtractor):
             raise ExtractorError('Video %s does not exist' % video_id,
                                  expected=True)
 
+        chash = self._search_regex(
+            r'val\("([^"]*)"\s*\+\s*\$\("#chash"\)', webpage, 'chash')
         fields = self._hidden_inputs(webpage)
-        post = urlencode_postdata(fields)
-        req = sanitized_Request(url, post)
-        req.add_header('Content-type', 'application/x-www-form-urlencoded')
+        keys = list(fields.keys())
+        chash_key = keys[0] if len(keys) == 1 else next(
+            key for key in keys if key.startswith('cha'))
+        fields[chash_key] = chash + fields[chash_key]
+
         webpage = self._download_webpage(
-            req, video_id, 'Downloading video page')
+            url, video_id, 'Downloading video page',
+            data=urlencode_postdata(fields),
+            headers={'Content-type': 'application/x-www-form-urlencoded'})
 
-        url = self._html_search_regex(r'url:\s*\'([^\']+)\'', webpage, 'URL')
+        video_url = self._search_regex(
+            (r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1[^>]*>\s*Download File',
+             r'<a[^>]+href=(["\'])(?P<url>https?://(?:www\.)?promptfile\.com/file/(?:(?!\1).)+)\1'),
+            webpage, 'video url', group='url')
         title = self._html_search_regex(
             r'<span.+title="([^"]+)">', webpage, 'title')
         thumbnail = self._html_search_regex(
@@ -49,7 +57,7 @@ class PromptFileIE(InfoExtractor):
 
         formats = [{
             'format_id': 'sd',
-            'url': url,
+            'url': video_url,
             'ext': determine_ext(title),
         }]
         self._sort_formats(formats)
index c6eee3b72a6e428012644ee7c10caad9be78fa86..7cc07a2ad5b88c51aa9f5d339839fd743727e17e 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -15,10 +15,124 @@ from ..utils import (
 )
 
 
-class ProSiebenSat1IE(InfoExtractor):
+class ProSiebenSat1BaseIE(InfoExtractor):
+    def _extract_video_info(self, url, clip_id):
+        client_location = url
+
+        video = self._download_json(
+            'http://vas.sim-technik.de/vas/live/v2/videos',
+            clip_id, 'Downloading videos JSON', query={
+                'access_token': self._TOKEN,
+                'client_location': client_location,
+                'client_name': self._CLIENT_NAME,
+                'ids': clip_id,
+            })[0]
+
+        if video.get('is_protected') is True:
+            raise ExtractorError('This video is DRM protected.', expected=True)
+
+        duration = float_or_none(video.get('duration'))
+        source_ids = [compat_str(source['id']) for source in video['sources']]
+
+        client_id = self._SALT[:2] + sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
+
+        sources = self._download_json(
+            'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id,
+            clip_id, 'Downloading sources JSON', query={
+                'access_token': self._TOKEN,
+                'client_id': client_id,
+                'client_location': client_location,
+                'client_name': self._CLIENT_NAME,
+            })
+        server_id = sources['server_id']
+
+        def fix_bitrate(bitrate):
+            bitrate = int_or_none(bitrate)
+            if not bitrate:
+                return None
+            return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
+
+        formats = []
+        for source_id in source_ids:
+            client_id = self._SALT[:2] + sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
+            urls = self._download_json(
+                'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id,
+                clip_id, 'Downloading urls JSON', fatal=False, query={
+                    'access_token': self._TOKEN,
+                    'client_id': client_id,
+                    'client_location': client_location,
+                    'client_name': self._CLIENT_NAME,
+                    'server_id': server_id,
+                    'source_ids': source_id,
+                })
+            if not urls:
+                continue
+            if urls.get('status_code') != 0:
+                raise ExtractorError('This video is unavailable', expected=True)
+            urls_sources = urls['sources']
+            if isinstance(urls_sources, dict):
+                urls_sources = urls_sources.values()
+            for source in urls_sources:
+                source_url = source.get('url')
+                if not source_url:
+                    continue
+                protocol = source.get('protocol')
+                mimetype = source.get('mimetype')
+                if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m':
+                    formats.extend(self._extract_f4m_formats(
+                        source_url, clip_id, f4m_id='hds', fatal=False))
+                elif mimetype == 'application/x-mpegURL':
+                    formats.extend(self._extract_m3u8_formats(
+                        source_url, clip_id, 'mp4', 'm3u8_native',
+                        m3u8_id='hls', fatal=False))
+                else:
+                    tbr = fix_bitrate(source['bitrate'])
+                    if protocol in ('rtmp', 'rtmpe'):
+                        mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url)
+                        if not mobj:
+                            continue
+                        path = mobj.group('path')
+                        mp4colon_index = path.rfind('mp4:')
+                        app = path[:mp4colon_index]
+                        play_path = path[mp4colon_index:]
+                        formats.append({
+                            'url': '%s/%s' % (mobj.group('url'), app),
+                            'app': app,
+                            'play_path': play_path,
+                            'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf',
+                            'page_url': 'http://www.prosieben.de',
+                            'tbr': tbr,
+                            'ext': 'flv',
+                            'format_id': 'rtmp%s' % ('-%d' % tbr if tbr else ''),
+                        })
+                    else:
+                        formats.append({
+                            'url': source_url,
+                            'tbr': tbr,
+                            'format_id': 'http%s' % ('-%d' % tbr if tbr else ''),
+                        })
+        self._sort_formats(formats)
+
+        return {
+            'duration': duration,
+            'formats': formats,
+        }
+
+
+class ProSiebenSat1IE(ProSiebenSat1BaseIE):
     IE_NAME = 'prosiebensat1'
     IE_DESC = 'ProSiebenSat.1 Digital'
-    _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany|7tv)\.(?:de|at|ch)|ran\.de|fem\.com)/(?P<id>.+)'
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:www\.)?
+                        (?:
+                            (?:
+                                prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv|advopedia
+                            )\.(?:de|at|ch)|
+                            ran\.de|fem\.com|advopedia\.de
+                        )
+                        /(?P<id>.+)
+                    '''
 
     _TESTS = [
         {
@@ -186,8 +300,29 @@ class ProSiebenSat1IE(InfoExtractor):
                 'skip_download': True,
             },
         },
+        {
+            # geo restricted to Germany
+            'url': 'http://www.kabeleinsdoku.de/tv/mayday-alarm-im-cockpit/video/102-notlandung-im-hudson-river-ganze-folge',
+            'only_matching': True,
+        },
+        {
+            # geo restricted to Germany
+            'url': 'http://www.sat1gold.de/tv/edel-starck/video/11-staffel-1-episode-1-partner-wider-willen-ganze-folge',
+            'only_matching': True,
+        },
+        {
+            'url': 'http://www.sat1gold.de/tv/edel-starck/playlist/die-gesamte-1-staffel',
+            'only_matching': True,
+        },
+        {
+            'url': 'http://www.advopedia.de/videos/lenssen-klaert-auf/lenssen-klaert-auf-folge-8-staffel-3-feiertage-und-freie-tage',
+            'only_matching': True,
+        },
     ]
 
+    _TOKEN = 'prosieben'
+    _SALT = '01!8d8F_)r9]4s[qeuXfP%'
+    _CLIENT_NAME = 'kolibri-2.0.19-splec4'
     _CLIPID_REGEXES = [
         r'"clip_id"\s*:\s+"(\d+)"',
         r'clipid: "(\d+)"',
@@ -234,140 +369,48 @@ class ProSiebenSat1IE(InfoExtractor):
     def _extract_clip(self, url, webpage):
         clip_id = self._html_search_regex(
             self._CLIPID_REGEXES, webpage, 'clip id')
-
-        access_token = 'prosieben'
-        client_name = 'kolibri-2.0.19-splec4'
-        client_location = url
-
-        video = self._download_json(
-            'http://vas.sim-technik.de/vas/live/v2/videos',
-            clip_id, 'Downloading videos JSON', query={
-                'access_token': access_token,
-                'client_location': client_location,
-                'client_name': client_name,
-                'ids': clip_id,
-            })[0]
-
-        if video.get('is_protected') is True:
-            raise ExtractorError('This video is DRM protected.', expected=True)
-
-        duration = float_or_none(video.get('duration'))
-        source_ids = [compat_str(source['id']) for source in video['sources']]
-
-        g = '01!8d8F_)r9]4s[qeuXfP%'
-        client_id = g[:2] + sha1(''.join([clip_id, g, access_token, client_location, g, client_name]).encode('utf-8')).hexdigest()
-
-        sources = self._download_json(
-            'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id,
-            clip_id, 'Downloading sources JSON', query={
-                'access_token': access_token,
-                'client_id': client_id,
-                'client_location': client_location,
-                'client_name': client_name,
-            })
-        server_id = sources['server_id']
-
         title = self._html_search_regex(self._TITLE_REGEXES, webpage, 'title')
-
-        def fix_bitrate(bitrate):
-            bitrate = int_or_none(bitrate)
-            if not bitrate:
-                return None
-            return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
-
-        formats = []
-        for source_id in source_ids:
-            client_id = g[:2] + sha1(''.join([g, clip_id, access_token, server_id, client_location, source_id, g, client_name]).encode('utf-8')).hexdigest()
-            urls = self._download_json(
-                'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id,
-                clip_id, 'Downloading urls JSON', fatal=False, query={
-                    'access_token': access_token,
-                    'client_id': client_id,
-                    'client_location': client_location,
-                    'client_name': client_name,
-                    'server_id': server_id,
-                    'source_ids': source_id,
-                })
-            if not urls:
-                continue
-            if urls.get('status_code') != 0:
-                raise ExtractorError('This video is unavailable', expected=True)
-            urls_sources = urls['sources']
-            if isinstance(urls_sources, dict):
-                urls_sources = urls_sources.values()
-            for source in urls_sources:
-                source_url = source.get('url')
-                if not source_url:
-                    continue
-                protocol = source.get('protocol')
-                mimetype = source.get('mimetype')
-                if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m':
-                    formats.extend(self._extract_f4m_formats(
-                        source_url, clip_id, f4m_id='hds', fatal=False))
-                elif mimetype == 'application/x-mpegURL':
-                    formats.extend(self._extract_m3u8_formats(
-                        source_url, clip_id, 'mp4', 'm3u8_native',
-                        m3u8_id='hls', fatal=False))
-                else:
-                    tbr = fix_bitrate(source['bitrate'])
-                    if protocol in ('rtmp', 'rtmpe'):
-                        mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url)
-                        if not mobj:
-                            continue
-                        path = mobj.group('path')
-                        mp4colon_index = path.rfind('mp4:')
-                        app = path[:mp4colon_index]
-                        play_path = path[mp4colon_index:]
-                        formats.append({
-                            'url': '%s/%s' % (mobj.group('url'), app),
-                            'app': app,
-                            'play_path': play_path,
-                            'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf',
-                            'page_url': 'http://www.prosieben.de',
-                            'tbr': tbr,
-                            'ext': 'flv',
-                            'format_id': 'rtmp%s' % ('-%d' % tbr if tbr else ''),
-                        })
-                    else:
-                        formats.append({
-                            'url': source_url,
-                            'tbr': tbr,
-                            'format_id': 'http%s' % ('-%d' % tbr if tbr else ''),
-                        })
-        self._sort_formats(formats)
-
+        info = self._extract_video_info(url, clip_id)
         description = self._html_search_regex(
             self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False)
         thumbnail = self._og_search_thumbnail(webpage)
         upload_date = unified_strdate(self._html_search_regex(
             self._UPLOAD_DATE_REGEXES, webpage, 'upload date', default=None))
 
-        return {
+        info.update({
             'id': clip_id,
             'title': title,
             'description': description,
             'thumbnail': thumbnail,
             'upload_date': upload_date,
-            'duration': duration,
-            'formats': formats,
-        }
+        })
+        return info
 
     def _extract_playlist(self, url, webpage):
         playlist_id = self._html_search_regex(
             self._PLAYLIST_ID_REGEXES, webpage, 'playlist id')
-        for regex in self._PLAYLIST_CLIP_REGEXES:
-            playlist_clips = re.findall(regex, webpage)
-            if playlist_clips:
-                title = self._html_search_regex(
-                    self._TITLE_REGEXES, webpage, 'title')
-                description = self._html_search_regex(
-                    self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False)
-                entries = [
-                    self.url_result(
-                        re.match('(.+?//.+?)/', url).group(1) + clip_path,
-                        'ProSiebenSat1')
-                    for clip_path in playlist_clips]
-                return self.playlist_result(entries, playlist_id, title, description)
+        playlist = self._parse_json(
+            self._search_regex(
+                'var\s+contentResources\s*=\s*(\[.+?\]);\s*</script',
+                webpage, 'playlist'),
+            playlist_id)
+        entries = []
+        for item in playlist:
+            clip_id = item.get('id') or item.get('upc')
+            if not clip_id:
+                continue
+            info = self._extract_video_info(url, clip_id)
+            info.update({
+                'id': clip_id,
+                'title': item.get('title') or item.get('teaser', {}).get('headline'),
+                'description': item.get('teaser', {}).get('description'),
+                'thumbnail': item.get('poster'),
+                'duration': float_or_none(item.get('duration')),
+                'series': item.get('tvShowTitle'),
+                'uploader': item.get('broadcastPublisher'),
+            })
+            entries.append(info)
+        return self.playlist_result(entries, playlist_id)
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
index fca30e1aae5b35f9ef439fccc8396b5127f79aa9..80091b85f88db2025982d195a54c64e164152881 100644 (file)
@@ -1,88 +1,57 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
-from .common import InfoExtractor
+from .prosiebensat1 import ProSiebenSat1BaseIE
 from ..utils import (
-    ExtractorError,
     unified_strdate,
-    int_or_none,
+    parse_duration,
+    compat_str,
 )
 
 
-class Puls4IE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?puls4\.com/video/[^/]+/play/(?P<id>[0-9]+)'
+class Puls4IE(ProSiebenSat1BaseIE):
+    _VALID_URL = r'https?://(?:www\.)?puls4\.com/(?P<id>[^?#&]+)'
     _TESTS = [{
-        'url': 'http://www.puls4.com/video/pro-und-contra/play/2716816',
-        'md5': '49f6a6629747eeec43cef6a46b5df81d',
+        'url': 'http://www.puls4.com/2-minuten-2-millionen/staffel-3/videos/2min2miotalk/Tobias-Homberger-von-myclubs-im-2min2miotalk-118118',
+        'md5': 'fd3c6b0903ac72c9d004f04bc6bb3e03',
         'info_dict': {
-            'id': '2716816',
-            'ext': 'mp4',
-            'title': 'Pro und Contra vom 23.02.2015',
-            'description': 'md5:293e44634d9477a67122489994675db6',
-            'duration': 2989,
-            'upload_date': '20150224',
+            'id': '118118',
+            'ext': 'flv',
+            'title': 'Tobias Homberger von myclubs im #2min2miotalk',
+            'description': 'md5:f9def7c5e8745d6026d8885487d91955',
+            'upload_date': '20160830',
             'uploader': 'PULS_4',
         },
-        'skip': 'Only works from Germany',
     }, {
-        'url': 'http://www.puls4.com/video/kult-spielfilme/play/1298106',
-        'md5': '6a48316c8903ece8dab9b9a7bf7a59ec',
-        'info_dict': {
-            'id': '1298106',
-            'ext': 'mp4',
-            'title': 'Lucky Fritz',
-        },
-        'skip': 'Only works from Germany',
+        'url': 'http://www.puls4.com/pro-und-contra/wer-wird-prasident/Ganze-Folgen/Wer-wird-Praesident.-Norbert-Hofer',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.puls4.com/pro-und-contra/wer-wird-prasident/Ganze-Folgen/Wer-wird-Praesident-Analyse-des-Interviews-mit-Norbert-Hofer-416598',
+        'only_matching': True,
     }]
+    _TOKEN = 'puls4'
+    _SALT = '01!kaNgaiNgah1Ie4AeSha'
+    _CLIENT_NAME = ''
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-
-        error_message = self._html_search_regex(
-            r'<div[^>]+class="message-error"[^>]*>(.+?)</div>',
-            webpage, 'error message', default=None)
-        if error_message:
-            raise ExtractorError(
-                '%s returned error: %s' % (self.IE_NAME, error_message), expected=True)
-
-        real_url = self._html_search_regex(
-            r'\"fsk-button\".+?href=\"([^"]+)',
-            webpage, 'fsk_button', default=None)
-        if real_url:
-            webpage = self._download_webpage(real_url, video_id)
-
-        player = self._search_regex(
-            r'p4_video_player(?:_iframe)?\("video_\d+_container"\s*,(.+?)\);\s*\}',
-            webpage, 'player')
-
-        player_json = self._parse_json(
-            '[%s]' % player, video_id,
-            transform_source=lambda s: s.replace('undefined,', ''))
-
-        formats = None
-        result = None
-
-        for v in player_json:
-            if isinstance(v, list) and not formats:
-                formats = [{
-                    'url': f['url'],
-                    'format': 'hd' if f.get('hd') else 'sd',
-                    'width': int_or_none(f.get('size_x')),
-                    'height': int_or_none(f.get('size_y')),
-                    'tbr': int_or_none(f.get('bitrate')),
-                } for f in v]
-                self._sort_formats(formats)
-            elif isinstance(v, dict) and not result:
-                result = {
-                    'id': video_id,
-                    'title': v['videopartname'].strip(),
-                    'description': v.get('videotitle'),
-                    'duration': int_or_none(v.get('videoduration') or v.get('episodeduration')),
-                    'upload_date': unified_strdate(v.get('clipreleasetime')),
-                    'uploader': v.get('channel'),
-                }
-
-        result['formats'] = formats
-
-        return result
+        path = self._match_id(url)
+        content_path = self._download_json(
+            'http://www.puls4.com/api/json-fe/page/' + path, path)['content'][0]['url']
+        media = self._download_json(
+            'http://www.puls4.com' + content_path,
+            content_path)['mediaCurrent']
+        player_content = media['playerContent']
+        info = self._extract_video_info(url, player_content['id'])
+        info.update({
+            'id': compat_str(media['objectId']),
+            'title': player_content['title'],
+            'description': media.get('description'),
+            'thumbnail': media.get('previewLink'),
+            'upload_date': unified_strdate(media.get('date')),
+            'duration': parse_duration(player_content.get('duration')),
+            'episode': player_content.get('episodePartName'),
+            'show': media.get('channel'),
+            'season_id': player_content.get('seasonId'),
+            'uploader': player_content.get('sourceCompany'),
+        })
+        return info
index cc0416cb81eb23ed87d1dae0cdf2573a6df8936a..b8ac93a62c4157ae51335efcadf83ca363272f19 100644 (file)
@@ -1,59 +1,72 @@
 from __future__ import unicode_literals
 
 import re
-import os
 
 from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import int_or_none
 
 
 class PyvideoIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'
-
-    _TESTS = [
-        {
-            'url': 'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
-            'md5': '520915673e53a5c5d487c36e0c4d85b5',
-            'info_dict': {
-                'id': '24_4WWkSmNo',
-                'ext': 'webm',
-                'title': 'Become a logging expert in 30 minutes',
-                'description': 'md5:9665350d466c67fb5b1598de379021f7',
-                'upload_date': '20130320',
-                'uploader': 'Next Day Video',
-                'uploader_id': 'NextDayVideo',
-            },
-            'add_ie': ['Youtube'],
+    _VALID_URL = r'https?://(?:www\.)?pyvideo\.org/(?P<category>[^/]+)/(?P<id>[^/?#&.]+)'
+
+    _TESTS = [{
+        'url': 'http://pyvideo.org/pycon-us-2013/become-a-logging-expert-in-30-minutes.html',
+        'info_dict': {
+            'id': 'become-a-logging-expert-in-30-minutes',
         },
-        {
-            'url': 'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v',
-            'md5': '5fe1c7e0a8aa5570330784c847ff6d12',
-            'info_dict': {
-                'id': '2542',
-                'ext': 'm4v',
-                'title': 'Gloriajw-SpotifyWithErikBernhardsson182',
-            },
+        'playlist_count': 2,
+    }, {
+        'url': 'http://pyvideo.org/pygotham-2012/gloriajw-spotifywitherikbernhardsson182m4v.html',
+        'md5': '5fe1c7e0a8aa5570330784c847ff6d12',
+        'info_dict': {
+            'id': '2542',
+            'ext': 'm4v',
+            'title': 'Gloriajw-SpotifyWithErikBernhardsson182.m4v',
         },
-    ]
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
+        category = mobj.group('category')
         video_id = mobj.group('id')
 
-        webpage = self._download_webpage(url, video_id)
+        entries = []
 
-        m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage)
-        if m_youtube is not None:
-            return self.url_result(m_youtube.group(1), 'Youtube')
+        data = self._download_json(
+            'https://raw.githubusercontent.com/pyvideo/data/master/%s/videos/%s.json'
+            % (category, video_id), video_id, fatal=False)
 
-        title = self._html_search_regex(
-            r'<div class="section">\s*<h3(?:\s+class="[^"]*"[^>]*)?>([^>]+?)</h3>',
-            webpage, 'title', flags=re.DOTALL)
-        video_url = self._search_regex(
-            [r'<source src="(.*?)"', r'<dt>Download</dt>.*?<a href="(.+?)"'],
-            webpage, 'video url', flags=re.DOTALL)
+        if data:
+            for video in data['videos']:
+                video_url = video.get('url')
+                if video_url:
+                    if video.get('type') == 'youtube':
+                        entries.append(self.url_result(video_url, 'Youtube'))
+                    else:
+                        entries.append({
+                            'id': compat_str(data.get('id') or video_id),
+                            'url': video_url,
+                            'title': data['title'],
+                            'description': data.get('description') or data.get('summary'),
+                            'thumbnail': data.get('thumbnail_url'),
+                            'duration': int_or_none(data.get('duration')),
+                        })
+        else:
+            webpage = self._download_webpage(url, video_id)
+            title = self._og_search_title(webpage)
+            media_urls = self._search_regex(
+                r'(?s)Media URL:(.+?)</li>', webpage, 'media urls')
+            for m in re.finditer(
+                    r'<a[^>]+href=(["\'])(?P<url>http.+?)\1', media_urls):
+                media_url = m.group('url')
+                if re.match(r'https?://www\.youtube\.com/watch\?v=.*', media_url):
+                    entries.append(self.url_result(media_url, 'Youtube'))
+                else:
+                    entries.append({
+                        'id': video_id,
+                        'url': media_url,
+                        'title': title,
+                    })
 
-        return {
-            'id': video_id,
-            'title': os.path.splitext(title)[0],
-            'url': video_url,
-        }
+        return self.playlist_result(entries, video_id)
index ff0af9543c2b5e5527f406958e9ae5ae4d1adbda..37cb9e2c9dded7c9fa6e1e9eeef4ebeccdf9b4a9 100644 (file)
@@ -18,7 +18,7 @@ from ..utils import (
 class QQMusicIE(InfoExtractor):
     IE_NAME = 'qqmusic'
     IE_DESC = 'QQ音乐'
-    _VALID_URL = r'https?://y.qq.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)'
+    _VALID_URL = r'https?://y\.qq\.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)'
     _TESTS = [{
         'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD',
         'md5': '9ce1c1c8445f561506d2e3cfb0255705',
@@ -172,7 +172,7 @@ class QQPlaylistBaseIE(InfoExtractor):
 class QQMusicSingerIE(QQPlaylistBaseIE):
     IE_NAME = 'qqmusic:singer'
     IE_DESC = 'QQ音乐 - 歌手'
-    _VALID_URL = r'https?://y.qq.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)'
+    _VALID_URL = r'https?://y\.qq\.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)'
     _TEST = {
         'url': 'http://y.qq.com/#type=singer&mid=001BLpXF2DyJe2',
         'info_dict': {
@@ -217,7 +217,7 @@ class QQMusicSingerIE(QQPlaylistBaseIE):
 class QQMusicAlbumIE(QQPlaylistBaseIE):
     IE_NAME = 'qqmusic:album'
     IE_DESC = 'QQ音乐 - 专辑'
-    _VALID_URL = r'https?://y.qq.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)'
+    _VALID_URL = r'https?://y\.qq\.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)'
 
     _TESTS = [{
         'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1',
index 0cbb15f086f4b3c747f2da80f8af813f8dbf50f0..0aa8d059bf81dffd28df727650b20aafc49302eb 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 
 from __future__ import unicode_literals
 
@@ -13,15 +13,15 @@ class RadioBremenIE(InfoExtractor):
     IE_NAME = 'radiobremen'
 
     _TEST = {
-        'url': 'http://www.radiobremen.de/mediathek/index.html?id=114720',
+        'url': 'http://www.radiobremen.de/mediathek/?id=141876',
         'info_dict': {
-            'id': '114720',
+            'id': '141876',
             'ext': 'mp4',
-            'duration': 1685,
+            'duration': 178,
             'width': 512,
-            'title': 'buten un binnen vom 22. Dezember',
+            'title': 'Druck auf Patrick Öztürk',
             'thumbnail': 're:https?://.*\.jpg$',
-            'description': 'Unter anderem mit diesen Themen: 45 Flüchtlinge sind in Worpswede angekommen +++ Freies Internet für alle: Bremer arbeiten an einem flächendeckenden W-Lan-Netzwerk +++ Aktivisten kämpfen für das Unibad +++ So war das Wetter 2014 +++',
+            'description': 'Gegen den SPD-Bürgerschaftsabgeordneten Patrick Öztürk wird wegen Beihilfe zum gewerbsmäßigen Betrug ermittelt. Am Donnerstagabend sollte er dem Vorstand des SPD-Unterbezirks Bremerhaven dazu Rede und Antwort stehen.',
         },
     }
 
index 8ec402646767a22f8a1e7cedbc89eb8576b59804..321917ad0810c6ddfe1d8586ba31570251fe012e 100644 (file)
@@ -13,6 +13,7 @@ from ..utils import (
     xpath_element,
     ExtractorError,
     determine_protocol,
+    unsmuggle_url,
 )
 
 
@@ -35,28 +36,51 @@ class RadioCanadaIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
+        url, smuggled_data = unsmuggle_url(url, {})
         app_code, video_id = re.match(self._VALID_URL, url).groups()
 
-        device_types = ['ipad', 'android']
+        metadata = self._download_xml(
+            'http://api.radio-canada.ca/metaMedia/v1/index.ashx',
+            video_id, note='Downloading metadata XML', query={
+                'appCode': app_code,
+                'idMedia': video_id,
+            })
+
+        def get_meta(name):
+            el = find_xpath_attr(metadata, './/Meta', 'name', name)
+            return el.text if el is not None else None
+
+        if get_meta('protectionType'):
+            raise ExtractorError('This video is DRM protected.', expected=True)
+
+        device_types = ['ipad']
         if app_code != 'toutv':
             device_types.append('flash')
+        if not smuggled_data:
+            device_types.append('android')
 
         formats = []
         # TODO: extract f4m formats
         # f4m formats can be extracted using flashhd device_type but they produce unplayable file
         for device_type in device_types:
-            v_data = self._download_xml(
-                'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx',
-                video_id, note='Downloading %s XML' % device_type, query={
-                    'appCode': app_code,
-                    'idMedia': video_id,
-                    'connectionType': 'broadband',
-                    'multibitrate': 'true',
-                    'deviceType': device_type,
+            validation_url = 'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx'
+            query = {
+                'appCode': app_code,
+                'idMedia': video_id,
+                'connectionType': 'broadband',
+                'multibitrate': 'true',
+                'deviceType': device_type,
+            }
+            if smuggled_data:
+                validation_url = 'https://services.radio-canada.ca/media/validation/v2/'
+                query.update(smuggled_data)
+            else:
+                query.update({
                     # paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction
                     'paysJ391wsHjbOJwvCs26toz': 'CA',
                     'bypasslock': 'NZt5K62gRqfc',
-                }, fatal=False)
+                })
+            v_data = self._download_xml(validation_url, video_id, note='Downloading %s XML' % device_type, query=query, fatal=False)
             v_url = xpath_text(v_data, 'url')
             if not v_url:
                 continue
@@ -101,16 +125,13 @@ class RadioCanadaIE(InfoExtractor):
                                 f4m_id='hds', fatal=False))
         self._sort_formats(formats)
 
-        metadata = self._download_xml(
-            'http://api.radio-canada.ca/metaMedia/v1/index.ashx',
-            video_id, note='Downloading metadata XML', query={
-                'appCode': app_code,
-                'idMedia': video_id,
-            })
-
-        def get_meta(name):
-            el = find_xpath_attr(metadata, './/Meta', 'name', name)
-            return el.text if el is not None else None
+        subtitles = {}
+        closed_caption_url = get_meta('closedCaption') or get_meta('closedCaptionHTML5')
+        if closed_caption_url:
+            subtitles['fr'] = [{
+                'url': closed_caption_url,
+                'ext': determine_ext(closed_caption_url, 'vtt'),
+            }]
 
         return {
             'id': video_id,
@@ -122,6 +143,7 @@ class RadioCanadaIE(InfoExtractor):
             'season_number': int_or_none('SrcSaison'),
             'episode_number': int_or_none('SrcEpisode'),
             'upload_date': unified_strdate(get_meta('Date')),
+            'subtitles': subtitles,
             'formats': formats,
         }
 
index 721fc3a9e2d2b3431051ea00982f72ae1d98ff65..c367a6ae74f3a7b63dd50f035a2d380f76dc3719 100644 (file)
@@ -1,5 +1,7 @@
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
@@ -10,8 +12,8 @@ from ..utils import (
 
 
 class RedTubeIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:(?:www\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
+    _TESTS = [{
         'url': 'http://www.redtube.com/66418',
         'md5': '7b8c22b5e7098a3e1c09709df1126d2d',
         'info_dict': {
@@ -23,11 +25,21 @@ class RedTubeIE(InfoExtractor):
             'view_count': int,
             'age_limit': 18,
         }
-    }
+    }, {
+        'url': 'http://embed.redtube.com/?bgcolor=000000&id=1443286',
+        'only_matching': True,
+    }]
+
+    @staticmethod
+    def _extract_urls(webpage):
+        return re.findall(
+            r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)',
+            webpage)
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        webpage = self._download_webpage(
+            'http://www.redtube.com/%s' % video_id, video_id)
 
         if any(s in webpage for s in ['video-deleted-info', '>This video has been removed']):
             raise ExtractorError('Video %s has been removed' % video_id, expected=True)
diff --git a/youtube_dl/extractor/rentv.py b/youtube_dl/extractor/rentv.py
new file mode 100644 (file)
index 0000000..422c02c
--- /dev/null
@@ -0,0 +1,76 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .jwplatform import JWPlatformBaseIE
+from ..compat import compat_str
+
+
+class RENTVIE(JWPlatformBaseIE):
+    _VALID_URL = r'(?:rentv:|https?://(?:www\.)?ren\.tv/(?:player|video/epizod)/)(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'http://ren.tv/video/epizod/118577',
+        'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb',
+        'info_dict': {
+            'id': '118577',
+            'ext': 'mp4',
+            'title': 'Документальный спецпроект: "Промывка мозгов. Технологии XXI века"'
+        }
+    }, {
+        'url': 'http://ren.tv/player/118577',
+        'only_matching': True,
+    }, {
+        'url': 'rentv:118577',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage('http://ren.tv/player/' + video_id, video_id)
+        jw_config = self._parse_json(self._search_regex(
+            r'config\s*=\s*({.+});', webpage, 'jw config'), video_id)
+        return self._parse_jwplayer_data(jw_config, video_id, m3u8_id='hls')
+
+
+class RENTVArticleIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?ren\.tv/novosti/\d{4}-\d{2}-\d{2}/(?P<id>[^/?#]+)'
+    _TESTS = [{
+        'url': 'http://ren.tv/novosti/2016-10-26/video-mikroavtobus-popavshiy-v-dtp-s-gruzovikami-v-podmoskove-prevratilsya-v',
+        'md5': 'ebd63c4680b167693745ab91343df1d6',
+        'info_dict': {
+            'id': '136472',
+            'ext': 'mp4',
+            'title': 'Видео: микроавтобус, попавший в ДТП с грузовиками в Подмосковье, превратился в груду металла',
+            'description': 'Жертвами столкновения двух фур и микроавтобуса, по последним данным, стали семь человек.',
+        }
+    }, {
+        # TODO: invalid m3u8
+        'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
+        'info_dict': {
+            'id': 'playlist',
+            'ext': 'mp4',
+            'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
+            'uploader': 'ren.tv',
+        },
+        'params': {
+            # m3u8 downloads
+            'skip_download': True,
+        },
+        'skip': True,
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        drupal_settings = self._parse_json(self._search_regex(
+            r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
+            webpage, 'drupal settings'), display_id)
+
+        entries = []
+        for config_profile in drupal_settings.get('ren_jwplayer', {}).values():
+            media_id = config_profile.get('mediaid')
+            if not media_id:
+                continue
+            media_id = compat_str(media_id)
+            entries.append(self.url_result('rentv:' + media_id, 'RENTV', media_id))
+        return self.playlist_result(entries, display_id)
index 3c6725aeb42945ce7f4e07b49bcd0d629248fcac..4875009e5cafd68867b67393d36d90625e5f29c8 100644 (file)
@@ -1,29 +1,29 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
-from ..utils import str_or_none
+from ..utils import (
+    qualities,
+    str_or_none,
+)
 
 
 class ReverbNationIE(InfoExtractor):
     _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
     _TESTS = [{
         'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
-        'md5': '3da12ebca28c67c111a7f8b262d3f7a7',
+        'md5': 'c0aaf339bcee189495fdf5a8c8ba8645',
         'info_dict': {
             'id': '16965047',
             'ext': 'mp3',
             'title': 'MONA LISA',
             'uploader': 'ALKILADOS',
             'uploader_id': '216429',
-            'thumbnail': 're:^https://gp1\.wac\.edgecastcdn\.net/.*?\.jpg$'
+            'thumbnail': 're:^https?://.*\.jpg',
         },
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        song_id = mobj.group('id')
+        song_id = self._match_id(url)
 
         api_res = self._download_json(
             'https://api.reverbnation.com/song/%s' % song_id,
@@ -31,14 +31,23 @@ class ReverbNationIE(InfoExtractor):
             note='Downloading information of song %s' % song_id
         )
 
+        THUMBNAILS = ('thumbnail', 'image')
+        quality = qualities(THUMBNAILS)
+        thumbnails = []
+        for thumb_key in THUMBNAILS:
+            if api_res.get(thumb_key):
+                thumbnails.append({
+                    'url': api_res[thumb_key],
+                    'preference': quality(thumb_key)
+                })
+
         return {
             'id': song_id,
-            'title': api_res.get('name'),
-            'url': api_res.get('url'),
+            'title': api_res['name'],
+            'url': api_res['url'],
             'uploader': api_res.get('artist', {}).get('name'),
             'uploader_id': str_or_none(api_res.get('artist', {}).get('id')),
-            'thumbnail': self._proto_relative_url(
-                api_res.get('image', api_res.get('thumbnail'))),
+            'thumbnails': thumbnails,
             'ext': 'mp3',
             'vcodec': 'none',
         }
diff --git a/youtube_dl/extractor/rmcdecouverte.py b/youtube_dl/extractor/rmcdecouverte.py
new file mode 100644 (file)
index 0000000..2340dae
--- /dev/null
@@ -0,0 +1,39 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .brightcove import BrightcoveLegacyIE
+from ..compat import (
+    compat_parse_qs,
+    compat_urlparse,
+)
+
+
+class RMCDecouverteIE(InfoExtractor):
+    _VALID_URL = r'https?://rmcdecouverte\.bfmtv\.com/mediaplayer-replay.*?\bid=(?P<id>\d+)'
+
+    _TEST = {
+        'url': 'http://rmcdecouverte.bfmtv.com/mediaplayer-replay/?id=1430&title=LES%20HEROS%20DU%2088e%20ETAGE',
+        'info_dict': {
+            'id': '5111223049001',
+            'ext': 'mp4',
+            'title': ': LES HEROS DU 88e ETAGE',
+            'description': 'Découvrez comment la bravoure de deux hommes dans la Tour Nord du World Trade Center a sauvé  la vie d\'innombrables personnes le 11 septembre 2001.',
+            'uploader_id': '1969646226001',
+            'upload_date': '20160904',
+            'timestamp': 1472951103,
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        },
+        'skip': 'Only works from France',
+    }
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1969646226001/default_default/index.html?videoId=%s'
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
+        brightcove_id = compat_parse_qs(compat_urlparse.urlparse(brightcove_legacy_url).query)['@videoPlayer'][0]
+        return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
index f9cd48790c3b4a92b82bf1880020d53a074b1434..1d404d20aa8b2223c68cada46e4bfe87613eb6ae 100644 (file)
@@ -1,31 +1,32 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..compat import compat_urlparse
 from .internetvideoarchive import InternetVideoArchiveIE
 
 
 class RottenTomatoesIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.rottentomatoes\.com/m/[^/]+/trailers/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?rottentomatoes\.com/m/[^/]+/trailers/(?P<id>\d+)'
 
     _TEST = {
         'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/',
         'info_dict': {
-            'id': '613340',
+            'id': '11028566',
             'ext': 'mp4',
             'title': 'Toy Story 3',
+            'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.',
+            'thumbnail': 're:^https?://.*\.jpg$',
         },
     }
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
-        og_video = self._og_search_video_url(webpage)
-        query = compat_urlparse.urlparse(og_video).query
+        iva_id = self._search_regex(r'publishedid=(\d+)', webpage, 'internet video archive id')
 
         return {
             '_type': 'url_transparent',
-            'url': InternetVideoArchiveIE._build_xml_url(query),
+            'url': 'http://video.internetvideoarchive.net/player/6/configuration.ashx?domain=www.videodetective.com&customerid=69249&playerid=641&publishedid=' + iva_id,
             'ie_key': InternetVideoArchiveIE.ie_key(),
+            'id': video_id,
             'title': self._og_search_title(webpage),
         }
index 41638c1d01e2e76398d60ae5ef869d93845a59bc..65284643b4de287c7e77b9e2b571822c5a020606 100644 (file)
@@ -7,7 +7,7 @@ from ..utils import unified_strdate, determine_ext
 
 
 class RoxwelIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.roxwel\.com/player/(?P<filename>.+?)(\.|\?|$)'
+    _VALID_URL = r'https?://(?:www\.)?roxwel\.com/player/(?P<filename>.+?)(\.|\?|$)'
 
     _TEST = {
         'url': 'http://www.roxwel.com/player/passionpittakeawalklive.html',
index de004671d564eb455e45361666fd304f8ca040a6..cb4ee88033ba1d761faac452de724a6c44f08503 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index 34f9c4a991263572d935a2bfc79194f8594ecb78..6a43b036e924470055aea3910d1c5ea807483fdb 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import base64
@@ -64,7 +64,7 @@ def _decrypt_url(png):
 class RTVEALaCartaIE(InfoExtractor):
     IE_NAME = 'rtve.es:alacarta'
     IE_DESC = 'RTVE a la carta'
-    _VALID_URL = r'https?://www\.rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)'
 
     _TESTS = [{
         'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
@@ -184,7 +184,7 @@ class RTVEInfantilIE(InfoExtractor):
 class RTVELiveIE(InfoExtractor):
     IE_NAME = 'rtve.es:live'
     IE_DESC = 'RTVE.es live streams'
-    _VALID_URL = r'https?://www\.rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)'
+    _VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)'
 
     _TESTS = [{
         'url': 'http://www.rtve.es/directo/la-1/',
@@ -226,7 +226,7 @@ class RTVELiveIE(InfoExtractor):
 
 class RTVETelevisionIE(InfoExtractor):
     IE_NAME = 'rtve.es:television'
-    _VALID_URL = r'https?://www\.rtve\.es/television/[^/]+/[^/]+/(?P<id>\d+).shtml'
+    _VALID_URL = r'https?://(?:www\.)?rtve\.es/television/[^/]+/[^/]+/(?P<id>\d+).shtml'
 
     _TEST = {
         'url': 'http://www.rtve.es/television/20160628/revolucion-del-movil/1364141.shtml',
index 38366b784e7fbedce2b2d7de3669db752a6528d0..9a330c1961b75f662caa457fabe231e6aa4bcb8a 100644 (file)
@@ -43,7 +43,7 @@ class RudoIE(JWPlatformBaseIE):
             transform_source=lambda s: js_to_json(re.sub(r'encodeURI\([^)]+\)', '""', s)))
 
         info_dict = self._parse_jwplayer_data(
-            jwplayer_data, video_id, require_title=False, m3u8_id='hls')
+            jwplayer_data, video_id, require_title=False, m3u8_id='hls', mpd_id='dash')
 
         info_dict.update({
             'title': self._og_search_title(webpage),
index 1f7c262993c8ce7e0d602f612fc6316e80052f66..ce631b46c30bcd2eda03c798d61bed616f41e0b4 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index 9ca4ae147cb1e3c430de3abd9fd0927aaee2ed5a..fd1df925ba46bcecf87e192d2331da5e77d0b4bc 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -88,7 +88,7 @@ class RutubeIE(InfoExtractor):
 class RutubeEmbedIE(InfoExtractor):
     IE_NAME = 'rutube:embed'
     IE_DESC = 'Rutube embedded videos'
-    _VALID_URL = 'https?://rutube\.ru/(?:video|play)/embed/(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://rutube\.ru/(?:video|play)/embed/(?P<id>[0-9]+)'
 
     _TESTS = [{
         'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
index a2379eb04c2e6744a49f315ebee2a0c9fb0170f6..a5e672c0a674e3461c261e0b6b2ca7ca9435ea30 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index ffea438cc4645c267c87b54a761394e0c1eca247..6db3e3e9328b754f4a1f0ef149b33489857a3cd5 100644 (file)
@@ -5,6 +5,7 @@ from .common import InfoExtractor
 from ..compat import compat_urllib_parse_urlparse
 from ..utils import (
     determine_ext,
+    ExtractorError,
     int_or_none,
     xpath_attr,
     xpath_text,
@@ -12,7 +13,7 @@ from ..utils import (
 
 
 class RuutuIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?ruutu\.fi/video/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:ruutu|supla)\.fi/(?:video|supla)/(?P<id>\d+)'
     _TESTS = [
         {
             'url': 'http://www.ruutu.fi/video/2058907',
@@ -34,12 +35,24 @@ class RuutuIE(InfoExtractor):
                 'id': '2057306',
                 'ext': 'mp4',
                 'title': 'Superpesis: katso koko kausi Ruudussa',
-                'description': 'md5:da2736052fef3b2bd5e0005e63c25eac',
+                'description': 'md5:bfb7336df2a12dc21d18fa696c9f8f23',
                 'thumbnail': 're:^https?://.*\.jpg$',
                 'duration': 40,
                 'age_limit': 0,
             },
         },
+        {
+            'url': 'http://www.supla.fi/supla/2231370',
+            'md5': 'df14e782d49a2c0df03d3be2a54ef949',
+            'info_dict': {
+                'id': '2231370',
+                'ext': 'mp4',
+                'title': 'Osa 1: Mikael Jungner',
+                'description': 'md5:7d90f358c47542e3072ff65d7b1bcffe',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'age_limit': 0,
+            },
+        },
     ]
 
     def _real_extract(self, url):
@@ -89,6 +102,11 @@ class RuutuIE(InfoExtractor):
                         })
 
         extract_formats(video_xml.find('./Clip'))
+
+        drm = xpath_text(video_xml, './Clip/DRM', default=None)
+        if not formats and drm:
+            raise ExtractorError('This video is DRM protected.', expected=True)
+
         self._sort_formats(formats)
 
         return {
index 08ddbe3c4222879cade93fd9a5433e1d9c4e6e49..c3aec1edde5e9d02efb377fa39941ae01d2f04b4 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -103,13 +103,13 @@ class SafariIE(SafariBaseIE):
 
         webpage = self._download_webpage(url, video_id)
         reference_id = self._search_regex(
-            r'data-reference-id=(["\'])(?P<id>.+?)\1',
+            r'data-reference-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
             webpage, 'kaltura reference id', group='id')
         partner_id = self._search_regex(
-            r'data-partner-id=(["\'])(?P<id>.+?)\1',
+            r'data-partner-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
             webpage, 'kaltura widget id', group='id')
         ui_id = self._search_regex(
-            r'data-ui-id=(["\'])(?P<id>.+?)\1',
+            r'data-ui-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
             webpage, 'kaltura uiconf id', group='id')
 
         query = {
@@ -157,7 +157,14 @@ class SafariCourseIE(SafariBaseIE):
     IE_NAME = 'safari:course'
     IE_DESC = 'safaribooksonline.com online courses'
 
-    _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P<id>[^/]+)/?(?:[#?]|$)'
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:
+                            (?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)|
+                            techbus\.safaribooksonline\.com
+                        )
+                        /(?P<id>[^/]+)/?(?:[#?]|$)
+                    '''
 
     _TESTS = [{
         'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',
@@ -170,6 +177,9 @@ class SafariCourseIE(SafariBaseIE):
     }, {
         'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json',
         'only_matching': True,
+    }, {
+        'url': 'http://techbus.safaribooksonline.com/9780134426365',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
index 172cc12752d64ce326ba74ced1223628e5fff76a..49a9b313a87a5bf9b80fa8a3b8d78c104722cd5b 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index 96472fbc44e9a78654ae7c136e9f7e4a31751a13..43131fb7e5ce82d69d25bf639ce6c2bffe35182a 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index 3566317008712d8e378eec36c437cbc39fdc1cdc..ed9de964841e52c1e5753556d6b9e53339ba23c3 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index dd0a6ba19d4ef3b9397af6d977277256cbc0e1e9..02e574cd89a79b2ad76c0eea8c76792b15f48d7b 100644 (file)
@@ -11,7 +11,7 @@ from ..utils import (
 
 
 class ScreenJunkiesIE(InfoExtractor):
-    _VALID_URL = r'https?://www.screenjunkies.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)'
+    _VALID_URL = r'https?://(?:www\.)?screenjunkies\.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)'
     _TESTS = [{
         'url': 'http://www.screenjunkies.com/video/best-quentin-tarantino-movie-2841915',
         'md5': '5c2b686bec3d43de42bde9ec047536b0',
diff --git a/youtube_dl/extractor/screenwavemedia.py b/youtube_dl/extractor/screenwavemedia.py
deleted file mode 100644 (file)
index 40333c8..0000000
+++ /dev/null
@@ -1,146 +0,0 @@
-# encoding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
-    int_or_none,
-    unified_strdate,
-    js_to_json,
-)
-
-
-class ScreenwaveMediaIE(InfoExtractor):
-    _VALID_URL = r'(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=(?P<id>[A-Za-z0-9-]+)'
-    EMBED_PATTERN = r'src=(["\'])(?P<url>(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=.+?)\1'
-    _TESTS = [{
-        'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911',
-        'only_matching': True,
-    }]
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        playerdata = self._download_webpage(
-            'http://player.screenwavemedia.com/player.php?id=%s' % video_id,
-            video_id, 'Downloading player webpage')
-
-        vidtitle = self._search_regex(
-            r'\'vidtitle\'\s*:\s*"([^"]+)"', playerdata, 'vidtitle').replace('\\/', '/')
-
-        playerconfig = self._download_webpage(
-            'http://player.screenwavemedia.com/player.js',
-            video_id, 'Downloading playerconfig webpage')
-
-        videoserver = self._search_regex(r'SWMServer\s*=\s*"([\d\.]+)"', playerdata, 'videoserver')
-
-        sources = self._parse_json(
-            js_to_json(
-                re.sub(
-                    r'(?s)/\*.*?\*/', '',
-                    self._search_regex(
-                        r'sources\s*:\s*(\[[^\]]+?\])', playerconfig,
-                        'sources',
-                    ).replace(
-                        "' + thisObj.options.videoserver + '",
-                        videoserver
-                    ).replace(
-                        "' + playerVidId + '",
-                        video_id
-                    )
-                )
-            ),
-            video_id, fatal=False
-        )
-
-        # Fallback to hardcoded sources if JS changes again
-        if not sources:
-            self.report_warning('Falling back to a hardcoded list of streams')
-            sources = [{
-                'file': 'http://%s/vod/%s_%s.mp4' % (videoserver, video_id, format_id),
-                'type': 'mp4',
-                'label': format_label,
-            } for format_id, format_label in (
-                ('low', '144p Low'), ('med', '160p Med'), ('high', '360p High'), ('hd1', '720p HD1'))]
-            sources.append({
-                'file': 'http://%s/vod/smil:%s.smil/playlist.m3u8' % (videoserver, video_id),
-                'type': 'hls',
-            })
-
-        formats = []
-        for source in sources:
-            file_ = source.get('file')
-            if not file_:
-                continue
-            if source.get('type') == 'hls':
-                formats.extend(self._extract_m3u8_formats(file_, video_id, ext='mp4'))
-            else:
-                format_id = self._search_regex(
-                    r'_(.+?)\.[^.]+$', file_, 'format id', default=None)
-                if not self._is_valid_url(file_, video_id, format_id or 'video'):
-                    continue
-                format_label = source.get('label')
-                height = int_or_none(self._search_regex(
-                    r'^(\d+)[pP]', format_label, 'height', default=None))
-                formats.append({
-                    'url': file_,
-                    'format_id': format_id,
-                    'format': format_label,
-                    'ext': source.get('type'),
-                    'height': height,
-                })
-        self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id'))
-
-        return {
-            'id': video_id,
-            'title': vidtitle,
-            'formats': formats,
-        }
-
-
-class TeamFourIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/video/(?P<id>[a-z0-9\-]+)/?'
-    _TEST = {
-        'url': 'http://teamfourstar.com/video/a-moment-with-tfs-episode-4/',
-        'info_dict': {
-            'id': 'TeamFourStar-5292a02f20bfa',
-            'ext': 'mp4',
-            'upload_date': '20130401',
-            'description': 'Check out this and more on our website: http://teamfourstar.com\nTFS Store: http://sharkrobot.com/team-four-star\nFollow on Twitter: http://twitter.com/teamfourstar\nLike on FB: http://facebook.com/teamfourstar',
-            'title': 'A Moment With TFS Episode 4',
-        },
-        'params': {
-            # m3u8 download
-            'skip_download': True,
-        },
-    }
-
-    def _real_extract(self, url):
-        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id)
-
-        playerdata_url = self._search_regex(
-            r'src="(http://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
-            webpage, 'player data URL')
-
-        video_title = self._html_search_regex(
-            r'<div class="heroheadingtitle">(?P<title>.+?)</div>',
-            webpage, 'title')
-        video_date = unified_strdate(self._html_search_regex(
-            r'<div class="heroheadingdate">(?P<date>.+?)</div>',
-            webpage, 'date', fatal=False))
-        video_description = self._html_search_regex(
-            r'(?s)<div class="postcontent">(?P<description>.+?)</div>',
-            webpage, 'description', fatal=False)
-        video_thumbnail = self._og_search_thumbnail(webpage)
-
-        return {
-            '_type': 'url_transparent',
-            'display_id': display_id,
-            'title': video_title,
-            'description': video_description,
-            'upload_date': video_date,
-            'thumbnail': video_thumbnail,
-            'url': playerdata_url,
-        }
index c5f474dd1d8a5040a5368de7f2aa050658f7a984..35540c082ef2f7c4d6fa9cf9ce8acf404bc33a8c 100644 (file)
@@ -48,7 +48,7 @@ class SenateISVPIE(InfoExtractor):
         ['arch', '', 'http://ussenate-f.akamaihd.net/']
     ]
     _IE_NAME = 'senate.gov'
-    _VALID_URL = r'https?://www\.senate\.gov/isvp/?\?(?P<qs>.+)'
+    _VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)'
     _TESTS = [{
         'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',
         'info_dict': {
index ca286abb1f7df74c46a62f995475fa771245f282..62d41e88a1084c58af259176e28a8b2654ccd4ee 100644 (file)
@@ -1,17 +1,24 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+import json
+
 from .common import InfoExtractor
+from ..compat import compat_HTTPError
 from ..utils import (
     ExtractorError,
     int_or_none,
     parse_iso8601,
     str_or_none,
+    urlencode_postdata,
+    clean_html,
 )
 
 
 class ShahidIE(InfoExtractor):
-    _VALID_URL = r'https?://shahid\.mbc\.net/ar/episode/(?P<id>\d+)/?'
+    _NETRC_MACHINE = 'shahid'
+    _VALID_URL = r'https?://shahid\.mbc\.net/ar/(?P<type>episode|movie)/(?P<id>\d+)'
     _TESTS = [{
         'url': 'https://shahid.mbc.net/ar/episode/90574/%D8%A7%D9%84%D9%85%D9%84%D9%83-%D8%B9%D8%A8%D8%AF%D8%A7%D9%84%D9%84%D9%87-%D8%A7%D9%84%D8%A5%D9%86%D8%B3%D8%A7%D9%86-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-3.html',
         'info_dict': {
@@ -27,18 +34,54 @@ class ShahidIE(InfoExtractor):
             # m3u8 download
             'skip_download': True,
         }
+    }, {
+        'url': 'https://shahid.mbc.net/ar/movie/151746/%D8%A7%D9%84%D9%82%D9%86%D8%A7%D8%B5%D8%A9.html',
+        'only_matching': True
     }, {
         # shahid plus subscriber only
         'url': 'https://shahid.mbc.net/ar/episode/90511/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1.html',
         'only_matching': True
     }]
 
-    def _call_api(self, path, video_id, note):
-        data = self._download_json(
-            'http://api.shahid.net/api/v1_1/' + path, video_id, note, query={
-                'apiKey': 'sh@hid0nlin3',
-                'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=',
-            }).get('data', {})
+    def _real_initialize(self):
+        email, password = self._get_login_info()
+        if email is None:
+            return
+
+        try:
+            user_data = self._download_json(
+                'https://shahid.mbc.net/wd/service/users/login',
+                None, 'Logging in', data=json.dumps({
+                    'email': email,
+                    'password': password,
+                    'basic': 'false',
+                }).encode('utf-8'), headers={
+                    'Content-Type': 'application/json; charset=UTF-8',
+                })['user']
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError):
+                fail_data = self._parse_json(
+                    e.cause.read().decode('utf-8'), None, fatal=False)
+                if fail_data:
+                    faults = fail_data.get('faults', [])
+                    faults_message = ', '.join([clean_html(fault['userMessage']) for fault in faults if fault.get('userMessage')])
+                    if faults_message:
+                        raise ExtractorError(faults_message, expected=True)
+            raise
+
+        self._download_webpage(
+            'https://shahid.mbc.net/populateContext',
+            None, 'Populate Context', data=urlencode_postdata({
+                'firstName': user_data['firstName'],
+                'lastName': user_data['lastName'],
+                'userName': user_data['email'],
+                'csg_user_name': user_data['email'],
+                'subscriberId': user_data['id'],
+                'sessionId': user_data['sessionId'],
+            }))
+
+    def _get_api_data(self, response):
+        data = response.get('data', {})
 
         error = data.get('error')
         if error:
@@ -49,11 +92,11 @@ class ShahidIE(InfoExtractor):
         return data
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        page_type, video_id = re.match(self._VALID_URL, url).groups()
 
-        player = self._call_api(
-            'Content/Episode/%s' % video_id,
-            video_id, 'Downloading player JSON')
+        player = self._get_api_data(self._download_json(
+            'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-%s.type-player.html' % video_id,
+            video_id, 'Downloading player JSON'))
 
         if player.get('drm'):
             raise ExtractorError('This video is DRM protected.', expected=True)
@@ -61,9 +104,12 @@ class ShahidIE(InfoExtractor):
         formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4')
         self._sort_formats(formats)
 
-        video = self._call_api(
-            'episode/%s' % video_id, video_id,
-            'Downloading video JSON')['episode']
+        video = self._get_api_data(self._download_json(
+            'http://api.shahid.net/api/v1_1/%s/%s' % (page_type, video_id),
+            video_id, 'Downloading video JSON', query={
+                'apiKey': 'sh@hid0nlin3',
+                'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=',
+            }))[page_type]
 
         title = video['title']
         categories = [
index d592dfeb8ed99d45d7c9b8245f5570e06f260a20..89e19e9277f42b69ac6f01b03c78ced00f3ec990 100644 (file)
@@ -10,11 +10,38 @@ from ..utils import (
 )
 
 
-class SharedIE(InfoExtractor):
-    IE_DESC = 'shared.sx and vivo.sx'
-    _VALID_URL = r'https?://(?:shared|vivo)\.sx/(?P<id>[\da-z]{10})'
+class SharedBaseIE(InfoExtractor):
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage, urlh = self._download_webpage_handle(url, video_id)
+
+        if self._FILE_NOT_FOUND in webpage:
+            raise ExtractorError(
+                'Video %s does not exist' % video_id, expected=True)
+
+        video_url = self._extract_video_url(webpage, video_id, url)
+
+        title = base64.b64decode(self._html_search_meta(
+            'full:title', webpage, 'title').encode('utf-8')).decode('utf-8')
+        filesize = int_or_none(self._html_search_meta(
+            'full:size', webpage, 'file size', fatal=False))
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'ext': 'mp4',
+            'filesize': filesize,
+            'title': title,
+        }
+
+
+class SharedIE(SharedBaseIE):
+    IE_DESC = 'shared.sx'
+    _VALID_URL = r'https?://shared\.sx/(?P<id>[\da-z]{10})'
+    _FILE_NOT_FOUND = '>File does not exist<'
 
-    _TESTS = [{
+    _TEST = {
         'url': 'http://shared.sx/0060718775',
         'md5': '106fefed92a8a2adb8c98e6a0652f49b',
         'info_dict': {
@@ -23,52 +50,47 @@ class SharedIE(InfoExtractor):
             'title': 'Bmp4',
             'filesize': 1720110,
         },
-    }, {
-        'url': 'http://vivo.sx/d7ddda0e78',
-        'md5': '15b3af41be0b4fe01f4df075c2678b2c',
-        'info_dict': {
-            'id': 'd7ddda0e78',
-            'ext': 'mp4',
-            'title': 'Chicken',
-            'filesize': 528031,
-        },
-    }]
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        webpage, urlh = self._download_webpage_handle(url, video_id)
-
-        if '>File does not exist<' in webpage:
-            raise ExtractorError(
-                'Video %s does not exist' % video_id, expected=True)
+    }
 
+    def _extract_video_url(self, webpage, video_id, url):
         download_form = self._hidden_inputs(webpage)
 
         video_page = self._download_webpage(
-            urlh.geturl(), video_id, 'Downloading video page',
+            url, video_id, 'Downloading video page',
             data=urlencode_postdata(download_form),
             headers={
                 'Content-Type': 'application/x-www-form-urlencoded',
-                'Referer': urlh.geturl(),
+                'Referer': url,
             })
 
         video_url = self._html_search_regex(
             r'data-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
             video_page, 'video URL', group='url')
-        title = base64.b64decode(self._html_search_meta(
-            'full:title', webpage, 'title').encode('utf-8')).decode('utf-8')
-        filesize = int_or_none(self._html_search_meta(
-            'full:size', webpage, 'file size', fatal=False))
-        thumbnail = self._html_search_regex(
-            r'data-poster=(["\'])(?P<url>(?:(?!\1).)+)\1',
-            video_page, 'thumbnail', default=None, group='url')
 
-        return {
-            'id': video_id,
-            'url': video_url,
+        return video_url
+
+
+class VivoIE(SharedBaseIE):
+    IE_DESC = 'vivo.sx'
+    _VALID_URL = r'https?://vivo\.sx/(?P<id>[\da-z]{10})'
+    _FILE_NOT_FOUND = '>The file you have requested does not exists or has been removed'
+
+    _TEST = {
+        'url': 'http://vivo.sx/d7ddda0e78',
+        'md5': '15b3af41be0b4fe01f4df075c2678b2c',
+        'info_dict': {
+            'id': 'd7ddda0e78',
             'ext': 'mp4',
-            'filesize': filesize,
-            'title': title,
-            'thumbnail': thumbnail,
-        }
+            'title': 'Chicken',
+            'filesize': 528031,
+        },
+    }
+
+    def _extract_video_url(self, webpage, video_id, *args):
+        return self._parse_json(
+            self._search_regex(
+                r'InitializeStream\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
+                webpage, 'stream', group='url'),
+            video_id,
+            transform_source=lambda x: base64.b64decode(
+                x.encode('ascii')).decode('utf-8'))[0]
index 4967c1b7752e4ebfd0c1aac9b0d079c2dc843363..74a1dc672e7725f2f3500284a53ade4ca16c380d 100644 (file)
@@ -14,7 +14,7 @@ from ..utils import (
 
 
 class SlideshareIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)'
+    _VALID_URL = r'https?://(?:www\.)?slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)'
 
     _TEST = {
         'url': 'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity',
index 7efb29f653b76b25c26d91aac16c6985255ee1d0..18cc7721e142c7493bbebdfcb59f621e3fedaf4f 100644 (file)
@@ -1,7 +1,5 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 
 
@@ -9,7 +7,7 @@ class SlutloadIE(InfoExtractor):
     _VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P<id>[^/]+)/?$'
     _TEST = {
         'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/',
-        'md5': '0cf531ae8006b530bd9df947a6a0df77',
+        'md5': '868309628ba00fd488cf516a113fd717',
         'info_dict': {
             'id': 'TD73btpBqSxc',
             'ext': 'mp4',
@@ -20,9 +18,7 @@ class SlutloadIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         video_title = self._html_search_regex(r'<h1><strong>([^<]+)</strong>',
index 1143587868c842704901df4484482e7def3e64cc..def46abda45c5d4899f3c3e5a3fb775592efdfa6 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index 0d1ab07f86ac4088b4fd1e56e9d1dfaa52514ddd..4819fe5b4b6322cc02e9e1fdd4c128cbe28e55b0 100644 (file)
@@ -5,9 +5,9 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
-    float_or_none,
-    str_to_int,
     parse_duration,
+    parse_filesize,
+    str_to_int,
 )
 
 
@@ -17,21 +17,24 @@ class SnotrIE(InfoExtractor):
         'url': 'http://www.snotr.com/video/13708/Drone_flying_through_fireworks',
         'info_dict': {
             'id': '13708',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'Drone flying through fireworks!',
-            'duration': 247,
-            'filesize_approx': 98566144,
+            'duration': 248,
+            'filesize_approx': 40700000,
             'description': 'A drone flying through Fourth of July Fireworks',
-        }
+            'thumbnail': 're:^https?://.*\.jpg$',
+        },
+        'expected_warnings': ['description'],
     }, {
         'url': 'http://www.snotr.com/video/530/David_Letteman_-_George_W_Bush_Top_10',
         'info_dict': {
             'id': '530',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'David Letteman - George W. Bush Top 10',
             'duration': 126,
-            'filesize_approx': 8912896,
+            'filesize_approx': 8500000,
             'description': 'The top 10 George W. Bush moments, brought to you by David Letterman!',
+            'thumbnail': 're:^https?://.*\.jpg$',
         }
     }]
 
@@ -43,26 +46,28 @@ class SnotrIE(InfoExtractor):
         title = self._og_search_title(webpage)
 
         description = self._og_search_description(webpage)
-        video_url = 'http://cdn.videos.snotr.com/%s.flv' % video_id
+        info_dict = self._parse_html5_media_entries(
+            url, webpage, video_id, m3u8_entry_protocol='m3u8_native')[0]
 
         view_count = str_to_int(self._html_search_regex(
-            r'<p>\n<strong>Views:</strong>\n([\d,\.]+)</p>',
+            r'<p[^>]*>\s*<strong[^>]*>Views:</strong>\s*<span[^>]*>([\d,\.]+)',
             webpage, 'view count', fatal=False))
 
         duration = parse_duration(self._html_search_regex(
-            r'<p>\n<strong>Length:</strong>\n\s*([0-9:]+).*?</p>',
+            r'<p[^>]*>\s*<strong[^>]*>Length:</strong>\s*<span[^>]*>([\d:]+)',
             webpage, 'duration', fatal=False))
 
-        filesize_approx = float_or_none(self._html_search_regex(
-            r'<p>\n<strong>Filesize:</strong>\n\s*([0-9.]+)\s*megabyte</p>',
-            webpage, 'filesize', fatal=False), invscale=1024 * 1024)
+        filesize_approx = parse_filesize(self._html_search_regex(
+            r'<p[^>]*>\s*<strong[^>]*>Filesize:</strong>\s*<span[^>]*>([^<]+)',
+            webpage, 'filesize', fatal=False))
 
-        return {
+        info_dict.update({
             'id': video_id,
             'description': description,
             'title': title,
-            'url': video_url,
             'view_count': view_count,
             'duration': duration,
             'filesize_approx': filesize_approx,
-        }
+        })
+
+        return info_dict
index 48e2ba2dd16b0df190956c4d5e71b9206d30e531..30760ca06be4b3fc112f3fe0200c74b665d64855 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index aeae931a20774ff9bc5c3988273a0d008e293954..5a201eaa890347d384849565d98d05ac914ddc39 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -32,7 +32,7 @@ class SoundcloudIE(InfoExtractor):
     _VALID_URL = r'''(?x)^(?:https?://)?
                     (?:(?:(?:www\.|m\.)?soundcloud\.com/
                             (?P<uploader>[\w\d-]+)/
-                            (?!(?:tracks|sets(?:/[^/?#]+)?|reposts|likes|spotlight)/?(?:$|[?#]))
+                            (?!(?:tracks|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
                             (?P<title>[\w\d-]+)/?
                             (?P<token>[^?]+?)?(?:[?].*)?$)
                        |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
@@ -53,6 +53,7 @@ class SoundcloudIE(InfoExtractor):
                 'uploader': 'E.T. ExTerrestrial Music',
                 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
                 'duration': 143,
+                'license': 'all-rights-reserved',
             }
         },
         # not streamable song
@@ -66,6 +67,7 @@ class SoundcloudIE(InfoExtractor):
                 'uploader': 'The Royal Concept',
                 'upload_date': '20120521',
                 'duration': 227,
+                'license': 'all-rights-reserved',
             },
             'params': {
                 # rtmp
@@ -84,6 +86,7 @@ class SoundcloudIE(InfoExtractor):
                 'description': 'test chars:  \"\'/\\ä↭',
                 'upload_date': '20131209',
                 'duration': 9,
+                'license': 'all-rights-reserved',
             },
         },
         # private link (alt format)
@@ -98,6 +101,7 @@ class SoundcloudIE(InfoExtractor):
                 'description': 'test chars:  \"\'/\\ä↭',
                 'upload_date': '20131209',
                 'duration': 9,
+                'license': 'all-rights-reserved',
             },
         },
         # downloadable song
@@ -112,11 +116,12 @@ class SoundcloudIE(InfoExtractor):
                 'uploader': 'oddsamples',
                 'upload_date': '20140109',
                 'duration': 17,
+                'license': 'cc-by-sa',
             },
         },
     ]
 
-    _CLIENT_ID = '02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea'
+    _CLIENT_ID = 'fDoItMDbsbZz8dY16ZzARCZmzgHBPotA'
     _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
 
     @staticmethod
@@ -138,20 +143,20 @@ class SoundcloudIE(InfoExtractor):
         name = full_title or track_id
         if quiet:
             self.report_extraction(name)
-
-        thumbnail = info['artwork_url']
-        if thumbnail is not None:
+        thumbnail = info.get('artwork_url')
+        if isinstance(thumbnail, compat_str):
             thumbnail = thumbnail.replace('-large', '-t500x500')
         ext = 'mp3'
         result = {
             'id': track_id,
-            'uploader': info['user']['username'],
-            'upload_date': unified_strdate(info['created_at']),
+            'uploader': info.get('user', {}).get('username'),
+            'upload_date': unified_strdate(info.get('created_at')),
             'title': info['title'],
-            'description': info['description'],
+            'description': info.get('description'),
             'thumbnail': thumbnail,
             'duration': int_or_none(info.get('duration'), 1000),
             'webpage_url': info.get('permalink_url'),
+            'license': info.get('license'),
         }
         formats = []
         if info.get('downloadable', False):
@@ -221,7 +226,7 @@ class SoundcloudIE(InfoExtractor):
             raise ExtractorError('Invalid URL: %s' % url)
 
         track_id = mobj.group('track_id')
-        token = None
+
         if track_id is not None:
             info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
             full_title = track_id
@@ -255,7 +260,20 @@ class SoundcloudIE(InfoExtractor):
         return self._extract_info_dict(info, full_title, secret_token=token)
 
 
-class SoundcloudSetIE(SoundcloudIE):
+class SoundcloudPlaylistBaseIE(SoundcloudIE):
+    @staticmethod
+    def _extract_id(e):
+        return compat_str(e['id']) if e.get('id') else None
+
+    def _extract_track_entries(self, tracks):
+        return [
+            self.url_result(
+                track['permalink_url'], SoundcloudIE.ie_key(),
+                video_id=self._extract_id(track))
+            for track in tracks if track.get('permalink_url')]
+
+
+class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
     _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'
     IE_NAME = 'soundcloud:set'
     _TESTS = [{
@@ -265,6 +283,9 @@ class SoundcloudSetIE(SoundcloudIE):
             'title': 'The Royal Concept EP',
         },
         'playlist_mincount': 6,
+    }, {
+        'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep/token',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -291,7 +312,7 @@ class SoundcloudSetIE(SoundcloudIE):
             msgs = (compat_str(err['error_message']) for err in info['errors'])
             raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs))
 
-        entries = [self.url_result(track['permalink_url'], 'Soundcloud') for track in info['tracks']]
+        entries = self._extract_track_entries(info['tracks'])
 
         return {
             '_type': 'playlist',
@@ -301,7 +322,7 @@ class SoundcloudSetIE(SoundcloudIE):
         }
 
 
-class SoundcloudUserIE(SoundcloudIE):
+class SoundcloudUserIE(SoundcloudPlaylistBaseIE):
     _VALID_URL = r'''(?x)
                         https?://
                             (?:(?:www|m)\.)?soundcloud\.com/
@@ -318,21 +339,21 @@ class SoundcloudUserIE(SoundcloudIE):
             'id': '114582580',
             'title': 'The Akashic Chronicler (All)',
         },
-        'playlist_mincount': 111,
+        'playlist_mincount': 74,
     }, {
         'url': 'https://soundcloud.com/the-akashic-chronicler/tracks',
         'info_dict': {
             'id': '114582580',
             'title': 'The Akashic Chronicler (Tracks)',
         },
-        'playlist_mincount': 50,
+        'playlist_mincount': 37,
     }, {
         'url': 'https://soundcloud.com/the-akashic-chronicler/sets',
         'info_dict': {
             'id': '114582580',
             'title': 'The Akashic Chronicler (Playlists)',
         },
-        'playlist_mincount': 3,
+        'playlist_mincount': 2,
     }, {
         'url': 'https://soundcloud.com/the-akashic-chronicler/reposts',
         'info_dict': {
@@ -351,7 +372,7 @@ class SoundcloudUserIE(SoundcloudIE):
         'url': 'https://soundcloud.com/grynpyret/spotlight',
         'info_dict': {
             'id': '7098329',
-            'title': 'Grynpyret (Spotlight)',
+            'title': 'GRYNPYRET (Spotlight)',
         },
         'playlist_mincount': 1,
     }]
@@ -413,13 +434,14 @@ class SoundcloudUserIE(SoundcloudIE):
                 for cand in candidates:
                     if isinstance(cand, dict):
                         permalink_url = cand.get('permalink_url')
+                        entry_id = self._extract_id(cand)
                         if permalink_url and permalink_url.startswith('http'):
-                            return permalink_url
+                            return permalink_url, entry_id
 
             for e in collection:
-                permalink_url = resolve_permalink_url((e, e.get('track'), e.get('playlist')))
+                permalink_url, entry_id = resolve_permalink_url((e, e.get('track'), e.get('playlist')))
                 if permalink_url:
-                    entries.append(self.url_result(permalink_url))
+                    entries.append(self.url_result(permalink_url, video_id=entry_id))
 
             next_href = response.get('next_href')
             if not next_href:
@@ -439,7 +461,7 @@ class SoundcloudUserIE(SoundcloudIE):
         }
 
 
-class SoundcloudPlaylistIE(SoundcloudIE):
+class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
     _VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
     IE_NAME = 'soundcloud:playlist'
     _TESTS = [{
@@ -469,7 +491,7 @@ class SoundcloudPlaylistIE(SoundcloudIE):
         data = self._download_json(
             base_url + data, playlist_id, 'Downloading playlist')
 
-        entries = [self.url_result(track['permalink_url'], 'Soundcloud') for track in data['tracks']]
+        entries = self._extract_track_entries(data['tracks'])
 
         return {
             '_type': 'playlist',
index a147f7db19a11c27df55f783f1faf965af8bf644..08f8c5744a84dffda03904afd30d44cac42f2917 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .mtv import MTVServicesInfoExtractor
@@ -35,6 +35,7 @@ class SouthParkEsIE(SouthParkIE):
             'description': 'Cartman Consigue Una Sonda Anal',
         },
         'playlist_count': 4,
+        'skip': 'Geo-restricted',
     }]
 
 
index 50433d0f678f27c348031dbe0d6fcc3774d021b7..186d22b7d1608b01bb0a3d45082403e6a58bb05e 100644 (file)
@@ -14,7 +14,7 @@ class SpankBangIE(InfoExtractor):
             'id': '3vvn',
             'ext': 'mp4',
             'title': 'fantasy solo',
-            'description': 'dillion harper masturbates on a bed',
+            'description': 'Watch fantasy solo free HD porn video - 05 minutes - dillion harper masturbates on a bed free adult movies.',
             'thumbnail': 're:^https?://.*\.jpg$',
             'uploader': 'silly2587',
             'age_limit': 18,
@@ -44,12 +44,10 @@ class SpankBangIE(InfoExtractor):
 
         title = self._html_search_regex(
             r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title')
-        description = self._search_regex(
-            r'class="desc"[^>]*>([^<]+)',
-            webpage, 'description', default=None)
+        description = self._og_search_description(webpage)
         thumbnail = self._og_search_thumbnail(webpage)
         uploader = self._search_regex(
-            r'class="user"[^>]*>([^<]+)',
+            r'class="user"[^>]*><img[^>]+>([^<]+)',
             webpage, 'uploader', fatal=False)
 
         age_limit = self._rta_search(webpage)
index 3c552807e268bb50a6a7d178e61d0834b0c48a42..ec1b603889754af70d516e70c123be7d2604387a 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -103,7 +103,7 @@ class SpiegelIE(InfoExtractor):
 
 
 class SpiegelArticleIE(InfoExtractor):
-    _VALID_URL = 'https?://www\.spiegel\.de/(?!video/)[^?#]*?-(?P<id>[0-9]+)\.html'
+    _VALID_URL = r'https?://(?:www\.)?spiegel\.de/(?!video/)[^?#]*?-(?P<id>[0-9]+)\.html'
     IE_NAME = 'Spiegel:Article'
     IE_DESC = 'Articles on spiegel.de'
     _TESTS = [{
index 218785ee4e11045bcbb09416cd3bc6862a757ac0..abfee3ece451dd4cfb4a45bd83fd7e29d2004c00 100644 (file)
@@ -1,5 +1,7 @@
 from __future__ import unicode_literals
 
+import re
+
 from .mtv import MTVServicesInfoExtractor
 
 
@@ -16,6 +18,15 @@ class SpikeIE(MTVServicesInfoExtractor):
             'timestamp': 1388120400,
             'upload_date': '20131227',
         },
+    }, {
+        'url': 'http://www.spike.com/full-episodes/j830qm/lip-sync-battle-joel-mchale-vs-jim-rash-season-2-ep-209',
+        'md5': 'b25c6f16418aefb9ad5a6cae2559321f',
+        'info_dict': {
+            'id': '37ace3a8-1df6-48be-85b8-38df8229e241',
+            'ext': 'mp4',
+            'title': 'Lip Sync Battle|April 28, 2016|2|209|Joel McHale Vs. Jim Rash|Act 1',
+            'description': 'md5:a739ca8f978a7802f67f8016d27ce114',
+        },
     }, {
         'url': 'http://www.spike.com/video-clips/lhtu8m/',
         'only_matching': True,
@@ -32,3 +43,12 @@ class SpikeIE(MTVServicesInfoExtractor):
 
     _FEED_URL = 'http://www.spike.com/feeds/mrss/'
     _MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s'
+    _CUSTOM_URL_REGEX = re.compile(r'spikenetworkapp://([^/]+/[-a-fA-F0-9]+)')
+
+    def _extract_mgid(self, webpage):
+        mgid = super(SpikeIE, self)._extract_mgid(webpage, default=None)
+        if mgid is None:
+            url_parts = self._search_regex(self._CUSTOM_URL_REGEX, webpage, 'episode_id')
+            video_type, episode_id = url_parts.split('/', 1)
+            mgid = 'mgid:arc:{0}:spike.com:{1}'.format(video_type, episode_id)
+        return mgid
index 409d5030422652e26fff1102c7fee1302f2b07b9..b03272f7a273e8a3726adb03d805bd2a449849bf 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .ard import ARDMediathekIE
index 1c61437a45901b91691f1d394f92d272ffd2e333..2c26fa689003c6203399eca293c32c8998636ea5 100644 (file)
@@ -1,6 +1,8 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
@@ -48,6 +50,14 @@ class StreamableIE(InfoExtractor):
         }
     ]
 
+    @staticmethod
+    def _extract_url(webpage):
+        mobj = re.search(
+            r'<iframe[^>]+src=(?P<q1>[\'"])(?P<src>(?:https?:)?//streamable\.com/(?:(?!\1).+))(?P=q1)',
+            webpage)
+        if mobj:
+            return mobj.group('src')
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
index d3d2b7eb7a6fa9db4008365e62e046b83490b064..9e533103c88b93157efdd28d7765a7e9ae961603 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 import hashlib
index 1c04dfb7bf757477d134cc7caa223ab47d0800ba..fb0a4b24ef5bf65ff13ca2288395f09540e71d48 100644 (file)
@@ -16,7 +16,7 @@ class SVTBaseIE(InfoExtractor):
     def _extract_video(self, video_info, video_id):
         formats = []
         for vr in video_info['videoReferences']:
-            player_type = vr.get('playerType')
+            player_type = vr.get('playerType') or vr.get('format')
             vurl = vr['url']
             ext = determine_ext(vurl)
             if ext == 'm3u8':
index 58073eefeffc0f3ebc244a6087cad36662940228..6d69f7686b37bd2b39b6362373eadefedef0b932 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index cc81f60036794da7ba9f0f3f584e632aefb557ee..def7e5a2c719e38fab0ec27d33d5abd920031670 100644 (file)
@@ -8,7 +8,7 @@ from ..utils import (
 
 
 class SyfyIE(AdobePassIE):
-    _VALID_URL = r'https?://www\.syfy\.com/(?:[^/]+/)?videos/(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?syfy\.com/(?:[^/]+/)?videos/(?P<id>[^/?#]+)'
     _TESTS = [{
         'url': 'http://www.syfy.com/theinternetruinedmylife/videos/the-internet-ruined-my-life-season-1-trailer',
         'info_dict': {
@@ -31,7 +31,7 @@ class SyfyIE(AdobePassIE):
         display_id = self._match_id(url)
         webpage = self._download_webpage(url, display_id)
         syfy_mpx = list(self._parse_json(self._search_regex(
-            r'jQuery\.extend\([^,]+,\s*({.+})\);', webpage, 'drupal settings'),
+            r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', webpage, 'drupal settings'),
             display_id)['syfy']['syfy_mpx'].values())[0]
         video_id = syfy_mpx['mpxGUID']
         title = syfy_mpx['episodeTitle']
index f562aa6d386ee891f4ab3a724bef53e20a6cec92..cfad3314642b0412f7fd31995828ee6ba8a6a5b9 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index 136e18f96cadf7bd5701e32b0a3bc7c8767e324e..8670cee28d381de6011e3187db3024bcc40519de 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index c4ef70778b8ac8d2289bbbf5da3bbae5f65c263b..5293393efc219526b61fe04ff12ff25f1d49b33c 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import json
diff --git a/youtube_dl/extractor/tbs.py b/youtube_dl/extractor/tbs.py
new file mode 100644 (file)
index 0000000..bf93eb8
--- /dev/null
@@ -0,0 +1,56 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .turner import TurnerBaseIE
+from ..utils import extract_attributes
+
+
+class TBSIE(TurnerBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com/videos/(?:[^/]+/)+(?P<id>[^/?#]+)\.html'
+    _TESTS = [{
+        'url': 'http://www.tbs.com/videos/people-of-earth/season-1/extras/2007318/theatrical-trailer.html',
+        'md5': '9e61d680e2285066ade7199e6408b2ee',
+        'info_dict': {
+            'id': '2007318',
+            'ext': 'mp4',
+            'title': 'Theatrical Trailer',
+            'description': 'Catch the latest comedy from TBS, People of Earth, premiering Halloween night--Monday, October 31, at 9/8c.',
+        }
+    }, {
+        'url': 'http://www.tntdrama.com/videos/good-behavior/season-1/extras/1538823/you-better-run.html',
+        'md5': 'ce53c6ead5e9f3280b4ad2031a6fab56',
+        'info_dict': {
+            'id': '1538823',
+            'ext': 'mp4',
+            'title': 'You Better Run',
+            'description': 'Letty Raines must figure out what she\'s running toward while running away from her past. Good Behavior premieres November 15 at 9/8c.',
+        }
+    }]
+
+    def _real_extract(self, url):
+        domain, display_id = re.match(self._VALID_URL, url).groups()
+        site = domain[:3]
+        webpage = self._download_webpage(url, display_id)
+        video_params = extract_attributes(self._search_regex(r'(<[^>]+id="page-video"[^>]*>)', webpage, 'video params'))
+        query = None
+        clip_id = video_params.get('clipid')
+        if clip_id:
+            query = 'id=' + clip_id
+        else:
+            query = 'titleId=' + video_params['titleid']
+        return self._extract_cvp_info(
+            'http://www.%s.com/service/cvpXml?%s' % (domain, query), display_id, {
+                'default': {
+                    'media_src': 'http://ht.cdn.turner.com/%s/big' % site,
+                },
+                'secure': {
+                    'media_src': 'http://androidhls-secure.cdn.turner.com/%s/big' % site,
+                    'tokenizer_src': 'http://www.%s.com/video/processors/services/token_ipadAdobe.do' % domain,
+                },
+            }, {
+                'url': url,
+                'site_name': site.upper(),
+                'auth_required': video_params.get('isAuthRequired') != 'false',
+            })
index 82675431f863fded8768241e2ad21c4874f8525d..df5d5556fadf82c8dc680643389fdeccf989793f 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index d14d93e3ab1ae87902dc275e1208964a86b6b840..e89759714e6e3cea3da8a7007df838618f6f1cc1 100644 (file)
@@ -7,7 +7,7 @@ from .ooyala import OoyalaIE
 
 
 class TeachingChannelIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.teachingchannel\.org/videos/(?P<title>.+)'
+    _VALID_URL = r'https?://(?:www\.)?teachingchannel\.org/videos/(?P<title>.+)'
 
     _TEST = {
         'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution',
index 79a7789200e34e1e457d9cd69cdabb495e3548c3..75346393b017995098d08136df2cbffad1e1c6bb 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 import base64
diff --git a/youtube_dl/extractor/teamfourstar.py b/youtube_dl/extractor/teamfourstar.py
new file mode 100644 (file)
index 0000000..a8c6ed7
--- /dev/null
@@ -0,0 +1,48 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .jwplatform import JWPlatformIE
+from ..utils import unified_strdate
+
+
+class TeamFourStarIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/(?P<id>[a-z0-9\-]+)'
+    _TEST = {
+        'url': 'http://teamfourstar.com/tfs-abridged-parody-episode-1-2/',
+        'info_dict': {
+            'id': '0WdZO31W',
+            'title': 'TFS Abridged Parody Episode 1',
+            'description': 'md5:d60bc389588ebab2ee7ad432bda953ae',
+            'ext': 'mp4',
+            'timestamp': 1394168400,
+            'upload_date': '20080508',
+        },
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        jwplatform_url = JWPlatformIE._extract_url(webpage)
+
+        video_title = self._html_search_regex(
+            r'<h1[^>]+class="entry-title"[^>]*>(?P<title>.+?)</h1>',
+            webpage, 'title')
+        video_date = unified_strdate(self._html_search_regex(
+            r'<span[^>]+class="meta-date date updated"[^>]*>(?P<date>.+?)</span>',
+            webpage, 'date', fatal=False))
+        video_description = self._html_search_regex(
+            r'(?s)<div[^>]+class="content-inner"[^>]*>.*?(?P<description><p>.+?)</div>',
+            webpage, 'description', fatal=False)
+        video_thumbnail = self._og_search_thumbnail(webpage)
+
+        return {
+            '_type': 'url_transparent',
+            'display_id': display_id,
+            'title': video_title,
+            'description': video_description,
+            'upload_date': video_date,
+            'thumbnail': video_thumbnail,
+            'url': jwplatform_url,
+        }
index 16e945d8e624adc51e6a68eab786bdece0a29960..a5b62c717160380c873117e878017f5c3573939a 100644 (file)
@@ -10,9 +10,9 @@ from ..utils import (
 
 
 class TechTalksIE(InfoExtractor):
-    _VALID_URL = r'https?://techtalks\.tv/talks/[^/]*/(?P<id>\d+)/'
+    _VALID_URL = r'https?://techtalks\.tv/talks/(?:[^/]+/)?(?P<id>\d+)'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/',
         'info_dict': {
             'id': '57758',
@@ -38,7 +38,10 @@ class TechTalksIE(InfoExtractor):
             # rtmp download
             'skip_download': True,
         },
-    }
+    }, {
+        'url': 'http://techtalks.tv/talks/57758',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
index 2ecfd0405afa27d78f81fb0c4ba604d022798850..d5abfc9e44ec82b492fcd98d9e4429b40c5c05b9 100644 (file)
@@ -6,7 +6,7 @@ from .mitele import MiTeleBaseIE
 
 class TelecincoIE(MiTeleBaseIE):
     IE_DESC = 'telecinco.es, cuatro.com and mediaset.es'
-    _VALID_URL = r'https?://www\.(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html'
+    _VALID_URL = r'https?://(?:www\.)?(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html'
 
     _TESTS = [{
         'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
diff --git a/youtube_dl/extractor/telequebec.py b/youtube_dl/extractor/telequebec.py
new file mode 100644 (file)
index 0000000..4043fcb
--- /dev/null
@@ -0,0 +1,36 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class TeleQuebecIE(InfoExtractor):
+    _VALID_URL = r'https?://zonevideo\.telequebec\.tv/media/(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://zonevideo.telequebec.tv/media/20984/le-couronnement-de-new-york/couronnement-de-new-york',
+        'md5': 'fe95a0957e5707b1b01f5013e725c90f',
+        'info_dict': {
+            'id': '20984',
+            'ext': 'mp4',
+            'title': 'Le couronnement de New York',
+            'description': 'md5:f5b3d27a689ec6c1486132b2d687d432',
+            'upload_date': '20160220',
+            'timestamp': 1455965438,
+        }
+    }
+
+    def _real_extract(self, url):
+        media_id = self._match_id(url)
+        media_data = self._download_json(
+            'https://mnmedias.api.telequebec.tv/api/v2/media/' + media_id,
+            media_id)['media']
+        return {
+            '_type': 'url_transparent',
+            'id': media_id,
+            'url': 'limelight:media:' + media_data['streamInfo']['sourceId'],
+            'title': media_data['title'],
+            'description': media_data.get('descriptions', [{'text': None}])[0].get('text'),
+            'duration': int_or_none(media_data.get('durationInMilliseconds'), 1000),
+            'ie_key': 'LimelightMedia',
+        }
index 77916c6010de62f3072d6811ddf2935dbae551d7..7786b281371181b8e42378cac766946fdf59b762 100644 (file)
@@ -5,7 +5,7 @@ from .common import InfoExtractor
 
 
 class TelewebionIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.telewebion\.com/#!/episode/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?telewebion\.com/#!/episode/(?P<id>\d+)'
 
     _TEST = {
         'url': 'http://www.telewebion.com/#!/episode/1263668/',
diff --git a/youtube_dl/extractor/tfo.py b/youtube_dl/extractor/tfo.py
new file mode 100644 (file)
index 0000000..6f1eeac
--- /dev/null
@@ -0,0 +1,53 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    HEADRequest,
+    ExtractorError,
+    int_or_none,
+)
+
+
+class TFOIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?tfo\.org/(?:en|fr)/(?:[^/]+/){2}(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://www.tfo.org/en/universe/tfo-247/100463871/video-game-hackathon',
+        'md5': '47c987d0515561114cf03d1226a9d4c7',
+        'info_dict': {
+            'id': '100463871',
+            'ext': 'mp4',
+            'title': 'Video Game Hackathon',
+            'description': 'md5:558afeba217c6c8d96c60e5421795c07',
+            'upload_date': '20160212',
+            'timestamp': 1455310233,
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        self._request_webpage(HEADRequest('http://www.tfo.org/'), video_id)
+        infos = self._download_json(
+            'http://www.tfo.org/api/web/video/get_infos', video_id, data=json.dumps({
+                'product_id': video_id,
+            }).encode(), headers={
+                'X-tfo-session': self._get_cookies('http://www.tfo.org/')['tfo-session'].value,
+            })
+        if infos.get('success') == 0:
+            raise ExtractorError('%s said: %s' % (self.IE_NAME, infos['msg']), expected=True)
+        video_data = infos['data']
+
+        return {
+            '_type': 'url_transparent',
+            'id': video_id,
+            'url': 'limelight:media:' + video_data['llid'],
+            'title': video_data['title'],
+            'description': video_data.get('description'),
+            'series': video_data.get('collection'),
+            'season_number': int_or_none(video_data.get('season')),
+            'episode_number': int_or_none(video_data.get('episode')),
+            'duration': int_or_none(video_data.get('duration')),
+            'ie_key': 'LimelightMedia',
+        }
index 8cb3c3669f2af9929e702ce0c2a57606177f18e2..f23b587137a0e471ada57c8a08d2fbaf8ecc9722 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
@@ -11,7 +11,7 @@ from ..utils import (
 
 
 class TheInterceptIE(InfoExtractor):
-    _VALID_URL = r'https://theintercept.com/fieldofvision/(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://theintercept\.com/fieldofvision/(?P<id>[^/?#]+)'
     _TESTS = [{
         'url': 'https://theintercept.com/fieldofvision/thisisacoup-episode-four-surrender-or-die/',
         'md5': '145f28b41d44aab2f87c0a4ac8ec95bd',
index 23067e8c6510ffec73f4036f122e8d810291efef..cfbf7f4e1562c78ea1d5ae44437694a5325eb70b 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -96,7 +96,7 @@ class ThePlatformBaseIE(OnceIE):
 class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
     _VALID_URL = r'''(?x)
         (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
-           (?:(?:(?:[^/]+/)+select/)?(?P<media>media/(?:guid/\d+/)?)|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
+           (?:(?:(?:[^/]+/)+select/)?(?P<media>media/(?:guid/\d+/)?)?|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
          |theplatform:)(?P<id>[^/\?&]+)'''
 
     _TESTS = [{
@@ -116,6 +116,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
             # rtmp download
             'skip_download': True,
         },
+        'skip': '404 Not Found',
     }, {
         # from http://www.cnet.com/videos/tesla-model-s-a-second-step-towards-a-cleaner-motoring-future/
         'url': 'http://link.theplatform.com/s/kYEXFC/22d_qsQ6MIRT',
index 3e4e14031a975d8176ffce6706a139318c12d25d..ce1326c03643186b4e1eb58905ef8f9c868588f6 100644 (file)
@@ -7,7 +7,7 @@ from ..utils import qualities
 
 
 class TheSceneIE(InfoExtractor):
-    _VALID_URL = r'https://thescene\.com/watch/[^/]+/(?P<id>[^/#?]+)'
+    _VALID_URL = r'https?://thescene\.com/watch/[^/]+/(?P<id>[^/#?]+)'
 
     _TEST = {
         'url': 'https://thescene.com/watch/vogue/narciso-rodriguez-spring-2013-ready-to-wear',
index ba1380abcbeb3cb2a321b41695517f16f8cd6196..c3f11889471b978f9bdbb29014fb7e9e08aca636 100644 (file)
@@ -2,8 +2,6 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from .brightcove import BrightcoveLegacyIE
-from ..compat import compat_parse_qs
 
 
 class TheStarIE(InfoExtractor):
@@ -30,6 +28,9 @@ class TheStarIE(InfoExtractor):
     def _real_extract(self, url):
         display_id = self._match_id(url)
         webpage = self._download_webpage(url, display_id)
-        brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
-        brightcove_id = compat_parse_qs(brightcove_legacy_url)['@videoPlayer'][0]
-        return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
+        brightcove_id = self._search_regex(
+            r'mainartBrightcoveVideoId["\']?\s*:\s*["\']?(\d+)',
+            webpage, 'brightcove id')
+        return self.url_result(
+            self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
+            'BrightcoveNew', brightcove_id)
diff --git a/youtube_dl/extractor/theweatherchannel.py b/youtube_dl/extractor/theweatherchannel.py
new file mode 100644 (file)
index 0000000..c34a49d
--- /dev/null
@@ -0,0 +1,79 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .theplatform import ThePlatformIE
+from ..utils import (
+    determine_ext,
+    parse_duration,
+)
+
+
+class TheWeatherChannelIE(ThePlatformIE):
+    _VALID_URL = r'https?://(?:www\.)?weather\.com/(?:[^/]+/)*video/(?P<id>[^/?#]+)'
+    _TESTS = [{
+        'url': 'https://weather.com/series/great-outdoors/video/ice-climber-is-in-for-a-shock',
+        'md5': 'ab924ac9574e79689c24c6b95e957def',
+        'info_dict': {
+            'id': 'cc82397e-cc3f-4d11-9390-a785add090e8',
+            'ext': 'mp4',
+            'title': 'Ice Climber Is In For A Shock',
+            'description': 'md5:55606ce1378d4c72e6545e160c9d9695',
+            'uploader': 'TWC - Digital (No Distro)',
+            'uploader_id': '6ccd5455-16bb-46f2-9c57-ff858bb9f62c',
+        }
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        drupal_settings = self._parse_json(self._search_regex(
+            r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
+            webpage, 'drupal settings'), display_id)
+        video_id = drupal_settings['twc']['contexts']['node']['uuid']
+        video_data = self._download_json(
+            'https://dsx.weather.com/cms/v4/asset-collection/en_US/' + video_id, video_id)
+        seo_meta = video_data.get('seometa', {})
+        title = video_data.get('title') or seo_meta['title']
+
+        urls = []
+        thumbnails = []
+        formats = []
+        for variant_id, variant_url in video_data.get('variants', []).items():
+            variant_url = variant_url.strip()
+            if not variant_url or variant_url in urls:
+                continue
+            urls.append(variant_url)
+            ext = determine_ext(variant_url)
+            if ext == 'jpg':
+                thumbnails.append({
+                    'url': variant_url,
+                    'id': variant_id,
+                })
+            elif ThePlatformIE.suitable(variant_url):
+                tp_formats, _ = self._extract_theplatform_smil(variant_url, video_id)
+                formats.extend(tp_formats)
+            elif ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    variant_url, video_id, 'mp4', 'm3u8_native',
+                    m3u8_id=variant_id, fatal=False))
+            elif ext == 'f4m':
+                formats.extend(self._extract_f4m_formats(
+                    variant_url, video_id, f4m_id=variant_id, fatal=False))
+            else:
+                formats.append({
+                    'url': variant_url,
+                    'format_id': variant_id,
+                })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'description': video_data.get('description') or seo_meta.get('description') or seo_meta.get('og:description'),
+            'duration': parse_duration(video_data.get('duration')),
+            'uploader': video_data.get('providername'),
+            'uploader_id': video_data.get('providerid'),
+            'thumbnails': thumbnails,
+            'formats': formats,
+        }
index 7f323c938762f6ec1337b6dbf6b9c64ec2993dd8..4473a3c773c3d9c4c26361e907769e8bb1ac9fad 100644 (file)
@@ -3,13 +3,13 @@ from __future__ import unicode_literals
 
 import re
 
-from .common import InfoExtractor
-from ..utils import determine_ext
+from .jwplatform import JWPlatformBaseIE
+from ..utils import remove_end
 
 
-class ThisAVIE(InfoExtractor):
+class ThisAVIE(JWPlatformBaseIE):
     _VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*'
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html',
         'md5': '0480f1ef3932d901f0e0e719f188f19b',
         'info_dict': {
@@ -19,29 +19,49 @@ class ThisAVIE(InfoExtractor):
             'uploader': 'dj7970',
             'uploader_id': 'dj7970'
         }
-    }
+    }, {
+        'url': 'http://www.thisav.com/video/242352/nerdy-18yo-big-ass-tattoos-and-glasses.html',
+        'md5': 'ba90c076bd0f80203679e5b60bf523ee',
+        'info_dict': {
+            'id': '242352',
+            'ext': 'mp4',
+            'title': 'Nerdy 18yo Big Ass Tattoos and Glasses',
+            'uploader': 'cybersluts',
+            'uploader_id': 'cybersluts',
+        },
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
 
         video_id = mobj.group('id')
         webpage = self._download_webpage(url, video_id)
-        title = self._html_search_regex(r'<h1>([^<]*)</h1>', webpage, 'title')
+        title = remove_end(self._html_search_regex(
+            r'<title>([^<]+)</title>', webpage, 'title'),
+            ' - 視頻 - ThisAV.com-世界第一中文成人娛樂網站')
         video_url = self._html_search_regex(
-            r"addVariable\('file','([^']+)'\);", webpage, 'video url')
+            r"addVariable\('file','([^']+)'\);", webpage, 'video url', default=None)
+        if video_url:
+            info_dict = {
+                'formats': [{
+                    'url': video_url,
+                }],
+            }
+        else:
+            info_dict = self._extract_jwplayer_data(
+                webpage, video_id, require_title=False)
         uploader = self._html_search_regex(
             r': <a href="http://www.thisav.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>',
             webpage, 'uploader name', fatal=False)
         uploader_id = self._html_search_regex(
             r': <a href="http://www.thisav.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>',
             webpage, 'uploader id', fatal=False)
-        ext = determine_ext(video_url)
 
-        return {
+        info_dict.update({
             'id': video_id,
-            'url': video_url,
             'uploader': uploader,
             'uploader_id': uploader_id,
             'title': title,
-            'ext': ext,
-        }
+        })
+
+        return info_dict
diff --git a/youtube_dl/extractor/thisoldhouse.py b/youtube_dl/extractor/thisoldhouse.py
new file mode 100644 (file)
index 0000000..7629f0d
--- /dev/null
@@ -0,0 +1,32 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class ThisOldHouseIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to)/(?P<id>[^/?#]+)'
+    _TESTS = [{
+        'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench',
+        'md5': '568acf9ca25a639f0c4ff905826b662f',
+        'info_dict': {
+            'id': '2REGtUDQ',
+            'ext': 'mp4',
+            'title': 'How to Build a Storage Bench',
+            'description': 'In the workshop, Tom Silva and Kevin O\'Connor build a storage bench for an entryway.',
+            'timestamp': 1442548800,
+            'upload_date': '20150918',
+        }
+    }, {
+        'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        drupal_settings = self._parse_json(self._search_regex(
+            r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
+            webpage, 'drupal settings'), display_id)
+        video_id = drupal_settings['jwplatform']['video_id']
+        return self.url_result('jwplatform:' + video_id, 'JWPlatform', video_id)
diff --git a/youtube_dl/extractor/thvideo.py b/youtube_dl/extractor/thvideo.py
deleted file mode 100644 (file)
index 406f4a8..0000000
+++ /dev/null
@@ -1,84 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
-    unified_strdate
-)
-
-
-class THVideoIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?thvideo\.tv/(?:v/th|mobile\.php\?cid=)(?P<id>[0-9]+)'
-    _TEST = {
-        'url': 'http://thvideo.tv/v/th1987/',
-        'md5': 'fa107b1f73817e325e9433505a70db50',
-        'info_dict': {
-            'id': '1987',
-            'ext': 'mp4',
-            'title': '【动画】秘封活动记录 ~ The Sealed Esoteric History.分镜稿预览',
-            'display_id': 'th1987',
-            'thumbnail': 'http://thvideo.tv/uploadfile/2014/0722/20140722013459856.jpg',
-            'description': '社团京都幻想剧团的第一个东方二次同人动画作品「秘封活动记录 ~ The Sealed Esoteric History.」 本视频是该动画第一期的分镜草稿...',
-            'upload_date': '20140722'
-        }
-    }
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        # extract download link from mobile player page
-        webpage_player = self._download_webpage(
-            'http://thvideo.tv/mobile.php?cid=%s-0' % (video_id),
-            video_id, note='Downloading video source page')
-        video_url = self._html_search_regex(
-            r'<source src="(.*?)" type', webpage_player, 'video url')
-
-        # extract video info from main page
-        webpage = self._download_webpage(
-            'http://thvideo.tv/v/th%s' % (video_id), video_id)
-        title = self._og_search_title(webpage)
-        display_id = 'th%s' % video_id
-        thumbnail = self._og_search_thumbnail(webpage)
-        description = self._og_search_description(webpage)
-        upload_date = unified_strdate(self._html_search_regex(
-            r'span itemprop="datePublished" content="(.*?)">', webpage,
-            'upload date', fatal=False))
-
-        return {
-            'id': video_id,
-            'ext': 'mp4',
-            'url': video_url,
-            'title': title,
-            'display_id': display_id,
-            'thumbnail': thumbnail,
-            'description': description,
-            'upload_date': upload_date
-        }
-
-
-class THVideoPlaylistIE(InfoExtractor):
-    _VALID_URL = r'http?://(?:www\.)?thvideo\.tv/mylist(?P<id>[0-9]+)'
-    _TEST = {
-        'url': 'http://thvideo.tv/mylist2',
-        'info_dict': {
-            'id': '2',
-            'title': '幻想万華鏡',
-        },
-        'playlist_mincount': 23,
-    }
-
-    def _real_extract(self, url):
-        playlist_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, playlist_id)
-        list_title = self._html_search_regex(
-            r'<h1 class="show_title">(.*?)<b id', webpage, 'playlist title',
-            fatal=False)
-
-        entries = [
-            self.url_result('http://thvideo.tv/v/th' + id, 'THVideo')
-            for id in re.findall(r'<dd><a href="http://thvideo.tv/v/th(\d+)/" target=', webpage)]
-
-        return self.playlist_result(entries, playlist_id, list_title)
index abad3ff64b5e519414615d3dd3cf8da345e9a2f3..fd145ba429fbc94ec5582b6100660f2897b25f5f 100644 (file)
@@ -1,15 +1,19 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
+
 import re
 
 from .common import InfoExtractor
 from .brightcove import BrightcoveLegacyIE
-from ..compat import compat_parse_qs
+from ..compat import (
+    compat_parse_qs,
+    compat_urlparse,
+)
 
 
 class TlcDeIE(InfoExtractor):
     IE_NAME = 'tlc.de'
-    _VALID_URL = r'https?://www\.tlc\.de/(?:[^/]+/)*videos/(?P<title>[^/?#]+)?(?:.*#(?P<id>\d+))?'
+    _VALID_URL = r'https?://(?:www\.)?tlc\.de/(?:[^/]+/)*videos/(?P<title>[^/?#]+)?(?:.*#(?P<id>\d+))?'
 
     _TEST = {
         'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001',
@@ -35,5 +39,5 @@ class TlcDeIE(InfoExtractor):
             title = mobj.group('title')
             webpage = self._download_webpage(url, title)
             brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
-            brightcove_id = compat_parse_qs(brightcove_legacy_url)['@videoPlayer'][0]
+            brightcove_id = compat_parse_qs(compat_urlparse.urlparse(brightcove_legacy_url).query)['@videoPlayer'][0]
         return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
index 979856e9a6663332a5eed7be8c2f9a30a9495d40..419f9d92eea375e4aeabc0c39f736955b818c921 100644 (file)
@@ -32,12 +32,15 @@ class TMZArticleIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?tmz\.com/\d{4}/\d{2}/\d{2}/(?P<id>[^/]+)/?'
     _TEST = {
         'url': 'http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert',
-        'md5': 'e482a414a38db73087450e3a6ce69d00',
+        'md5': '3316ff838ae5bb7f642537825e1e90d2',
         'info_dict': {
             'id': '0_6snoelag',
-            'ext': 'mp4',
+            'ext': 'mov',
             'title': 'Bobby Brown Tells Crowd ... Bobbi Kristina is Awake',
             'description': 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake.  She\'s watching me."',
+            'timestamp': 1429467813,
+            'upload_date': '20150419',
+            'uploader_id': 'batchUser',
         }
     }
 
@@ -45,12 +48,9 @@ class TMZArticleIE(InfoExtractor):
         video_id = self._match_id(url)
 
         webpage = self._download_webpage(url, video_id)
-        embedded_video_info_str = self._html_search_regex(
-            r'tmzVideoEmbedV2\("([^)]+)"\);', webpage, 'embedded video info')
-
-        embedded_video_info = self._parse_json(
-            embedded_video_info_str, video_id,
-            transform_source=lambda s: s.replace('\\', ''))
+        embedded_video_info = self._parse_json(self._html_search_regex(
+            r'tmzVideoEmbed\(({.+?})\);', webpage, 'embedded video info'),
+            video_id)
 
         return self.url_result(
             'http://www.tmz.com/videos/%s/' % embedded_video_info['id'])
index 7ddf77767d804faff4097d85d796760bc4473b77..77d56b8ca87306a66c22a7e41c5d01de6bba9cb6 100644 (file)
@@ -10,6 +10,7 @@ from ..utils import (
     int_or_none,
     parse_duration,
     str_to_int,
+    unescapeHTML,
     xpath_text,
 )
 
@@ -80,7 +81,8 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
 
         if not cfg_url:
             inputs = self._hidden_inputs(webpage)
-            cfg_url = 'https://cdn-fck.tnaflix.com/tnaflix/%s.fid?key=%s' % (inputs['vkey'], inputs['nkey'])
+            cfg_url = ('https://cdn-fck.tnaflix.com/tnaflix/%s.fid?key=%s&VID=%s&premium=1&vip=1&alpha'
+                       % (inputs['vkey'], inputs['nkey'], video_id))
 
         cfg_xml = self._download_xml(
             cfg_url, display_id, 'Downloading metadata',
@@ -89,7 +91,7 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
         formats = []
 
         def extract_video_url(vl):
-            return re.sub('speed=\d+', 'speed=', vl.text)
+            return re.sub('speed=\d+', 'speed=', unescapeHTML(vl.text))
 
         video_link = cfg_xml.find('./videoLink')
         if video_link is not None:
@@ -201,7 +203,7 @@ class TNAFlixIE(TNAFlixNetworkBaseIE):
     _TESTS = [{
         # anonymous uploader, no categories
         'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878',
-        'md5': '7e569419fe6d69543d01e6be22f5f7c4',
+        'md5': 'ecf3498417d09216374fc5907f9c6ec0',
         'info_dict': {
             'id': '553878',
             'display_id': 'Carmella-Decesare-striptease',
@@ -215,11 +217,11 @@ class TNAFlixIE(TNAFlixNetworkBaseIE):
     }, {
         # non-anonymous uploader, categories
         'url': 'https://www.tnaflix.com/teen-porn/Educational-xxx-video/video6538',
-        'md5': 'fcba2636572895aba116171a899a5658',
+        'md5': '0f5d4d490dbfd117b8607054248a07c0',
         'info_dict': {
             'id': '6538',
             'display_id': 'Educational-xxx-video',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'Educational xxx video',
             'description': 'md5:b4fab8f88a8621c8fabd361a173fe5b8',
             'thumbnail': 're:https?://.*\.jpg$',
diff --git a/youtube_dl/extractor/tonline.py b/youtube_dl/extractor/tonline.py
new file mode 100644 (file)
index 0000000..cc11eae
--- /dev/null
@@ -0,0 +1,59 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class TOnlineIE(InfoExtractor):
+    IE_NAME = 't-online.de'
+    _VALID_URL = r'https?://(?:www\.)?t-online\.de/tv/(?:[^/]+/)*id_(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://www.t-online.de/tv/sport/fussball/id_79166266/drittes-remis-zidane-es-muss-etwas-passieren-.html',
+        'md5': '7d94dbdde5f9d77c5accc73c39632c29',
+        'info_dict': {
+            'id': '79166266',
+            'ext': 'mp4',
+            'title': 'Drittes Remis! Zidane: "Es muss etwas passieren"',
+            'description': 'Es läuft nicht rund bei Real Madrid. Das 1:1 gegen den SD Eibar war das dritte Unentschieden in Folge in der Liga.',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        video_data = self._download_json(
+            'http://www.t-online.de/tv/id_%s/tid_json_video' % video_id, video_id)
+        title = video_data['subtitle']
+
+        formats = []
+        for asset in video_data.get('assets', []):
+            asset_source = asset.get('source') or asset.get('source2')
+            if not asset_source:
+                continue
+            formats_id = []
+            for field_key in ('type', 'profile'):
+                field_value = asset.get(field_key)
+                if field_value:
+                    formats_id.append(field_value)
+            formats.append({
+                'format_id': '-'.join(formats_id),
+                'url': asset_source,
+            })
+
+        thumbnails = []
+        for image in video_data.get('images', []):
+            image_source = image.get('source')
+            if not image_source:
+                continue
+            thumbnails.append({
+                'url': image_source,
+            })
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': video_data.get('description'),
+            'duration': int_or_none(video_data.get('duration')),
+            'thumbnails': thumbnails,
+            'formats': formats,
+        }
index 54c2d0aa6c0d234f9f747550ab841c409bfbc079..26d770992ab1618c95094513371a01fcf99d1a70 100644 (file)
@@ -2,14 +2,24 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+    int_or_none,
+    js_to_json,
+    ExtractorError,
+    urlencode_postdata,
+    extract_attributes,
+    smuggle_url,
+)
 
 
 class TouTvIE(InfoExtractor):
+    _NETRC_MACHINE = 'toutv'
     IE_NAME = 'tou.tv'
-    _VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+/S[0-9]+E[0-9]+)'
+    _VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/S[0-9]+E[0-9]+)?)'
+    _access_token = None
+    _claims = None
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://ici.tou.tv/garfield-tout-court/S2015E17',
         'info_dict': {
             'id': '122017',
@@ -22,18 +32,67 @@ class TouTvIE(InfoExtractor):
             # m3u8 download
             'skip_download': True,
         },
-    }
+        'skip': '404 Not Found',
+    }, {
+        'url': 'http://ici.tou.tv/hackers',
+        'only_matching': True,
+    }]
+
+    def _real_initialize(self):
+        email, password = self._get_login_info()
+        if email is None:
+            return
+        state = 'http://ici.tou.tv//'
+        webpage = self._download_webpage(state, None, 'Downloading homepage')
+        toutvlogin = self._parse_json(self._search_regex(
+            r'(?s)toutvlogin\s*=\s*({.+?});', webpage, 'toutvlogin'), None, js_to_json)
+        authorize_url = toutvlogin['host'] + '/auth/oauth/v2/authorize'
+        login_webpage = self._download_webpage(
+            authorize_url, None, 'Downloading login page', query={
+                'client_id': toutvlogin['clientId'],
+                'redirect_uri': 'https://ici.tou.tv/login/loginCallback',
+                'response_type': 'token',
+                'scope': 'media-drmt openid profile email id.write media-validation.read.privileged',
+                'state': state,
+            })
+        login_form = self._search_regex(
+            r'(?s)(<form[^>]+(?:id|name)="Form-login".+?</form>)', login_webpage, 'login form')
+        form_data = self._hidden_inputs(login_form)
+        form_data.update({
+            'login-email': email,
+            'login-password': password,
+        })
+        post_url = extract_attributes(login_form).get('action') or authorize_url
+        _, urlh = self._download_webpage_handle(
+            post_url, None, 'Logging in', data=urlencode_postdata(form_data))
+        self._access_token = self._search_regex(
+            r'access_token=([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
+            urlh.geturl(), 'access token')
+        self._claims = self._download_json(
+            'https://services.radio-canada.ca/media/validation/v2/getClaims',
+            None, 'Extracting Claims', query={
+                'token': self._access_token,
+                'access_token': self._access_token,
+            })['claims']
 
     def _real_extract(self, url):
         path = self._match_id(url)
         metadata = self._download_json('http://ici.tou.tv/presentation/%s' % path, path)
+        if metadata.get('IsDrm'):
+            raise ExtractorError('This video is DRM protected.', expected=True)
         video_id = metadata['IdMedia']
         details = metadata['Details']
         title = details['OriginalTitle']
+        video_url = 'radiocanada:%s:%s' % (metadata.get('AppCode', 'toutv'), video_id)
+        if self._access_token and self._claims:
+            video_url = smuggle_url(video_url, {
+                'access_token': self._access_token,
+                'claims': self._claims,
+            })
 
         return {
             '_type': 'url_transparent',
-            'url': 'radiocanada:%s:%s' % (metadata.get('AppCode', 'toutv'), video_id),
+            'url': video_url,
             'id': video_id,
             'title': title,
             'thumbnail': details.get('ImageUrl'),
index 2579ba8c67498c91aa117c6853b83f391ccb3ba6..938e05076313cb5b3d3284083d2cc7e699241d21 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding:utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
diff --git a/youtube_dl/extractor/trollvids.py b/youtube_dl/extractor/trollvids.py
deleted file mode 100644 (file)
index 6577056..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-# encoding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .nuevo import NuevoBaseIE
-
-
-class TrollvidsIE(NuevoBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?trollvids\.com/video/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
-    IE_NAME = 'trollvids'
-    _TEST = {
-        'url': 'http://trollvids.com/video/2349002/%E3%80%90MMD-R-18%E3%80%91%E3%82%AC%E3%83%BC%E3%83%AB%E3%83%95%E3%83%AC%E3%83%B3%E3%83%89-carrymeoff',
-        'md5': '1d53866b2c514b23ed69e4352fdc9839',
-        'info_dict': {
-            'id': '2349002',
-            'ext': 'mp4',
-            'title': '【MMD R-18】ガールフレンド carry_me_off',
-            'age_limit': 18,
-            'duration': 216.78,
-        },
-    }
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        display_id = mobj.group('display_id')
-
-        info = self._extract_nuevo(
-            'http://trollvids.com/nuevo/player/config.php?v=%s' % video_id,
-            video_id)
-        info.update({
-            'display_id': display_id,
-            'age_limit': 18
-        })
-        return info
diff --git a/youtube_dl/extractor/trutube.py b/youtube_dl/extractor/trutube.py
deleted file mode 100644 (file)
index d55e0c5..0000000
+++ /dev/null
@@ -1,26 +0,0 @@
-from __future__ import unicode_literals
-
-from .nuevo import NuevoBaseIE
-
-
-class TruTubeIE(NuevoBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?trutube\.tv/(?:video/|nuevo/player/embed\.php\?v=)(?P<id>\d+)'
-    _TESTS = [{
-        'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-',
-        'md5': 'c5b6e301b0a2040b074746cbeaa26ca1',
-        'info_dict': {
-            'id': '14880',
-            'ext': 'flv',
-            'title': 'Ramses II - Proven To Be A Red Headed Caucasoid',
-            'thumbnail': 're:^http:.*\.jpg$',
-        }
-    }, {
-        'url': 'https://trutube.tv/nuevo/player/embed.php?v=14880',
-        'only_matching': True,
-    }]
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        return self._extract_nuevo(
-            'https://trutube.tv/nuevo/player/config.php?v=%s' % video_id,
-            video_id)
diff --git a/youtube_dl/extractor/trutv.py b/youtube_dl/extractor/trutv.py
new file mode 100644 (file)
index 0000000..3a57825
--- /dev/null
@@ -0,0 +1,47 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .turner import TurnerBaseIE
+
+
+class TruTVIE(TurnerBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?trutv\.com(?:(?P<path>/shows/[^/]+/videos/[^/?#]+?)\.html|/full-episodes/[^/]+/(?P<id>\d+))'
+    _TEST = {
+        'url': 'http://www.trutv.com/shows/10-things/videos/you-wont-believe-these-sports-bets.html',
+        'md5': '2cdc844f317579fed1a7251b087ff417',
+        'info_dict': {
+            'id': '/shows/10-things/videos/you-wont-believe-these-sports-bets',
+            'ext': 'mp4',
+            'title': 'You Won\'t Believe These Sports Bets',
+            'description': 'Jamie Lee sits down with a bookie to discuss the bizarre world of illegal sports betting.',
+            'upload_date': '20130305',
+        }
+    }
+
+    def _real_extract(self, url):
+        path, video_id = re.match(self._VALID_URL, url).groups()
+        auth_required = False
+        if path:
+            data_src = 'http://www.trutv.com/video/cvp/v2/xml/content.xml?id=%s.xml' % path
+        else:
+            webpage = self._download_webpage(url, video_id)
+            video_id = self._search_regex(
+                r"TTV\.TVE\.episodeId\s*=\s*'([^']+)';",
+                webpage, 'video id', default=video_id)
+            auth_required = self._search_regex(
+                r'TTV\.TVE\.authRequired\s*=\s*(true|false);',
+                webpage, 'auth required', default='false') == 'true'
+            data_src = 'http://www.trutv.com/tveverywhere/services/cvpXML.do?titleId=' + video_id
+        return self._extract_cvp_info(
+            data_src, path, {
+                'secure': {
+                    'media_src': 'http://androidhls-secure.cdn.turner.com/trutv/big',
+                    'tokenizer_src': 'http://www.trutv.com/tveverywhere/processors/services/token_ipadAdobe.do',
+                },
+            }, {
+                'url': url,
+                'site_name': 'truTV',
+                'auth_required': auth_required,
+            })
index 4053f6c2150ff536cce19a7fede105f0fee5d8d8..1853a1104c2b8957793ede25c6296598eb0babc9 100644 (file)
@@ -1,5 +1,7 @@
 from __future__ import unicode_literals
 
+import re
+
 from ..utils import (
     int_or_none,
     str_to_int,
@@ -21,7 +23,9 @@ class Tube8IE(KeezMoviesIE):
             'title': 'Kasia music video',
             'age_limit': 18,
             'duration': 230,
-        }
+            'categories': ['Teen'],
+            'tags': ['dancing'],
+        },
     }, {
         'url': 'http://www.tube8.com/shemale/teen/blonde-cd-gets-kidnapped-by-two-blacks-and-punished-for-being-a-slutty-girl/19569151/',
         'only_matching': True,
@@ -51,6 +55,17 @@ class Tube8IE(KeezMoviesIE):
             r'<span id="allCommentsCount">(\d+)</span>',
             webpage, 'comment count', fatal=False))
 
+        category = self._search_regex(
+            r'Category:\s*</strong>\s*<a[^>]+href=[^>]+>([^<]+)',
+            webpage, 'category', fatal=False)
+        categories = [category] if category else None
+
+        tags_str = self._search_regex(
+            r'(?s)Tags:\s*</strong>(.+?)</(?!a)',
+            webpage, 'tags', fatal=False)
+        tags = [t for t in re.findall(
+            r'<a[^>]+href=[^>]+>([^<]+)', tags_str)] if tags_str else None
+
         info.update({
             'description': description,
             'uploader': uploader,
@@ -58,6 +73,8 @@ class Tube8IE(KeezMoviesIE):
             'like_count': like_count,
             'dislike_count': dislike_count,
             'comment_count': comment_count,
+            'categories': categories,
+            'tags': tags,
         })
 
         return info
index c6572defbcf7a732d58d5fa3003993d063a8be3e..3a37df2e8eb710c68d448b6231850ddc846ec716 100644 (file)
@@ -9,7 +9,6 @@ from ..utils import (
     int_or_none,
     sanitized_Request,
     urlencode_postdata,
-    parse_iso8601,
 )
 
 
@@ -19,17 +18,13 @@ class TubiTvIE(InfoExtractor):
     _NETRC_MACHINE = 'tubitv'
     _TEST = {
         'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday',
+        'md5': '43ac06be9326f41912dc64ccf7a80320',
         'info_dict': {
             'id': '283829',
             'ext': 'mp4',
             'title': 'The Comedian at The Friday',
             'description': 'A stand up comedian is forced to look at the decisions in his life while on a one week trip to the west coast.',
-            'uploader': 'Indie Rights Films',
-            'upload_date': '20160111',
-            'timestamp': 1452555979,
-        },
-        'params': {
-            'skip_download': 'HLS download',
+            'uploader_id': 'bc168bee0d18dd1cb3b86c68706ab434',
         },
     }
 
@@ -58,19 +53,28 @@ class TubiTvIE(InfoExtractor):
         video_id = self._match_id(url)
         video_data = self._download_json(
             'http://tubitv.com/oz/videos/%s/content' % video_id, video_id)
-        title = video_data['n']
+        title = video_data['title']
 
         formats = self._extract_m3u8_formats(
-            video_data['mh'], video_id, 'mp4', 'm3u8_native')
+            self._proto_relative_url(video_data['url']),
+            video_id, 'mp4', 'm3u8_native')
         self._sort_formats(formats)
 
+        thumbnails = []
+        for thumbnail_url in video_data.get('thumbnails', []):
+            if not thumbnail_url:
+                continue
+            thumbnails.append({
+                'url': self._proto_relative_url(thumbnail_url),
+            })
+
         subtitles = {}
-        for sub in video_data.get('sb', []):
-            sub_url = sub.get('u')
+        for sub in video_data.get('subtitles', []):
+            sub_url = sub.get('url')
             if not sub_url:
                 continue
-            subtitles.setdefault(sub.get('l', 'en'), []).append({
-                'url': sub_url,
+            subtitles.setdefault(sub.get('lang', 'English'), []).append({
+                'url': self._proto_relative_url(sub_url),
             })
 
         return {
@@ -78,9 +82,8 @@ class TubiTvIE(InfoExtractor):
             'title': title,
             'formats': formats,
             'subtitles': subtitles,
-            'thumbnail': video_data.get('ph'),
-            'description': video_data.get('d'),
-            'duration': int_or_none(video_data.get('s')),
-            'timestamp': parse_iso8601(video_data.get('u')),
-            'uploader': video_data.get('on'),
+            'thumbnails': thumbnails,
+            'description': video_data.get('description'),
+            'duration': int_or_none(video_data.get('duration')),
+            'uploader_id': video_data.get('publisher_id'),
         }
index 4d8b57111897f3c936e11f55fcea60d6a6bd30d6..ebe411e12aa5fa44e201dcaefc52e839e5b2d212 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
diff --git a/youtube_dl/extractor/turner.py b/youtube_dl/extractor/turner.py
new file mode 100644 (file)
index 0000000..57ffedb
--- /dev/null
@@ -0,0 +1,175 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .adobepass import AdobePassIE
+from ..compat import compat_str
+from ..utils import (
+    xpath_text,
+    int_or_none,
+    determine_ext,
+    parse_duration,
+    xpath_attr,
+    update_url_query,
+    ExtractorError,
+)
+
+
+class TurnerBaseIE(AdobePassIE):
+    def _extract_timestamp(self, video_data):
+        return int_or_none(xpath_attr(video_data, 'dateCreated', 'uts'))
+
+    def _extract_cvp_info(self, data_src, video_id, path_data={}, ap_data={}):
+        video_data = self._download_xml(data_src, video_id)
+        video_id = video_data.attrib['id']
+        title = xpath_text(video_data, 'headline', fatal=True)
+        content_id = xpath_text(video_data, 'contentId') or video_id
+        # rtmp_src = xpath_text(video_data, 'akamai/src')
+        # if rtmp_src:
+        #     splited_rtmp_src = rtmp_src.split(',')
+        #     if len(splited_rtmp_src) == 2:
+        #         rtmp_src = splited_rtmp_src[1]
+        # aifp = xpath_text(video_data, 'akamai/aifp', default='')
+
+        tokens = {}
+        urls = []
+        formats = []
+        rex = re.compile(
+            r'(?P<width>[0-9]+)x(?P<height>[0-9]+)(?:_(?P<bitrate>[0-9]+))?')
+        # Possible formats locations: files/file, files/groupFiles/files
+        # and maybe others
+        for video_file in video_data.findall('.//file'):
+            video_url = video_file.text.strip()
+            if not video_url:
+                continue
+            ext = determine_ext(video_url)
+            if video_url.startswith('/mp4:protected/'):
+                continue
+                # TODO Correct extraction for these files
+                # protected_path_data = path_data.get('protected')
+                # if not protected_path_data or not rtmp_src:
+                #     continue
+                # protected_path = self._search_regex(
+                #     r'/mp4:(.+)\.[a-z0-9]', video_url, 'secure path')
+                # auth = self._download_webpage(
+                #     protected_path_data['tokenizer_src'], query={
+                #         'path': protected_path,
+                #         'videoId': content_id,
+                #         'aifp': aifp,
+                #     })
+                # token = xpath_text(auth, 'token')
+                # if not token:
+                #     continue
+                # video_url = rtmp_src + video_url + '?' + token
+            elif video_url.startswith('/secure/'):
+                secure_path_data = path_data.get('secure')
+                if not secure_path_data:
+                    continue
+                video_url = secure_path_data['media_src'] + video_url
+                secure_path = self._search_regex(r'https?://[^/]+(.+/)', video_url, 'secure path') + '*'
+                token = tokens.get(secure_path)
+                if not token:
+                    query = {
+                        'path': secure_path,
+                        'videoId': content_id,
+                    }
+                    if ap_data.get('auth_required'):
+                        query['accessToken'] = self._extract_mvpd_auth(ap_data['url'], video_id, ap_data['site_name'], ap_data['site_name'])
+                    auth = self._download_xml(
+                        secure_path_data['tokenizer_src'], video_id, query=query)
+                    error_msg = xpath_text(auth, 'error/msg')
+                    if error_msg:
+                        raise ExtractorError(error_msg, expected=True)
+                    token = xpath_text(auth, 'token')
+                    if not token:
+                        continue
+                    tokens[secure_path] = token
+                video_url = video_url + '?hdnea=' + token
+            elif not re.match('https?://', video_url):
+                base_path_data = path_data.get(ext, path_data.get('default', {}))
+                media_src = base_path_data.get('media_src')
+                if not media_src:
+                    continue
+                video_url = media_src + video_url
+            if video_url in urls:
+                continue
+            urls.append(video_url)
+            format_id = video_file.get('bitrate')
+            if ext == 'smil':
+                formats.extend(self._extract_smil_formats(
+                    video_url, video_id, fatal=False))
+            elif ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    video_url, video_id, 'mp4',
+                    m3u8_id=format_id or 'hls', fatal=False))
+            elif ext == 'f4m':
+                formats.extend(self._extract_f4m_formats(
+                    update_url_query(video_url, {'hdcore': '3.7.0'}),
+                    video_id, f4m_id=format_id or 'hds', fatal=False))
+            else:
+                f = {
+                    'format_id': format_id,
+                    'url': video_url,
+                    'ext': ext,
+                }
+                mobj = rex.search(format_id + video_url)
+                if mobj:
+                    f.update({
+                        'width': int(mobj.group('width')),
+                        'height': int(mobj.group('height')),
+                        'tbr': int_or_none(mobj.group('bitrate')),
+                    })
+                elif isinstance(format_id, compat_str):
+                    if format_id.isdigit():
+                        f['tbr'] = int(format_id)
+                    else:
+                        mobj = re.match(r'ios_(audio|[0-9]+)$', format_id)
+                        if mobj:
+                            if mobj.group(1) == 'audio':
+                                f.update({
+                                    'vcodec': 'none',
+                                    'ext': 'm4a',
+                                })
+                            else:
+                                f['tbr'] = int(mobj.group(1))
+                formats.append(f)
+        self._sort_formats(formats)
+
+        subtitles = {}
+        for source in video_data.findall('closedCaptions/source'):
+            for track in source.findall('track'):
+                track_url = track.get('url')
+                if not isinstance(track_url, compat_str) or track_url.endswith('/big'):
+                    continue
+                lang = track.get('lang') or track.get('label') or 'en'
+                subtitles.setdefault(lang, []).append({
+                    'url': track_url,
+                    'ext': {
+                        'scc': 'scc',
+                        'webvtt': 'vtt',
+                        'smptett': 'tt',
+                    }.get(source.get('format'))
+                })
+
+        thumbnails = [{
+            'id': image.get('cut'),
+            'url': image.text,
+            'width': int_or_none(image.get('width')),
+            'height': int_or_none(image.get('height')),
+        } for image in video_data.findall('images/image')]
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+            'subtitles': subtitles,
+            'thumbnails': thumbnails,
+            'description': xpath_text(video_data, 'description'),
+            'duration': parse_duration(xpath_text(video_data, 'length') or xpath_text(video_data, 'trt')),
+            'timestamp': self._extract_timestamp(video_data),
+            'upload_date': xpath_attr(video_data, 'metas', 'version'),
+            'series': xpath_text(video_data, 'showTitle'),
+            'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')),
+            'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
+        }
index f225ec68448271eabbcca0b63ef367f37e7e908c..bd28267b0cb6a0154133c98f567c24f054b5459a 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index 343edf20663d172a4e071a77f228fc4d9962003d..5d2d8f13239e6ac5b10f5506143216301e5d4ecf 100644 (file)
@@ -2,9 +2,13 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
     ExtractorError,
+    int_or_none,
     parse_iso8601,
+    try_get,
+    update_url_query,
 )
 
 
@@ -65,36 +69,47 @@ class TV4IE(InfoExtractor):
         video_id = self._match_id(url)
 
         info = self._download_json(
-            'http://www.tv4play.se/player/assets/%s.json' % video_id, video_id, 'Downloading video info JSON')
+            'http://www.tv4play.se/player/assets/%s.json' % video_id,
+            video_id, 'Downloading video info JSON')
 
         # If is_geo_restricted is true, it doesn't necessarily mean we can't download it
-        if info['is_geo_restricted']:
+        if info.get('is_geo_restricted'):
             self.report_warning('This content might not be available in your country due to licensing restrictions.')
-        if info['requires_subscription']:
+        if info.get('requires_subscription'):
             raise ExtractorError('This content requires subscription.', expected=True)
 
-        sources_data = self._download_json(
-            'https://prima.tv4play.se/api/web/asset/%s/play.json?protocol=http&videoFormat=MP4' % video_id, video_id, 'Downloading sources JSON')
-        sources = sources_data['playback']
+        title = info['title']
 
         formats = []
-        for item in sources.get('items', {}).get('item', []):
-            ext, bitrate = item['mediaFormat'], item['bitrate']
-            formats.append({
-                'format_id': '%s_%s' % (ext, bitrate),
-                'tbr': bitrate,
-                'ext': ext,
-                'url': item['url'],
-            })
+        # http formats are linked with unresolvable host
+        for kind in ('hls', ''):
+            data = self._download_json(
+                'https://prima.tv4play.se/api/web/asset/%s/play.json' % video_id,
+                video_id, 'Downloading sources JSON', query={
+                    'protocol': kind,
+                    'videoFormat': 'MP4+WEBVTTS+WEBVTT',
+                })
+            item = try_get(data, lambda x: x['playback']['items']['item'], dict)
+            manifest_url = item.get('url')
+            if not isinstance(manifest_url, compat_str):
+                continue
+            if kind == 'hls':
+                formats.extend(self._extract_m3u8_formats(
+                    manifest_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                    m3u8_id=kind, fatal=False))
+            else:
+                formats.extend(self._extract_f4m_formats(
+                    update_url_query(manifest_url, {'hdcore': '3.8.0'}),
+                    video_id, f4m_id='hds', fatal=False))
         self._sort_formats(formats)
 
         return {
             'id': video_id,
-            'title': info['title'],
+            'title': title,
             'formats': formats,
             'description': info.get('description'),
             'timestamp': parse_iso8601(info.get('broadcast_date_time')),
-            'duration': info.get('duration'),
+            'duration': int_or_none(info.get('duration')),
             'thumbnail': info.get('image'),
-            'is_live': sources.get('live'),
+            'is_live': info.get('is_live') is True,
         }
diff --git a/youtube_dl/extractor/tvanouvelles.py b/youtube_dl/extractor/tvanouvelles.py
new file mode 100644 (file)
index 0000000..1086176
--- /dev/null
@@ -0,0 +1,65 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .brightcove import BrightcoveNewIE
+
+
+class TVANouvellesIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?tvanouvelles\.ca/videos/(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://www.tvanouvelles.ca/videos/5117035533001',
+        'info_dict': {
+            'id': '5117035533001',
+            'ext': 'mp4',
+            'title': 'L’industrie du taxi dénonce l’entente entre Québec et Uber: explications',
+            'description': 'md5:479653b7c8cf115747bf5118066bd8b3',
+            'uploader_id': '1741764581',
+            'timestamp': 1473352030,
+            'upload_date': '20160908',
+        },
+        'add_ie': ['BrightcoveNew'],
+    }
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1741764581/default_default/index.html?videoId=%s'
+
+    def _real_extract(self, url):
+        brightcove_id = self._match_id(url)
+        return self.url_result(
+            self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
+            BrightcoveNewIE.ie_key(), brightcove_id)
+
+
+class TVANouvellesArticleIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?tvanouvelles\.ca/(?:[^/]+/)+(?P<id>[^/?#&]+)'
+    _TEST = {
+        'url': 'http://www.tvanouvelles.ca/2016/11/17/des-policiers-qui-ont-la-meche-un-peu-courte',
+        'info_dict': {
+            'id': 'des-policiers-qui-ont-la-meche-un-peu-courte',
+            'title': 'Des policiers qui ont «la mèche un peu courte»?',
+            'description': 'md5:92d363c8eb0f0f030de9a4a84a90a3a0',
+        },
+        'playlist_mincount': 4,
+    }
+
+    @classmethod
+    def suitable(cls, url):
+        return False if TVANouvellesIE.suitable(url) else super(TVANouvellesArticleIE, cls).suitable(url)
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        entries = [
+            self.url_result(
+                'http://www.tvanouvelles.ca/videos/%s' % mobj.group('id'),
+                ie=TVANouvellesIE.ie_key(), video_id=mobj.group('id'))
+            for mobj in re.finditer(
+                r'data-video-id=(["\'])?(?P<id>\d+)', webpage)]
+
+        title = self._og_search_title(webpage, fatal=False)
+        description = self._og_search_description(webpage)
+
+        return self.playlist_result(entries, display_id, title, description)
index ead4c00c79bb453585b4ba18c67f7535bcc69254..f3817ab288473a01e899f335821c241fe43d0e91 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index cb76a2a583912d120faa81bcbcb17fa136a95eeb..957cf1ea2666ace07087ffd7d9e94810e87fe1e8 100644 (file)
@@ -6,7 +6,7 @@ from .mtv import MTVServicesInfoExtractor
 
 class TVLandIE(MTVServicesInfoExtractor):
     IE_NAME = 'tvland.com'
-    _VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|episodes)/(?P<id>[^/?#.]+)'
+    _VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|(?:full-)?episodes)/(?P<id>[^/?#.]+)'
     _FEED_URL = 'http://www.tvland.com/feeds/mrss/'
     _TESTS = [{
         # Geo-restricted. Without a proxy metadata are still there. With a
@@ -28,4 +28,7 @@ class TVLandIE(MTVServicesInfoExtractor):
             'upload_date': '20151228',
             'timestamp': 1451289600,
         },
+    }, {
+        'url': 'http://www.tvland.com/full-episodes/iu0hz6/younger-a-kiss-is-just-a-kiss-season-3-ep-301',
+        'only_matching': True,
     }]
diff --git a/youtube_dl/extractor/tvnoe.py b/youtube_dl/extractor/tvnoe.py
new file mode 100644 (file)
index 0000000..6d5c748
--- /dev/null
@@ -0,0 +1,49 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .jwplatform import JWPlatformBaseIE
+from ..utils import (
+    clean_html,
+    get_element_by_class,
+    js_to_json,
+)
+
+
+class TVNoeIE(JWPlatformBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?tvnoe\.cz/video/(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://www.tvnoe.cz/video/10362',
+        'md5': 'aee983f279aab96ec45ab6e2abb3c2ca',
+        'info_dict': {
+            'id': '10362',
+            'ext': 'mp4',
+            'series': 'Noční univerzita',
+            'title': 'prof. Tomáš Halík, Th.D. - Návrat náboženství a střet civilizací',
+            'description': 'md5:f337bae384e1a531a52c55ebc50fff41',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        iframe_url = self._search_regex(
+            r'<iframe[^>]+src="([^"]+)"', webpage, 'iframe URL')
+
+        ifs_page = self._download_webpage(iframe_url, video_id)
+        jwplayer_data = self._parse_json(
+            self._find_jwplayer_data(ifs_page),
+            video_id, transform_source=js_to_json)
+        info_dict = self._parse_jwplayer_data(
+            jwplayer_data, video_id, require_title=False, base_url=iframe_url)
+
+        info_dict.update({
+            'id': video_id,
+            'title': clean_html(get_element_by_class(
+                'field-name-field-podnazev', webpage)),
+            'description': clean_html(get_element_by_class(
+                'field-name-body', webpage)),
+            'series': clean_html(get_element_by_class('title', webpage))
+        })
+
+        return info_dict
index 2abfb78307186a2007e7f375bccb7a26ff0713ca..06ea2b40a759158baa2c561498e5155011f418ec 100644 (file)
@@ -69,7 +69,8 @@ class TVPIE(InfoExtractor):
         webpage = self._download_webpage(url, page_id)
         video_id = self._search_regex([
             r'<iframe[^>]+src="[^"]*?object_id=(\d+)',
-            "object_id\s*:\s*'(\d+)'"], webpage, 'video id')
+            r"object_id\s*:\s*'(\d+)'",
+            r'data-video-id="(\d+)"'], webpage, 'video id', default=page_id)
         return {
             '_type': 'url_transparent',
             'url': 'tvp:' + video_id,
@@ -138,6 +139,9 @@ class TVPEmbedIE(InfoExtractor):
             # formats.extend(self._extract_mpd_formats(
             #     video_url_base + '.ism/video.mpd',
             #     video_id, mpd_id='dash', fatal=False))
+            formats.extend(self._extract_ism_formats(
+                video_url_base + '.ism/Manifest',
+                video_id, 'mss', fatal=False))
             formats.extend(self._extract_f4m_formats(
                 video_url_base + '.ism/video.f4m',
                 video_id, f4m_id='hds', fatal=False))
index 4186e82db170300c6addeb0d0054c75b3d512e48..3eda0a399cf602d8e717c75e392a18e9dac82ec9 100644 (file)
@@ -348,6 +348,29 @@ class ViafreeIE(InfoExtractor):
             'skip_download': True,
         },
         'add_ie': [TVPlayIE.ie_key()],
+    }, {
+        # with relatedClips
+        'url': 'http://www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-1',
+        'info_dict': {
+            'id': '758770',
+            'ext': 'mp4',
+            'title': 'Sommaren med YouTube-stjärnorna S01E01',
+            'description': 'md5:2bc69dce2c4bb48391e858539bbb0e3f',
+            'series': 'Sommaren med YouTube-stjärnorna',
+            'season': 'Säsong 1',
+            'season_number': 1,
+            'duration': 1326,
+            'timestamp': 1470905572,
+            'upload_date': '20160811',
+        },
+        'params': {
+            'skip_download': True,
+        },
+        'add_ie': [TVPlayIE.ie_key()],
+    }, {
+        # Different og:image URL schema
+        'url': 'http://www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-2',
+        'only_matching': True,
     }, {
         'url': 'http://www.viafree.no/programmer/underholdning/det-beste-vorspielet/sesong-2/episode-1',
         'only_matching': True,
@@ -365,8 +388,38 @@ class ViafreeIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
-        video_id = self._search_regex(
-            r'currentVideo["\']\s*:\s*.+?["\']id["\']\s*:\s*["\'](?P<id>\d{6,})',
-            webpage, 'video id')
+        data = self._parse_json(
+            self._search_regex(
+                r'(?s)window\.App\s*=\s*({.+?})\s*;\s*</script',
+                webpage, 'data', default='{}'),
+            video_id, transform_source=lambda x: re.sub(
+                r'(?s)function\s+[a-zA-Z_][\da-zA-Z_]*\s*\([^)]*\)\s*{[^}]*}\s*',
+                'null', x), fatal=False)
+
+        video_id = None
+
+        if data:
+            video_id = try_get(
+                data, lambda x: x['context']['dispatcher']['stores'][
+                    'ContentPageProgramStore']['currentVideo']['id'],
+                compat_str)
+
+        # Fallback #1 (extract from og:image URL schema)
+        if not video_id:
+            thumbnail = self._og_search_thumbnail(webpage, default=None)
+            if thumbnail:
+                video_id = self._search_regex(
+                    # Patterns seen:
+                    #  http://cdn.playapi.mtgx.tv/imagecache/600x315/cloud/content-images/inbox/765166/a2e95e5f1d735bab9f309fa345cc3f25.jpg
+                    #  http://cdn.playapi.mtgx.tv/imagecache/600x315/cloud/content-images/seasons/15204/758770/4a5ba509ca8bc043e1ebd1a76131cdf2.jpg
+                    r'https?://[^/]+/imagecache/(?:[^/]+/)+(\d{6,})/',
+                    thumbnail, 'video id', default=None)
+
+        # Fallback #2. Extract from raw JSON string.
+        # May extract wrong video id if relatedClips is present.
+        if not video_id:
+            video_id = self._search_regex(
+                r'currentVideo["\']\s*:\s*.+?["\']id["\']\s*:\s*["\'](\d{6,})',
+                webpage, 'video id')
 
         return self.url_result('mtg:%s' % video_id, TVPlayIE.ie_key())
index 890f551800d011c423636530dfd8d70e4e926290..77414a242d68f2309985235f0418d47c77194417 100644 (file)
@@ -7,6 +7,7 @@ import random
 
 from .common import InfoExtractor
 from ..compat import (
+    compat_HTTPError,
     compat_parse_qs,
     compat_str,
     compat_urllib_parse_urlencode,
@@ -14,13 +15,13 @@ from ..compat import (
     compat_urlparse,
 )
 from ..utils import (
+    clean_html,
     ExtractorError,
     int_or_none,
     js_to_json,
     orderedSet,
     parse_duration,
     parse_iso8601,
-    sanitized_Request,
     urlencode_postdata,
 )
 
@@ -31,6 +32,7 @@ class TwitchBaseIE(InfoExtractor):
     _API_BASE = 'https://api.twitch.tv'
     _USHER_BASE = 'https://usher.ttvnw.net'
     _LOGIN_URL = 'http://www.twitch.tv/login'
+    _CLIENT_ID = 'jzkbprff40iqj646a697cyrvl0zt2m6'
     _NETRC_MACHINE = 'twitch'
 
     def _handle_error(self, response):
@@ -42,16 +44,10 @@ class TwitchBaseIE(InfoExtractor):
                 '%s returned error: %s - %s' % (self.IE_NAME, error, response.get('message')),
                 expected=True)
 
-    def _download_json(self, url, video_id, note='Downloading JSON metadata'):
-        headers = {
-            'Referer': 'http://api.twitch.tv/crossdomain/receiver.html?v=2',
-            'X-Requested-With': 'XMLHttpRequest',
-        }
-        for cookie in self._downloader.cookiejar:
-            if cookie.name == 'api_token':
-                headers['Twitch-Api-Token'] = cookie.value
-        request = sanitized_Request(url, headers=headers)
-        response = super(TwitchBaseIE, self)._download_json(request, video_id, note)
+    def _call_api(self, path, item_id, note):
+        response = self._download_json(
+            '%s/%s' % (self._API_BASE, path), item_id, note,
+            headers={'Client-ID': self._CLIENT_ID})
         self._handle_error(response)
         return response
 
@@ -63,9 +59,17 @@ class TwitchBaseIE(InfoExtractor):
         if username is None:
             return
 
+        def fail(message):
+            raise ExtractorError(
+                'Unable to login. Twitch said: %s' % message, expected=True)
+
         login_page, handle = self._download_webpage_handle(
             self._LOGIN_URL, None, 'Downloading login page')
 
+        # Some TOR nodes and public proxies are blocked completely
+        if 'blacklist_message' in login_page:
+            fail(clean_html(login_page))
+
         login_form = self._hidden_inputs(login_page)
 
         login_form.update({
@@ -82,21 +86,24 @@ class TwitchBaseIE(InfoExtractor):
         if not post_url.startswith('http'):
             post_url = compat_urlparse.urljoin(redirect_url, post_url)
 
-        request = sanitized_Request(
-            post_url, urlencode_postdata(login_form))
-        request.add_header('Referer', redirect_url)
-        response = self._download_webpage(
-            request, None, 'Logging in as %s' % username)
-
-        error_message = self._search_regex(
-            r'<div[^>]+class="subwindow_notice"[^>]*>([^<]+)</div>',
-            response, 'error message', default=None)
-        if error_message:
-            raise ExtractorError(
-                'Unable to login. Twitch said: %s' % error_message, expected=True)
+        headers = {'Referer': redirect_url}
 
-        if '>Reset your password<' in response:
-            self.report_warning('Twitch asks you to reset your password, go to https://secure.twitch.tv/reset/submit')
+        try:
+            response = self._download_json(
+                post_url, None, 'Logging in as %s' % username,
+                data=urlencode_postdata(login_form),
+                headers=headers)
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
+                response = self._parse_json(
+                    e.cause.read().decode('utf-8'), None)
+                fail(response['message'])
+            raise
+
+        if response.get('redirect'):
+            self._download_webpage(
+                response['redirect'], None, 'Downloading login redirect page',
+                headers=headers)
 
     def _prefer_source(self, formats):
         try:
@@ -109,14 +116,14 @@ class TwitchBaseIE(InfoExtractor):
 
 class TwitchItemBaseIE(TwitchBaseIE):
     def _download_info(self, item, item_id):
-        return self._extract_info(self._download_json(
-            '%s/kraken/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
+        return self._extract_info(self._call_api(
+            'kraken/videos/%s%s' % (item, item_id), item_id,
             'Downloading %s info JSON' % self._ITEM_TYPE))
 
     def _extract_media(self, item_id):
         info = self._download_info(self._ITEM_SHORTCUT, item_id)
-        response = self._download_json(
-            '%s/api/videos/%s%s' % (self._API_BASE, self._ITEM_SHORTCUT, item_id), item_id,
+        response = self._call_api(
+            'api/videos/%s%s' % (self._ITEM_SHORTCUT, item_id), item_id,
             'Downloading %s playlist JSON' % self._ITEM_TYPE)
         entries = []
         chunks = response['chunks']
@@ -240,14 +247,15 @@ class TwitchVodIE(TwitchItemBaseIE):
             # m3u8 download
             'skip_download': True,
         },
+        'skip': 'HTTP Error 404: Not Found',
     }]
 
     def _real_extract(self, url):
         item_id = self._match_id(url)
 
         info = self._download_info(self._ITEM_SHORTCUT, item_id)
-        access_token = self._download_json(
-            '%s/api/vods/%s/access_token' % (self._API_BASE, item_id), item_id,
+        access_token = self._call_api(
+            'api/vods/%s/access_token' % item_id, item_id,
             'Downloading %s access token' % self._ITEM_TYPE)
 
         formats = self._extract_m3u8_formats(
@@ -275,12 +283,12 @@ class TwitchVodIE(TwitchItemBaseIE):
 
 
 class TwitchPlaylistBaseIE(TwitchBaseIE):
-    _PLAYLIST_URL = '%s/kraken/channels/%%s/videos/?offset=%%d&limit=%%d' % TwitchBaseIE._API_BASE
+    _PLAYLIST_PATH = 'kraken/channels/%s/videos/?offset=%d&limit=%d'
     _PAGE_LIMIT = 100
 
     def _extract_playlist(self, channel_id):
-        info = self._download_json(
-            '%s/kraken/channels/%s' % (self._API_BASE, channel_id),
+        info = self._call_api(
+            'kraken/channels/%s' % channel_id,
             channel_id, 'Downloading channel info JSON')
         channel_name = info.get('display_name') or info.get('name')
         entries = []
@@ -289,8 +297,8 @@ class TwitchPlaylistBaseIE(TwitchBaseIE):
         broken_paging_detected = False
         counter_override = None
         for counter in itertools.count(1):
-            response = self._download_json(
-                self._PLAYLIST_URL % (channel_id, offset, limit),
+            response = self._call_api(
+                self._PLAYLIST_PATH % (channel_id, offset, limit),
                 channel_id,
                 'Downloading %s videos JSON page %s'
                 % (self._PLAYLIST_TYPE, counter_override or counter))
@@ -345,7 +353,7 @@ class TwitchProfileIE(TwitchPlaylistBaseIE):
 class TwitchPastBroadcastsIE(TwitchPlaylistBaseIE):
     IE_NAME = 'twitch:past_broadcasts'
     _VALID_URL = r'%s/(?P<id>[^/]+)/profile/past_broadcasts/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
-    _PLAYLIST_URL = TwitchPlaylistBaseIE._PLAYLIST_URL + '&broadcasts=true'
+    _PLAYLIST_PATH = TwitchPlaylistBaseIE._PLAYLIST_PATH + '&broadcasts=true'
     _PLAYLIST_TYPE = 'past broadcasts'
 
     _TEST = {
@@ -389,15 +397,12 @@ class TwitchStreamIE(TwitchBaseIE):
     def _real_extract(self, url):
         channel_id = self._match_id(url)
 
-        stream = self._download_json(
-            '%s/kraken/streams/%s' % (self._API_BASE, channel_id), channel_id,
+        stream = self._call_api(
+            'kraken/streams/%s?stream_type=all' % channel_id, channel_id,
             'Downloading stream JSON').get('stream')
 
-        # Fallback on profile extraction if stream is offline
         if not stream:
-            return self.url_result(
-                'http://www.twitch.tv/%s/profile' % channel_id,
-                'TwitchProfile', channel_id)
+            raise ExtractorError('%s is offline' % channel_id, expected=True)
 
         # Channel name may be typed if different case than the original channel name
         # (e.g. http://www.twitch.tv/TWITCHPLAYSPOKEMON) that will lead to constructing
@@ -405,13 +410,14 @@ class TwitchStreamIE(TwitchBaseIE):
         # JSON and fallback to lowercase if it's not available.
         channel_id = stream.get('channel', {}).get('name') or channel_id.lower()
 
-        access_token = self._download_json(
-            '%s/api/channels/%s/access_token' % (self._API_BASE, channel_id), channel_id,
+        access_token = self._call_api(
+            'api/channels/%s/access_token' % channel_id, channel_id,
             'Downloading channel access token')
 
         query = {
             'allow_source': 'true',
             'allow_audio_only': 'true',
+            'allow_spectre': 'true',
             'p': random.randint(1000000, 10000000),
             'player': 'twitchweb',
             'segment_preference': '4',
index b7384298619608ab879337326b1e6719962932e3..ac0b221b4f5ab02c33f1776389cba849bf00ae2b 100644 (file)
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import compat_urlparse
 from ..utils import (
     determine_ext,
     float_or_none,
@@ -13,6 +14,8 @@ from ..utils import (
     ExtractorError,
 )
 
+from .periscope import PeriscopeIE
+
 
 class TwitterBaseIE(InfoExtractor):
     def _get_vmap_video_url(self, vmap_url, video_id):
@@ -22,7 +25,7 @@ class TwitterBaseIE(InfoExtractor):
 
 class TwitterCardIE(TwitterBaseIE):
     IE_NAME = 'twitter:card'
-    _VALID_URL = r'https?://(?:www\.)?twitter\.com/i/(?:cards/tfw/v1|videos/tweet)/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?twitter\.com/i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
     _TESTS = [
         {
             'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
@@ -48,12 +51,12 @@ class TwitterCardIE(TwitterBaseIE):
         },
         {
             'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
-            'md5': 'd4724ffe6d2437886d004fa5de1043b3',
+            'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
             'info_dict': {
                 'id': 'dq4Oj5quskI',
                 'ext': 'mp4',
                 'title': 'Ubuntu 11.10 Overview',
-                'description': 'Take a quick peek at what\'s new and improved in Ubuntu 11.10.\n\nOnce installed take a look at 10 Things to Do After Installing: http://www.omgubuntu.co.uk/2011/10/10...',
+                'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
                 'upload_date': '20111013',
                 'uploader': 'OMG! Ubuntu!',
                 'uploader_id': 'omgubuntu',
@@ -81,6 +84,9 @@ class TwitterCardIE(TwitterBaseIE):
                 'title': 'Twitter web player',
                 'thumbnail': 're:^https?://.*\.jpg',
             },
+        }, {
+            'url': 'https://twitter.com/i/videos/752274308186120192',
+            'only_matching': True,
         },
     ]
 
@@ -100,12 +106,17 @@ class TwitterCardIE(TwitterBaseIE):
             return self.url_result(iframe_url)
 
         config = self._parse_json(self._html_search_regex(
-            r'data-(?:player-)?config="([^"]+)"', webpage, 'data player config'),
+            r'data-(?:player-)?config="([^"]+)"', webpage,
+            'data player config', default='{}'),
             video_id)
 
         if config.get('source_type') == 'vine':
             return self.url_result(config['player_url'], 'Vine')
 
+        periscope_url = PeriscopeIE._extract_url(webpage)
+        if periscope_url:
+            return self.url_result(periscope_url, PeriscopeIE.ie_key())
+
         def _search_dimensions_in_video_url(a_format, video_url):
             m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
             if m:
@@ -244,10 +255,10 @@ class TwitterIE(InfoExtractor):
         'info_dict': {
             'id': '700207533655363584',
             'ext': 'mp4',
-            'title': 'Donte The Dumbass - BEAT PROD: @suhmeduh #Damndaniel',
-            'description': 'Donte The Dumbass on Twitter: "BEAT PROD: @suhmeduh  https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"',
+            'title': 'JG - BEAT PROD: @suhmeduh #Damndaniel',
+            'description': 'JG on Twitter: "BEAT PROD: @suhmeduh  https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"',
             'thumbnail': 're:^https?://.*\.jpg',
-            'uploader': 'Donte The Dumbass',
+            'uploader': 'JG',
             'uploader_id': 'jaydingeer',
         },
         'params': {
@@ -278,6 +289,18 @@ class TwitterIE(InfoExtractor):
         'params': {
             'skip_download': True,  # requires ffmpeg
         },
+    }, {
+        'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
+        'info_dict': {
+            'id': '1zqKVVlkqLaKB',
+            'ext': 'mp4',
+            'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
+            'upload_date': '20160923',
+            'uploader_id': 'OPP_HSD',
+            'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
+            'timestamp': 1474613214,
+        },
+        'add_ie': ['Periscope'],
     }]
 
     def _real_extract(self, url):
@@ -328,13 +351,22 @@ class TwitterIE(InfoExtractor):
             })
             return info
 
+        twitter_card_url = None
         if 'class="PlayableMedia' in webpage:
+            twitter_card_url = '%s//twitter.com/i/videos/tweet/%s' % (self.http_scheme(), twid)
+        else:
+            twitter_card_iframe_url = self._search_regex(
+                r'data-full-card-iframe-url=([\'"])(?P<url>(?:(?!\1).)+)\1',
+                webpage, 'Twitter card iframe URL', default=None, group='url')
+            if twitter_card_iframe_url:
+                twitter_card_url = compat_urlparse.urljoin(url, twitter_card_iframe_url)
+
+        if twitter_card_url:
             info.update({
                 '_type': 'url_transparent',
                 'ie_key': 'TwitterCard',
-                'url': '%s//twitter.com/i/videos/tweet/%s' % (self.http_scheme(), twid),
+                'url': twitter_card_url,
             })
-
             return info
 
         raise ExtractorError('There\'s no video in this tweet.')
@@ -342,7 +374,7 @@ class TwitterIE(InfoExtractor):
 
 class TwitterAmplifyIE(TwitterBaseIE):
     IE_NAME = 'twitter:amplify'
-    _VALID_URL = 'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
+    _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
 
     _TEST = {
         'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
index 89b86955913587c3c09474fdffaab8ad338bb26a..cce29c6e07134c58425f5486b15ce6d9e017d0d7 100644 (file)
@@ -5,6 +5,7 @@ import re
 from .common import InfoExtractor
 from ..compat import (
     compat_HTTPError,
+    compat_str,
     compat_urllib_request,
     compat_urlparse,
 )
@@ -207,7 +208,7 @@ class UdemyIE(InfoExtractor):
             if youtube_url:
                 return self.url_result(youtube_url, 'Youtube')
 
-        video_id = asset['id']
+        video_id = compat_str(asset['id'])
         thumbnail = asset.get('thumbnail_url') or asset.get('thumbnailUrl')
         duration = float_or_none(asset.get('data', {}).get('duration'))
 
@@ -307,7 +308,7 @@ class UdemyIE(InfoExtractor):
 
 class UdemyCourseIE(UdemyIE):
     IE_NAME = 'udemy:course'
-    _VALID_URL = r'https?://www\.udemy\.com/(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)?udemy\.com/(?P<id>[^/?#&]+)'
     _TESTS = []
 
     @classmethod
index ae529f690d10606f065e31ef68d73745109578f2..2cd22cf8a1afa51403b3b9801ca7dd08c03503a9 100644 (file)
@@ -33,9 +33,7 @@ class UplynkIE(InfoExtractor):
         formats = self._extract_m3u8_formats('http://content.uplynk.com/%s.m3u8' % path, display_id, 'mp4')
         if session_id:
             for f in formats:
-                f['extra_param_to_segment_url'] = {
-                    'pbs': session_id,
-                }
+                f['extra_param_to_segment_url'] = 'pbs=' + session_id
         self._sort_formats(formats)
         asset = self._download_json('http://content.uplynk.com/player/assetinfo/%s.json' % path, display_id)
         if asset.get('error') == 1:
index ce3bf6b023bced2176aa1cdf19cb75e371e290ed..8e6fd4731e38bfad40a11e035a61227736babb0f 100644 (file)
@@ -5,17 +5,20 @@ from .common import InfoExtractor
 
 
 class URPlayIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?urplay\.se/program/(?P<id>[0-9]+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?ur(?:play|skola)\.se/(?:program|Produkter)/(?P<id>[0-9]+)'
+    _TESTS = [{
         'url': 'http://urplay.se/program/190031-tripp-trapp-trad-sovkudde',
-        'md5': '15ca67b63fd8fb320ac2bcd854bad7b6',
+        'md5': 'ad5f0de86f16ca4c8062cd103959a9eb',
         'info_dict': {
             'id': '190031',
             'ext': 'mp4',
             'title': 'Tripp, Trapp, Träd : Sovkudde',
             'description': 'md5:b86bffdae04a7e9379d1d7e5947df1d1',
-        }
-    }
+        },
+    }, {
+        'url': 'http://urskola.se/Produkter/155794-Smasagor-meankieli-Grodan-i-vida-varlden',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -27,30 +30,17 @@ class URPlayIE(InfoExtractor):
 
         formats = []
         for quality_attr, quality, preference in (('', 'sd', 0), ('_hd', 'hd', 1)):
-            file_rtmp = urplayer_data.get('file_rtmp' + quality_attr)
-            if file_rtmp:
-                formats.append({
-                    'url': 'rtmp://%s/urplay/mp4:%s' % (host, file_rtmp),
-                    'format_id': quality + '-rtmp',
-                    'ext': 'flv',
-                    'preference': preference,
-                })
             file_http = urplayer_data.get('file_http' + quality_attr) or urplayer_data.get('file_http_sub' + quality_attr)
             if file_http:
-                file_http_base_url = 'http://%s/%s' % (host, file_http)
-                formats.extend(self._extract_f4m_formats(
-                    file_http_base_url + 'manifest.f4m', video_id,
-                    preference, '%s-hds' % quality, fatal=False))
-                formats.extend(self._extract_m3u8_formats(
-                    file_http_base_url + 'playlist.m3u8', video_id, 'mp4',
-                    'm3u8_native', preference, '%s-hls' % quality, fatal=False))
+                formats.extend(self._extract_wowza_formats(
+                    'http://%s/%splaylist.m3u8' % (host, file_http), video_id, skip_protocols=['rtmp', 'rtsp']))
         self._sort_formats(formats)
 
         subtitles = {}
         for subtitle in urplayer_data.get('subtitles', []):
             subtitle_url = subtitle.get('file')
             kind = subtitle.get('kind')
-            if subtitle_url or kind and kind != 'captions':
+            if not subtitle_url or (kind and kind != 'captions'):
                 continue
             subtitles.setdefault(subtitle.get('label', 'Svenska'), []).append({
                 'url': subtitle_url,
diff --git a/youtube_dl/extractor/usanetwork.py b/youtube_dl/extractor/usanetwork.py
new file mode 100644 (file)
index 0000000..8233407
--- /dev/null
@@ -0,0 +1,76 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .adobepass import AdobePassIE
+from ..utils import (
+    extract_attributes,
+    smuggle_url,
+    update_url_query,
+)
+
+
+class USANetworkIE(AdobePassIE):
+    _VALID_URL = r'https?://(?:www\.)?usanetwork\.com/(?:[^/]+/videos|movies)/(?P<id>[^/?#]+)'
+    _TEST = {
+        'url': 'http://www.usanetwork.com/mrrobot/videos/hpe-cybersecurity',
+        'md5': '33c0d2ba381571b414024440d08d57fd',
+        'info_dict': {
+            'id': '3086229',
+            'ext': 'mp4',
+            'title': 'HPE Cybersecurity',
+            'description': 'The more we digitize our world, the more vulnerable we are.',
+            'upload_date': '20160818',
+            'timestamp': 1471535460,
+            'uploader': 'NBCU-USA',
+        },
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        player_params = extract_attributes(self._search_regex(
+            r'(<div[^>]+data-usa-tve-player-container[^>]*>)', webpage, 'player params'))
+        video_id = player_params['data-mpx-guid']
+        title = player_params['data-episode-title']
+
+        account_pid, path = re.search(
+            r'data-src="(?:https?)?//player\.theplatform\.com/p/([^/]+)/.*?/(media/guid/\d+/\d+)',
+            webpage).groups()
+
+        query = {
+            'mbr': 'true',
+        }
+        if player_params.get('data-is-full-episode') == '1':
+            query['manifest'] = 'm3u'
+
+        if player_params.get('data-entitlement') == 'auth':
+            adobe_pass = {}
+            drupal_settings = self._search_regex(
+                r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
+                webpage, 'drupal settings', fatal=False)
+            if drupal_settings:
+                drupal_settings = self._parse_json(drupal_settings, video_id, fatal=False)
+                if drupal_settings:
+                    adobe_pass = drupal_settings.get('adobePass', {})
+            resource = self._get_mvpd_resource(
+                adobe_pass.get('adobePassResourceId', 'usa'),
+                title, video_id, player_params.get('data-episode-rating', 'TV-14'))
+            query['auth'] = self._extract_mvpd_auth(
+                url, video_id, adobe_pass.get('adobePassRequestorId', 'usa'), resource)
+
+        info = self._search_json_ld(webpage, video_id, default={})
+        info.update({
+            '_type': 'url_transparent',
+            'url': smuggle_url(update_url_query(
+                'http://link.theplatform.com/s/%s/%s' % (account_pid, path),
+                query), {'force_smil_url': True}),
+            'id': video_id,
+            'title': title,
+            'series': player_params.get('data-show-title'),
+            'episode': title,
+            'ie_key': 'ThePlatform',
+        })
+        return info
index 54605d863027968a4a15c5358b9f98539c69c4b3..0c06bf36bd5f76cabecc47e699ad56a45ba63a4a 100644 (file)
@@ -1,20 +1,25 @@
 from __future__ import unicode_literals
 
+import random
 import re
 
 from .common import InfoExtractor
 from ..compat import (
+    compat_str,
     compat_urlparse,
 )
 from ..utils import (
+    encode_data_uri,
     ExtractorError,
     int_or_none,
     float_or_none,
+    mimetype2ext,
+    str_or_none,
 )
 
 
 class UstreamIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.ustream\.tv/(?P<type>recorded|embed|embed/recorded)/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?ustream\.tv/(?P<type>recorded|embed|embed/recorded)/(?P<id>\d+)'
     IE_NAME = 'ustream'
     _TESTS = [{
         'url': 'http://www.ustream.tv/recorded/20274954',
@@ -47,8 +52,108 @@ class UstreamIE(InfoExtractor):
             'id': '10299409',
         },
         'playlist_count': 3,
+    }, {
+        'url': 'http://www.ustream.tv/recorded/91343263',
+        'info_dict': {
+            'id': '91343263',
+            'ext': 'mp4',
+            'title': 'GitHub Universe - General Session - Day 1',
+            'upload_date': '20160914',
+            'description': 'GitHub Universe - General Session - Day 1',
+            'timestamp': 1473872730,
+            'uploader': 'wa0dnskeqkr',
+            'uploader_id': '38977840',
+        },
+        'params': {
+            'skip_download': True,  # m3u8 download
+        },
     }]
 
+    def _get_stream_info(self, url, video_id, app_id_ver, extra_note=None):
+        def num_to_hex(n):
+            return hex(n)[2:]
+
+        rnd = random.randrange
+
+        if not extra_note:
+            extra_note = ''
+
+        conn_info = self._download_json(
+            'http://r%d-1-%s-recorded-lp-live.ums.ustream.tv/1/ustream' % (rnd(1e8), video_id),
+            video_id, note='Downloading connection info' + extra_note,
+            query={
+                'type': 'viewer',
+                'appId': app_id_ver[0],
+                'appVersion': app_id_ver[1],
+                'rsid': '%s:%s' % (num_to_hex(rnd(1e8)), num_to_hex(rnd(1e8))),
+                'rpin': '_rpin.%d' % rnd(1e15),
+                'referrer': url,
+                'media': video_id,
+                'application': 'recorded',
+            })
+        host = conn_info[0]['args'][0]['host']
+        connection_id = conn_info[0]['args'][0]['connectionId']
+
+        return self._download_json(
+            'http://%s/1/ustream?connectionId=%s' % (host, connection_id),
+            video_id, note='Downloading stream info' + extra_note)
+
+    def _get_streams(self, url, video_id, app_id_ver):
+        # Sometimes the return dict does not have 'stream'
+        for trial_count in range(3):
+            stream_info = self._get_stream_info(
+                url, video_id, app_id_ver,
+                extra_note=' (try %d)' % (trial_count + 1) if trial_count > 0 else '')
+            if 'stream' in stream_info[0]['args'][0]:
+                return stream_info[0]['args'][0]['stream']
+        return []
+
+    def _parse_segmented_mp4(self, dash_stream_info):
+        def resolve_dash_template(template, idx, chunk_hash):
+            return template.replace('%', compat_str(idx), 1).replace('%', chunk_hash)
+
+        formats = []
+        for stream in dash_stream_info['streams']:
+            # Use only one provider to avoid too many formats
+            provider = dash_stream_info['providers'][0]
+            fragments = [{
+                'url': resolve_dash_template(
+                    provider['url'] + stream['initUrl'], 0, dash_stream_info['hashes']['0'])
+            }]
+            for idx in range(dash_stream_info['videoLength'] // dash_stream_info['chunkTime']):
+                fragments.append({
+                    'url': resolve_dash_template(
+                        provider['url'] + stream['segmentUrl'], idx,
+                        dash_stream_info['hashes'][compat_str(idx // 10 * 10)])
+                })
+            content_type = stream['contentType']
+            kind = content_type.split('/')[0]
+            f = {
+                'format_id': '-'.join(filter(None, [
+                    'dash', kind, str_or_none(stream.get('bitrate'))])),
+                'protocol': 'http_dash_segments',
+                # TODO: generate a MPD doc for external players?
+                'url': encode_data_uri(b'<MPD/>', 'text/xml'),
+                'ext': mimetype2ext(content_type),
+                'height': stream.get('height'),
+                'width': stream.get('width'),
+                'fragments': fragments,
+            }
+            if kind == 'video':
+                f.update({
+                    'vcodec': stream.get('codec'),
+                    'acodec': 'none',
+                    'vbr': stream.get('bitrate'),
+                })
+            else:
+                f.update({
+                    'vcodec': 'none',
+                    'acodec': stream.get('codec'),
+                    'abr': stream.get('bitrate'),
+                })
+            formats.append(f)
+        return formats
+
     def _real_extract(self, url):
         m = re.match(self._VALID_URL, url)
         video_id = m.group('id')
@@ -86,7 +191,22 @@ class UstreamIE(InfoExtractor):
             'url': video_url,
             'ext': format_id,
             'filesize': filesize,
-        } for format_id, video_url in video['media_urls'].items()]
+        } for format_id, video_url in video['media_urls'].items() if video_url]
+
+        if not formats:
+            hls_streams = self._get_streams(url, video_id, app_id_ver=(11, 2))
+            if hls_streams:
+                # m3u8_native leads to intermittent ContentTooShortError
+                formats.extend(self._extract_m3u8_formats(
+                    hls_streams[0]['url'], video_id, ext='mp4', m3u8_id='hls'))
+
+            '''
+            # DASH streams handling is incomplete as 'url' is missing
+            dash_streams = self._get_streams(url, video_id, app_id_ver=(3, 1))
+            if dash_streams:
+                formats.extend(self._parse_segmented_mp4(dash_streams))
+            '''
+
         self._sort_formats(formats)
 
         description = video.get('description')
@@ -117,7 +237,7 @@ class UstreamIE(InfoExtractor):
 
 
 class UstreamChannelIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)'
+    _VALID_URL = r'https?://(?:www\.)?ustream\.tv/channel/(?P<slug>.+)'
     IE_NAME = 'ustream:channel'
     _TEST = {
         'url': 'http://www.ustream.tv/channel/channeljapan',
index e1798857364c0d9d9ecc8ff48583880cb5cff697..a1e0851b7424e4c73cd34b72c02f16bc1905b6ce 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index 2cd617b91ce4a4a7eba0a639c0956dca3e168576..6b9c227db7a8a88e89b2df8efd3e067613bf605a 100644 (file)
@@ -13,7 +13,7 @@ from ..utils import (
 
 
 class VesselIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?vessel\.com/(?:videos|embed)/(?P<id>[0-9a-zA-Z]+)'
+    _VALID_URL = r'https?://(?:www\.)?vessel\.com/(?:videos|embed)/(?P<id>[0-9a-zA-Z-_]+)'
     _API_URL_TEMPLATE = 'https://www.vessel.com/api/view/items/%s'
     _LOGIN_URL = 'https://www.vessel.com/api/account/login'
     _NETRC_MACHINE = 'vessel'
@@ -32,12 +32,18 @@ class VesselIE(InfoExtractor):
     }, {
         'url': 'https://www.vessel.com/embed/G4U7gUJ6a?w=615&h=346',
         'only_matching': True,
+    }, {
+        'url': 'https://www.vessel.com/videos/F01_dsLj1',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.vessel.com/videos/RRX-sir-J',
+        'only_matching': True,
     }]
 
     @staticmethod
     def _extract_urls(webpage):
         return [url for _, url in re.findall(
-            r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?vessel\.com/embed/[0-9a-zA-Z]+.*?)\1',
+            r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?vessel\.com/embed/[0-9a-zA-Z-_]+.*?)\1',
             webpage)]
 
     @staticmethod
index cb64ae0bd07cdca051eb3aa10550840a296ded85..5ab7168808b10279932ba670165bc8190d5fceb0 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index 388b4debee27d7331ae7dc351338e3829e539071..d82261e5eec5f3c575bc48f23b23c64aa0355f83 100644 (file)
@@ -31,7 +31,7 @@ class VevoIE(VevoBaseIE):
     (currently used by MTVIE and MySpaceIE)
     '''
     _VALID_URL = r'''(?x)
-        (?:https?://www\.vevo\.com/watch/(?!playlist|genre)(?:[^/]+/(?:[^/]+/)?)?|
+        (?:https?://(?:www\.)?vevo\.com/watch/(?!playlist|genre)(?:[^/]+/(?:[^/]+/)?)?|
            https?://cache\.vevo\.com/m/html/embed\.html\?video=|
            https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
            vevo:)
@@ -51,7 +51,7 @@ class VevoIE(VevoBaseIE):
             'artist': 'Hurts',
             'genre': 'Pop',
         },
-        'expected_warnings': ['Unable to download SMIL file'],
+        'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
     }, {
         'note': 'v3 SMIL format',
         'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923',
@@ -67,7 +67,7 @@ class VevoIE(VevoBaseIE):
             'artist': 'Cassadee Pope',
             'genre': 'Country',
         },
-        'expected_warnings': ['Unable to download SMIL file'],
+        'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
     }, {
         'note': 'Age-limited video',
         'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282',
@@ -83,7 +83,7 @@ class VevoIE(VevoBaseIE):
             'artist': 'Justin Timberlake',
             'genre': 'Pop',
         },
-        'expected_warnings': ['Unable to download SMIL file'],
+        'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
     }, {
         'note': 'No video_info',
         'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000',
@@ -91,15 +91,33 @@ class VevoIE(VevoBaseIE):
         'info_dict': {
             'id': 'USUV71503000',
             'ext': 'mp4',
-            'title': 'K Camp - Till I Die',
+            'title': 'K Camp ft. T.I. - Till I Die',
             'age_limit': 18,
             'timestamp': 1449468000,
             'upload_date': '20151207',
             'uploader': 'K Camp',
             'track': 'Till I Die',
             'artist': 'K Camp',
-            'genre': 'Rap/Hip-Hop',
+            'genre': 'Hip-Hop',
         },
+        'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
+    }, {
+        'note': 'Featured test',
+        'url': 'https://www.vevo.com/watch/lemaitre/Wait/USUV71402190',
+        'md5': 'd28675e5e8805035d949dc5cf161071d',
+        'info_dict': {
+            'id': 'USUV71402190',
+            'ext': 'mp4',
+            'title': 'Lemaitre ft. LoLo - Wait',
+            'age_limit': 0,
+            'timestamp': 1413432000,
+            'upload_date': '20141016',
+            'uploader': 'Lemaitre',
+            'track': 'Wait',
+            'artist': 'Lemaitre',
+            'genre': 'Electronic',
+        },
+        'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
     }, {
         'note': 'Only available via webpage',
         'url': 'http://www.vevo.com/watch/GBUV71600656',
@@ -242,8 +260,11 @@ class VevoIE(VevoBaseIE):
 
             timestamp = parse_iso8601(video_info.get('releaseDate'))
             artists = video_info.get('artists')
-            if artists:
-                artist = uploader = artists[0]['name']
+            for curr_artist in artists:
+                if curr_artist.get('role') == 'Featured':
+                    featured_artist = curr_artist['name']
+                else:
+                    artist = uploader = curr_artist['name']
             view_count = int_or_none(video_info.get('views', {}).get('total'))
 
             for video_version in video_versions:
@@ -374,7 +395,7 @@ class VevoIE(VevoBaseIE):
 
 
 class VevoPlaylistIE(VevoBaseIE):
-    _VALID_URL = r'https?://www\.vevo\.com/watch/(?P<kind>playlist|genre)/(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)?vevo\.com/watch/(?P<kind>playlist|genre)/(?P<id>[^/?#&]+)'
 
     _TESTS = [{
         'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29',
index 185756301c3a9b0afe440254d3d2051ca97730d7..3b38ac700296a2eef8c12f0b45406f54785d7684 100644 (file)
@@ -22,6 +22,7 @@ class VGTVIE(XstreamIE):
         'fvn.no/fvntv': 'fvntv',
         'aftenposten.no/webtv': 'aptv',
         'ap.vgtv.no/webtv': 'aptv',
+        'tv.aftonbladet.se/abtv': 'abtv',
     }
 
     _APP_NAME_TO_VENDOR = {
@@ -30,6 +31,7 @@ class VGTVIE(XstreamIE):
         'satv': 'sa',
         'fvntv': 'fvn',
         'aptv': 'ap',
+        'abtv': 'ab',
     }
 
     _VALID_URL = r'''(?x)
@@ -40,7 +42,8 @@ class VGTVIE(XstreamIE):
                     /?
                     (?:
                         \#!/(?:video|live)/|
-                        embed?.*id=
+                        embed?.*id=|
+                        articles/
                     )|
                     (?P<appname>
                         %s
@@ -135,6 +138,14 @@ class VGTVIE(XstreamIE):
             'url': 'http://www.vgtv.no/#!/video/127205/inside-the-mind-of-favela-funk',
             'only_matching': True,
         },
+        {
+            'url': 'http://tv.aftonbladet.se/abtv/articles/36015',
+            'only_matching': True,
+        },
+        {
+            'url': 'abtv:140026',
+            'only_matching': True,
+        }
     ]
 
     def _real_extract(self, url):
index e2b2ce0981cc8767ade2f5ef4c8bc52759b86af3..8a00c8fee17ee84ae0d8d0e1e7360ca67befc8b0 100644 (file)
@@ -1,12 +1,93 @@
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
+import time
+import hashlib
+import json
 
+from .adobepass import AdobePassIE
 from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..compat import compat_HTTPError
+from ..utils import (
+    int_or_none,
+    parse_age_limit,
+    str_or_none,
+    parse_duration,
+    ExtractorError,
+    extract_attributes,
+)
 
 
-class ViceIE(InfoExtractor):
+class ViceBaseIE(AdobePassIE):
+    def _extract_preplay_video(self, url, webpage):
+        watch_hub_data = extract_attributes(self._search_regex(
+            r'(?s)(<watch-hub\s*.+?</watch-hub>)', webpage, 'watch hub'))
+        video_id = watch_hub_data['vms-id']
+        title = watch_hub_data['video-title']
+
+        query = {}
+        is_locked = watch_hub_data.get('video-locked') == '1'
+        if is_locked:
+            resource = self._get_mvpd_resource(
+                'VICELAND', title, video_id,
+                watch_hub_data.get('video-rating'))
+            query['tvetoken'] = self._extract_mvpd_auth(url, video_id, 'VICELAND', resource)
+
+        # signature generation algorithm is reverse engineered from signatureGenerator in
+        # webpack:///../shared/~/vice-player/dist/js/vice-player.js in
+        # https://www.viceland.com/assets/common/js/web.vendor.bundle.js
+        exp = int(time.time()) + 14400
+        query.update({
+            'exp': exp,
+            'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(),
+        })
+
+        try:
+            host = 'www.viceland' if is_locked else self._PREPLAY_HOST
+            preplay = self._download_json('https://%s.com/en_us/preplay/%s' % (host, video_id), video_id, query=query)
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
+                error = json.loads(e.cause.read().decode())
+                raise ExtractorError('%s said: %s' % (self.IE_NAME, error['details']), expected=True)
+            raise
+
+        video_data = preplay['video']
+        base = video_data['base']
+        uplynk_preplay_url = preplay['preplayURL']
+        episode = video_data.get('episode', {})
+        channel = video_data.get('channel', {})
+
+        subtitles = {}
+        cc_url = preplay.get('ccURL')
+        if cc_url:
+            subtitles['en'] = [{
+                'url': cc_url,
+            }]
+
+        return {
+            '_type': 'url_transparent',
+            'url': uplynk_preplay_url,
+            'id': video_id,
+            'title': title,
+            'description': base.get('body'),
+            'thumbnail': watch_hub_data.get('cover-image') or watch_hub_data.get('thumbnail'),
+            'duration': parse_duration(video_data.get('video_duration') or watch_hub_data.get('video-duration')),
+            'timestamp': int_or_none(video_data.get('created_at')),
+            'age_limit': parse_age_limit(video_data.get('video_rating')),
+            'series': video_data.get('show_title') or watch_hub_data.get('show-title'),
+            'episode_number': int_or_none(episode.get('episode_number') or watch_hub_data.get('episode')),
+            'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')),
+            'season_number': int_or_none(watch_hub_data.get('season')),
+            'season_id': str_or_none(episode.get('season_id')),
+            'uploader': channel.get('base', {}).get('title') or watch_hub_data.get('channel-title'),
+            'uploader_id': str_or_none(channel.get('id')),
+            'subtitles': subtitles,
+            'ie_key': 'UplynkPreplay',
+        }
+
+
+class ViceIE(ViceBaseIE):
     _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?videos?/(?P<id>[^/?#&]+)'
 
     _TESTS = [{
@@ -21,7 +102,7 @@ class ViceIE(InfoExtractor):
         'add_ie': ['Ooyala'],
     }, {
         'url': 'http://www.vice.com/video/how-to-hack-a-car',
-        'md5': '6fb2989a3fed069fb8eab3401fc2d3c9',
+        'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2',
         'info_dict': {
             'id': '3jstaBeXgAs',
             'ext': 'mp4',
@@ -32,6 +113,22 @@ class ViceIE(InfoExtractor):
             'upload_date': '20140529',
         },
         'add_ie': ['Youtube'],
+    }, {
+        'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56',
+        'md5': '',
+        'info_dict': {
+            'id': '5816510690b70e6c5fd39a56',
+            'ext': 'mp4',
+            'uploader': 'Waypoint',
+            'title': 'The Signal From Tölva',
+            'uploader_id': '57f7d621e05ca860fa9ccaf9',
+            'timestamp': 1477941983938,
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+        'add_ie': ['UplynkPreplay'],
     }, {
         'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
         'only_matching': True,
@@ -42,21 +139,21 @@ class ViceIE(InfoExtractor):
         'url': 'https://munchies.vice.com/en/videos/watch-the-trailer-for-our-new-series-the-pizza-show',
         'only_matching': True,
     }]
+    _PREPLAY_HOST = 'video.vice'
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-        try:
-            embed_code = self._search_regex(
-                r'embedCode=([^&\'"]+)', webpage,
-                'ooyala embed code', default=None)
-            if embed_code:
-                return self.url_result('ooyala:%s' % embed_code, 'Ooyala')
-            youtube_id = self._search_regex(
-                r'data-youtube-id="([^"]+)"', webpage, 'youtube id')
+        webpage, urlh = self._download_webpage_handle(url, video_id)
+        embed_code = self._search_regex(
+            r'embedCode=([^&\'"]+)', webpage,
+            'ooyala embed code', default=None)
+        if embed_code:
+            return self.url_result('ooyala:%s' % embed_code, 'Ooyala')
+        youtube_id = self._search_regex(
+            r'data-youtube-id="([^"]+)"', webpage, 'youtube id', default=None)
+        if youtube_id:
             return self.url_result(youtube_id, 'Youtube')
-        except ExtractorError:
-            raise ExtractorError('The page doesn\'t contain a video', expected=True)
+        return self._extract_preplay_video(urlh.geturl(), webpage)
 
 
 class ViceShowIE(InfoExtractor):
index 8742b607a4a8dcbbf6b2790fa40ca29353b04de2..0eff055a6e5ce2ced15a618c82cadbf2c6c47a4a 100644 (file)
@@ -1,23 +1,10 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import time
-import hashlib
-import json
+from .vice import ViceBaseIE
 
-from .adobepass import AdobePassIE
-from ..compat import compat_HTTPError
-from ..utils import (
-    int_or_none,
-    parse_age_limit,
-    str_or_none,
-    parse_duration,
-    ExtractorError,
-    extract_attributes,
-)
 
-
-class VicelandIE(AdobePassIE):
+class VicelandIE(ViceBaseIE):
     _VALID_URL = r'https?://(?:www\.)?viceland\.com/[^/]+/video/[^/]+/(?P<id>[a-f0-9]+)'
     _TEST = {
         'url': 'https://www.viceland.com/en_us/video/cyberwar-trailer/57608447973ee7705f6fbd4e',
@@ -38,70 +25,9 @@ class VicelandIE(AdobePassIE):
         },
         'add_ie': ['UplynkPreplay'],
     }
+    _PREPLAY_HOST = 'www.viceland'
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-
         webpage = self._download_webpage(url, video_id)
-        watch_hub_data = extract_attributes(self._search_regex(
-            r'(?s)(<watch-hub\s*.+?</watch-hub>)', webpage, 'watch hub'))
-        video_id = watch_hub_data['vms-id']
-        title = watch_hub_data['video-title']
-
-        query = {}
-        if watch_hub_data.get('video-locked') == '1':
-            resource = self._get_mvpd_resource(
-                'VICELAND', title, video_id,
-                watch_hub_data.get('video-rating'))
-            query['tvetoken'] = self._extract_mvpd_auth(url, video_id, 'VICELAND', resource)
-
-        # signature generation algorithm is reverse engineered from signatureGenerator in
-        # webpack:///../shared/~/vice-player/dist/js/vice-player.js in
-        # https://www.viceland.com/assets/common/js/web.vendor.bundle.js
-        exp = int(time.time()) + 14400
-        query.update({
-            'exp': exp,
-            'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(),
-        })
-
-        try:
-            preplay = self._download_json('https://www.viceland.com/en_us/preplay/%s' % video_id, video_id, query=query)
-        except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
-                error = json.loads(e.cause.read().decode())
-                raise ExtractorError('%s said: %s' % (self.IE_NAME, error['details']), expected=True)
-            raise
-
-        video_data = preplay['video']
-        base = video_data['base']
-        uplynk_preplay_url = preplay['preplayURL']
-        episode = video_data.get('episode', {})
-        channel = video_data.get('channel', {})
-
-        subtitles = {}
-        cc_url = preplay.get('ccURL')
-        if cc_url:
-            subtitles['en'] = [{
-                'url': cc_url,
-            }]
-
-        return {
-            '_type': 'url_transparent',
-            'url': uplynk_preplay_url,
-            'id': video_id,
-            'title': title,
-            'description': base.get('body'),
-            'thumbnail': watch_hub_data.get('cover-image') or watch_hub_data.get('thumbnail'),
-            'duration': parse_duration(video_data.get('video_duration') or watch_hub_data.get('video-duration')),
-            'timestamp': int_or_none(video_data.get('created_at')),
-            'age_limit': parse_age_limit(video_data.get('video_rating')),
-            'series': video_data.get('show_title') or watch_hub_data.get('show-title'),
-            'episode_number': int_or_none(episode.get('episode_number') or watch_hub_data.get('episode')),
-            'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')),
-            'season_number': int_or_none(watch_hub_data.get('season')),
-            'season_id': str_or_none(episode.get('season_id')),
-            'uploader': channel.get('base', {}).get('title') or watch_hub_data.get('channel-title'),
-            'uploader_id': str_or_none(channel.get('id')),
-            'subtitles': subtitles,
-            'ie_key': 'UplynkPreplay',
-        }
+        return self._extract_preplay_video(url, webpage)
index 2ed5d964344211c22d2260b1946273772434db8b..a19411a058784fc61db3b764ec882f0b9986323f 100644 (file)
@@ -6,7 +6,7 @@ from .internetvideoarchive import InternetVideoArchiveIE
 
 
 class VideoDetectiveIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)'
 
     _TEST = {
         'url': 'http://www.videodetective.com/movies/kick-ass-2/194487',
index 04e95c66e91eec8368d325d1086ac59dddf7656f..7f25665864c696757903deeb582a64f16eec0d85 100644 (file)
@@ -6,8 +6,7 @@ import re
 from .common import InfoExtractor
 from ..utils import (
     int_or_none,
-    parse_age_limit,
-    parse_iso8601,
+    xpath_element,
     xpath_text,
 )
 
@@ -17,38 +16,32 @@ class VideomoreIE(InfoExtractor):
     _VALID_URL = r'videomore:(?P<sid>\d+)$|https?://videomore\.ru/(?:(?:embed|[^/]+/[^/]+)/|[^/]+\?.*\btrack_id=)(?P<id>\d+)(?:[/?#&]|\.(?:xml|json)|$)'
     _TESTS = [{
         'url': 'http://videomore.ru/kino_v_detalayah/5_sezon/367617',
-        'md5': '70875fbf57a1cd004709920381587185',
+        'md5': '44455a346edc0d509ac5b5a5b531dc35',
         'info_dict': {
             'id': '367617',
             'ext': 'flv',
-            'title': 'В гостях Алексей Чумаков и Юлия Ковальчук',
-            'description': 'В гостях – лучшие романтические комедии года, «Выживший» Иньярриту и «Стив Джобс» Дэнни Бойла.',
+            'title': 'Кино в деталях 5 сезон В гостях Алексей Чумаков и Юлия Ковальчук',
             'series': 'Кино в деталях',
             'episode': 'В гостях Алексей Чумаков и Юлия Ковальчук',
-            'episode_number': None,
-            'season': 'Сезон 2015',
-            'season_number': 5,
             'thumbnail': 're:^https?://.*\.jpg',
             'duration': 2910,
-            'age_limit': 16,
             'view_count': int,
+            'comment_count': int,
+            'age_limit': 16,
         },
     }, {
         'url': 'http://videomore.ru/embed/259974',
         'info_dict': {
             'id': '259974',
             'ext': 'flv',
-            'title': '80 серия',
-            'description': '«Медведей» ждет решающий матч. Макеев выясняет отношения со Стрельцовым. Парни узнают подробности прошлого Макеева.',
+            'title': 'Молодежка 2 сезон 40 серия',
             'series': 'Молодежка',
-            'episode': '80 серия',
-            'episode_number': 40,
-            'season': '2 сезон',
-            'season_number': 2,
+            'episode': '40 серия',
             'thumbnail': 're:^https?://.*\.jpg',
             'duration': 2809,
-            'age_limit': 16,
             'view_count': int,
+            'comment_count': int,
+            'age_limit': 16,
         },
         'params': {
             'skip_download': True,
@@ -58,13 +51,8 @@ class VideomoreIE(InfoExtractor):
         'info_dict': {
             'id': '341073',
             'ext': 'flv',
-            'title': 'Команда проиграла из-за Бакина?',
-            'description': 'Молодежка 3 сезон скоро',
-            'series': 'Молодежка',
+            'title': 'Промо Команда проиграла из-за Бакина?',
             'episode': 'Команда проиграла из-за Бакина?',
-            'episode_number': None,
-            'season': 'Промо',
-            'season_number': 99,
             'thumbnail': 're:^https?://.*\.jpg',
             'duration': 29,
             'age_limit': 16,
@@ -96,8 +84,13 @@ class VideomoreIE(InfoExtractor):
     @staticmethod
     def _extract_url(webpage):
         mobj = re.search(
-            r'<object[^>]+data=(["\'])https?://videomore.ru/player\.swf\?.*config=(?P<url>https?://videomore\.ru/(?:[^/]+/)+\d+\.xml).*\1',
+            r'<object[^>]+data=(["\'])https?://videomore\.ru/player\.swf\?.*config=(?P<url>https?://videomore\.ru/(?:[^/]+/)+\d+\.xml).*\1',
             webpage)
+        if not mobj:
+            mobj = re.search(
+                r'<iframe[^>]+src=([\'"])(?P<url>https?://videomore\.ru/embed/\d+)',
+                webpage)
+
         if mobj:
             return mobj.group('url')
 
@@ -109,43 +102,33 @@ class VideomoreIE(InfoExtractor):
             'http://videomore.ru/video/tracks/%s.xml' % video_id,
             video_id, 'Downloading video XML')
 
-        video_url = xpath_text(video, './/video_url', 'video url', fatal=True)
+        item = xpath_element(video, './/playlist/item', fatal=True)
+
+        title = xpath_text(
+            item, ('./title', './episode_name'), 'title', fatal=True)
+
+        video_url = xpath_text(item, './video_url', 'video url', fatal=True)
         formats = self._extract_f4m_formats(video_url, video_id, f4m_id='hds')
         self._sort_formats(formats)
 
-        data = self._download_json(
-            'http://videomore.ru/video/tracks/%s.json' % video_id,
-            video_id, 'Downloading video JSON')
-
-        title = data.get('title') or data['project_title']
-        description = data.get('description') or data.get('description_raw')
-        timestamp = parse_iso8601(data.get('published_at'))
-        duration = int_or_none(data.get('duration'))
-        view_count = int_or_none(data.get('views'))
-        age_limit = parse_age_limit(data.get('min_age'))
-        thumbnails = [{
-            'url': thumbnail,
-        } for thumbnail in data.get('big_thumbnail_urls', [])]
-
-        series = data.get('project_title')
-        episode = data.get('title')
-        episode_number = int_or_none(data.get('episode_of_season') or None)
-        season = data.get('season_title')
-        season_number = int_or_none(data.get('season_pos') or None)
+        thumbnail = xpath_text(item, './thumbnail_url')
+        duration = int_or_none(xpath_text(item, './duration'))
+        view_count = int_or_none(xpath_text(item, './views'))
+        comment_count = int_or_none(xpath_text(item, './count_comments'))
+        age_limit = int_or_none(xpath_text(item, './min_age'))
+
+        series = xpath_text(item, './project_name')
+        episode = xpath_text(item, './episode_name')
 
         return {
             'id': video_id,
             'title': title,
-            'description': description,
             'series': series,
             'episode': episode,
-            'episode_number': episode_number,
-            'season': season,
-            'season_number': season_number,
-            'thumbnails': thumbnails,
-            'timestamp': timestamp,
+            'thumbnail': thumbnail,
             'duration': duration,
             'view_count': view_count,
+            'comment_count': comment_count,
             'age_limit': age_limit,
             'formats': formats,
         }
index d49cc6cbc567a8a0219f304a52707bd4129d1119..9950c62ad636ee4f03389bef627da4318f019c22 100644 (file)
@@ -1,10 +1,14 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .jwplatform import JWPlatformBaseIE
 from ..utils import (
     decode_packed_codes,
     js_to_json,
+    NO_DEFAULT,
+    PACKED_CODES_RE,
 )
 
 
@@ -35,10 +39,17 @@ class VidziIE(JWPlatformBaseIE):
         title = self._html_search_regex(
             r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title')
 
-        code = decode_packed_codes(webpage).replace('\\\'', '\'')
-        jwplayer_data = self._parse_json(
-            self._search_regex(r'setup\(([^)]+)\)', code, 'jwplayer data'),
-            video_id, transform_source=js_to_json)
+        packed_codes = [mobj.group(0) for mobj in re.finditer(
+            PACKED_CODES_RE, webpage)]
+        for num, pc in enumerate(packed_codes, 1):
+            code = decode_packed_codes(pc).replace('\\\'', '\'')
+            jwplayer_data = self._parse_json(
+                self._search_regex(
+                    r'setup\(([^)]+)\)', code, 'jwplayer data',
+                    default=NO_DEFAULT if num == len(packed_codes) else '{}'),
+                video_id, transform_source=js_to_json)
+            if jwplayer_data:
+                break
 
         info_dict = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False)
         info_dict['title'] = title
index 6645c6186dbff315e850f22ae793677803cbbf9b..d26fb49b3939728e8a962b2ad3131c71fd223366 100644 (file)
@@ -48,8 +48,8 @@ class VierIE(InfoExtractor):
             [r'data-filename="([^"]+)"', r'"filename"\s*:\s*"([^"]+)"'],
             webpage, 'filename')
 
-        playlist_url = 'http://vod.streamcloud.be/%s/mp4:_definst_/%s.mp4/playlist.m3u8' % (application, filename)
-        formats = self._extract_m3u8_formats(playlist_url, display_id, 'mp4')
+        playlist_url = 'http://vod.streamcloud.be/%s/_definst_/mp4:%s.mp4/playlist.m3u8' % (application, filename)
+        formats = self._extract_wowza_formats(playlist_url, display_id, skip_protocols=['dash'])
         self._sort_formats(formats)
 
         title = self._og_search_title(webpage, default=display_id)
index 4351ac4571935fa3c3ace915c0b97f20e67ec18d..9c48701c1a568589e0a875f35fa800386c4a4058 100644 (file)
@@ -1,11 +1,12 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import json
-import time
-import hmac
 import hashlib
+import hmac
 import itertools
+import json
+import re
+import time
 
 from .common import InfoExtractor
 from ..utils import (
@@ -276,10 +277,14 @@ class VikiIE(VikiBaseIE):
             height = int_or_none(self._search_regex(
                 r'^(\d+)[pP]$', format_id, 'height', default=None))
             for protocol, format_dict in stream_dict.items():
+                # rtmps URLs does not seem to work
+                if protocol == 'rtmps':
+                    continue
+                format_url = format_dict['url']
                 if format_id == 'm3u8':
                     m3u8_formats = self._extract_m3u8_formats(
-                        format_dict['url'], video_id, 'mp4',
-                        entry_protocol='m3u8_native', preference=-1,
+                        format_url, video_id, 'mp4',
+                        entry_protocol='m3u8_native',
                         m3u8_id='m3u8-%s' % protocol, fatal=False)
                     # Despite CODECS metadata in m3u8 all video-only formats
                     # are actually video+audio
@@ -287,9 +292,23 @@ class VikiIE(VikiBaseIE):
                         if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
                             f['acodec'] = None
                     formats.extend(m3u8_formats)
+                elif format_url.startswith('rtmp'):
+                    mobj = re.search(
+                        r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$',
+                        format_url)
+                    if not mobj:
+                        continue
+                    formats.append({
+                        'format_id': 'rtmp-%s' % format_id,
+                        'ext': 'flv',
+                        'url': mobj.group('url'),
+                        'play_path': mobj.group('playpath'),
+                        'app': mobj.group('app'),
+                        'page_url': url,
+                    })
                 else:
                     formats.append({
-                        'url': format_dict['url'],
+                        'url': format_url,
                         'format_id': '%s-%s' % (format_id, protocol),
                         'height': height,
                     })
index 7e854f3265eac3312f1b63199ce8633a17eb7d04..51c69a80c216889315a4c5fe070572100c13dd36 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import json
@@ -322,6 +322,22 @@ class VimeoIE(VimeoBaseInfoExtractor):
             },
             'expected_warnings': ['Unable to download JSON metadata'],
         },
+        {
+            # redirects to ondemand extractor and should be passed throught it
+            # for successful extraction
+            'url': 'https://vimeo.com/73445910',
+            'info_dict': {
+                'id': '73445910',
+                'ext': 'mp4',
+                'title': 'The Reluctant Revolutionary',
+                'uploader': '10Ft Films',
+                'uploader_url': 're:https?://(?:www\.)?vimeo\.com/tenfootfilms',
+                'uploader_id': 'tenfootfilms',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
         {
             'url': 'http://vimeo.com/moogaloop.swf?clip_id=2539741',
             'only_matching': True,
@@ -351,24 +367,32 @@ class VimeoIE(VimeoBaseInfoExtractor):
     ]
 
     @staticmethod
-    def _extract_vimeo_url(url, webpage):
+    def _smuggle_referrer(url, referrer_url):
+        return smuggle_url(url, {'http_headers': {'Referer': referrer_url}})
+
+    @staticmethod
+    def _extract_urls(url, webpage):
+        urls = []
         # Look for embedded (iframe) Vimeo player
-        mobj = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
-        if mobj:
-            player_url = unescapeHTML(mobj.group('url'))
-            surl = smuggle_url(player_url, {'http_headers': {'Referer': url}})
-            return surl
-        # Look for embedded (swf embed) Vimeo player
-        mobj = re.search(
-            r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
-        if mobj:
-            return mobj.group(1)
-        # Look more for non-standard embedded Vimeo player
-        mobj = re.search(
-            r'<video[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/[0-9]+)(?P=q1)', webpage)
-        if mobj:
-            return mobj.group('url')
+        for mobj in re.finditer(
+                r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1',
+                webpage):
+            urls.append(VimeoIE._smuggle_referrer(unescapeHTML(mobj.group('url')), url))
+        PLAIN_EMBED_RE = (
+            # Look for embedded (swf embed) Vimeo player
+            r'<embed[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)\1',
+            # Look more for non-standard embedded Vimeo player
+            r'<video[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/[0-9]+)\1',
+        )
+        for embed_re in PLAIN_EMBED_RE:
+            for mobj in re.finditer(embed_re, webpage):
+                urls.append(mobj.group('url'))
+        return urls
+
+    @staticmethod
+    def _extract_url(url, webpage):
+        urls = VimeoIE._extract_urls(url, webpage)
+        return urls[0] if urls else None
 
     def _verify_player_video_password(self, url, video_id):
         password = self._downloader.params.get('videopassword')
@@ -406,7 +430,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
         # Retrieve video webpage to extract further information
         request = sanitized_Request(url, headers=headers)
         try:
-            webpage = self._download_webpage(request, video_id)
+            webpage, urlh = self._download_webpage_handle(request, video_id)
+            # Some URLs redirect to ondemand can't be extracted with
+            # this extractor right away thus should be passed through
+            # ondemand extractor (e.g. https://vimeo.com/73445910)
+            if VimeoOndemandIE.suitable(urlh.geturl()):
+                return self.url_result(urlh.geturl(), VimeoOndemandIE.ie_key())
         except ExtractorError as ee:
             if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
                 errmsg = ee.cause.read()
@@ -585,6 +614,20 @@ class VimeoOndemandIE(VimeoBaseInfoExtractor):
             'uploader_url': 're:https?://(?:www\.)?vimeo\.com/gumfilms',
             'uploader_id': 'gumfilms',
         },
+    }, {
+        # requires Referer to be passed along with og:video:url
+        'url': 'https://vimeo.com/ondemand/36938/126682985',
+        'info_dict': {
+            'id': '126682985',
+            'ext': 'mp4',
+            'title': 'Rävlock, rätt läte på rätt plats',
+            'uploader': 'Lindroth & Norin',
+            'uploader_url': 're:https?://(?:www\.)?vimeo\.com/user14430847',
+            'uploader_id': 'user14430847',
+        },
+        'params': {
+            'skip_download': True,
+        },
     }, {
         'url': 'https://vimeo.com/ondemand/nazmaalik',
         'only_matching': True,
@@ -599,7 +642,12 @@ class VimeoOndemandIE(VimeoBaseInfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
-        return self.url_result(self._og_search_video_url(webpage), VimeoIE.ie_key())
+        return self.url_result(
+            # Some videos require Referer to be passed along with og:video:url
+            # similarly to generic vimeo embeds (e.g.
+            # https://vimeo.com/ondemand/36938/126682985).
+            VimeoIE._smuggle_referrer(self._og_search_video_url(webpage), url),
+            VimeoIE.ie_key())
 
 
 class VimeoChannelIE(VimeoBaseInfoExtractor):
@@ -810,6 +858,7 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
         'params': {
             'videopassword': 'holygrail',
         },
+        'skip': 'video gone',
     }]
 
     def _real_initialize(self):
@@ -817,9 +866,10 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
 
     def _get_config_url(self, webpage_url, video_id, video_password_verified=False):
         webpage = self._download_webpage(webpage_url, video_id)
-        config_url = self._html_search_regex(
-            r'data-config-url="([^"]+)"', webpage, 'config URL',
-            default=NO_DEFAULT if video_password_verified else None)
+        data = self._parse_json(self._search_regex(
+            r'window\s*=\s*_extend\(window,\s*({.+?})\);', webpage, 'data',
+            default=NO_DEFAULT if video_password_verified else '{}'), video_id)
+        config_url = data.get('vimeo_esi', {}).get('config', {}).get('configUrl')
         if config_url is None:
             self._verify_video_password(webpage_url, video_id, webpage)
             config_url = self._get_config_url(
index 92321d66e369626c0adfeda6cb4fae282a6f7abb..7fd9b777b4b6bb88cd08e9e625f74f41e8775092 100644 (file)
@@ -28,23 +28,24 @@ class SprutoBaseIE(InfoExtractor):
 
 class VimpleIE(SprutoBaseIE):
     IE_DESC = 'Vimple - one-click video hosting'
-    _VALID_URL = r'https?://(?:player\.vimple\.ru/iframe|vimple\.ru)/(?P<id>[\da-f-]{32,36})'
-    _TESTS = [
-        {
-            'url': 'http://vimple.ru/c0f6b1687dcd4000a97ebe70068039cf',
-            'md5': '2e750a330ed211d3fd41821c6ad9a279',
-            'info_dict': {
-                'id': 'c0f6b168-7dcd-4000-a97e-be70068039cf',
-                'ext': 'mp4',
-                'title': 'Sunset',
-                'duration': 20,
-                'thumbnail': 're:https?://.*?\.jpg',
-            },
-        }, {
-            'url': 'http://player.vimple.ru/iframe/52e1beec-1314-4a83-aeac-c61562eadbf9',
-            'only_matching': True,
-        }
-    ]
+    _VALID_URL = r'https?://(?:player\.vimple\.(?:ru|co)/iframe|vimple\.(?:ru|co))/(?P<id>[\da-f-]{32,36})'
+    _TESTS = [{
+        'url': 'http://vimple.ru/c0f6b1687dcd4000a97ebe70068039cf',
+        'md5': '2e750a330ed211d3fd41821c6ad9a279',
+        'info_dict': {
+            'id': 'c0f6b168-7dcd-4000-a97e-be70068039cf',
+            'ext': 'mp4',
+            'title': 'Sunset',
+            'duration': 20,
+            'thumbnail': 're:https?://.*?\.jpg',
+        },
+    }, {
+        'url': 'http://player.vimple.ru/iframe/52e1beec-1314-4a83-aeac-c61562eadbf9',
+        'only_matching': True,
+    }, {
+        'url': 'http://vimple.co/04506a053f124483b8fb05ed73899f19',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
index 3ee66e23e22b1d5350148d5cb21c2f55e4885f75..1990e7093acabb2dce11faebfddd220e8d88392b 100644 (file)
@@ -1,8 +1,8 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
+import collections
 import re
-import json
 import sys
 
 from .common import InfoExtractor
@@ -16,15 +16,15 @@ from ..utils import (
     get_element_by_class,
     int_or_none,
     orderedSet,
-    parse_duration,
     remove_start,
     str_to_int,
     unescapeHTML,
-    unified_strdate,
+    unified_timestamp,
     urlencode_postdata,
 )
-from .vimeo import VimeoIE
+from .dailymotion import DailymotionIE
 from .pladform import PladformIE
+from .vimeo import VimeoIE
 
 
 class VKBaseIE(InfoExtractor):
@@ -52,8 +52,9 @@ class VKBaseIE(InfoExtractor):
         # what actually happens.
         # We will workaround this VK issue by resetting the remixlhk cookie to
         # the first one manually.
-        cookies = url_handle.headers.get('Set-Cookie')
-        if cookies:
+        for header, cookies in url_handle.headers.items():
+            if header.lower() != 'set-cookie':
+                continue
             if sys.version_info[0] >= 3:
                 cookies = cookies.encode('iso-8859-1')
             cookies = cookies.decode('utf-8')
@@ -61,6 +62,7 @@ class VKBaseIE(InfoExtractor):
             if remixlhk:
                 value, domain = remixlhk.groups()
                 self._set_cookie(domain, 'remixlhk', value)
+                break
 
         login_page = self._download_webpage(
             'https://login.vk.com/?act=login', None,
@@ -103,6 +105,7 @@ class VKIE(VKBaseIE):
                 'title': 'ProtivoGunz - Хуёвая песня',
                 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
                 'duration': 195,
+                'timestamp': 1329060660,
                 'upload_date': '20120212',
                 'view_count': int,
             },
@@ -116,6 +119,7 @@ class VKIE(VKBaseIE):
                 'uploader': 'Tom Cruise',
                 'title': 'No name',
                 'duration': 9,
+                'timestamp': 1374374880,
                 'upload_date': '20130721',
                 'view_count': int,
             }
@@ -192,6 +196,7 @@ class VKIE(VKBaseIE):
                 'upload_date': '20150709',
                 'view_count': int,
             },
+            'skip': 'Removed',
         },
         {
             # youtube embed
@@ -208,6 +213,23 @@ class VKIE(VKBaseIE):
                 'view_count': int,
             },
         },
+        {
+            # dailymotion embed
+            'url': 'https://vk.com/video-37468416_456239855',
+            'info_dict': {
+                'id': 'k3lz2cmXyRuJQSjGHUv',
+                'ext': 'mp4',
+                'title': 'md5:d52606645c20b0ddbb21655adaa4f56f',
+                'description': 'md5:c651358f03c56f1150b555c26d90a0fd',
+                'uploader': 'AniLibria.Tv',
+                'upload_date': '20160914',
+                'uploader_id': 'x1p5vl5',
+                'timestamp': 1473877246,
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
         {
             # video key is extra_data not url\d+
             'url': 'http://vk.com/video-110305615_171782105',
@@ -217,10 +239,30 @@ class VKIE(VKBaseIE):
                 'ext': 'mp4',
                 'title': 'S-Dance, репетиции к The way show',
                 'uploader': 'THE WAY SHOW | 17 апреля',
+                'timestamp': 1454870100,
                 'upload_date': '20160207',
                 'view_count': int,
             },
         },
+        {
+            # finished live stream, live_mp4
+            'url': 'https://vk.com/videos-387766?z=video-387766_456242764%2Fpl_-387766_-2',
+            'md5': '90d22d051fccbbe9becfccc615be6791',
+            'info_dict': {
+                'id': '456242764',
+                'ext': 'mp4',
+                'title': 'ИгроМир 2016 — день 1',
+                'uploader': 'Игромания',
+                'duration': 5239,
+                'view_count': int,
+            },
+        },
+        {
+            # live stream, hls and rtmp links,most likely already finished live
+            # stream by the time you are reading this comment
+            'url': 'https://vk.com/video-140332_456239111',
+            'only_matching': True,
+        },
         {
             # removed video, just testing that we match the pattern
             'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
@@ -298,7 +340,7 @@ class VKIE(VKBaseIE):
         if youtube_url:
             return self.url_result(youtube_url, 'Youtube')
 
-        vimeo_url = VimeoIE._extract_vimeo_url(url, info_page)
+        vimeo_url = VimeoIE._extract_url(url, info_page)
         if vimeo_url is not None:
             return self.url_result(vimeo_url)
 
@@ -313,6 +355,10 @@ class VKIE(VKBaseIE):
                 m_rutube.group(1).replace('\\', ''))
             return self.url_result(rutube_url)
 
+        dailymotion_urls = DailymotionIE._extract_urls(info_page)
+        if dailymotion_urls:
+            return self.url_result(dailymotion_urls[0], DailymotionIE.ie_key())
+
         m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page)
         if m_opts:
             m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1))
@@ -322,45 +368,64 @@ class VKIE(VKBaseIE):
                     opts_url = 'http:' + opts_url
                 return self.url_result(opts_url)
 
-        data_json = self._search_regex(r'var\s+vars\s*=\s*({.+?});', info_page, 'vars')
-        data = json.loads(data_json)
+        # vars does not look to be served anymore since 24.10.2016
+        data = self._parse_json(
+            self._search_regex(
+                r'var\s+vars\s*=\s*({.+?});', info_page, 'vars', default='{}'),
+            video_id, fatal=False)
+
+        # <!json> is served instead
+        if not data:
+            data = self._parse_json(
+                self._search_regex(
+                    r'<!json>\s*({.+?})\s*<!>', info_page, 'json'),
+                video_id)['player']['params'][0]
+
+        title = unescapeHTML(data['md_title'])
 
-        # Extract upload date
-        upload_date = None
-        mobj = re.search(r'id="mv_date(?:_views)?_wrap"[^>]*>([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page)
-        if mobj is not None:
-            mobj.group(1) + ' ' + mobj.group(2)
-            upload_date = unified_strdate(mobj.group(1) + ' ' + mobj.group(2))
+        if data.get('live') == 2:
+            title = self._live_title(title)
 
-        view_count = None
-        views = self._html_search_regex(
-            r'"mv_views_count_number"[^>]*>(.+?\bviews?)<',
-            info_page, 'view count', default=None)
-        if views:
-            view_count = str_to_int(self._search_regex(
-                r'([\d,.]+)', views, 'view count', fatal=False))
+        timestamp = unified_timestamp(self._html_search_regex(
+            r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page,
+            'upload date', fatal=False))
+
+        view_count = str_to_int(self._search_regex(
+            r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)',
+            info_page, 'view count', fatal=False))
 
         formats = []
-        for k, v in data.items():
-            if not k.startswith('url') and not k.startswith('cache') and k != 'extra_data' or not v:
+        for format_id, format_url in data.items():
+            if not isinstance(format_url, compat_str) or not format_url.startswith(('http', '//', 'rtmp')):
                 continue
-            height = int_or_none(self._search_regex(
-                r'^(?:url|cache)(\d+)', k, 'height', default=None))
-            formats.append({
-                'format_id': k,
-                'url': v,
-                'height': height,
-            })
+            if format_id.startswith(('url', 'cache')) or format_id in ('extra_data', 'live_mp4'):
+                height = int_or_none(self._search_regex(
+                    r'^(?:url|cache)(\d+)', format_id, 'height', default=None))
+                formats.append({
+                    'format_id': format_id,
+                    'url': format_url,
+                    'height': height,
+                })
+            elif format_id == 'hls':
+                formats.extend(self._extract_m3u8_formats(
+                    format_url, video_id, 'mp4', m3u8_id=format_id,
+                    fatal=False, live=True))
+            elif format_id == 'rtmp':
+                formats.append({
+                    'format_id': format_id,
+                    'url': format_url,
+                    'ext': 'flv',
+                })
         self._sort_formats(formats)
 
         return {
-            'id': compat_str(data['vid']),
+            'id': compat_str(data.get('vid') or video_id),
             'formats': formats,
-            'title': unescapeHTML(data['md_title']),
+            'title': title,
             'thumbnail': data.get('jpg'),
             'uploader': data.get('md_author'),
             'duration': data.get('duration'),
-            'upload_date': upload_date,
+            'timestamp': timestamp,
             'view_count': view_count,
         }
 
@@ -445,6 +510,9 @@ class VKWallPostIE(VKBaseIE):
                 'skip_download': True,
             },
         }],
+        'params': {
+            'usenetrc': True,
+        },
         'skip': 'Requires vk account credentials',
     }, {
         # single YouTube embed, no leading -
@@ -454,6 +522,9 @@ class VKWallPostIE(VKBaseIE):
             'title': 'Sergey Gorbunov - Wall post 85155021_6319',
         },
         'playlist_count': 1,
+        'params': {
+            'usenetrc': True,
+        },
         'skip': 'Requires vk account credentials',
     }, {
         # wall page URL
@@ -481,37 +552,41 @@ class VKWallPostIE(VKBaseIE):
             raise ExtractorError('VK said: %s' % error, expected=True)
 
         description = clean_html(get_element_by_class('wall_post_text', webpage))
-        uploader = clean_html(get_element_by_class(
-            'fw_post_author', webpage)) or self._og_search_description(webpage)
+        uploader = clean_html(get_element_by_class('author', webpage))
         thumbnail = self._og_search_thumbnail(webpage)
 
         entries = []
 
-        for audio in re.finditer(r'''(?sx)
-                            <input[^>]+
-                                id=(?P<q1>["\'])audio_info(?P<id>\d+_\d+).*?(?P=q1)[^>]+
-                                value=(?P<q2>["\'])(?P<url>http.+?)(?P=q2)
-                                .+?
-                            </table>''', webpage):
-            audio_html = audio.group(0)
-            audio_id = audio.group('id')
-            duration = parse_duration(get_element_by_class('duration', audio_html))
-            track = self._html_search_regex(
-                r'<span[^>]+id=["\']title%s[^>]*>([^<]+)' % audio_id,
-                audio_html, 'title', default=None)
-            artist = self._html_search_regex(
-                r'>([^<]+)</a></b>\s*&ndash', audio_html,
-                'artist', default=None)
-            entries.append({
-                'id': audio_id,
-                'url': audio.group('url'),
-                'title': '%s - %s' % (artist, track) if artist and track else audio_id,
-                'thumbnail': thumbnail,
-                'duration': duration,
-                'uploader': uploader,
-                'artist': artist,
-                'track': track,
-            })
+        audio_ids = re.findall(r'data-full-id=["\'](\d+_\d+)', webpage)
+        if audio_ids:
+            al_audio = self._download_webpage(
+                'https://vk.com/al_audio.php', post_id,
+                note='Downloading audio info', fatal=False,
+                data=urlencode_postdata({
+                    'act': 'reload_audio',
+                    'al': '1',
+                    'ids': ','.join(audio_ids)
+                }))
+            if al_audio:
+                Audio = collections.namedtuple(
+                    'Audio', ['id', 'user_id', 'url', 'track', 'artist', 'duration'])
+                audios = self._parse_json(
+                    self._search_regex(
+                        r'<!json>(.+?)<!>', al_audio, 'audios', default='[]'),
+                    post_id, fatal=False, transform_source=unescapeHTML)
+                if isinstance(audios, list):
+                    for audio in audios:
+                        a = Audio._make(audio[:6])
+                        entries.append({
+                            'id': '%s_%s' % (a.user_id, a.id),
+                            'url': a.url,
+                            'title': '%s - %s' % (a.artist, a.track) if a.artist and a.track else a.id,
+                            'thumbnail': thumbnail,
+                            'duration': a.duration,
+                            'uploader': uploader,
+                            'artist': a.artist,
+                            'track': a.track,
+                        })
 
         for video in re.finditer(
                 r'<a[^>]+href=(["\'])(?P<url>/video(?:-?[\d_]+).*?)\1', webpage):
index 8d671cca767d4592a5428f7d3ad855e952df5353..acf9fda487f6143906b8162a158c1cf9f53fec68 100644 (file)
@@ -17,7 +17,7 @@ from ..compat import compat_urllib_parse_urlencode
 class VLiveIE(InfoExtractor):
     IE_NAME = 'vlive'
     _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)'
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.vlive.tv/video/1326',
         'md5': 'cc7314812855ce56de70a06a27314983',
         'info_dict': {
@@ -27,7 +27,20 @@ class VLiveIE(InfoExtractor):
             'creator': "Girl's Day",
             'view_count': int,
         },
-    }
+    }, {
+        'url': 'http://www.vlive.tv/video/16937',
+        'info_dict': {
+            'id': '16937',
+            'ext': 'mp4',
+            'title': '[V LIVE] 첸백시 걍방',
+            'creator': 'EXO',
+            'view_count': int,
+            'subtitles': 'mincount:12',
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -116,7 +129,7 @@ class VLiveIE(InfoExtractor):
 
         subtitles = {}
         for caption in playinfo.get('captions', {}).get('list', []):
-            lang = dict_get(caption, ('language', 'locale', 'country', 'label'))
+            lang = dict_get(caption, ('locale', 'language', 'country', 'label'))
             if lang and caption.get('source'):
                 subtitles[lang] = [{
                     'ext': 'vtt',
index a938a4007ead91a25ca84b43307ce16e1787e2e6..bbfa6e5f26f6043af52ae168b6e2cebb7463edfc 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
@@ -31,7 +31,8 @@ class VodlockerIE(InfoExtractor):
         if any(p in webpage for p in (
                 '>THIS FILE WAS DELETED<',
                 '>File Not Found<',
-                'The file you were looking for could not be found, sorry for any inconvenience.<')):
+                'The file you were looking for could not be found, sorry for any inconvenience.<',
+                '>The file was removed')):
             raise ExtractorError('Video %s does not exist' % video_id, expected=True)
 
         fields = self._hidden_inputs(webpage)
index b49542b162d82cca3f1c11d5a3b2c062b3c34c9f..239644340384b60c8e1a80d40b50cabbd0fd2c9e 100644 (file)
@@ -6,7 +6,7 @@ from ..utils import unescapeHTML
 
 
 class VODPlatformIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?vod-platform\.net/embed/(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?vod-platform\.net/[eE]mbed/(?P<id>[^/?#]+)'
     _TEST = {
         # from http://www.lbcgroup.tv/watch/chapter/29143/52844/%D8%A7%D9%84%D9%86%D8%B5%D8%B1%D8%A9-%D9%81%D9%8A-%D8%B6%D9%8A%D8%A7%D9%81%D8%A9-%D8%A7%D9%84%D9%80-cnn/ar
         'url': 'http://vod-platform.net/embed/RufMcytHDolTH1MuKHY9Fw',
@@ -25,29 +25,8 @@ class VODPlatformIE(InfoExtractor):
         title = unescapeHTML(self._og_search_title(webpage))
         hidden_inputs = self._hidden_inputs(webpage)
 
-        base_url = self._search_regex(
-            '(.*/)(?:playlist.m3u8|manifest.mpd)',
-            hidden_inputs.get('HiddenmyhHlsLink') or hidden_inputs['HiddenmyDashLink'],
-            'base url')
-        formats = self._extract_m3u8_formats(
-            base_url + 'playlist.m3u8', video_id, 'mp4',
-            'm3u8_native', m3u8_id='hls', fatal=False)
-        formats.extend(self._extract_mpd_formats(
-            base_url + 'manifest.mpd', video_id,
-            mpd_id='dash', fatal=False))
-        rtmp_formats = self._extract_smil_formats(
-            base_url + 'jwplayer.smil', video_id, fatal=False)
-        for rtmp_format in rtmp_formats:
-            rtsp_format = rtmp_format.copy()
-            rtsp_format['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path'])
-            del rtsp_format['play_path']
-            del rtsp_format['ext']
-            rtsp_format.update({
-                'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'),
-                'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'),
-                'protocol': 'rtsp',
-            })
-            formats.extend([rtmp_format, rtsp_format])
+        formats = self._extract_wowza_formats(
+            hidden_inputs.get('HiddenmyhHlsLink') or hidden_inputs['HiddenmyDashLink'], video_id, skip_protocols=['f4m', 'smil'])
         self._sort_formats(formats)
 
         return {
index b1b32ad44ecfd796e46219a87ea71caa3587face..f8e33149398bde16115114bb0323d2f286ee9d42 100644 (file)
@@ -9,13 +9,16 @@ class VoxMediaIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?(?:theverge|vox|sbnation|eater|polygon|curbed|racked)\.com/(?:[^/]+/)*(?P<id>[^/?]+)'
     _TESTS = [{
         'url': 'http://www.theverge.com/2014/6/27/5849272/material-world-how-google-discovered-what-software-is-made-of',
-        'md5': '73856edf3e89a711e70d5cf7cb280b37',
         'info_dict': {
             'id': '11eXZobjrG8DCSTgrNjVinU-YmmdYjhe',
             'ext': 'mp4',
             'title': 'Google\'s new material design direction',
             'description': 'md5:2f44f74c4d14a1f800ea73e1c6832ad2',
         },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
         'add_ie': ['Ooyala'],
     }, {
         # data-ooyala-id
@@ -31,13 +34,16 @@ class VoxMediaIE(InfoExtractor):
     }, {
         # volume embed
         'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill',
-        'md5': '375c483c5080ab8cd85c9c84cfc2d1e4',
         'info_dict': {
             'id': 'wydzk3dDpmRz7PQoXRsTIX6XTkPjYL0b',
             'ext': 'mp4',
             'title': 'The new frontier of LGBTQ civil rights, explained',
             'description': 'md5:0dc58e94a465cbe91d02950f770eb93f',
         },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
         'add_ie': ['Ooyala'],
     }, {
         # youtube embed
index bec7ab327008803f8609ea0e78e7d70577556940..00c72e34684f918e68fc859ad6ffb926efa04661 100644 (file)
@@ -5,7 +5,6 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
-    determine_ext,
     float_or_none,
 )
 
@@ -75,7 +74,6 @@ class VRTIE(InfoExtractor):
         },
         {
             'url': 'http://cobra.canvas.be/cm/cobra/videozone/rubriek/film-videozone/1.2377055',
-            'md5': '',
             'info_dict': {
                 'id': '2377055',
                 'ext': 'mp4',
@@ -119,39 +117,17 @@ class VRTIE(InfoExtractor):
                 video_id, 'mp4', m3u8_id='hls', fatal=False))
 
         if src:
-            if determine_ext(src) == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(
-                    src, video_id, 'mp4', entry_protocol='m3u8_native',
-                    m3u8_id='hls', fatal=False))
-                formats.extend(self._extract_f4m_formats(
-                    src.replace('playlist.m3u8', 'manifest.f4m'),
-                    video_id, f4m_id='hds', fatal=False))
-                if 'data-video-geoblocking="true"' not in webpage:
-                    rtmp_formats = self._extract_smil_formats(
-                        src.replace('playlist.m3u8', 'jwplayer.smil'),
-                        video_id, fatal=False)
-                    formats.extend(rtmp_formats)
-                    for rtmp_format in rtmp_formats:
-                        rtmp_format_c = rtmp_format.copy()
-                        rtmp_format_c['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path'])
-                        del rtmp_format_c['play_path']
-                        del rtmp_format_c['ext']
-                        http_format = rtmp_format_c.copy()
+            formats = self._extract_wowza_formats(src, video_id)
+            if 'data-video-geoblocking="true"' not in webpage:
+                for f in formats:
+                    if f['url'].startswith('rtsp://'):
+                        http_format = f.copy()
                         http_format.update({
-                            'url': rtmp_format_c['url'].replace('rtmp://', 'http://').replace('vod.', 'download.').replace('/_definst_/', '/').replace('mp4:', ''),
-                            'format_id': rtmp_format['format_id'].replace('rtmp', 'http'),
+                            'url': f['url'].replace('rtsp://', 'http://').replace('vod.', 'download.').replace('/_definst_/', '/').replace('mp4:', ''),
+                            'format_id': f['format_id'].replace('rtsp', 'http'),
                             'protocol': 'http',
                         })
-                        rtsp_format = rtmp_format_c.copy()
-                        rtsp_format.update({
-                            'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'),
-                            'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'),
-                            'protocol': 'rtsp',
-                        })
-                        formats.extend([http_format, rtsp_format])
-            else:
-                formats.extend(self._extract_f4m_formats(
-                    '%s/manifest.f4m' % src, video_id, f4m_id='hds', fatal=False))
+                        formats.append(http_format)
 
         if not formats and 'data-video-geoblocking="true"' in webpage:
             self.raise_geo_restricted('This video is only available in Belgium')
index b73da5cd073a6f4803c070b1cde3c4a3879dbaf1..55e087bdb47bff8d01d06e73ff4e9587e7e5eea8 100644 (file)
@@ -17,12 +17,12 @@ class VuClipIE(InfoExtractor):
     _VALID_URL = r'https?://(?:m\.)?vuclip\.com/w\?.*?cid=(?P<id>[0-9]+)'
 
     _TEST = {
-        'url': 'http://m.vuclip.com/w?cid=922692425&fid=70295&z=1010&nvar&frm=index.html',
+        'url': 'http://m.vuclip.com/w?cid=1129900602&bu=8589892792&frm=w&z=34801&op=0&oc=843169247&section=recommend',
         'info_dict': {
-            'id': '922692425',
+            'id': '1129900602',
             'ext': '3gp',
-            'title': 'The Toy Soldiers - Hollywood Movie Trailer',
-            'duration': 177,
+            'title': 'Top 10 TV Convicts',
+            'duration': 733,
         }
     }
 
@@ -54,7 +54,7 @@ class VuClipIE(InfoExtractor):
                 'url': video_url,
             }]
         else:
-            formats = self._parse_html5_media_entries(url, webpage)[0]['formats']
+            formats = self._parse_html5_media_entries(url, webpage, video_id)[0]['formats']
 
         title = remove_end(self._html_search_regex(
             r'<title>(.*?)-\s*Vuclip</title>', webpage, 'title').strip(), ' - Video')
diff --git a/youtube_dl/extractor/vyborymos.py b/youtube_dl/extractor/vyborymos.py
new file mode 100644 (file)
index 0000000..9e703c4
--- /dev/null
@@ -0,0 +1,55 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+
+
+class VyboryMosIE(InfoExtractor):
+    _VALID_URL = r'https?://vybory\.mos\.ru/(?:#precinct/|account/channels\?.*?\bstation_id=)(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'http://vybory.mos.ru/#precinct/13636',
+        'info_dict': {
+            'id': '13636',
+            'ext': 'mp4',
+            'title': 're:^Участковая избирательная комиссия №2231 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+            'description': 'Россия, Москва, улица Введенского, 32А',
+            'is_live': True,
+        },
+        'params': {
+            'skip_download': True,
+        }
+    }, {
+        'url': 'http://vybory.mos.ru/account/channels?station_id=13636',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        station_id = self._match_id(url)
+
+        channels = self._download_json(
+            'http://vybory.mos.ru/account/channels?station_id=%s' % station_id,
+            station_id, 'Downloading channels JSON')
+
+        formats = []
+        for cam_num, (sid, hosts, name, _) in enumerate(channels, 1):
+            for num, host in enumerate(hosts, 1):
+                formats.append({
+                    'url': 'http://%s/master.m3u8?sid=%s' % (host, sid),
+                    'ext': 'mp4',
+                    'format_id': 'camera%d-host%d' % (cam_num, num),
+                    'format_note': '%s, %s' % (name, host),
+                })
+
+        info = self._download_json(
+            'http://vybory.mos.ru/json/voting_stations/%s/%s.json'
+            % (compat_str(station_id)[:3], station_id),
+            station_id, 'Downloading station JSON', fatal=False)
+
+        return {
+            'id': station_id,
+            'title': self._live_title(info['name'] if info else station_id),
+            'description': info.get('address'),
+            'is_live': True,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/vzaar.py b/youtube_dl/extractor/vzaar.py
new file mode 100644 (file)
index 0000000..b270f08
--- /dev/null
@@ -0,0 +1,55 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    float_or_none,
+)
+
+
+class VzaarIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:(?:www|view)\.)?vzaar\.com/(?:videos/)?(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://vzaar.com/videos/1152805',
+        'md5': 'bde5ddfeb104a6c56a93a06b04901dbf',
+        'info_dict': {
+            'id': '1152805',
+            'ext': 'mp4',
+            'title': 'sample video (public)',
+        },
+    }, {
+        'url': 'https://view.vzaar.com/27272/player',
+        'md5': '3b50012ac9bbce7f445550d54e0508f2',
+        'info_dict': {
+            'id': '27272',
+            'ext': 'mp3',
+            'title': 'MP3',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        video_data = self._download_json(
+            'http://view.vzaar.com/v2/%s/video' % video_id, video_id)
+        source_url = video_data['sourceUrl']
+
+        info = {
+            'id': video_id,
+            'title': video_data['videoTitle'],
+            'url': source_url,
+            'thumbnail': self._proto_relative_url(video_data.get('poster')),
+            'duration': float_or_none(video_data.get('videoDuration')),
+        }
+        if 'audio' in source_url:
+            info.update({
+                'vcodec': 'none',
+                'ext': 'mp3',
+            })
+        else:
+            info.update({
+                'width': int_or_none(video_data.get('width')),
+                'height': int_or_none(video_data.get('height')),
+                'ext': 'mp4',
+            })
+        return info
index 9f1b8b4b5f1bbe6f5d34d5e31bcc5596c35543f7..20fef1f04ea776ba21869dfca9e46bd6af591c9f 100644 (file)
@@ -86,38 +86,50 @@ class WatIE(InfoExtractor):
 
         def extract_url(path_template, url_type):
             req_url = 'http://www.wat.tv/get/%s' % (path_template % video_id)
-            head = self._request_webpage(HEADRequest(req_url), video_id, 'Extracting %s url' % url_type)
-            red_url = head.geturl()
-            if req_url == red_url:
-                raise ExtractorError(
-                    '%s said: Sorry, this video is not available from your country.' % self.IE_NAME,
-                    expected=True)
-            return red_url
+            head = self._request_webpage(HEADRequest(req_url), video_id, 'Extracting %s url' % url_type, fatal=False)
+            if head:
+                red_url = head.geturl()
+                if req_url != red_url:
+                    return red_url
+            return None
+
+        def remove_bitrate_limit(manifest_url):
+            return re.sub(r'(?:max|min)_bitrate=\d+&?', '', manifest_url)
 
         formats = []
         try:
-            http_url = extract_url('android5/%s.mp4', 'http')
-            m3u8_url = extract_url('ipad/%s.m3u8', 'm3u8')
-            m3u8_formats = self._extract_m3u8_formats(
-                m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
-            formats.extend(m3u8_formats)
-            formats.extend(self._extract_f4m_formats(
-                m3u8_url.replace('ios.', 'web.').replace('.m3u8', '.f4m'),
-                video_id, f4m_id='hds', fatal=False))
-            for m3u8_format in m3u8_formats:
-                vbr, abr = m3u8_format.get('vbr'), m3u8_format.get('abr')
-                if not vbr or not abr:
-                    continue
-                format_id = m3u8_format['format_id'].replace('hls', 'http')
-                fmt_url = re.sub(r'%s-\d+00-\d+' % video_id, '%s-%d00-%d' % (video_id, round(vbr / 100), round(abr)), http_url)
-                if self._is_valid_url(fmt_url, video_id, format_id):
-                    f = m3u8_format.copy()
-                    f.update({
-                        'url': fmt_url,
-                        'format_id': format_id,
-                        'protocol': 'http',
-                    })
-                    formats.append(f)
+            manifest_urls = self._download_json(
+                'http://www.wat.tv/get/webhtml/' + video_id, video_id)
+            m3u8_url = manifest_urls.get('hls')
+            if m3u8_url:
+                m3u8_url = remove_bitrate_limit(m3u8_url)
+                m3u8_formats = self._extract_m3u8_formats(
+                    m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
+                if m3u8_formats:
+                    formats.extend(m3u8_formats)
+                    formats.extend(self._extract_f4m_formats(
+                        m3u8_url.replace('ios', 'web').replace('.m3u8', '.f4m'),
+                        video_id, f4m_id='hds', fatal=False))
+                    http_url = extract_url('android5/%s.mp4', 'http')
+                    if http_url:
+                        for m3u8_format in m3u8_formats:
+                            vbr, abr = m3u8_format.get('vbr'), m3u8_format.get('abr')
+                            if not vbr or not abr:
+                                continue
+                            format_id = m3u8_format['format_id'].replace('hls', 'http')
+                            fmt_url = re.sub(r'%s-\d+00-\d+' % video_id, '%s-%d00-%d' % (video_id, round(vbr / 100), round(abr)), http_url)
+                            if self._is_valid_url(fmt_url, video_id, format_id):
+                                f = m3u8_format.copy()
+                                f.update({
+                                    'url': fmt_url,
+                                    'format_id': format_id,
+                                    'protocol': 'http',
+                                })
+                                formats.append(f)
+            mpd_url = manifest_urls.get('mpd')
+            if mpd_url:
+                formats.extend(self._extract_mpd_formats(remove_bitrate_limit(
+                    mpd_url), video_id, mpd_id='dash', fatal=False))
             self._sort_formats(formats)
         except ExtractorError:
             abr = 64
index 390f9e8302f392a25c83af2520cb30b854500632..f7e6360a33e8b6d2cc3096232bfa1d2c458ab3c7 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
diff --git a/youtube_dl/extractor/webcaster.py b/youtube_dl/extractor/webcaster.py
new file mode 100644 (file)
index 0000000..7486cb3
--- /dev/null
@@ -0,0 +1,102 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+    xpath_text,
+)
+
+
+class WebcasterIE(InfoExtractor):
+    _VALID_URL = r'https?://bl\.webcaster\.pro/(?:quote|media)/start/free_(?P<id>[^/]+)'
+    _TESTS = [{
+        # http://video.khl.ru/quotes/393859
+        'url': 'http://bl.webcaster.pro/quote/start/free_c8cefd240aa593681c8d068cff59f407_hd/q393859/eb173f99dd5f558674dae55f4ba6806d/1480289104?sr%3D105%26fa%3D1%26type_id%3D18',
+        'md5': '0c162f67443f30916ff1c89425dcd4cd',
+        'info_dict': {
+            'id': 'c8cefd240aa593681c8d068cff59f407_hd',
+            'ext': 'mp4',
+            'title': 'Сибирь - Нефтехимик. Лучшие моменты первого периода',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        },
+    }, {
+        'url': 'http://bl.webcaster.pro/media/start/free_6246c7a4453ac4c42b4398f840d13100_hd/2_2991109016/e8d0d82587ef435480118f9f9c41db41/4635726126',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        video = self._download_xml(url, video_id)
+
+        title = xpath_text(video, './/event_name', 'event name', fatal=True)
+
+        def make_id(parts, separator):
+            return separator.join(filter(None, parts))
+
+        formats = []
+        for format_id in (None, 'noise'):
+            track_tag = make_id(('track', format_id), '_')
+            for track in video.findall('.//iphone/%s' % track_tag):
+                track_url = track.text
+                if not track_url:
+                    continue
+                if determine_ext(track_url) == 'm3u8':
+                    m3u8_formats = self._extract_m3u8_formats(
+                        track_url, video_id, 'mp4',
+                        entry_protocol='m3u8_native',
+                        m3u8_id=make_id(('hls', format_id), '-'), fatal=False)
+                    for f in m3u8_formats:
+                        f.update({
+                            'source_preference': 0 if format_id == 'noise' else 1,
+                            'format_note': track.get('title'),
+                        })
+                    formats.extend(m3u8_formats)
+        self._sort_formats(formats)
+
+        thumbnail = xpath_text(video, './/image', 'thumbnail')
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }
+
+
+class WebcasterFeedIE(InfoExtractor):
+    _VALID_URL = r'https?://bl\.webcaster\.pro/feed/start/free_(?P<id>[^/]+)'
+    _TEST = {
+        'url': 'http://bl.webcaster.pro/feed/start/free_c8cefd240aa593681c8d068cff59f407_hd/q393859/eb173f99dd5f558674dae55f4ba6806d/1480289104',
+        'only_matching': True,
+    }
+
+    @staticmethod
+    def _extract_url(ie, webpage):
+        mobj = re.search(
+            r'<(?:object|a[^>]+class=["\']webcaster-player["\'])[^>]+data(?:-config)?=(["\']).*?config=(?P<url>https?://bl\.webcaster\.pro/feed/start/free_.*?)(?:[?&]|\1)',
+            webpage)
+        if mobj:
+            return mobj.group('url')
+        for secure in (True, False):
+            video_url = ie._og_search_video_url(
+                webpage, secure=secure, default=None)
+            if video_url:
+                mobj = re.search(
+                    r'config=(?P<url>https?://bl\.webcaster\.pro/feed/start/free_[^?&=]+)',
+                    video_url)
+                if mobj:
+                    return mobj.group('url')
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        feed = self._download_xml(url, video_id)
+
+        video_url = xpath_text(
+            feed, ('video_hd', 'video'), 'video url', fatal=True)
+
+        return self.url_result(video_url, WebcasterIE.ie_key())
index 3dafbeec2c5f7ba0b2e18ec621c67966214d3307..8e09156c26c58b4cc184dbe97e679ee9b8dfa47f 100644 (file)
@@ -6,7 +6,7 @@ from .common import InfoExtractor
 
 class WeiqiTVIE(InfoExtractor):
     IE_DESC = 'WQTV'
-    _VALID_URL = r'https?://www\.weiqitv\.com/index/video_play\?videoId=(?P<id>[A-Za-z0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?weiqitv\.com/index/video_play\?videoId=(?P<id>[A-Za-z0-9]+)'
 
     _TESTS = [{
         'url': 'http://www.weiqitv.com/index/video_play?videoId=53c744f09874f0e76a8b46f3',
index bdd7097baec16afb2a3c83dbdde0e89ebc713a69..0f53f1bcb85f409a71d77712b006a57146a9d513 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
index a83e68b17f53ca06fa4b76af6e3aa560fb23e107..deb7483ae51699df4670675db4622503320f1cbc 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index b113ab1c4891fdf96898d359cf38779d61b394f8..d9c277bc3cb0221cd926c54a64f95bcec928bd3d 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index 995aada0d1565ccc76f2fa2c7654e3f53f834d91..de344bad25309c03b1d7378ceb6b3968c2d4c47a 100644 (file)
@@ -124,12 +124,14 @@ class XFileShareIE(InfoExtractor):
             webpage = self._download_webpage(req, video_id, 'Downloading video page')
 
         title = (self._search_regex(
-            [r'style="z-index: [0-9]+;">([^<]+)</span>',
+            (r'style="z-index: [0-9]+;">([^<]+)</span>',
              r'<td nowrap>([^<]+)</td>',
              r'h4-fine[^>]*>([^<]+)<',
              r'>Watch (.+) ',
-             r'<h2 class="video-page-head">([^<]+)</h2>'],
-            webpage, 'title', default=None) or self._og_search_title(webpage)).strip()
+             r'<h2 class="video-page-head">([^<]+)</h2>',
+             r'<h2 style="[^"]*color:#403f3d[^"]*"[^>]*>([^<]+)<'),  # streamin.to
+            webpage, 'title', default=None) or self._og_search_title(
+            webpage, default=None) or video_id).strip()
 
         def extract_video_url(default=NO_DEFAULT):
             return self._search_regex(
index bcb140305559a164f56392dd33eab4d5c7b0bab5..e0a6255dc4df8f2a2bd56ffcf1363089a08e6aea 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index a66daee46ebc0324152f69eefa2b66d79bfb513d..4b9c1ee9c5222f48c5634184f703baa062cf3ae9 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 import base64
index b0679dfb70868f39b1190b8b04696787c02d75b1..4951414e91ffbc34dc83629403c8b64ffb5e5682 100644 (file)
@@ -8,7 +8,6 @@ import re
 from .common import InfoExtractor, SearchInfoExtractor
 from ..compat import (
     compat_urllib_parse,
-    compat_urllib_parse_urlencode,
     compat_urlparse,
 )
 from ..utils import (
@@ -17,9 +16,13 @@ from ..utils import (
     ExtractorError,
     int_or_none,
     mimetype2ext,
+    determine_ext,
 )
 
-from .brightcove import BrightcoveNewIE
+from .brightcove import (
+    BrightcoveLegacyIE,
+    BrightcoveNewIE,
+)
 from .nbc import NBCSportsVPlayerIE
 
 
@@ -39,7 +42,7 @@ class YahooIE(InfoExtractor):
         },
         {
             'url': 'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html',
-            'md5': 'c3466d2b6d5dd6b9f41ba9ed04c24b23',
+            'md5': '251af144a19ebc4a033e8ba91ac726bb',
             'info_dict': {
                 'id': 'd1dedf8c-d58c-38c3-8963-e899929ae0a9',
                 'ext': 'mp4',
@@ -50,7 +53,7 @@ class YahooIE(InfoExtractor):
         },
         {
             'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed',
-            'md5': '75ffabdb87c16d4ffe8c036dc4d1c136',
+            'md5': '7993e572fac98e044588d0b5260f4352',
             'info_dict': {
                 'id': '4fe78544-8d48-39d8-97cd-13f205d9fcdb',
                 'ext': 'mp4',
@@ -61,7 +64,7 @@ class YahooIE(InfoExtractor):
         },
         {
             'url': 'https://tw.news.yahoo.com/%E6%95%A2%E5%95%8F%E5%B8%82%E9%95%B7%20%E9%BB%83%E7%A7%80%E9%9C%9C%E6%89%B9%E8%B3%B4%E6%B8%85%E5%BE%B7%20%E9%9D%9E%E5%B8%B8%E9%AB%98%E5%82%B2-034024051.html',
-            'md5': '9035d38f88b1782682a3e89f985be5bb',
+            'md5': '45c024bad51e63e9b6f6fad7a43a8c23',
             'info_dict': {
                 'id': 'cac903b3-fcf4-3c14-b632-643ab541712f',
                 'ext': 'mp4',
@@ -72,10 +75,10 @@ class YahooIE(InfoExtractor):
         },
         {
             'url': 'https://uk.screen.yahoo.com/editor-picks/cute-raccoon-freed-drain-using-091756545.html',
-            'md5': '0b51660361f0e27c9789e7037ef76f4b',
+            'md5': '71298482f7c64cbb7fa064e4553ff1c1',
             'info_dict': {
                 'id': 'b3affa53-2e14-3590-852b-0e0db6cd1a58',
-                'ext': 'mp4',
+                'ext': 'webm',
                 'title': 'Cute Raccoon Freed From Drain\u00a0Using Angle Grinder',
                 'description': 'md5:f66c890e1490f4910a9953c941dee944',
                 'duration': 97,
@@ -98,7 +101,7 @@ class YahooIE(InfoExtractor):
                 'id': '154609075',
             },
             'playlist': [{
-                'md5': 'f8e336c6b66f503282e5f719641d6565',
+                'md5': '000887d0dc609bc3a47c974151a40fb8',
                 'info_dict': {
                     'id': 'e624c4bc-3389-34de-9dfc-025f74943409',
                     'ext': 'mp4',
@@ -107,7 +110,7 @@ class YahooIE(InfoExtractor):
                     'duration': 30,
                 },
             }, {
-                'md5': '958bcb90b4d6df71c56312137ee1cd5a',
+                'md5': '81bc74faf10750fe36e4542f9a184c66',
                 'info_dict': {
                     'id': '1fc8ada0-718e-3abe-a450-bf31f246d1a9',
                     'ext': 'mp4',
@@ -139,7 +142,7 @@ class YahooIE(InfoExtractor):
             'skip': 'Domain name in.lifestyle.yahoo.com gone',
         }, {
             'url': 'https://www.yahoo.com/movies/v/true-story-trailer-173000497.html',
-            'md5': 'b17ac378b1134fa44370fb27db09a744',
+            'md5': '2a9752f74cb898af5d1083ea9f661b58',
             'info_dict': {
                 'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1',
                 'ext': 'mp4',
@@ -168,7 +171,7 @@ class YahooIE(InfoExtractor):
         }, {
             # Query result is embedded in webpage, but explicit request to video API fails with geo restriction
             'url': 'https://screen.yahoo.com/community/communitary-community-episode-1-ladders-154501237.html',
-            'md5': '1ddbf7c850777548438e5c4f147c7b8c',
+            'md5': '4fbafb9c9b6f07aa8f870629f6671b35',
             'info_dict': {
                 'id': '1f32853c-a271-3eef-8cb6-f6d6872cb504',
                 'ext': 'mp4',
@@ -196,6 +199,33 @@ class YahooIE(InfoExtractor):
                 'description': 'Galactic',
                 'title': 'Dolla Diva (feat. Maggie Koerner)',
             },
+            'skip': 'redirect to https://www.yahoo.com/music',
+        },
+        {
+            # yahoo://article/
+            'url': 'https://www.yahoo.com/movies/video/true-story-trailer-173000497.html',
+            'info_dict': {
+                'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1',
+                'ext': 'mp4',
+                'title': "'True Story' Trailer",
+                'description': 'True Story',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
+        {
+            # ytwnews://cavideo/
+            'url': 'https://tw.video.yahoo.com/movie-tw/單車天使-中文版預-092316541.html',
+            'info_dict': {
+                'id': 'ba133ff2-0793-3510-b636-59dfe9ff6cff',
+                'ext': 'mp4',
+                'title': '單車天使 - 中文版預',
+                'description': '中文版預',
+            },
+            'params': {
+                'skip_download': True,
+            },
         },
     ]
 
@@ -213,15 +243,7 @@ class YahooIE(InfoExtractor):
         entries = []
         iframe_urls = re.findall(r'<iframe[^>]+src="(/video/.+?-\d+\.html\?format=embed.*?)"', webpage)
         for idx, iframe_url in enumerate(iframe_urls):
-            iframepage = self._download_webpage(
-                host + iframe_url, display_id,
-                note='Downloading iframe webpage for video #%d' % idx)
-            items_json = self._search_regex(
-                r'mediaItems: (\[.+?\])$', iframepage, 'items', flags=re.MULTILINE, default=None)
-            if items_json:
-                items = json.loads(items_json)
-                video_id = items[0]['id']
-                entries.append(self._get_info(video_id, display_id, webpage))
+            entries.append(self.url_result(host + iframe_url, 'Yahoo'))
         if entries:
             return self.playlist_result(entries, page_id)
 
@@ -230,6 +252,11 @@ class YahooIE(InfoExtractor):
         if nbc_sports_url:
             return self.url_result(nbc_sports_url, NBCSportsVPlayerIE.ie_key())
 
+        # Look for Brightcove Legacy Studio embeds
+        bc_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
+        if bc_url:
+            return self.url_result(bc_url, BrightcoveLegacyIE.ie_key())
+
         # Look for Brightcove New Studio embeds
         bc_url = BrightcoveNewIE._extract_url(webpage)
         if bc_url:
@@ -246,7 +273,9 @@ class YahooIE(InfoExtractor):
             if config:
                 sapi = config.get('models', {}).get('applet_model', {}).get('data', {}).get('sapi')
                 if sapi and 'query' in sapi:
-                    return self._extract_info(display_id, sapi, webpage)
+                    info = self._extract_info(display_id, sapi, webpage)
+                    self._sort_formats(info['formats'])
+                    return info
 
         items_json = self._search_regex(
             r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
@@ -266,7 +295,8 @@ class YahooIE(InfoExtractor):
                     r'"first_videoid"\s*:\s*"([^"]+)"',
                     r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id),
                     r'<article[^>]data-uuid=["\']([^"\']+)',
-                    r'yahoo://article/view\?.*\buuid=([^&"\']+)',
+                    r'<meta[^<>]+yahoo://article/view\?.*\buuid=([^&"\']+)',
+                    r'<meta[^<>]+["\']ytwnews://cavideo/(?:[^/]+/)+([\da-fA-F-]+)[&"\']',
                 ]
                 video_id = self._search_regex(
                     CONTENT_ID_REGEXES, webpage, 'content ID')
@@ -292,15 +322,17 @@ class YahooIE(InfoExtractor):
 
         formats = []
         for s in info['streams']:
+            tbr = int_or_none(s.get('bitrate'))
             format_info = {
                 'width': int_or_none(s.get('width')),
                 'height': int_or_none(s.get('height')),
-                'tbr': int_or_none(s.get('bitrate')),
+                'tbr': tbr,
             }
 
             host = s['host']
             path = s['path']
             if host.startswith('rtmp'):
+                fmt = 'rtmp'
                 format_info.update({
                     'url': host,
                     'play_path': path,
@@ -308,14 +340,18 @@ class YahooIE(InfoExtractor):
                 })
             else:
                 if s.get('format') == 'm3u8_playlist':
-                    format_info['protocol'] = 'm3u8_native'
-                    format_info['ext'] = 'mp4'
+                    fmt = 'hls'
+                    format_info.update({
+                        'protocol': 'm3u8_native',
+                        'ext': 'mp4',
+                    })
+                else:
+                    fmt = format_info['ext'] = determine_ext(path)
                 format_url = compat_urlparse.urljoin(host, path)
                 format_info['url'] = format_url
+            format_info['format_id'] = fmt + ('-%d' % tbr if tbr else '')
             formats.append(format_info)
 
-        self._sort_formats(formats)
-
         closed_captions = self._html_search_regex(
             r'"closedcaptions":(\[[^\]]+\])', webpage, 'closed captions',
             default='[]')
@@ -346,17 +382,25 @@ class YahooIE(InfoExtractor):
     def _get_info(self, video_id, display_id, webpage):
         region = self._search_regex(
             r'\\?"region\\?"\s*:\s*\\?"([^"]+?)\\?"',
-            webpage, 'region', fatal=False, default='US')
-        data = compat_urllib_parse_urlencode({
-            'protocol': 'http',
-            'region': region.upper(),
-        })
-        query_url = (
-            'https://video.media.yql.yahoo.com/v1/video/sapi/streams/'
-            '{id}?{data}'.format(id=video_id, data=data))
-        query_result = self._download_json(
-            query_url, display_id, 'Downloading video info')
-        return self._extract_info(display_id, query_result, webpage)
+            webpage, 'region', fatal=False, default='US').upper()
+        formats = []
+        info = {}
+        for fmt in ('webm', 'mp4'):
+            query_result = self._download_json(
+                'https://video.media.yql.yahoo.com/v1/video/sapi/streams/' + video_id,
+                display_id, 'Downloading %s video info' % fmt, query={
+                    'protocol': 'http',
+                    'region': region,
+                    'format': fmt,
+                })
+            info = self._extract_info(display_id, query_result, webpage)
+            formats.extend(info['formats'])
+        formats.extend(self._extract_m3u8_formats(
+            'http://video.media.yql.yahoo.com/v1/hls/%s?region=%s' % (video_id, region),
+            video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+        self._sort_formats(formats)
+        info['formats'] = formats
+        return info
 
 
 class YahooSearchIE(SearchInfoExtractor):
index 63bbc06346a04b385c722eaae22d0ff5c41445f4..ef553554736884ea4b43cb424028b5302c6a872e 100644 (file)
@@ -15,7 +15,7 @@ from ..utils import (
 
 class YamIE(InfoExtractor):
     IE_DESC = '蕃薯藤yam天空部落'
-    _VALID_URL = r'https?://mymedia.yam.com/m/(?P<id>\d+)'
+    _VALID_URL = r'https?://mymedia\.yam\.com/m/(?P<id>\d+)'
 
     _TESTS = [{
         # An audio hosted on Yam
index 31e2f9263ada15aa4932b66347b12640b1a46621..b50f34e9bb30e47c679940ca1577ea8cc6683934 100644 (file)
@@ -1,21 +1,16 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-)
 
 
 class YouJizzIE(InfoExtractor):
     _VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/(?:[^/#?]+)?-(?P<id>[0-9]+)\.html(?:$|[?#])'
     _TESTS = [{
         'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
-        'md5': '07e15fa469ba384c7693fd246905547c',
+        'md5': '78fc1901148284c69af12640e01c6310',
         'info_dict': {
             'id': '2189178',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'Zeichentrick 1',
             'age_limit': 18,
         }
@@ -27,38 +22,18 @@ class YouJizzIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
+        # YouJizz's HTML5 player has invalid HTML
+        webpage = webpage.replace('"controls', '" controls')
         age_limit = self._rta_search(webpage)
         video_title = self._html_search_regex(
             r'<title>\s*(.*)\s*</title>', webpage, 'title')
 
-        embed_page_url = self._search_regex(
-            r'(https?://www.youjizz.com/videos/embed/[0-9]+)',
-            webpage, 'embed page')
-        webpage = self._download_webpage(
-            embed_page_url, video_id, note='downloading embed page')
-
-        # Get the video URL
-        m_playlist = re.search(r'so.addVariable\("playlist", ?"(?P<playlist>.+?)"\);', webpage)
-        if m_playlist is not None:
-            playlist_url = m_playlist.group('playlist')
-            playlist_page = self._download_webpage(playlist_url, video_id,
-                                                   'Downloading playlist page')
-            m_levels = list(re.finditer(r'<level bitrate="(\d+?)" file="(.*?)"', playlist_page))
-            if len(m_levels) == 0:
-                raise ExtractorError('Unable to extract video url')
-            videos = [(int(m.group(1)), m.group(2)) for m in m_levels]
-            (_, video_url) = sorted(videos)[0]
-            video_url = video_url.replace('%252F', '%2F')
-        else:
-            video_url = self._search_regex(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);',
-                                           webpage, 'video URL')
+        info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0]
 
-        return {
+        info_dict.update({
             'id': video_id,
-            'url': video_url,
             'title': video_title,
-            'ext': 'flv',
-            'format': 'flv',
-            'player_url': embed_page_url,
             'age_limit': age_limit,
-        }
+        })
+
+        return info_dict
index 0df2d76ee198d5d6ae1914f078cc96accec2d17e..0265a64a7d3c014001b2d0e81789f0e904b32d62 100644 (file)
@@ -35,7 +35,7 @@ class YouPornIE(InfoExtractor):
             'age_limit': 18,
         },
     }, {
-        # Anonymous User uploader
+        # Unknown uploader
         'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4',
         'info_dict': {
             'id': '561726',
@@ -44,7 +44,7 @@ class YouPornIE(InfoExtractor):
             'title': 'Big Tits Awesome Brunette On amazing webcam show',
             'description': 'http://sweetlivegirls.com Big Tits Awesome Brunette On amazing webcam show.mp4',
             'thumbnail': 're:^https?://.*\.jpg$',
-            'uploader': 'Anonymous User',
+            'uploader': 'Unknown',
             'upload_date': '20111125',
             'average_rating': int,
             'view_count': int,
@@ -140,17 +140,17 @@ class YouPornIE(InfoExtractor):
             r'>All [Cc]omments? \(([\d,.]+)\)',
             webpage, 'comment count', fatal=False))
 
-        def extract_tag_box(title):
-            tag_box = self._search_regex(
-                (r'<div[^>]+class=["\']tagBoxTitle["\'][^>]*>\s*%s\b.*?</div>\s*'
-                 '<div[^>]+class=["\']tagBoxContent["\']>(.+?)</div>') % re.escape(title),
-                webpage, '%s tag box' % title, default=None)
+        def extract_tag_box(regex, title):
+            tag_box = self._search_regex(regex, webpage, title, default=None)
             if not tag_box:
                 return []
             return re.findall(r'<a[^>]+href=[^>]+>([^<]+)', tag_box)
 
-        categories = extract_tag_box('Category')
-        tags = extract_tag_box('Tags')
+        categories = extract_tag_box(
+            r'(?s)Categories:.*?</[^>]+>(.+?)</div>', 'categories')
+        tags = extract_tag_box(
+            r'(?s)Tags:.*?</div>\s*<div[^>]+class=["\']tagBoxContent["\'][^>]*>(.+?)</div>',
+            'tags')
 
         return {
             'id': video_id,
index 268080ba6c7f5833bd9f5da3201563cd164fa12e..bd24a28389bf847f1e72451e232bfab0807809d8 100644 (file)
@@ -91,36 +91,18 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
         if login_page is False:
             return
 
-        galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
-                                  login_page, 'Login GALX parameter')
+        login_form = self._hidden_inputs(login_page)
 
-        # Log in
-        login_form_strs = {
-            'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
+        login_form.update({
+            'checkConnection': 'youtube',
             'Email': username,
-            'GALX': galx,
             'Passwd': password,
-
-            'PersistentCookie': 'yes',
-            '_utf8': '霱',
-            'bgresponse': 'js_disabled',
-            'checkConnection': '',
-            'checkedDomains': 'youtube',
-            'dnConn': '',
-            'pstMsg': '0',
-            'rmShown': '1',
-            'secTok': '',
-            'signIn': 'Sign in',
-            'timeStmp': '',
-            'service': 'youtube',
-            'uilel': '3',
-            'hl': 'en_US',
-        }
+        })
 
         login_results = self._download_webpage(
             self._PASSWORD_CHALLENGE_URL, None,
             note='Logging in', errnote='unable to log in', fatal=False,
-            data=urlencode_postdata(login_form_strs))
+            data=urlencode_postdata(login_form))
         if login_results is False:
             return False
 
@@ -282,7 +264,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                          )
                      )?                                                       # all until now is optional -> you can pass the naked ID
                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
-                     (?!.*?&list=)                                            # combined list/video URLs are handled by the playlist IE
+                     (?!.*?\blist=)                                            # combined list/video URLs are handled by the playlist IE
                      (?(1).+)?                                                # if we found the ID, everything can follow
                      $"""
     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
@@ -387,7 +369,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
     IE_NAME = 'youtube'
     _TESTS = [
         {
-            'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
+            'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
             'info_dict': {
                 'id': 'BaW_jenozKc',
                 'ext': 'mp4',
@@ -407,7 +389,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             }
         },
         {
-            'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY',
+            'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
             'note': 'Test generic use_cipher_signature video (#897)',
             'info_dict': {
                 'id': 'UxxajLWwzqY',
@@ -461,7 +443,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             }
         },
         {
-            'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
+            'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
             'note': 'Use the first video ID in the URL',
             'info_dict': {
                 'id': 'BaW_jenozKc',
@@ -483,7 +465,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             },
         },
         {
-            'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
+            'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
             'note': '256k DASH audio (format 141) via DASH manifest',
             'info_dict': {
                 'id': 'a9LDPn-MO4I',
@@ -557,7 +539,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         },
         # Normal age-gate video (No vevo, embed allowed)
         {
-            'url': 'http://youtube.com/watch?v=HtVdAasjOgU',
+            'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
             'info_dict': {
                 'id': 'HtVdAasjOgU',
                 'ext': 'mp4',
@@ -573,7 +555,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         },
         # Age-gate video with encrypted signature
         {
-            'url': 'http://www.youtube.com/watch?v=6kLq3WMV1nU',
+            'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
             'info_dict': {
                 'id': '6kLq3WMV1nU',
                 'ext': 'mp4',
@@ -766,11 +748,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             'skip': 'Not multifeed anymore',
         },
         {
-            'url': 'http://vid.plus/FlRa-iH7PGw',
+            'url': 'https://vid.plus/FlRa-iH7PGw',
             'only_matching': True,
         },
         {
-            'url': 'http://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
+            'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
             'only_matching': True,
         },
         {
@@ -862,6 +844,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             # YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059)
             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
             'only_matching': True,
+        },
+        {
+            # Rental video preview
+            'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
+            'info_dict': {
+                'id': 'uGpuVWrhIzE',
+                'ext': 'mp4',
+                'title': 'Piku - Trailer',
+                'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
+                'upload_date': '20150811',
+                'uploader': 'FlixMatrix',
+                'uploader_id': 'FlixMatrixKaravan',
+                'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
+                'license': 'Standard YouTube License',
+            },
+            'params': {
+                'skip_download': True,
+            },
         }
     ]
 
@@ -1272,6 +1272,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     # Convert to the same format returned by compat_parse_qs
                     video_info = dict((k, [v]) for k, v in args.items())
                     add_dash_mpd(video_info)
+                # Rental video is not rented but preview is available (e.g.
+                # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
+                # https://github.com/rg3/youtube-dl/issues/10532)
+                if not video_info and args.get('ypc_vid'):
+                    return self.url_result(
+                        args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
                     is_live = True
             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
@@ -1772,11 +1778,14 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
     _VALID_URL = r"""(?x)(?:
                         (?:https?://)?
                         (?:\w+\.)?
-                        youtube\.com/
                         (?:
-                           (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
-                           \? (?:.*?[&;])*? (?:p|a|list)=
-                        |  p/
+                            youtube\.com/
+                            (?:
+                               (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
+                               \? (?:.*?[&;])*? (?:p|a|list)=
+                            |  p/
+                            )|
+                            youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
                         )
                         (
                             (?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,}
@@ -1787,7 +1796,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
                      |
                         ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})
                      )"""
-    _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
+    _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&disable_polymer=true'
     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
     IE_NAME = 'youtube:playlist'
     _TESTS = [{
@@ -1837,7 +1846,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
         'playlist_count': 2,
     }, {
         'note': 'embedded',
-        'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
+        'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
         'playlist_count': 4,
         'info_dict': {
             'title': 'JODA15',
@@ -1845,7 +1854,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
         }
     }, {
         'note': 'Embedded SWF player',
-        'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
+        'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
         'playlist_count': 4,
         'info_dict': {
             'title': 'JODA7',
@@ -1858,7 +1867,53 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
             'title': 'Uploads from Interstellar Movie',
             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
         },
-        'playlist_mincout': 21,
+        'playlist_mincount': 21,
+    }, {
+        # Playlist URL that does not actually serve a playlist
+        'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
+        'info_dict': {
+            'id': 'FqZTN594JQw',
+            'ext': 'webm',
+            'title': "Smiley's People 01 detective, Adventure Series, Action",
+            'uploader': 'STREEM',
+            'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
+            'uploader_url': 're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
+            'upload_date': '20150526',
+            'license': 'Standard YouTube License',
+            'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
+            'categories': ['People & Blogs'],
+            'tags': list,
+            'like_count': int,
+            'dislike_count': int,
+        },
+        'params': {
+            'skip_download': True,
+        },
+        'add_ie': [YoutubeIE.ie_key()],
+    }, {
+        'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
+        'info_dict': {
+            'id': 'yeWKywCrFtk',
+            'ext': 'mp4',
+            'title': 'Small Scale Baler and Braiding Rugs',
+            'uploader': 'Backus-Page House Museum',
+            'uploader_id': 'backuspagemuseum',
+            'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
+            'upload_date': '20161008',
+            'license': 'Standard YouTube License',
+            'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
+            'categories': ['Nonprofits & Activism'],
+            'tags': list,
+            'like_count': int,
+            'dislike_count': int,
+        },
+        'params': {
+            'noplaylist': True,
+            'skip_download': True,
+        },
+    }, {
+        'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
+        'only_matching': True,
     }]
 
     def _real_initialize(self):
@@ -1919,20 +1974,35 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
 
         playlist_title = self._html_search_regex(
             r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
-            page, 'title')
+            page, 'title', default=None)
+
+        has_videos = True
+
+        if not playlist_title:
+            try:
+                # Some playlist URLs don't actually serve a playlist (e.g.
+                # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
+                next(self._entries(page, playlist_id))
+            except StopIteration:
+                has_videos = False
 
-        return self.playlist_result(self._entries(page, playlist_id), playlist_id, playlist_title)
+        return has_videos, self.playlist_result(
+            self._entries(page, playlist_id), playlist_id, playlist_title)
 
     def _check_download_just_video(self, url, playlist_id):
         # Check if it's a video-specific URL
         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
-        if 'v' in query_dict:
-            video_id = query_dict['v'][0]
+        video_id = query_dict.get('v', [None])[0] or self._search_regex(
+            r'(?:^|//)youtu\.be/([0-9A-Za-z_-]{11})', url,
+            'video id', default=None)
+        if video_id:
             if self._downloader.params.get('noplaylist'):
                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
-                return self.url_result(video_id, 'Youtube', video_id=video_id)
+                return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
             else:
                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
+                return video_id, None
+        return None, None
 
     def _real_extract(self, url):
         # Extract playlist id
@@ -1941,7 +2011,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
             raise ExtractorError('Invalid URL: %s' % url)
         playlist_id = mobj.group(1) or mobj.group(2)
 
-        video = self._check_download_just_video(url, playlist_id)
+        video_id, video = self._check_download_just_video(url, playlist_id)
         if video:
             return video
 
@@ -1949,7 +2019,15 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
             # Mixes require a custom extraction process
             return self._extract_mix(playlist_id)
 
-        return self._extract_playlist(playlist_id)
+        has_videos, playlist = self._extract_playlist(playlist_id)
+        if has_videos or not video_id:
+            return playlist
+
+        # Some playlist URLs don't actually serve a playlist (see
+        # https://github.com/rg3/youtube-dl/issues/10537).
+        # Fallback to plain video extraction if there is a video id
+        # along with playlist id.
+        return self.url_result(video_id, 'Youtube', video_id=video_id)
 
 
 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
@@ -2097,11 +2175,11 @@ class YoutubeUserIE(YoutubeChannelIE):
 
 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
     IE_DESC = 'YouTube.com live streams'
-    _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+))/live'
+    _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
     IE_NAME = 'youtube:live'
 
     _TESTS = [{
-        'url': 'http://www.youtube.com/user/TheYoungTurks/live',
+        'url': 'https://www.youtube.com/user/TheYoungTurks/live',
         'info_dict': {
             'id': 'a48o2S1cPoo',
             'ext': 'mp4',
@@ -2121,7 +2199,13 @@ class YoutubeLiveIE(YoutubeBaseInfoExtractor):
             'skip_download': True,
         },
     }, {
-        'url': 'http://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
+        'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.youtube.com/TheYoungTurks/live',
         'only_matching': True,
     }]
 
@@ -2146,7 +2230,7 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
     IE_NAME = 'youtube:playlists'
 
     _TESTS = [{
-        'url': 'http://www.youtube.com/user/ThirstForScience/playlists',
+        'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
         'playlist_mincount': 4,
         'info_dict': {
             'id': 'ThirstForScience',
@@ -2154,7 +2238,7 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
         },
     }, {
         # with "Load more" button
-        'url': 'http://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
+        'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
         'playlist_mincount': 70,
         'info_dict': {
             'id': 'igorkle1',
@@ -2247,7 +2331,7 @@ class YoutubeSearchURLIE(YoutubePlaylistBaseInfoExtractor):
 
 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
     IE_DESC = 'YouTube.com (multi-season) shows'
-    _VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
+    _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
     IE_NAME = 'youtube:show'
     _TESTS = [{
         'url': 'https://www.youtube.com/show/airdisasters',
@@ -2316,7 +2400,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
 class YoutubeWatchLaterIE(YoutubePlaylistIE):
     IE_NAME = 'youtube:watchlater'
     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
-    _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
+    _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
 
     _TESTS = [{
         'url': 'https://www.youtube.com/playlist?list=WL',
@@ -2327,16 +2411,17 @@ class YoutubeWatchLaterIE(YoutubePlaylistIE):
     }]
 
     def _real_extract(self, url):
-        video = self._check_download_just_video(url, 'WL')
+        _, video = self._check_download_just_video(url, 'WL')
         if video:
             return video
-        return self._extract_playlist('WL')
+        _, playlist = self._extract_playlist('WL')
+        return playlist
 
 
 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
     IE_NAME = 'youtube:favorites'
     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
-    _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
+    _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
     _LOGIN_REQUIRED = True
 
     def _real_extract(self, url):
@@ -2347,21 +2432,21 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
 
 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
-    _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
+    _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
     _FEED_NAME = 'recommended'
     _PLAYLIST_TITLE = 'Youtube Recommended videos'
 
 
 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
-    _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
+    _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
     _FEED_NAME = 'subscriptions'
     _PLAYLIST_TITLE = 'Youtube Subscriptions'
 
 
 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
-    _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
+    _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
     _FEED_NAME = 'history'
     _PLAYLIST_TITLE = 'Youtube History'
 
@@ -2386,10 +2471,10 @@ class YoutubeTruncatedURLIE(InfoExtractor):
     '''
 
     _TESTS = [{
-        'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
+        'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
         'only_matching': True,
     }, {
-        'url': 'http://www.youtube.com/watch?',
+        'url': 'https://www.youtube.com/watch?',
         'only_matching': True,
     }, {
         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
@@ -2410,7 +2495,7 @@ class YoutubeTruncatedURLIE(InfoExtractor):
             'Did you forget to quote the URL? Remember that & is a meta '
             'character in most shells, so you want to put the URL in quotes, '
             'like  youtube-dl '
-            '"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
+            '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
             ' or simply  youtube-dl BaW_jenozKc  .',
             expected=True)
 
index 437eecb6737161c9d730bf9a93eaed1bdb541799..0f0e9d0eb9b1ac945934b11a134d143d82b19fb0 100644 (file)
@@ -1,16 +1,20 @@
-# coding=utf-8
+# codingutf-8
 from __future__ import unicode_literals
 
 import re
 
 from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    update_url_query,
+)
 
 
 class ZingMp3BaseInfoExtractor(InfoExtractor):
 
-    def _extract_item(self, item, fatal=True):
-        error_message = item.find('./errormessage').text
+    def _extract_item(self, item, page_type, fatal=True):
+        error_message = item.get('msg')
         if error_message:
             if not fatal:
                 return
@@ -18,25 +22,48 @@ class ZingMp3BaseInfoExtractor(InfoExtractor):
                 '%s returned error: %s' % (self.IE_NAME, error_message),
                 expected=True)
 
-        title = item.find('./title').text.strip()
-        source = item.find('./source').text
-        extension = item.attrib['type']
-        thumbnail = item.find('./backimage').text
+        formats = []
+        for quality, source_url in zip(item.get('qualities') or item.get('quality', []), item.get('source_list') or item.get('source', [])):
+            if not source_url or source_url == 'require vip':
+                continue
+            if not re.match(r'https?://', source_url):
+                source_url = '//' + source_url
+            source_url = self._proto_relative_url(source_url, 'http:')
+            quality_num = int_or_none(quality)
+            f = {
+                'format_id': quality,
+                'url': source_url,
+            }
+            if page_type == 'video':
+                f.update({
+                    'height': quality_num,
+                    'ext': 'mp4',
+                })
+            else:
+                f.update({
+                    'abr': quality_num,
+                    'ext': 'mp3',
+                })
+            formats.append(f)
+
+        cover = item.get('cover')
 
         return {
-            'title': title,
-            'url': source,
-            'ext': extension,
-            'thumbnail': thumbnail,
+            'title': (item.get('name') or item.get('title')).strip(),
+            'formats': formats,
+            'thumbnail': 'http:/' + cover if cover else None,
+            'artist': item.get('artist'),
         }
 
-    def _extract_player_xml(self, player_xml_url, id, playlist_title=None):
-        player_xml = self._download_xml(player_xml_url, id, 'Downloading Player XML')
-        items = player_xml.findall('./item')
+    def _extract_player_json(self, player_json_url, id, page_type, playlist_title=None):
+        player_json = self._download_json(player_json_url, id, 'Downloading Player JSON')
+        items = player_json['data']
+        if 'item' in items:
+            items = items['item']
 
         if len(items) == 1:
             # one single song
-            data = self._extract_item(items[0])
+            data = self._extract_item(items[0], page_type)
             data['id'] = id
 
             return data
@@ -45,7 +72,7 @@ class ZingMp3BaseInfoExtractor(InfoExtractor):
             entries = []
 
             for i, item in enumerate(items, 1):
-                entry = self._extract_item(item, fatal=False)
+                entry = self._extract_item(item, page_type, fatal=False)
                 if not entry:
                     continue
                 entry['id'] = '%s-%d' % (id, i)
@@ -59,8 +86,8 @@ class ZingMp3BaseInfoExtractor(InfoExtractor):
             }
 
 
-class ZingMp3SongIE(ZingMp3BaseInfoExtractor):
-    _VALID_URL = r'https?://mp3\.zing\.vn/bai-hat/(?P<slug>[^/]+)/(?P<song_id>\w+)\.html'
+class ZingMp3IE(ZingMp3BaseInfoExtractor):
+    _VALID_URL = r'https?://mp3\.zing\.vn/(?:bai-hat|album|playlist|video-clip)/[^/]+/(?P<id>\w+)\.html'
     _TESTS = [{
         'url': 'http://mp3.zing.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html',
         'md5': 'ead7ae13693b3205cbc89536a077daed',
@@ -70,51 +97,47 @@ class ZingMp3SongIE(ZingMp3BaseInfoExtractor):
             'ext': 'mp3',
             'thumbnail': 're:^https?://.*\.jpg$',
         },
-    }]
-    IE_NAME = 'zingmp3:song'
-    IE_DESC = 'mp3.zing.vn songs'
-
-    def _real_extract(self, url):
-        matched = re.match(self._VALID_URL, url)
-        slug = matched.group('slug')
-        song_id = matched.group('song_id')
-
-        webpage = self._download_webpage(
-            'http://mp3.zing.vn/bai-hat/%s/%s.html' % (slug, song_id), song_id)
-
-        player_xml_url = self._search_regex(
-            r'&amp;xmlURL=(?P<xml_url>[^&]+)&', webpage, 'player xml url')
-
-        return self._extract_player_xml(player_xml_url, song_id)
-
-
-class ZingMp3AlbumIE(ZingMp3BaseInfoExtractor):
-    _VALID_URL = r'https?://mp3\.zing\.vn/(?:album|playlist)/(?P<slug>[^/]+)/(?P<album_id>\w+)\.html'
-    _TESTS = [{
+    }, {
+        'url': 'http://mp3.zing.vn/video-clip/Let-It-Go-Frozen-OST-Sungha-Jung/ZW6BAEA0.html',
+        'md5': '870295a9cd8045c0e15663565902618d',
+        'info_dict': {
+            'id': 'ZW6BAEA0',
+            'title': 'Let It Go (Frozen OST)',
+            'ext': 'mp4',
+        },
+    }, {
         'url': 'http://mp3.zing.vn/album/Lau-Dai-Tinh-Ai-Bang-Kieu-Minh-Tuyet/ZWZBWDAF.html',
         'info_dict': {
             '_type': 'playlist',
             'id': 'ZWZBWDAF',
-            'title': 'Lâu Đài Tình Ái - Bằng Kiều ft. Minh Tuyết | Album 320 lossless',
+            'title': 'Lâu Đài Tình Ái - Bằng Kiều,Minh Tuyết | Album 320 lossless',
         },
         'playlist_count': 10,
+        'skip': 'removed at the request of the owner',
     }, {
         'url': 'http://mp3.zing.vn/playlist/Duong-Hong-Loan-apollobee/IWCAACCB.html',
         'only_matching': True,
     }]
-    IE_NAME = 'zingmp3:album'
-    IE_DESC = 'mp3.zing.vn albums'
+    IE_NAME = 'zingmp3'
+    IE_DESC = 'mp3.zing.vn'
 
     def _real_extract(self, url):
-        matched = re.match(self._VALID_URL, url)
-        slug = matched.group('slug')
-        album_id = matched.group('album_id')
-
-        webpage = self._download_webpage(
-            'http://mp3.zing.vn/album/%s/%s.html' % (slug, album_id), album_id)
-        player_xml_url = self._search_regex(
-            r'&amp;xmlURL=(?P<xml_url>[^&]+)&', webpage, 'player xml url')
-
-        return self._extract_player_xml(
-            player_xml_url, album_id,
-            playlist_title=self._og_search_title(webpage))
+        page_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, page_id)
+
+        player_json_url = self._search_regex([
+            r'data-xml="([^"]+)',
+            r'&amp;xmlURL=([^&]+)&'
+        ], webpage, 'player xml url')
+
+        playlist_title = None
+        page_type = self._search_regex(r'/(?:html5)?xml/([^/-]+)', player_json_url, 'page type')
+        if page_type == 'video':
+            player_json_url = update_url_query(player_json_url, {'format': 'json'})
+        else:
+            player_json_url = player_json_url.replace('/xml/', '/html5xml/')
+            if page_type == 'album':
+                playlist_title = self._og_search_title(webpage)
+
+        return self._extract_player_json(player_json_url, page_id, page_type, playlist_title)
index 9737f70021d3285a4e8df616467b764de1a91fa2..a8df4aef0a2553222d45b9f38131a2945470d412 100644 (file)
@@ -198,12 +198,12 @@ class JSInterpreter(object):
             return opfunc(x, y)
 
         m = re.match(
-            r'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]+)\)$' % _NAME_RE, expr)
+            r'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]*)\)$' % _NAME_RE, expr)
         if m:
             fname = m.group('func')
             argvals = tuple([
                 int(v) if v.isdigit() else local_vars[v]
-                for v in m.group('args').split(',')])
+                for v in m.group('args').split(',')]) if len(m.group('args')) > 0 else tuple()
             if fname not in self._functions:
                 self._functions[fname] = self.extract_function(fname)
             return self._functions[fname](argvals)
index d32a9e32cd0549a7faacc9ee638a03c0413834d5..53497fbc6f60a945b6350ce36e352a8eb6ef1f2c 100644 (file)
@@ -94,7 +94,7 @@ def parseOpts(overrideArguments=None):
         setattr(parser.values, option.dest, value.split(','))
 
     def _hide_login_info(opts):
-        PRIVATE_OPTS = ['-p', '--password', '-u', '--username', '--video-password']
+        PRIVATE_OPTS = ['-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username']
         eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
 
         def _scrub_eq(o):
@@ -351,6 +351,24 @@ def parseOpts(overrideArguments=None):
         dest='videopassword', metavar='PASSWORD',
         help='Video password (vimeo, smotri, youku)')
 
+    adobe_pass = optparse.OptionGroup(parser, 'Adobe Pass Options')
+    adobe_pass.add_option(
+        '--ap-mso',
+        dest='ap_mso', metavar='MSO',
+        help='Adobe Pass multiple-system operator (TV provider) identifier, use --ap-list-mso for a list of available MSOs')
+    adobe_pass.add_option(
+        '--ap-username',
+        dest='ap_username', metavar='USERNAME',
+        help='Multiple-system operator account login')
+    adobe_pass.add_option(
+        '--ap-password',
+        dest='ap_password', metavar='PASSWORD',
+        help='Multiple-system operator account password. If this option is left out, youtube-dl will ask interactively.')
+    adobe_pass.add_option(
+        '--ap-list-mso',
+        action='store_true', dest='ap_list_mso', default=False,
+        help='List all supported multiple-system operators')
+
     video_format = optparse.OptionGroup(parser, 'Video Format Options')
     video_format.add_option(
         '-f', '--format',
@@ -423,7 +441,15 @@ def parseOpts(overrideArguments=None):
     downloader.add_option(
         '--fragment-retries',
         dest='fragment_retries', metavar='RETRIES', default=10,
-        help='Number of retries for a fragment (default is %default), or "infinite" (DASH only)')
+        help='Number of retries for a fragment (default is %default), or "infinite" (DASH and hlsnative only)')
+    downloader.add_option(
+        '--skip-unavailable-fragments',
+        action='store_true', dest='skip_unavailable_fragments', default=True,
+        help='Skip unavailable fragments (DASH and hlsnative only)')
+    general.add_option(
+        '--abort-on-unavailable-fragment',
+        action='store_false', dest='skip_unavailable_fragments',
+        help='Abort downloading when some fragment is not available')
     downloader.add_option(
         '--buffer-size',
         dest='buffersize', metavar='SIZE', default='1024',
@@ -628,22 +654,7 @@ def parseOpts(overrideArguments=None):
     filesystem.add_option(
         '-o', '--output',
         dest='outtmpl', metavar='TEMPLATE',
-        help=('Output filename template. Use %(title)s to get the title, '
-              '%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, '
-              '%(autonumber)s to get an automatically incremented number, '
-              '%(ext)s for the filename extension, '
-              '%(format)s for the format description (like "22 - 1280x720" or "HD"), '
-              '%(format_id)s for the unique id of the format (like YouTube\'s itags: "137"), '
-              '%(upload_date)s for the upload date (YYYYMMDD), '
-              '%(extractor)s for the provider (youtube, metacafe, etc), '
-              '%(id)s for the video id, '
-              '%(playlist_title)s, %(playlist_id)s, or %(playlist)s (=title if present, ID otherwise) for the playlist the video is in, '
-              '%(playlist_index)s for the position in the playlist. '
-              '%(height)s and %(width)s for the width and height of the video format. '
-              '%(resolution)s for a textual description of the resolution of the video format. '
-              '%% for a literal percent. '
-              'Use - to output to stdout. Can also be used to download to a different directory, '
-              'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .'))
+        help=('Output filename template, see the "OUTPUT TEMPLATE" for all the info'))
     filesystem.add_option(
         '--autonumber-size',
         dest='autonumber_size', metavar='NUMBER',
@@ -820,6 +831,7 @@ def parseOpts(overrideArguments=None):
     parser.add_option_group(video_format)
     parser.add_option_group(subtitles)
     parser.add_option_group(authentication)
+    parser.add_option_group(adobe_pass)
     parser.add_option_group(postproc)
 
     if overrideArguments is not None:
index 3bad5a266b6d51aaf0c92224a94986957da230f2..e606a58de886533fb5239b9bb958fbff9606a4ee 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 
@@ -40,7 +40,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
                 'Skipping embedding the thumbnail because the file is missing.')
             return [], info
 
-        if info['ext'] in ('mp3', 'mkv'):
+        if info['ext'] == 'mp3':
             options = [
                 '-c', 'copy', '-map', '0', '-map', '1',
                 '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment="Cover (Front)"']
index fa99b0c2a6aa4d39fbffdcbafd4926431794c55b..1881f4849e23c749d51da2e45d655ed4e6a68314 100644 (file)
@@ -139,6 +139,30 @@ class FFmpegPostProcessor(PostProcessor):
     def probe_executable(self):
         return self._paths[self.probe_basename]
 
+    def get_audio_codec(self, path):
+        if not self.probe_available:
+            raise PostProcessingError('ffprobe or avprobe not found. Please install one.')
+        try:
+            cmd = [
+                encodeFilename(self.probe_executable, True),
+                encodeArgument('-show_streams'),
+                encodeFilename(self._ffmpeg_filename_argument(path), True)]
+            if self._downloader.params.get('verbose', False):
+                self._downloader.to_screen('[debug] %s command line: %s' % (self.basename, shell_quote(cmd)))
+            handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE, stdin=subprocess.PIPE)
+            output = handle.communicate()[0]
+            if handle.wait() != 0:
+                return None
+        except (IOError, OSError):
+            return None
+        audio_codec = None
+        for line in output.decode('ascii', 'ignore').split('\n'):
+            if line.startswith('codec_name='):
+                audio_codec = line.split('=')[1].strip()
+            elif line.strip() == 'codec_type=audio' and audio_codec is not None:
+                return audio_codec
+        return None
+
     def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
         self.check_version()
 
@@ -188,31 +212,6 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
         self._preferredquality = preferredquality
         self._nopostoverwrites = nopostoverwrites
 
-    def get_audio_codec(self, path):
-
-        if not self.probe_available:
-            raise PostProcessingError('ffprobe or avprobe not found. Please install one.')
-        try:
-            cmd = [
-                encodeFilename(self.probe_executable, True),
-                encodeArgument('-show_streams'),
-                encodeFilename(self._ffmpeg_filename_argument(path), True)]
-            if self._downloader.params.get('verbose', False):
-                self._downloader.to_screen('[debug] %s command line: %s' % (self.basename, shell_quote(cmd)))
-            handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE, stdin=subprocess.PIPE)
-            output = handle.communicate()[0]
-            if handle.wait() != 0:
-                return None
-        except (IOError, OSError):
-            return None
-        audio_codec = None
-        for line in output.decode('ascii', 'ignore').split('\n'):
-            if line.startswith('codec_name='):
-                audio_codec = line.split('=')[1].strip()
-            elif line.strip() == 'codec_type=audio' and audio_codec is not None:
-                return audio_codec
-        return None
-
     def run_ffmpeg(self, path, out_path, codec, more_opts):
         if codec is None:
             acodec_opts = []
@@ -280,6 +279,9 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
         prefix, sep, ext = path.rpartition('.')  # not os.path.splitext, since the latter does not work on unicode in all setups
         new_path = prefix + sep + extension
 
+        information['filepath'] = new_path
+        information['ext'] = extension
+
         # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
         if (new_path == path or
                 (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))):
@@ -301,9 +303,6 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
                 new_path, time.time(), information['filetime'],
                 errnote='Cannot update utime of audio file')
 
-        information['filepath'] = new_path
-        information['ext'] = extension
-
         return [path], information
 
 
@@ -504,15 +503,15 @@ class FFmpegFixupM4aPP(FFmpegPostProcessor):
 class FFmpegFixupM3u8PP(FFmpegPostProcessor):
     def run(self, info):
         filename = info['filepath']
-        temp_filename = prepend_extension(filename, 'temp')
+        if self.get_audio_codec(filename) == 'aac':
+            temp_filename = prepend_extension(filename, 'temp')
 
-        options = ['-c', 'copy', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
-        self._downloader.to_screen('[ffmpeg] Fixing malformated aac bitstream in "%s"' % filename)
-        self.run_ffmpeg(filename, temp_filename, options)
-
-        os.remove(encodeFilename(filename))
-        os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+            options = ['-c', 'copy', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
+            self._downloader.to_screen('[ffmpeg] Fixing malformated aac bitstream in "%s"' % filename)
+            self.run_ffmpeg(filename, temp_filename, options)
 
+            os.remove(encodeFilename(filename))
+            os.rename(encodeFilename(temp_filename), encodeFilename(filename))
         return [], info
 
 
index e39ca60aa08326b6f05814ff800bb09c75755e48..fbdfa02acc88ff8ba82684a2e5545aebe3fce5da 100644 (file)
@@ -1,37 +1,15 @@
 from __future__ import unicode_literals
 
-import os
-import subprocess
-import sys
-import errno
-
 from .common import PostProcessor
 from ..compat import compat_os_name
 from ..utils import (
-    check_executable,
     hyphenate_date,
-    version_tuple,
-    PostProcessingError,
-    encodeArgument,
-    encodeFilename,
+    write_xattr,
+    XAttrMetadataError,
+    XAttrUnavailableError,
 )
 
 
-class XAttrMetadataError(PostProcessingError):
-    def __init__(self, code=None, msg='Unknown error'):
-        super(XAttrMetadataError, self).__init__(msg)
-        self.code = code
-
-        # Parsing code and msg
-        if (self.code in (errno.ENOSPC, errno.EDQUOT) or
-                'No space left' in self.msg or 'Disk quota excedded' in self.msg):
-            self.reason = 'NO_SPACE'
-        elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
-            self.reason = 'VALUE_TOO_LONG'
-        else:
-            self.reason = 'NOT_SUPPORTED'
-
-
 class XAttrMetadataPP(PostProcessor):
 
     #
@@ -48,88 +26,6 @@ class XAttrMetadataPP(PostProcessor):
     def run(self, info):
         """ Set extended attributes on downloaded file (if xattr support is found). """
 
-        # This mess below finds the best xattr tool for the job and creates a
-        # "write_xattr" function.
-        try:
-            # try the pyxattr module...
-            import xattr
-
-            # Unicode arguments are not supported in python-pyxattr until
-            # version 0.5.0
-            # See https://github.com/rg3/youtube-dl/issues/5498
-            pyxattr_required_version = '0.5.0'
-            if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
-                self._downloader.report_warning(
-                    'python-pyxattr is detected but is too old. '
-                    'youtube-dl requires %s or above while your version is %s. '
-                    'Falling back to other xattr implementations' % (
-                        pyxattr_required_version, xattr.__version__))
-
-                raise ImportError
-
-            def write_xattr(path, key, value):
-                try:
-                    xattr.set(path, key, value)
-                except EnvironmentError as e:
-                    raise XAttrMetadataError(e.errno, e.strerror)
-
-        except ImportError:
-            if compat_os_name == 'nt':
-                # Write xattrs to NTFS Alternate Data Streams:
-                # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
-                def write_xattr(path, key, value):
-                    assert ':' not in key
-                    assert os.path.exists(path)
-
-                    ads_fn = path + ':' + key
-                    try:
-                        with open(ads_fn, 'wb') as f:
-                            f.write(value)
-                    except EnvironmentError as e:
-                        raise XAttrMetadataError(e.errno, e.strerror)
-            else:
-                user_has_setfattr = check_executable('setfattr', ['--version'])
-                user_has_xattr = check_executable('xattr', ['-h'])
-
-                if user_has_setfattr or user_has_xattr:
-
-                    def write_xattr(path, key, value):
-                        value = value.decode('utf-8')
-                        if user_has_setfattr:
-                            executable = 'setfattr'
-                            opts = ['-n', key, '-v', value]
-                        elif user_has_xattr:
-                            executable = 'xattr'
-                            opts = ['-w', key, value]
-
-                        cmd = ([encodeFilename(executable, True)] +
-                               [encodeArgument(o) for o in opts] +
-                               [encodeFilename(path, True)])
-
-                        try:
-                            p = subprocess.Popen(
-                                cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
-                        except EnvironmentError as e:
-                            raise XAttrMetadataError(e.errno, e.strerror)
-                        stdout, stderr = p.communicate()
-                        stderr = stderr.decode('utf-8', 'replace')
-                        if p.returncode != 0:
-                            raise XAttrMetadataError(p.returncode, stderr)
-
-                else:
-                    # On Unix, and can't find pyxattr, setfattr, or xattr.
-                    if sys.platform.startswith('linux'):
-                        self._downloader.report_error(
-                            "Couldn't find a tool to set the xattrs. "
-                            "Install either the python 'pyxattr' or 'xattr' "
-                            "modules, or the GNU 'attr' package "
-                            "(which contains the 'setfattr' tool).")
-                    else:
-                        self._downloader.report_error(
-                            "Couldn't find a tool to set the xattrs. "
-                            "Install either the python 'xattr' module, "
-                            "or the 'xattr' binary.")
-
         # Write the metadata to the file's xattrs
         self._downloader.to_screen('[metadata] Writing metadata to file\'s xattrs')
 
@@ -159,6 +55,10 @@ class XAttrMetadataPP(PostProcessor):
 
             return [], info
 
+        except XAttrUnavailableError as e:
+            self._downloader.report_error(str(e))
+            return [], info
+
         except XAttrMetadataError as e:
             if e.reason == 'NO_SPACE':
                 self._downloader.report_warning(
index 104807242bd3b0f35e0423faa096540be29d0a45..63d19b3a5214221afa71a6d43bde36a39c13cd4b 100644 (file)
@@ -103,6 +103,7 @@ class ProxyType(object):
     SOCKS4A = 1
     SOCKS5 = 2
 
+
 Proxy = collections.namedtuple('Proxy', (
     'type', 'host', 'port', 'username', 'password', 'remote_dns'))
 
index 7cf490aa43a878b3c377bea0b173c7a2b170c2c7..0c71585753134e93fba8d8de5cee003d31f050c9 100644 (file)
@@ -115,6 +115,8 @@ def _u30(reader):
     res = _read_int(reader)
     assert res & 0xf0000000 == 0
     return res
+
+
 _u32 = _read_int
 
 
@@ -176,6 +178,7 @@ class _Undefined(object):
         return 'undefined'
     __repr__ = __str__
 
+
 undefined = _Undefined()
 
 
index b3b687a314681de17547104f7f99e883a312c322..9595bcf9f120ea4d24133e3f7399e637d14ac035 100644 (file)
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-# -*- coding: utf-8 -*-
+# coding: utf-8
 
 from __future__ import unicode_literals
 
@@ -42,6 +42,7 @@ from .compat import (
     compat_html_entities_html5,
     compat_http_client,
     compat_kwargs,
+    compat_os_name,
     compat_parse_qs,
     compat_shlex_quote,
     compat_socket_create_connection,
@@ -91,6 +92,13 @@ ENGLISH_MONTH_NAMES = [
     'January', 'February', 'March', 'April', 'May', 'June',
     'July', 'August', 'September', 'October', 'November', 'December']
 
+MONTH_NAMES = {
+    'en': ENGLISH_MONTH_NAMES,
+    'fr': [
+        'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
+        'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
+}
+
 KNOWN_EXTENSIONS = (
     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
     'flv', 'f4v', 'f4a', 'f4b',
@@ -134,6 +142,8 @@ DATE_FORMATS = (
     '%Y-%m-%dT%H:%M:%S',
     '%Y-%m-%dT%H:%M:%S.%f',
     '%Y-%m-%dT%H:%M',
+    '%b %d %Y at %H:%M',
+    '%b %d %Y at %H:%M:%S',
 )
 
 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
@@ -155,6 +165,8 @@ DATE_FORMATS_MONTH_FIRST.extend([
     '%m/%d/%Y %H:%M:%S',
 ])
 
+PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
+
 
 def preferredencoding():
     """Get preferred encoding.
@@ -768,6 +780,26 @@ class ContentTooShortError(Exception):
         self.expected = expected
 
 
+class XAttrMetadataError(Exception):
+    def __init__(self, code=None, msg='Unknown error'):
+        super(XAttrMetadataError, self).__init__(msg)
+        self.code = code
+        self.msg = msg
+
+        # Parsing code and msg
+        if (self.code in (errno.ENOSPC, errno.EDQUOT) or
+                'No space left' in self.msg or 'Disk quota excedded' in self.msg):
+            self.reason = 'NO_SPACE'
+        elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
+            self.reason = 'VALUE_TOO_LONG'
+        else:
+            self.reason = 'NOT_SUPPORTED'
+
+
+class XAttrUnavailableError(Exception):
+    pass
+
+
 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
     # expected HTTP responses to meet HTTP/1.0 or later (see also
@@ -1504,38 +1536,63 @@ def parse_filesize(s):
     _UNIT_TABLE = {
         'B': 1,
         'b': 1,
+        'bytes': 1,
         'KiB': 1024,
         'KB': 1000,
         'kB': 1024,
         'Kb': 1000,
+        'kb': 1000,
+        'kilobytes': 1000,
+        'kibibytes': 1024,
         'MiB': 1024 ** 2,
         'MB': 1000 ** 2,
         'mB': 1024 ** 2,
         'Mb': 1000 ** 2,
+        'mb': 1000 ** 2,
+        'megabytes': 1000 ** 2,
+        'mebibytes': 1024 ** 2,
         'GiB': 1024 ** 3,
         'GB': 1000 ** 3,
         'gB': 1024 ** 3,
         'Gb': 1000 ** 3,
+        'gb': 1000 ** 3,
+        'gigabytes': 1000 ** 3,
+        'gibibytes': 1024 ** 3,
         'TiB': 1024 ** 4,
         'TB': 1000 ** 4,
         'tB': 1024 ** 4,
         'Tb': 1000 ** 4,
+        'tb': 1000 ** 4,
+        'terabytes': 1000 ** 4,
+        'tebibytes': 1024 ** 4,
         'PiB': 1024 ** 5,
         'PB': 1000 ** 5,
         'pB': 1024 ** 5,
         'Pb': 1000 ** 5,
+        'pb': 1000 ** 5,
+        'petabytes': 1000 ** 5,
+        'pebibytes': 1024 ** 5,
         'EiB': 1024 ** 6,
         'EB': 1000 ** 6,
         'eB': 1024 ** 6,
         'Eb': 1000 ** 6,
+        'eb': 1000 ** 6,
+        'exabytes': 1000 ** 6,
+        'exbibytes': 1024 ** 6,
         'ZiB': 1024 ** 7,
         'ZB': 1000 ** 7,
         'zB': 1024 ** 7,
         'Zb': 1000 ** 7,
+        'zb': 1000 ** 7,
+        'zettabytes': 1000 ** 7,
+        'zebibytes': 1024 ** 7,
         'YiB': 1024 ** 8,
         'YB': 1000 ** 8,
         'yB': 1024 ** 8,
         'Yb': 1000 ** 8,
+        'yb': 1000 ** 8,
+        'yottabytes': 1000 ** 8,
+        'yobibytes': 1024 ** 8,
     }
 
     return lookup_unit_table(_UNIT_TABLE, s)
@@ -1562,11 +1619,13 @@ def parse_count(s):
     return lookup_unit_table(_UNIT_TABLE, s)
 
 
-def month_by_name(name):
+def month_by_name(name, lang='en'):
     """ Return the number of a month by (locale-independently) English name """
 
+    month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
+
     try:
-        return ENGLISH_MONTH_NAMES.index(name) + 1
+        return month_names.index(name) + 1
     except ValueError:
         return None
 
@@ -1632,6 +1691,10 @@ def url_basename(url):
     return path.strip('/').split('/')[-1]
 
 
+def base_url(url):
+    return re.match(r'https?://[^?#&]+/', url).group()
+
+
 class HEADRequest(compat_urllib_request.Request):
     def get_method(self):
         return 'HEAD'
@@ -1759,8 +1822,12 @@ def get_exe_version(exe, args=['--version'],
     """ Returns the version of the specified executable,
     or False if the executable is not present """
     try:
+        # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
+        # SIGTTOU if youtube-dl is run in the background.
+        # See https://github.com/rg3/youtube-dl/issues/955#issuecomment-209789656
         out, _ = subprocess.Popen(
             [encodeArgument(exe)] + args,
+            stdin=subprocess.PIPE,
             stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
     except OSError:
         return False
@@ -2030,14 +2097,14 @@ def js_to_json(code):
             }.get(m.group(0), m.group(0)), v[1:-1])
 
         INTEGER_TABLE = (
-            (r'^0[xX][0-9a-fA-F]+', 16),
-            (r'^0+[0-7]+', 8),
+            (r'^(0[xX][0-9a-fA-F]+)\s*:?$', 16),
+            (r'^(0+[0-7]+)\s*:?$', 8),
         )
 
         for regex, base in INTEGER_TABLE:
             im = re.match(regex, v)
             if im:
-                i = int(im.group(0), base)
+                i = int(im.group(1), base)
                 return '"%d":' % i if v.endswith(':') else '%d' % i
 
         return '"%s"' % v
@@ -2123,7 +2190,7 @@ def mimetype2ext(mt):
         return ext
 
     _, _, res = mt.rpartition('/')
-    res = res.lower()
+    res = res.split(';')[0].strip().lower()
 
     return {
         '3gpp': '3gp',
@@ -2143,6 +2210,7 @@ def mimetype2ext(mt):
         'f4m+xml': 'f4m',
         'hds+xml': 'f4m',
         'vnd.ms-sstr+xml': 'ism',
+        'quicktime': 'mov',
     }.get(res, res)
 
 
@@ -2158,7 +2226,7 @@ def parse_codecs(codecs_str):
         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v'):
             if not vcodec:
                 vcodec = full_codec
-        elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac'):
+        elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3'):
             if not acodec:
                 acodec = full_codec
         else:
@@ -2281,11 +2349,18 @@ def _match_one(filter_part, dct):
     m = operator_rex.search(filter_part)
     if m:
         op = COMPARISON_OPERATORS[m.group('op')]
-        if m.group('strval') is not None:
+        actual_value = dct.get(m.group('key'))
+        if (m.group('strval') is not None or
+            # If the original field is a string and matching comparisonvalue is
+            # a number we should respect the origin of the original field
+            # and process comparison value as a string (see
+            # https://github.com/rg3/youtube-dl/issues/11082).
+            actual_value is not None and m.group('intval') is not None and
+                isinstance(actual_value, compat_str)):
             if m.group('op') not in ('=', '!='):
                 raise ValueError(
                     'Operator %s does not support string values!' % m.group('op'))
-            comparison_value = m.group('strval')
+            comparison_value = m.group('strval') or m.group('intval')
         else:
             try:
                 comparison_value = int(m.group('intval'))
@@ -2297,7 +2372,6 @@ def _match_one(filter_part, dct):
                     raise ValueError(
                         'Invalid integer value %r in filter part %r' % (
                             m.group('intval'), filter_part))
-        actual_value = dct.get(m.group('key'))
         if actual_value is None:
             return m.group('none_inclusive')
         return op(actual_value, comparison_value)
@@ -2959,9 +3033,7 @@ def encode_base_n(num, n, table=None):
 
 
 def decode_packed_codes(code):
-    mobj = re.search(
-        r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)",
-        code)
+    mobj = re.search(PACKED_CODES_RE, code)
     obfucasted_code, base, count, symbols = mobj.groups()
     base = int(base)
     count = int(count)
@@ -3096,3 +3168,87 @@ def decode_png(png_data):
             current_row.append(color)
 
     return width, height, pixels
+
+
+def write_xattr(path, key, value):
+    # This mess below finds the best xattr tool for the job
+    try:
+        # try the pyxattr module...
+        import xattr
+
+        if hasattr(xattr, 'set'):  # pyxattr
+            # Unicode arguments are not supported in python-pyxattr until
+            # version 0.5.0
+            # See https://github.com/rg3/youtube-dl/issues/5498
+            pyxattr_required_version = '0.5.0'
+            if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
+                # TODO: fallback to CLI tools
+                raise XAttrUnavailableError(
+                    'python-pyxattr is detected but is too old. '
+                    'youtube-dl requires %s or above while your version is %s. '
+                    'Falling back to other xattr implementations' % (
+                        pyxattr_required_version, xattr.__version__))
+
+            setxattr = xattr.set
+        else:  # xattr
+            setxattr = xattr.setxattr
+
+        try:
+            setxattr(path, key, value)
+        except EnvironmentError as e:
+            raise XAttrMetadataError(e.errno, e.strerror)
+
+    except ImportError:
+        if compat_os_name == 'nt':
+            # Write xattrs to NTFS Alternate Data Streams:
+            # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
+            assert ':' not in key
+            assert os.path.exists(path)
+
+            ads_fn = path + ':' + key
+            try:
+                with open(ads_fn, 'wb') as f:
+                    f.write(value)
+            except EnvironmentError as e:
+                raise XAttrMetadataError(e.errno, e.strerror)
+        else:
+            user_has_setfattr = check_executable('setfattr', ['--version'])
+            user_has_xattr = check_executable('xattr', ['-h'])
+
+            if user_has_setfattr or user_has_xattr:
+
+                value = value.decode('utf-8')
+                if user_has_setfattr:
+                    executable = 'setfattr'
+                    opts = ['-n', key, '-v', value]
+                elif user_has_xattr:
+                    executable = 'xattr'
+                    opts = ['-w', key, value]
+
+                cmd = ([encodeFilename(executable, True)] +
+                       [encodeArgument(o) for o in opts] +
+                       [encodeFilename(path, True)])
+
+                try:
+                    p = subprocess.Popen(
+                        cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
+                except EnvironmentError as e:
+                    raise XAttrMetadataError(e.errno, e.strerror)
+                stdout, stderr = p.communicate()
+                stderr = stderr.decode('utf-8', 'replace')
+                if p.returncode != 0:
+                    raise XAttrMetadataError(p.returncode, stderr)
+
+            else:
+                # On Unix, and can't find pyxattr, setfattr, or xattr.
+                if sys.platform.startswith('linux'):
+                    raise XAttrUnavailableError(
+                        "Couldn't find a tool to set the xattrs. "
+                        "Install either the python 'pyxattr' or 'xattr' "
+                        "modules, or the GNU 'attr' package "
+                        "(which contains the 'setfattr' tool).")
+                else:
+                    raise XAttrUnavailableError(
+                        "Couldn't find a tool to set the xattrs. "
+                        "Install either the python 'xattr' module, "
+                        "or the 'xattr' binary.")
index cf59501177dfde41c61dcd6d3d7418b3b4468c7a..1acb630af245e1288773118670821d846bde5bdd 100644 (file)
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '2016.08.17'
+__version__ = '2016.12.01'